├── .eslintignore ├── .eslintrc.json ├── .github ├── release-please.yml └── workflows │ ├── release.yml │ └── test.yml ├── .gitignore ├── .gitmodules ├── .prettierrc.js ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── README.md ├── docs ├── code-of-conduct.md ├── contributing.md └── intermediate_API_design.md ├── package.json ├── src └── re2.ts ├── third_party ├── README.md └── node-re2 │ ├── LICENSE │ └── tests │ ├── test_exec.js │ ├── test_general.js │ ├── test_groups.js │ ├── test_invalid.js │ ├── test_match.js │ ├── test_matchAll.js │ ├── test_prototype.js │ ├── test_replace.js │ ├── test_search.js │ ├── test_source.js │ ├── test_split.js │ ├── test_symbols.js │ ├── test_test.js │ ├── test_toString.js │ ├── tests.js │ └── worker.js ├── tsconfig.json ├── wasm └── re2.d.ts └── wrap └── re2_wrap.cc /.eslintignore: -------------------------------------------------------------------------------- 1 | build/ 2 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./node_modules/gts/" 3 | } 4 | -------------------------------------------------------------------------------- /.github/release-please.yml: -------------------------------------------------------------------------------- 1 | releaseType: node 2 | handleGHRelease: true 3 | primaryBranch: main 4 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | on: 2 | release: 3 | types: [published] 4 | name: release 5 | jobs: 6 | release-please: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v2 10 | with: 11 | submodules: recursive 12 | - uses: actions/setup-node@v2 13 | with: 14 | node-version: 14 15 | registry-url: 'https://wombat-dressing-room.appspot.com' 16 | - uses: mymindstorm/setup-emsdk@v7 17 | with: 18 | version: 2.0.9 19 | no-cache: true 20 | - run: npm install 21 | - run: npm publish 22 | env: 23 | NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}} -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - 'main' 7 | 8 | jobs: 9 | tests: 10 | strategy: 11 | matrix: 12 | node: [10, 12, 14] 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v2 16 | with: 17 | submodules: recursive 18 | - uses: actions/setup-node@v2 19 | with: 20 | node-version: ${{matrix.node}} 21 | - uses: mymindstorm/setup-emsdk@v7 22 | with: 23 | version: 2.0.9 24 | no-cache: true 25 | - name: Install 26 | run: npm install 27 | - name: Build 28 | run: npm run compile 29 | - name: Test 30 | run: npm test 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | build -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "deps/re2"] 2 | path = deps/re2 3 | url = https://github.com/google/re2.git 4 | -------------------------------------------------------------------------------- /.prettierrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | ...require('gts/.prettierrc.json') 3 | } 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [1.1.0](https://github.com/google/re2-wasm/compare/v1.0.2...v1.1.0) (2022-11-30) 4 | 5 | 6 | ### Features 7 | 8 | * Add support for String.prototype.matchAll() ([f8dfe27](https://github.com/google/re2-wasm/commit/f8dfe27716747914585482f6b70f353b2f2507ce)) 9 | 10 | ### [1.0.2](https://www.github.com/google/re2-wasm/compare/v1.0.1...v1.0.2) (2021-09-14) 11 | 12 | 13 | ### Bug Fixes 14 | 15 | * Don't generate unhandled exception and rejection handlers ([47df5f5](https://www.github.com/google/re2-wasm/commit/47df5f581089c4f9210188f54374b2285446936b)) 16 | 17 | ### [1.0.1](https://www.github.com/google/re2-wasm/compare/v1.0.0...v1.0.1) (2021-02-11) 18 | 19 | 20 | ### Bug Fixes 21 | 22 | * Use correct package name in README ([7aed127](https://www.github.com/google/re2-wasm/commit/7aed12756162a005b75c63e115ce1a78098c2a10)) 23 | 24 | ## 1.0.0 (2021-02-05) 25 | 26 | 27 | ### Features 28 | 29 | * initial release ([65d7c80](https://www.github.com/google/re2-wasm/commit/65d7c805511af0d95e3252bb7933020cbe7b0d12)) 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: wasm/re2.js 2 | 3 | wasm/re2.js: wrap/re2_wrap.cc deps/re2/re2/bitstate.cc deps/re2/re2/compile.cc deps/re2/re2/dfa.cc deps/re2/re2/filtered_re2.cc deps/re2/re2/mimics_pcre.cc deps/re2/re2/nfa.cc deps/re2/re2/onepass.cc deps/re2/re2/parse.cc deps/re2/re2/perl_groups.cc deps/re2/re2/prefilter.cc deps/re2/re2/prefilter_tree.cc deps/re2/re2/prog.cc deps/re2/re2/re2.cc deps/re2/re2/regexp.cc deps/re2/re2/set.cc deps/re2/re2/simplify.cc deps/re2/re2/stringpiece.cc deps/re2/re2/tostring.cc deps/re2/re2/unicode_casefold.cc deps/re2/re2/unicode_groups.cc deps/re2/util/rune.cc deps/re2/util/strutil.cc 4 | mkdir -p wasm 5 | emcc --bind -s WASM=1 -s WASM_ASYNC_COMPILATION=0 -s NODEJS_CATCH_EXIT=0 -s NODEJS_CATCH_REJECTION=0 -I deps/re2 -o wasm/re2.js wrap/re2_wrap.cc deps/re2/re2/bitstate.cc deps/re2/re2/compile.cc deps/re2/re2/dfa.cc deps/re2/re2/filtered_re2.cc deps/re2/re2/mimics_pcre.cc deps/re2/re2/nfa.cc deps/re2/re2/onepass.cc deps/re2/re2/parse.cc deps/re2/re2/perl_groups.cc deps/re2/re2/prefilter.cc deps/re2/re2/prefilter_tree.cc deps/re2/re2/prog.cc deps/re2/re2/re2.cc deps/re2/re2/regexp.cc deps/re2/re2/set.cc deps/re2/re2/simplify.cc deps/re2/re2/stringpiece.cc deps/re2/re2/tostring.cc deps/re2/re2/unicode_casefold.cc deps/re2/re2/unicode_groups.cc deps/re2/util/rune.cc deps/re2/util/strutil.cc 6 | 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # re2-wasm [![NPM version][npm-img]][npm-url] 2 | 3 | [npm-img]: https://img.shields.io/npm/v/re2-wasm.svg 4 | [npm-url]: https://npmjs.org/package/re2-wasm 5 | 6 | **This is not an officially supported Google product.** 7 | 8 | This README is modified from the node-re2 README, licensed under The "New" BSD License 9 | 10 | This project provides bindings for [RE2](https://github.com/google/re2): 11 | fast, safe alternative to backtracking regular expression engines written by [Russ Cox](http://swtch.com/~rsc/). 12 | To learn more about RE2, start with an overview 13 | [Regular Expression Matching in the Wild](http://swtch.com/~rsc/regexp/regexp3.html). More resources can be found 14 | at his [Implementing Regular Expressions](http://swtch.com/~rsc/regexp/) page. 15 | 16 | `RE2`'s regular expression language is almost a superset of what is provided by `RegExp` 17 | (see [Syntax](https://github.com/google/re2/wiki/Syntax)), 18 | but it lacks two features: backreferences and lookahead assertions. See below for more details. 19 | 20 | `RE2` object emulates standard `RegExp` making it a practical drop-in replacement in most cases. 21 | `RE2` is extended to provide `String`-based regular expression methods as well. To help to convert 22 | `RegExp` objects to `RE2` its constructor can take `RegExp` directly honoring all properties. 23 | 24 | ## Why use re2-wasm? 25 | 26 | The built-in Node.js regular expression engine can run in exponential time with a special combination: 27 | - A vulnerable regular expression 28 | - "Evil input" 29 | 30 | This can lead to what is known as a [Regular Expression Denial of Service (ReDoS)](https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS). 31 | To tell if your regular expressions are vulnerable, you might try the one of these projects: 32 | - [rxxr2](http://www.cs.bham.ac.uk/~hxt/research/rxxr2/) 33 | - [safe-regex](https://github.com/substack/safe-regex) 34 | 35 | However, neither project is perfect. 36 | 37 | re2-wasm can protect your Node.js application from ReDoS. 38 | re2-wasm makes vulnerable regular expression patterns safe by evaluating them in `RE2` instead of the built-in Node.js regex engine. 39 | 40 | ## Standard features 41 | 42 | `RE2` object can be created just like `RegExp`: 43 | 44 | * [`new RE2(pattern[, flags])`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp) 45 | 46 | Supported properties: 47 | 48 | * [`re2.lastIndex`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/lastIndex) 49 | * [`re2.global`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/global) 50 | * [`re2.ignoreCase`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/ignoreCase) 51 | * [`re2.multiline`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/multiline) 52 | * [`re2.unicode`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode) 53 | * `RE2` engine always works in the Unicode mode. See details below. 54 | * [`re2.sticky`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/sticky) 55 | * [`re2.source`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/source) 56 | * [`re2.flags`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/flags) 57 | 58 | Supported methods: 59 | 60 | * [`re2.exec(str)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec) 61 | * [`re2.test(str)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/test) 62 | * [`re2.toString()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/toString) 63 | 64 | The following well-known symbol-based methods are supported (see [Symbols](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol)): 65 | 66 | * [`re2[Symbol.match](str)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/match) 67 | * [`re2[Symbol.matchAll](str)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/matchAll) 68 | * [`re2[Symbol.search](str)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/search) 69 | * [`re2[Symbol.replace](str, newSubStr|function)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/replace) 70 | * [`re2[Symbol.split](str[, limit])`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/split) 71 | 72 | It allows to use `RE2` instances on strings directly, just like `RegExp` instances: 73 | 74 | ```js 75 | var re = new RE2("1", 'u'); 76 | "213".match(re); // [ '1', index: 1, input: '213' ] 77 | "213".search(re); // 1 78 | "213".replace(re, "+"); // 2+3 79 | "213".split(re); // [ '2', '3' ] 80 | ``` 81 | 82 | [Named groups](https://tc39.github.io/proposal-regexp-named-groups/) are supported. 83 | 84 | ## Extensions 85 | 86 | ### Shortcut construction 87 | 88 | `RE2` object can be created from a regular expression: 89 | 90 | ```js 91 | var re1 = new RE2(/ab*/igu); // from a RegExp object 92 | var re2 = new RE2(re1); // from another RE2 object 93 | ``` 94 | 95 | ### `String` methods 96 | 97 | Standard `String` defines four more methods that can use regular expressions. `RE2` provides them as methods 98 | exchanging positions of a string, and a regular expression: 99 | 100 | * `re2.match(str)` 101 | * See [`str.match(regexp)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match) 102 | * `re2.replace(str, newSubStr|function)` 103 | * See [`str.replace(regexp, newSubStr|function)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace) 104 | * `re2.search(str)` 105 | * See [`str.search(regexp)`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/search) 106 | * `re2.split(str[, limit])` 107 | * See [`str.split(regexp[, limit])`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/split) 108 | 109 | ### Property: `internalSource` 110 | 111 | Starting 1.8.0 property `source` emulates the same property of `RegExp`, meaning that it can be used to create an identical `RE2` or `RegExp` instance. Sometimes, for troubleshooting purposes, a user wants to inspect a `RE2` translated source. It is available as a read-only property called `internalSource`. 112 | 113 | ### Unicode Mode 114 | 115 | The `RE2` engine only works in Unicode mode, so the `RE2` class must always be constructed with the `u` flag to enable unicode mode. 116 | 117 | ## How to install 118 | 119 | Installation: 120 | 121 | ``` 122 | npm install --save re2-wasm 123 | ``` 124 | 125 | ## How to use 126 | 127 | It is used just like a `RegExp` object. 128 | 129 | ```js 130 | var { RE2 } = require("re2-wasm"); 131 | 132 | // with default flags 133 | var re = new RE2("a(b*)", 'u'); 134 | var result = re.exec("abbc"); 135 | console.log(result[0]); // "abb" 136 | console.log(result[1]); // "bb" 137 | 138 | result = re.exec("aBbC"); 139 | console.log(result[0]); // "a" 140 | console.log(result[1]); // "" 141 | 142 | // with explicit flags 143 | re = new RE2("a(b*)", "iu"); 144 | result = re.exec("aBbC"); 145 | console.log(result[0]); // "aBb" 146 | console.log(result[1]); // "Bb" 147 | 148 | // from regular expression object 149 | var regexp = new RegExp("a(b*)", "iu"); 150 | re = new RE2(regexp); 151 | result = re.exec("aBbC"); 152 | console.log(result[0]); // "aBb" 153 | console.log(result[1]); // "Bb" 154 | 155 | // from regular expression literal 156 | re = new RE2(/a(b*)/iu); 157 | result = re.exec("aBbC"); 158 | console.log(result[0]); // "aBb" 159 | console.log(result[1]); // "Bb" 160 | 161 | // from another RE2 object 162 | var rex = new RE2(re); 163 | result = rex.exec("aBbC"); 164 | console.log(result[0]); // "aBb" 165 | console.log(result[1]); // "Bb" 166 | 167 | // shortcut 168 | result = new RE2("ab*", 'u').exec("abba"); 169 | ``` 170 | 171 | ## Limitations (things RE2 does not support) 172 | 173 | `RE2` consciously avoids any regular expression features that require worst-case exponential time to evaluate. 174 | These features are essentially those that describe a Context-Free Language (CFL) rather than a Regular Expression, 175 | and are extensions to the traditional regular expression language because some people don't know when enough is enough. 176 | 177 | The most noteworthy missing features are backreferences and lookahead assertions. 178 | If your application uses these features, you should continue to use `RegExp`. 179 | But since these features are fundamentally vulnerable to 180 | [ReDoS](https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS), 181 | you should strongly consider replacing them. 182 | 183 | `RE2` will throw a `SyntaxError` if you try to declare a regular expression using these features. 184 | If you are evaluating an externally-provided regular expression, wrap your `RE2` declarations in a try-catch block. It allows to use `RegExp`, when `RE2` misses a feature: 185 | 186 | ```js 187 | var re = /(a)+(b)*/u; 188 | try { 189 | re = new RE2(re); 190 | // use RE2 as a drop-in replacement 191 | } catch (e) { 192 | // suppress an error, and use 193 | // the original RegExp 194 | } 195 | var result = re.exec(sample); 196 | ``` 197 | 198 | In addition to these missing features, `RE2` also behaves somewhat differently from the built-in regular expression engine in corner cases. 199 | 200 | ### Backreferences 201 | 202 | `RE2` doesn't support backreferences, which are numbered references to previously 203 | matched groups, like so: `\1`, `\2`, and so on. Example of backrefrences: 204 | 205 | ```js 206 | /(cat|dog)\1/.test("catcat"); // true 207 | /(cat|dog)\1/.test("dogdog"); // true 208 | /(cat|dog)\1/.test("catdog"); // false 209 | /(cat|dog)\1/.test("dogcat"); // false 210 | ``` 211 | 212 | ### Lookahead assertions 213 | 214 | `RE2` doesn't support lookahead assertions, which are ways to allow a matching dependent on subsequent contents. 215 | 216 | ```js 217 | /abc(?=def)/; // match abc only if it is followed by def 218 | /abc(?!def)/; // match abc only if it is not followed by def 219 | ``` 220 | 221 | ### Mismatched behavior 222 | 223 | `RE2` and the built-in regex engines disagree a bit. Before you switch to `RE2`, verify that your regular expressions continue to work as expected. They should do so in the vast majority of cases. 224 | 225 | Here is an example of a case where they may not: 226 | 227 | ```js 228 | var { RE2 } = require("re2-wasm"); 229 | 230 | var pattern = '(?:(a)|(b)|(c))+'; 231 | 232 | var built_in = new RegExp(pattern); 233 | var re2 = new RE2(pattern, 'u'); 234 | 235 | var input = 'abc'; 236 | 237 | var bi_res = built_in.exec(input); 238 | var re2_res = re2.exec(input); 239 | 240 | console.log('bi_res: ' + bi_res); // prints: bi_res: abc,,,c 241 | console.log('re2_res : ' + re2_res); // prints: re2_res : abc,a,b,c 242 | ``` 243 | 244 | ### Unicode 245 | 246 | `RE2` only works in the Unicode mode. The `u` flag must be passed to the `RE2` constructor. 247 | 248 | ## License 249 | 250 | Apache 2.0 251 | -------------------------------------------------------------------------------- /docs/code-of-conduct.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of 9 | experience, education, socio-economic status, nationality, personal appearance, 10 | race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or reject 41 | comments, commits, code, wiki edits, issues, and other contributions that are 42 | not aligned to this Code of Conduct, or to ban temporarily or permanently any 43 | contributor for other behaviors that they deem inappropriate, threatening, 44 | offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | This Code of Conduct also applies outside the project spaces when the Project 56 | Steward has a reasonable belief that an individual's behavior may have a 57 | negative impact on the project or its community. 58 | 59 | ## Conflict Resolution 60 | 61 | We do not believe that all conflict is bad; healthy debate and disagreement 62 | often yield positive results. However, it is never okay to be disrespectful or 63 | to engage in behavior that violates the project’s code of conduct. 64 | 65 | If you see someone violating the code of conduct, you are encouraged to address 66 | the behavior directly with those involved. Many issues can be resolved quickly 67 | and easily, and this gives people more control over the outcome of their 68 | dispute. If you are unable to resolve the matter for any reason, or if the 69 | behavior is threatening or harassing, report it. We are dedicated to providing 70 | an environment where participants feel welcome and safe. 71 | 72 | Reports should be directed to Michael Lumish at mlumish@google.com, the 73 | Project Steward(s) for RE2-WASM. It is the Project Steward’s duty to 74 | receive and address reported violations of the code of conduct. They will then 75 | work with a committee consisting of representatives from the Open Source 76 | Programs Office and the Google Open Source Strategy team. If for any reason you 77 | are uncomfortable reaching out to the Project Steward, please email 78 | opensource@google.com. 79 | 80 | We will investigate every complaint, but you may not receive a direct response. 81 | We will use our discretion in determining when and how to follow up on reported 82 | incidents, which may range from not taking action to permanent expulsion from 83 | the project and project-sponsored spaces. We will notify the accused of the 84 | report and provide them an opportunity to discuss it before any action is taken. 85 | The identity of the reporter will be omitted from the details of the report 86 | supplied to the accused. In potentially harmful situations, such as ongoing 87 | harassment or threats to anyone's safety, we may take action without notice. 88 | 89 | ## Attribution 90 | 91 | This Code of Conduct is adapted from the Contributor Covenant, version 1.4, 92 | available at 93 | https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 94 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code Reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google/conduct/). 29 | -------------------------------------------------------------------------------- /docs/intermediate_API_design.md: -------------------------------------------------------------------------------- 1 | ## Background 2 | 3 | The objective of the re2-wasm library is to implement the JavaScript [RegExp API](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp) 4 | using the RE2 C++ API compiled into a WASM module. Unfortunately, there are some interface compatibility problems that require intermediate wrapping code to resolve. 5 | In particular, the RE2 API has functions with "out" parameters, which cannot be used in WASM interfaces, so some C++ code is needed to present an API that provides all 6 | of the necessary functionality without using any out parameters. On the other side, the RegExp API uses some types such as RegExp itself that cannot be represented 7 | effectively using WASM, so some JavaScript code is needed to bridge that gap. 8 | 9 | ## Design 10 | 11 | The `RegExp` API functions require the following information/functionality: 12 | 13 | - [`exec`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec): 14 | - The full match 15 | - All sub matches 16 | - The index of the match 17 | - The original input string 18 | - If the `g` or `y` flag is set: The RE2 object must record the `lastIndex` where the next search will start, and start searches from `lastIndex` 19 | - [`test`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/test): 20 | - Whether a match was found 21 | - If the `g` or `y` flag is set: The RE2 object must record the `lastIndex` where the next search will start 22 | - [`match`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match): 23 | - With the `g` flag and without the `y` flag: 24 | - A list of all disjoint full matches (equivalent to the full match result from repeatedly calling `exec`, but without modifying `lastIndex`) 25 | - Without the `g` flag: 26 | - The full match 27 | - All sub matches 28 | - The index of the match 29 | - The original input string 30 | - If the `y` flag is set: The RE2 object must record the `lastIndex` where the next search will start, and start searches from `lastIndex` 31 | - [`search`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/search): 32 | - The index of the first match (not modified by the `g` or `y` flags) 33 | - [`replace`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace): 34 | - The full match 35 | - All sub matches 36 | - The index of the match 37 | - The last index or length of the match 38 | - The original input string 39 | - With the `g` flag and without the `y` flag, all of this information is needed for every disjoint full match 40 | - With the `y` flag, `lastIndex` is used and updated 41 | - [`split`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/split) 42 | - The index and length of any number of full matches (can be all of them, bounded above by `limit` if set) 43 | - All sub matches for each match 44 | - The `g` and `y` flags are irrelevant 45 | 46 | This can be achieved with a `match` function that takes as arguments the input string, the starting search index, and a boolean indicating whether to look for capture groups, 47 | and returns a single match object containing the full match, its index, and an array of sub match strings. The returned index is `-1` on failure The JavaScript layer above it can track the 48 | `lastIndex`. -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "re2-wasm", 3 | "version": "1.1.0", 4 | "keywords": [ 5 | "regexp", 6 | "re2", 7 | "wasm" 8 | ], 9 | "description": "Google's RE2 library distributed as a WASM module", 10 | "homepage": "https://github.com/google/re2-wasm", 11 | "bugs": "https://github.com/google/re2-wasm/issues", 12 | "repository": "https://github.com/google/re2-wasm", 13 | "main": "build/src/re2.js", 14 | "scripts": { 15 | "test": "node ./third_party/node-re2/tests/tests.js", 16 | "lint": "gts lint src/*.ts", 17 | "clean": "gts clean", 18 | "compile": "make -j12 && tsc && cp -r wasm build/", 19 | "fix": "gts fix src/*.ts", 20 | "prepare": "npm run compile", 21 | "pretest": "npm run compile", 22 | "posttest": "npm run lint" 23 | }, 24 | "author": "", 25 | "license": "Apache-2.0", 26 | "devDependencies": { 27 | "@types/node": "^14.11.2", 28 | "gts": "^3.1.0", 29 | "heya-unit": "^0.3.0", 30 | "typescript": "~4.1.3" 31 | }, 32 | "engines": { 33 | "node": ">=10" 34 | }, 35 | "files": [ 36 | "src/*.ts", 37 | "wasm/re2.d.ts", 38 | "build/**/*.{d.ts,js,js.map,wasm}" 39 | ], 40 | "types": "./build/src/re2.d.ts" 41 | } 42 | -------------------------------------------------------------------------------- /src/re2.ts: -------------------------------------------------------------------------------- 1 | /* Copyright 2021 Google LLC 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | // eslint-disable-next-line node/no-unpublished-import 17 | import {WrappedRE2, InternalMatchResult} from '../wasm/re2'; 18 | 19 | export interface RE2ExecArray extends Array { 20 | index: number; 21 | input: string; 22 | groups?: { 23 | [key: string]: string; 24 | }; 25 | } 26 | 27 | export interface RE2MatchArray extends Array { 28 | index?: number; 29 | input?: string; 30 | groups?: { 31 | [key: string]: string; 32 | }; 33 | } 34 | 35 | const ALPHA_UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; 36 | const HEX = '0123456789ABCDEF'; 37 | 38 | function isHexadecimal(char: string): boolean { 39 | return HEX.indexOf(char.toUpperCase()) !== -1; 40 | } 41 | 42 | /** 43 | * Translate a string from Node RegExp syntax RE2 syntax. The algorithm is 44 | * translated from 45 | * https://github.com/uhop/node-re2/blob/master/lib/new.cc#L21-L142 46 | * @param pattern 47 | * @param multiline 48 | */ 49 | function translateRegExp(pattern: string, multiline: boolean): string { 50 | const result: string[] = []; 51 | if (pattern === '') { 52 | return '(?:)'; 53 | } else if (multiline) { 54 | result.push('(?m)'); 55 | } 56 | for (let i = 0; i < pattern.length; ) { 57 | if (pattern[i] === '\\') { 58 | if (i + 1 < pattern.length) { 59 | switch (pattern[i + 1]) { 60 | case '\\': 61 | // Consume "\\", output "\\" 62 | result.push('\\\\'); 63 | i += 2; 64 | break; 65 | case 'c': 66 | if (i + 2 < pattern.length) { 67 | const alphaIndex = ALPHA_UPPER.indexOf(pattern[i + 2]) + 1; 68 | if (alphaIndex >= 0) { 69 | // Consume "\c[upper case character]", output "\x[hex digit][hex digit]" 70 | result.push( 71 | '\\x', 72 | HEX[Math.floor(alphaIndex / 16)], 73 | HEX[alphaIndex % 16] 74 | ); 75 | i += 3; 76 | break; 77 | } 78 | } 79 | // Consume "\c", output "\c" 80 | result.push('\\c'); 81 | i += 2; 82 | break; 83 | case 'u': 84 | if (i + 2 < pattern.length) { 85 | const ch2 = pattern[i + 2]; 86 | if (isHexadecimal(ch2)) { 87 | // Consume "\u[hex digit]", output "\x{[hex digit]" 88 | result.push('\\x{'); 89 | result.push(ch2); 90 | i += 3; 91 | // Consume and output up to 3 more hex digits 92 | for ( 93 | let j = 0; 94 | j < 3 && i < pattern.length && isHexadecimal(pattern[i]); 95 | i++, j++ 96 | ) { 97 | result.push(pattern[i]); 98 | } 99 | // Output "}" 100 | result.push('}'); 101 | break; 102 | } else if (ch2 === '{') { 103 | // Consume "\u" followed by "{", output "\x" 104 | // The default case handles the subsequent characters 105 | result.push('\\x'); 106 | i += 2; 107 | break; 108 | } 109 | } 110 | // Consume and output "\u" 111 | result.push('\\u'); 112 | i += 2; 113 | break; 114 | default: 115 | // Consume and output "\[char]" 116 | result.push('\\', pattern[i + 1]); 117 | i += 2; 118 | } 119 | continue; 120 | } 121 | } else if (pattern[i] === '/') { 122 | // Consume "/"" and output "\/" 123 | // An existing "\/" would have been handled by the above default case 124 | result.push('\\/'); 125 | i += 1; 126 | continue; 127 | } else if (pattern.substring(i, i + 3) === '(?<') { 128 | if (pattern[i + 3] !== '=' && pattern[i + 3] !== '!') { 129 | // Consume "(?<" and output "(?P<" 130 | result.push('(?P<'); 131 | i += 3; 132 | continue; 133 | } 134 | } 135 | // Consume and output the next character 136 | result.push(pattern[i]); 137 | i += 1; 138 | } 139 | return result.join(''); 140 | } 141 | 142 | /** 143 | * Escape a RegExp pattern by ensuring that any instance of "/" in the string 144 | * is preceded by an odd number of backslashes. 145 | * @param pattern 146 | */ 147 | function escapeRegExp(pattern: string): string { 148 | return pattern.replace(/(^|[^\\])((?:\\\\)*)\//g, '$1$2\\/'); 149 | } 150 | 151 | /* This class should implement the RegExp interface, but it can't because of 152 | * https://github.com/microsoft/TypeScript/issues/42307 */ 153 | export class RE2 { 154 | private _global = false; 155 | private _ignoreCase = false; 156 | private _multiline = false; 157 | private _dotAll = false; 158 | private _unicode = false; 159 | private _sticky = false; 160 | lastIndex = 0; 161 | 162 | private pattern = '(?:)'; 163 | private wrapper: WrappedRE2; 164 | 165 | private groupNames: {[group: number]: string} = {}; 166 | private namedGroups: {[name: string]: number} = {}; 167 | 168 | constructor(pattern: string | RegExp | RE2, flags?: string) { 169 | if (typeof pattern !== 'string') { 170 | if (pattern instanceof RegExp || pattern instanceof RE2) { 171 | flags = flags ?? pattern.flags; 172 | pattern = pattern.source; 173 | } else { 174 | if (pattern === undefined) { 175 | pattern = '(?:)'; 176 | } else { 177 | pattern = pattern + ''; 178 | } 179 | } 180 | } 181 | if (pattern === '') { 182 | pattern = '(?:)'; 183 | } 184 | pattern = escapeRegExp(pattern); 185 | flags = flags ?? ''; 186 | for (const flag of flags) { 187 | switch (flag) { 188 | case 'g': 189 | this._global = true; 190 | break; 191 | case 'i': 192 | this._ignoreCase = true; 193 | break; 194 | case 'm': 195 | this._multiline = true; 196 | break; 197 | case 's': 198 | this._dotAll = true; 199 | break; 200 | case 'u': 201 | this._unicode = true; 202 | break; 203 | case 'y': 204 | this._sticky = true; 205 | break; 206 | } 207 | } 208 | if (!this._unicode) { 209 | throw new Error( 210 | 'RE2 only works in unicode mode. The "u" flag must be passed when constructing a RE2 instance' 211 | ); 212 | } 213 | this.pattern = pattern; 214 | this.wrapper = new WrappedRE2( 215 | translateRegExp(pattern, this._multiline), 216 | this._ignoreCase, 217 | this._multiline, 218 | this._dotAll 219 | ); 220 | if (!this.wrapper.ok()) { 221 | throw new SyntaxError( 222 | `Invalid regular expression: /${pattern}/${flags}: ${this.wrapper.error()}` 223 | ); 224 | } 225 | // Verify that all named groups have unique names 226 | const groupNames = this.wrapper.capturingGroupNames(); 227 | const groupNumbers = groupNames.keys(); 228 | for (let i = 0; i < groupNumbers.size(); i++) { 229 | const num = groupNumbers.get(i); 230 | const name = groupNames.get(num); 231 | if (name in this.namedGroups) { 232 | throw new SyntaxError( 233 | `Invalid regular expression: /${pattern}/${flags}: Duplicate capture group name` 234 | ); 235 | } 236 | this.groupNames[num] = name; 237 | this.namedGroups[name] = num; 238 | } 239 | } 240 | 241 | get source() { 242 | return this.pattern; 243 | } 244 | get internalSource() { 245 | return this.wrapper.pattern(); 246 | } 247 | get flags() { 248 | return ( 249 | (this._global ? 'g' : '') + 250 | (this._ignoreCase ? 'i' : '') + 251 | (this._multiline ? 'm' : '') + 252 | (this._dotAll ? 's' : '') + 253 | (this._unicode ? 'u' : '') + 254 | (this._sticky ? 'y' : '') 255 | ); 256 | } 257 | get global() { 258 | return this._global; 259 | } 260 | get ignoreCase() { 261 | return this._ignoreCase; 262 | } 263 | get multiline() { 264 | return this._multiline; 265 | } 266 | get dotAll() { 267 | return this._dotAll; 268 | } 269 | get unicode() { 270 | return this._unicode; 271 | } 272 | get sticky() { 273 | return this._sticky; 274 | } 275 | toString() { 276 | return `/${this.pattern}/${this.flags}`; 277 | } 278 | 279 | private getMaybeStickyIndex() { 280 | if (this._global || this._sticky) { 281 | return this.lastIndex; 282 | } else { 283 | return 0; 284 | } 285 | } 286 | 287 | private isMatchSuccessful( 288 | match: InternalMatchResult, 289 | searchStart: number 290 | ): boolean { 291 | return match.index === searchStart || (!this._sticky && match.index >= 0); 292 | } 293 | 294 | private maybeUpdateLastIndex(match: InternalMatchResult, start: number) { 295 | if (this._global || this._sticky) { 296 | if (this.isMatchSuccessful(match, start)) { 297 | this.lastIndex = match.index + match.match.length; 298 | } else { 299 | this.lastIndex = 0; 300 | } 301 | } 302 | } 303 | 304 | private getNamedGroups(match: InternalMatchResult): {[name: string]: string} { 305 | const groups: {[name: string]: string} = {}; 306 | for (const [groupName, groupNum] of Object.entries(this.namedGroups)) { 307 | if (match.groups[groupNum - 1] !== undefined) { 308 | groups[groupName] = match.groups[groupNum - 1]!; 309 | } 310 | } 311 | return groups; 312 | } 313 | 314 | exec(input: string): RE2ExecArray | null { 315 | if (typeof input !== 'string') { 316 | input = input + ''; 317 | } 318 | const startIndex = this.getMaybeStickyIndex(); 319 | const match = this.wrapper.match(input, startIndex, true); 320 | this.maybeUpdateLastIndex(match, startIndex); 321 | if (!this.isMatchSuccessful(match, startIndex)) { 322 | return null; 323 | } 324 | const result: RE2ExecArray = [match.match, ...match.groups] as RE2ExecArray; 325 | result.index = match.index; 326 | result.input = input; 327 | const groups = this.getNamedGroups(match); 328 | if (Object.keys(groups).length > 0) { 329 | result.groups = groups; 330 | } 331 | return result; 332 | } 333 | 334 | test(input: string): boolean { 335 | if (typeof input !== 'string') { 336 | input = input + ''; 337 | } 338 | const startIndex = this.getMaybeStickyIndex(); 339 | const match = this.wrapper.match(input, this.getMaybeStickyIndex(), false); 340 | this.maybeUpdateLastIndex(match, startIndex); 341 | return this.isMatchSuccessful(match, startIndex); 342 | } 343 | compile(): this { 344 | // This method is deprecated on RegExp, so it is intentionally not implemented here 345 | throw new Error( 346 | 'Deprecated RegExp method compile is not implemented in RE2.' 347 | ); 348 | } 349 | [Symbol.match](input: string): RE2MatchArray | null { 350 | if (typeof input !== 'string') { 351 | input = input + ''; 352 | } 353 | if (this._global) { 354 | const result: string[] = []; 355 | let nextIndex = 0; 356 | let success: boolean; 357 | do { 358 | const match = this.wrapper.match(input, nextIndex, false); 359 | success = 360 | match.index === nextIndex || (!this._sticky && match.index >= 0); 361 | if (success) { 362 | result.push(match.match); 363 | nextIndex = match.index + match.match.length; 364 | } 365 | } while (success); 366 | if (result.length === 0) { 367 | return null; 368 | } else { 369 | return result; 370 | } 371 | } else { 372 | const startIndex = this.getMaybeStickyIndex(); 373 | const match = this.wrapper.match(input, startIndex, true); 374 | this.maybeUpdateLastIndex(match, startIndex); 375 | if (!this.isMatchSuccessful(match, startIndex)) { 376 | return null; 377 | } 378 | const result: RE2MatchArray = [match.match, ...match.groups]; 379 | result.index = match.index; 380 | result.input = input; 381 | const groups = this.getNamedGroups(match); 382 | if (Object.keys(groups).length > 0) { 383 | result.groups = groups; 384 | } 385 | return result; 386 | } 387 | } 388 | 389 | match(input: string): RE2MatchArray | null { 390 | return this[Symbol.match](input); 391 | } 392 | 393 | *[(Symbol as SymbolConstructor & {matchAll?: symbol}).matchAll || Symbol()]( 394 | input: string 395 | ): Generator { 396 | const copy = new RE2(this); 397 | copy.lastIndex = this.lastIndex; 398 | 399 | for (;;) { 400 | const match = copy.exec(input); 401 | if (match === null) { 402 | break; 403 | } 404 | yield match; 405 | } 406 | } 407 | 408 | /** 409 | * Outputs the replacement for the matched part of the string 410 | * @param input 411 | * @param match 412 | * @param replacer 413 | */ 414 | private replaceMatch( 415 | input: string, 416 | match: InternalMatchResult, 417 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 418 | replacer: string | ((substring: string, ...args: any[]) => string) 419 | ): string { 420 | if (typeof replacer === 'string') { 421 | let result = ''; 422 | for (let i = 0; i < replacer.length; i++) { 423 | if (replacer[i] === '$') { 424 | switch (replacer[i + 1]) { 425 | case '$': 426 | result += '$'; 427 | i++; 428 | break; 429 | case '&': 430 | result += match.match; 431 | i++; 432 | break; 433 | case '`': 434 | result += input.substring(0, match.index); 435 | i++; 436 | break; 437 | case "'": 438 | result += input.substring(match.index + match.match.length); 439 | i++; 440 | break; 441 | case '<': { 442 | const endCaret = replacer.indexOf('>', i); 443 | if (endCaret < 0) { 444 | throw new Error('Invalid named group replacement'); 445 | } 446 | const groupName = replacer.substring(i + 2, endCaret); 447 | if (groupName in this.namedGroups) { 448 | result += match.groups[this.namedGroups[groupName] - 1] ?? ''; 449 | } 450 | i = endCaret; 451 | break; 452 | } 453 | default: { 454 | let groupNum: number; 455 | if ('123456789'.includes(replacer[i + 1])) { 456 | if ('0123456789'.includes(replacer[i + 2])) { 457 | // Subtract 1 because groups are 1-indexed in replacement strings 458 | groupNum = 459 | Number.parseInt(replacer.substring(i + 1, i + 3)) - 1; 460 | i += 2; 461 | } else { 462 | // Subtract 1 because groups are 1-indexed in replacement strings 463 | groupNum = Number.parseInt(replacer[i + 1]) - 1; 464 | i++; 465 | } 466 | } else { 467 | throw new Error('Invalid replacement string'); 468 | } 469 | if (groupNum < match.groups.length) { 470 | result += match.groups[groupNum] ?? ''; 471 | } else { 472 | result += '$' + groupNum; 473 | } 474 | } 475 | } 476 | } else { 477 | result += replacer[i]; 478 | } 479 | } 480 | return result; 481 | } else { 482 | return replacer( 483 | match.match, 484 | ...match.groups, 485 | match.index, 486 | input, 487 | this.getNamedGroups(match) 488 | ); 489 | } 490 | } 491 | 492 | [Symbol.replace]( 493 | input: string, 494 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 495 | replacer: string | ((substring: string, ...args: any[]) => string) 496 | ): string { 497 | if (typeof input !== 'string') { 498 | input = input + ''; 499 | } 500 | if (typeof replacer !== 'function') { 501 | replacer = replacer + ''; 502 | } 503 | if (this._global) { 504 | let result = ''; 505 | let nextIndex = 0; 506 | let success: boolean; 507 | do { 508 | const match = this.wrapper.match(input, nextIndex, true); 509 | success = 510 | match.index === nextIndex || (!this._sticky && match.index >= 0); 511 | if (success) { 512 | result += 513 | input.substring(nextIndex, match.index) + 514 | this.replaceMatch(input, match, replacer); 515 | nextIndex = match.index + match.match.length; 516 | } 517 | } while (success); 518 | result += input.substring(nextIndex); 519 | this.lastIndex = 0; 520 | return result; 521 | } else { 522 | const startIndex = this.getMaybeStickyIndex(); 523 | const match = this.wrapper.match(input, startIndex, true); 524 | this.maybeUpdateLastIndex(match, startIndex); 525 | if (this.isMatchSuccessful(match, startIndex)) { 526 | return ( 527 | input.substring(0, match.index) + 528 | this.replaceMatch(input, match, replacer) + 529 | input.substring(match.index + match.match.length) 530 | ); 531 | } else { 532 | return input; 533 | } 534 | } 535 | } 536 | replace( 537 | input: string, 538 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 539 | replacer: string | ((substring: string, ...args: any[]) => string) 540 | ): string { 541 | return this[Symbol.replace](input, replacer); 542 | } 543 | [Symbol.search](input: string): number { 544 | if (typeof input !== 'string') { 545 | input = input + ''; 546 | } 547 | const result = this.wrapper.match(input, 0, false).index; 548 | if (this._sticky && result !== 0) { 549 | return -1; 550 | } else { 551 | return result; 552 | } 553 | } 554 | search(input: string): number { 555 | return this[Symbol.search](input); 556 | } 557 | [Symbol.split](input: string, limit?: number): (string | undefined)[] { 558 | if (typeof input !== 'string') { 559 | input = input + ''; 560 | } 561 | const output = []; 562 | let nextIndex = 0; 563 | limit = limit ?? Infinity; 564 | while (output.length < limit) { 565 | const nextMatch = this.wrapper.match(input, nextIndex, true); 566 | if (nextMatch.index >= 0) { 567 | if (nextMatch.match.length === 0) { 568 | output.push(input.substring(nextIndex, nextIndex + 1)); 569 | nextIndex = nextIndex + 1; 570 | } else { 571 | output.push(input.substring(nextIndex, nextMatch.index)); 572 | nextIndex = nextMatch.index + nextMatch.match.length; 573 | } 574 | for (const group of nextMatch.groups) { 575 | if (output.length >= limit) { 576 | break; 577 | } 578 | output.push(group); 579 | } 580 | } else { 581 | output.push(input.substring(nextIndex)); 582 | break; 583 | } 584 | } 585 | return output; 586 | } 587 | split(input: string, limit?: number): (string | undefined)[] { 588 | return this[Symbol.split](input, limit); 589 | } 590 | } 591 | -------------------------------------------------------------------------------- /third_party/README.md: -------------------------------------------------------------------------------- 1 | The directory `third_party/node-re2/` is a copy of [node-re2](https://github.com/uhop/node-re2), keeping only the `tests` folder. Those tests were modified to test this library as follows: 2 | 3 | - Imports were changed to match this library. 4 | - `RE2(arg)` was replaced with `new RE2(arg)` because it is only callable as a constructor in this library. 5 | - The `u` flag was added to all `RE2` constructor calls because it is required in this library. 6 | - Tests for `Buffer` handling were removed because this library does not handle `Buffers`. -------------------------------------------------------------------------------- /third_party/node-re2/LICENSE: -------------------------------------------------------------------------------- 1 | This library is available under the terms of the modified BSD license. No external contributions 2 | are allowed under licenses which are fundamentally incompatible with the BSD license that this library is distributed under. 3 | 4 | The text of the BSD license is reproduced below. 5 | 6 | ------------------------------------------------------------------------------- 7 | The "New" BSD License: 8 | ********************** 9 | 10 | Copyright (c) 2005-2020, Eugene Lazutkin 11 | All rights reserved. 12 | 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | * Redistributions of source code must retain the above copyright notice, this 17 | list of conditions and the following disclaimer. 18 | * Redistributions in binary form must reproduce the above copyright notice, 19 | this list of conditions and the following disclaimer in the documentation 20 | and/or other materials provided with the distribution. 21 | * Neither the name of Eugene Lazutkin nor the names of other contributors 22 | may be used to endorse or promote products derived from this software 23 | without specific prior written permission. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 26 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 27 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 28 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 29 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 31 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 32 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 34 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/test_exec.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var unit = require("heya-unit"); 5 | var RE2 = require("../../..").RE2; 6 | 7 | 8 | // tests 9 | 10 | unit.add(module, [ 11 | 12 | // These tests are copied from MDN: 13 | // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec 14 | 15 | function test_execBasic(t) { 16 | "use strict"; 17 | 18 | var re = new RE2("quick\\s(brown).+?(jumps)", "igu"); 19 | 20 | eval(t.TEST("re.source === 'quick\\\\s(brown).+?(jumps)'")); 21 | eval(t.TEST("re.ignoreCase")); 22 | eval(t.TEST("re.global")); 23 | eval(t.TEST("!re.multiline")); 24 | 25 | var result = re.exec("The Quick Brown Fox Jumps Over The Lazy Dog"); 26 | 27 | eval(t.TEST("t.unify(result, ['Quick Brown Fox Jumps', 'Brown', 'Jumps'])")); 28 | eval(t.TEST("result.index === 4")); 29 | eval(t.TEST("result.input === 'The Quick Brown Fox Jumps Over The Lazy Dog'")); 30 | eval(t.TEST("re.lastIndex === 25")); 31 | }, 32 | function test_execSucc(t) { 33 | "use strict"; 34 | 35 | var str = "abbcdefabh"; 36 | 37 | var re = new RE2("ab*", "gu"); 38 | var result = re.exec(str); 39 | 40 | eval(t.TEST("!!result")); 41 | eval(t.TEST("result[0] === 'abb'")); 42 | eval(t.TEST("result.index === 0")); 43 | eval(t.TEST("re.lastIndex === 3")); 44 | 45 | result = re.exec(str); 46 | 47 | eval(t.TEST("!!result")); 48 | eval(t.TEST("result[0] === 'ab'")); 49 | eval(t.TEST("result.index === 7")); 50 | eval(t.TEST("re.lastIndex === 9")); 51 | 52 | result = re.exec(str); 53 | 54 | eval(t.TEST("!result")); 55 | }, 56 | function test_execSimple(t) { 57 | "use strict"; 58 | 59 | var re = new RE2("(hello \\S+)", "u"); 60 | var result = re.exec("This is a hello world!"); 61 | 62 | eval(t.TEST("result[1] === 'hello world!'")); 63 | }, 64 | function test_execFail(t) { 65 | "use strict"; 66 | 67 | var re = new RE2("(a+)?(b+)?", "u"); 68 | var result = re.exec("aaabb"); 69 | 70 | eval(t.TEST("result[1] === 'aaa'")); 71 | eval(t.TEST("result[2] === 'bb'")); 72 | 73 | result = re.exec("aaacbb"); 74 | 75 | eval(t.TEST("result[1] === 'aaa'")); 76 | eval(t.TEST("result[2] === undefined")); 77 | eval(t.TEST("result.length === 3")); 78 | }, 79 | function test_execAnchoredToBeginning(t) { 80 | "use strict"; 81 | 82 | var re = new RE2('^hello', 'gu'); 83 | 84 | var result = re.exec("hellohello"); 85 | 86 | eval(t.TEST("t.unify(result, ['hello'])")); 87 | eval(t.TEST("result.index === 0")); 88 | eval(t.TEST("re.lastIndex === 5")); 89 | 90 | eval(t.TEST("re.exec('hellohello') === null")); 91 | }, 92 | function test_execInvalid(t) { 93 | "use strict"; 94 | 95 | var re = new RE2('', 'u'); 96 | 97 | try { 98 | re.exec({ toString() { throw "corner"; } }); 99 | t.test(false); // shouldn't be here 100 | } catch(e) { 101 | eval(t.TEST("e === 'corner'")); 102 | } 103 | }, 104 | function test_execAnchor1(t) { 105 | "use strict"; 106 | 107 | var re = new RE2("b|^a", "gu"); 108 | 109 | var result = re.exec("aabc"); 110 | eval(t.TEST("!!result")); 111 | eval(t.TEST("result.index === 0")); 112 | eval(t.TEST("re.lastIndex === 1")); 113 | 114 | result = re.exec("aabc"); 115 | eval(t.TEST("!!result")); 116 | eval(t.TEST("result.index === 2")); 117 | eval(t.TEST("re.lastIndex === 3")); 118 | 119 | result = re.exec("aabc"); 120 | eval(t.TEST("!result")); 121 | }, 122 | function test_execAnchor2(t) { 123 | "use strict"; 124 | 125 | var re = new RE2("(?:^a)", "gu"); 126 | 127 | var result = re.exec("aabc"); 128 | eval(t.TEST("!!result")); 129 | eval(t.TEST("result.index === 0")); 130 | eval(t.TEST("re.lastIndex === 1")); 131 | 132 | result = re.exec("aabc"); 133 | eval(t.TEST("!result")); 134 | }, 135 | 136 | // Unicode tests 137 | 138 | function test_execUnicode(t) { 139 | "use strict"; 140 | 141 | var re = new RE2("охотник\\s(желает).+?(где)", "igu"); 142 | 143 | eval(t.TEST("re.source === 'охотник\\\\s(желает).+?(где)'")); 144 | eval(t.TEST("re.ignoreCase")); 145 | eval(t.TEST("re.global")); 146 | eval(t.TEST("!re.multiline")); 147 | 148 | var result = re.exec("Каждый Охотник Желает Знать Где Сидит Фазан"); 149 | 150 | eval(t.TEST("t.unify(result, ['Охотник Желает Знать Где', 'Желает', 'Где'])")); 151 | eval(t.TEST("result.index === 7")); 152 | eval(t.TEST("result.input === 'Каждый Охотник Желает Знать Где Сидит Фазан'")); 153 | eval(t.TEST("re.lastIndex === 31")); 154 | 155 | eval(t.TEST("result.input.substr(result.index) === 'Охотник Желает Знать Где Сидит Фазан'")); 156 | eval(t.TEST("result.input.substr(re.lastIndex) === ' Сидит Фазан'")); 157 | }, 158 | function test_execUnicodeSubsequent(t) { 159 | "use strict"; 160 | 161 | var str = "аббвгдеабё"; 162 | 163 | var re = new RE2("аб*", "gu"); 164 | var result = re.exec(str); 165 | 166 | eval(t.TEST("!!result")); 167 | eval(t.TEST("result[0] === 'абб'")); 168 | eval(t.TEST("result.index === 0")); 169 | eval(t.TEST("re.lastIndex === 3")); 170 | 171 | result = re.exec(str); 172 | 173 | eval(t.TEST("!!result")); 174 | eval(t.TEST("result[0] === 'аб'")); 175 | eval(t.TEST("result.index === 7")); 176 | eval(t.TEST("re.lastIndex === 9")); 177 | 178 | result = re.exec(str); 179 | 180 | eval(t.TEST("!result")); 181 | }, 182 | function test_execUnicodeSupplementary(t) { 183 | "use strict"; 184 | 185 | var re = new RE2("\\u{1F603}", "gu"); 186 | 187 | eval(t.TEST("re.source === '\\\\u{1F603}'")); 188 | eval(t.TEST("re.internalSource === '\\\\x{1F603}'")); 189 | eval(t.TEST("!re.ignoreCase")); 190 | eval(t.TEST("re.global")); 191 | eval(t.TEST("!re.multiline")); 192 | 193 | var result = re.exec("\u{1F603}"); // 1F603 is the SMILING FACE WITH OPEN MOUTH emoji 194 | 195 | eval(t.TEST("t.unify(result, ['\\u{1F603}'])")); 196 | eval(t.TEST("result.index === 0")); 197 | eval(t.TEST("result.input === '\\u{1F603}'")); 198 | eval(t.TEST("re.lastIndex === 2")); 199 | 200 | var re2 = new RE2(".", "gu"); 201 | 202 | eval(t.TEST("re2.source === '.'")); 203 | eval(t.TEST("!re2.ignoreCase")); 204 | eval(t.TEST("re2.global")); 205 | eval(t.TEST("!re2.multiline")); 206 | 207 | var result2 = re2.exec("\u{1F603}"); 208 | 209 | eval(t.TEST("t.unify(result2, ['\\u{1F603}'])")); 210 | eval(t.TEST("result2.index === 0")); 211 | eval(t.TEST("result2.input === '\\u{1F603}'")); 212 | eval(t.TEST("re2.lastIndex === 2")); 213 | 214 | var re3 = new RE2("[\u{1F603}-\u{1F605}]", "gu"); 215 | 216 | eval(t.TEST("re3.source === '[\u{1F603}-\u{1F605}]'")); 217 | eval(t.TEST("!re3.ignoreCase")); 218 | eval(t.TEST("re3.global")); 219 | eval(t.TEST("!re3.multiline")); 220 | 221 | var result3 = re3.exec("\u{1F604}"); 222 | 223 | eval(t.TEST("t.unify(result3, ['\\u{1F604}'])")); 224 | eval(t.TEST("result3.index === 0")); 225 | eval(t.TEST("result3.input === '\\u{1F604}'")); 226 | eval(t.TEST("re3.lastIndex === 2")); 227 | }, 228 | 229 | // Sticky tests 230 | 231 | function test_execSticky(t) { 232 | "use strict"; 233 | 234 | var re = new RE2("\\s+", "yu"); 235 | 236 | eval(t.TEST("re.exec('Hello world, how are you?') === null")); 237 | 238 | re.lastIndex = 5; 239 | 240 | var result = re.exec("Hello world, how are you?"); 241 | 242 | eval(t.TEST("t.unify(result, [' '])")); 243 | eval(t.TEST("result.index === 5")); 244 | eval(t.TEST("re.lastIndex === 6")); 245 | 246 | var re2 = new RE2("\\s+", "gyu"); 247 | 248 | eval(t.TEST("re2.exec('Hello world, how are you?') === null")); 249 | 250 | re2.lastIndex = 5; 251 | 252 | var result2 = re2.exec("Hello world, how are you?"); 253 | 254 | eval(t.TEST("t.unify(result2, [' '])")); 255 | eval(t.TEST("result2.index === 5")); 256 | eval(t.TEST("re2.lastIndex === 6")); 257 | }, 258 | 259 | // Multiline test 260 | 261 | function test_execMultiline(t) { 262 | "use strict"; 263 | 264 | const re = new RE2("^xy", "mu"), 265 | pattern = ` xy1 266 | xy2 (at start of line) 267 | xy3`; 268 | 269 | const result = re.exec(pattern); 270 | 271 | eval(t.TEST("!!result")); 272 | eval(t.TEST("result[0] === 'xy'")); 273 | eval(t.TEST("result.index > 3")); 274 | eval(t.TEST("result.index < pattern.length - 4")); 275 | eval(t.TEST("result[0] === pattern.substr(result.index, result[0].length)")); 276 | } 277 | ]); 278 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/test_general.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var unit = require("heya-unit"); 5 | var RE2 = require("../../..").RE2; 6 | 7 | 8 | // tests 9 | 10 | unit.add(module, [ 11 | function test_generalCtr(t) { 12 | "use strict"; 13 | 14 | eval(t.TEST("!!RE2")); 15 | eval(t.TEST("!!RE2.prototype")); 16 | }, 17 | function test_generalInst(t) { 18 | "use strict"; 19 | 20 | var re1 = new RE2("\\d+", 'u'); 21 | 22 | eval(t.TEST("!!re1")); 23 | eval(t.TEST("re1 instanceof RE2")); 24 | 25 | var re2 = new RE2("\\d+", 'u'); 26 | 27 | eval(t.TEST("!!re2")); 28 | eval(t.TEST("re2 instanceof RE2")); 29 | compare(re1, re2, t); 30 | 31 | re1 = new RE2("\\d+", "mu"); 32 | 33 | eval(t.TEST("!!re1")); 34 | eval(t.TEST("re1 instanceof RE2")); 35 | 36 | re2 = new RE2("\\d+", "mu"); 37 | 38 | eval(t.TEST("!!re2")); 39 | eval(t.TEST("re2 instanceof RE2")); 40 | compare(re1, re2, t); 41 | }, 42 | /* 43 | function test_instErrors(t) { 44 | try { 45 | var re = new RE2([], 'u'); 46 | t.test(false); // shouldn't be here 47 | } catch(e) { 48 | eval(t.TEST("e instanceof TypeError")); 49 | } 50 | 51 | try { 52 | var re = new RE2({}, 'u'); 53 | t.test(false); // shouldn't be here 54 | } catch(e) { 55 | eval(t.TEST("e instanceof TypeError")); 56 | } 57 | 58 | try { 59 | var re = new RE2(new Date(), 'u'); 60 | t.test(false); // shouldn't be here 61 | } catch(e) { 62 | eval(t.TEST("e instanceof TypeError")); 63 | } 64 | 65 | try { 66 | var re = new RE2(null); 67 | t.test(false); // shouldn't be here 68 | } catch(e) { 69 | eval(t.TEST("e instanceof TypeError")); 70 | } 71 | 72 | try { 73 | var re = new RE2(); 74 | t.test(false); // shouldn't be here 75 | } catch(e) { 76 | eval(t.TEST("e instanceof TypeError")); 77 | } 78 | 79 | try { 80 | var re = RE2(); 81 | t.test(false); // shouldn't be here 82 | } catch(e) { 83 | eval(t.TEST("e instanceof TypeError")); 84 | } 85 | 86 | try { 87 | var re = RE2({ toString() { throw "corner"; } }); 88 | t.test(false); // shouldn't be here 89 | } catch(e) { 90 | eval(t.TEST("e instanceof TypeError")); 91 | } 92 | }, 93 | */ 94 | function test_generalIn(t) { 95 | "use strict"; 96 | 97 | var re = new RE2("\\d+", 'u'); 98 | 99 | eval(t.TEST("'exec' in re")); 100 | eval(t.TEST("'test' in re")); 101 | eval(t.TEST("'match' in re")); 102 | eval(t.TEST("'replace' in re")); 103 | eval(t.TEST("'search' in re")); 104 | eval(t.TEST("'split' in re")); 105 | eval(t.TEST("'source' in re")); 106 | eval(t.TEST("'flags' in re")); 107 | eval(t.TEST("'global' in re")); 108 | eval(t.TEST("'ignoreCase' in re")); 109 | eval(t.TEST("'multiline' in re")); 110 | eval(t.TEST("'sticky' in re")); 111 | eval(t.TEST("'lastIndex' in re")); 112 | }, 113 | function test_generalPresent(t) { 114 | "use strict"; 115 | 116 | var re = new RE2("\\d+", 'u'); 117 | 118 | eval(t.TEST("typeof re.exec == 'function'")); 119 | eval(t.TEST("typeof re.test == 'function'")); 120 | eval(t.TEST("typeof re.match == 'function'")); 121 | eval(t.TEST("typeof re.replace == 'function'")); 122 | eval(t.TEST("typeof re.search == 'function'")); 123 | eval(t.TEST("typeof re.split == 'function'")); 124 | eval(t.TEST("typeof re.source == 'string'")); 125 | eval(t.TEST("typeof re.flags == 'string'")); 126 | eval(t.TEST("typeof re.global == 'boolean'")); 127 | eval(t.TEST("typeof re.ignoreCase == 'boolean'")); 128 | eval(t.TEST("typeof re.multiline == 'boolean'")); 129 | eval(t.TEST("typeof re.sticky == 'boolean'")); 130 | eval(t.TEST("typeof re.lastIndex == 'number'")); 131 | }, 132 | function test_generalLastIndex(t) { 133 | "use strict"; 134 | 135 | var re = new RE2("\\d+", 'u'); 136 | 137 | eval(t.TEST("re.lastIndex === 0")); 138 | 139 | re.lastIndex = 5; 140 | 141 | eval(t.TEST("re.lastIndex === 5")); 142 | 143 | re.lastIndex = 0; 144 | 145 | eval(t.TEST("re.lastIndex === 0")); 146 | }, 147 | function test_generalRegExp(t) { 148 | "use strict"; 149 | 150 | var re1 = new RegExp("\\d+", 'u'); 151 | var re2 = new RE2("\\d+", 'u'); 152 | 153 | compare(re1, re2, t); 154 | 155 | re2 = new RE2(re1); 156 | 157 | compare(re1, re2, t); 158 | 159 | re1 = new RegExp("a", "igu"); 160 | re2 = new RE2("a", "igu"); 161 | 162 | compare(re1, re2, t); 163 | 164 | re2 = new RE2(re1); 165 | 166 | compare(re1, re2, t); 167 | 168 | re1 = /\s/gmu; 169 | re2 = new RE2("\\s", "mgu"); 170 | 171 | compare(re1, re2, t); 172 | 173 | re2 = new RE2(re1); 174 | 175 | compare(re1, re2, t); 176 | 177 | re2 = new RE2(/\s/gmu); 178 | 179 | compare(/\s/gm, re2, t); 180 | 181 | re1 = new RE2("b", "gmu"); 182 | re2 = new RE2(re1); 183 | 184 | compare(re1, re2, t); 185 | }, 186 | /* 187 | function test_utf8(t) { 188 | "use strict"; 189 | 190 | var s = "Привет!"; 191 | 192 | eval(t.TEST("s.length === 7")); 193 | eval(t.TEST("RE2.getUtf8Length(s) === 13")); 194 | 195 | var b = new Buffer(s); 196 | eval(t.TEST("b.length === 13")); 197 | eval(t.TEST("RE2.getUtf16Length(b) === 7")); 198 | 199 | var s2 = "\u{1F603}"; 200 | 201 | eval(t.TEST("s2.length === 2")); 202 | eval(t.TEST("RE2.getUtf8Length(s2) === 4")); 203 | 204 | var b2 = new Buffer(s2); 205 | 206 | eval(t.TEST("b2.length === 4")); 207 | eval(t.TEST("RE2.getUtf16Length(b2) === 2")); 208 | 209 | var s3 = "\uD83D"; 210 | 211 | eval(t.TEST("s3.length === 1")); 212 | eval(t.TEST("RE2.getUtf8Length(s3) === 3")); 213 | 214 | var b3 = new Buffer([0xF0]); 215 | 216 | eval(t.TEST("b3.length === 1")); 217 | eval(t.TEST("RE2.getUtf16Length(b3) === 2")); 218 | 219 | try { 220 | RE2.getUtf8Length({ toString() { throw "corner"; } }); 221 | t.test(false); // shouldn't be here 222 | } catch(e) { 223 | eval(t.TEST("e === 'corner'")); 224 | } 225 | 226 | eval(t.TEST("RE2.getUtf16Length({ toString() { throw 'corner'; } }) === -1")); 227 | }, 228 | */ 229 | function test_flags(t) { 230 | "use strict"; 231 | 232 | var re = new RE2("a", "u"); 233 | eval(t.TEST("re.flags === 'u'")); 234 | 235 | re = new RE2("a", "iu"); 236 | eval(t.TEST("re.flags === 'iu'")); 237 | 238 | re = new RE2("a", "mu"); 239 | eval(t.TEST("re.flags === 'mu'")); 240 | 241 | re = new RE2("a", "gu"); 242 | eval(t.TEST("re.flags === 'gu'")); 243 | 244 | re = new RE2("a", "yu"); 245 | eval(t.TEST("re.flags === 'uy'")); 246 | 247 | re = new RE2("a", "yiu"); 248 | eval(t.TEST("re.flags === 'iuy'")); 249 | 250 | re = new RE2("a", "yigu"); 251 | eval(t.TEST("re.flags === 'giuy'")); 252 | 253 | re = new RE2("a", "miu"); 254 | eval(t.TEST("re.flags === 'imu'")); 255 | 256 | re = new RE2("a", "ygu"); 257 | eval(t.TEST("re.flags === 'guy'")); 258 | 259 | re = new RE2("a", "myu"); 260 | eval(t.TEST("re.flags === 'muy'")); 261 | 262 | re = new RE2("a", "migyu"); 263 | eval(t.TEST("re.flags === 'gimuy'")); 264 | } 265 | ]); 266 | 267 | 268 | // utilitites 269 | 270 | function compare(re1, re2, t) { 271 | // compares regular expression objects 272 | eval(t.TEST("re1.source === re2.source")); 273 | eval(t.TEST("re1.global === re2.global")); 274 | eval(t.TEST("re1.ignoreCase === re2.ignoreCase")); 275 | eval(t.TEST("re1.multiline === re2.multiline")); 276 | // eval(t.TEST("re1.unicode === re2.unicode")); 277 | eval(t.TEST("re1.sticky === re2.sticky")); 278 | } 279 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/test_groups.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var unit = require("heya-unit"); 5 | var RE2 = require("../../..").RE2; 6 | 7 | 8 | // tests 9 | 10 | unit.add(module, [ 11 | function test_groupsNormal(t) { 12 | "use strict"; 13 | 14 | eval(t.TEST("(new RE2('(?\\\\d)', 'u')).test('9')")); 15 | eval(t.TEST("t.unify((new RE2('(?-)', 'gu')).match('a-b-c'), ['-', '-'])")); 16 | eval(t.TEST("t.unify((new RE2('(?-)', 'u')).split('a-b-c'), ['a', '-', 'b', '-', 'c'])")); 17 | eval(t.TEST("(new RE2('(?-)', 'gu')).search('a-b-c') === 1")); 18 | }, 19 | function test_groupsExec(t) { 20 | "use strict"; 21 | 22 | var result = (new RE2('(\\d)', 'u')).exec('k9'); 23 | eval(t.TEST("result")); 24 | eval(t.TEST("result[0] === '9'")); 25 | eval(t.TEST("result[1] === '9'")); 26 | eval(t.TEST("result.index === 1")); 27 | eval(t.TEST("result.input === 'k9'")); 28 | eval(t.TEST("typeof result.groups == 'undefined'")); 29 | 30 | result = (new RE2('(?\\d)', 'u')).exec('k9'); 31 | eval(t.TEST("result")); 32 | eval(t.TEST("result[0] === '9'")); 33 | eval(t.TEST("result[1] === '9'")); 34 | eval(t.TEST("result.index === 1")); 35 | eval(t.TEST("result.input === 'k9'")); 36 | eval(t.TEST("t.unify(result.groups, {a: '9'})")); 37 | }, 38 | function test_groupsMatch(t) { 39 | "use strict"; 40 | 41 | var result = (new RE2('(\\d)', 'u')).match('k9'); 42 | eval(t.TEST("result")); 43 | eval(t.TEST("result[0] === '9'")); 44 | eval(t.TEST("result[1] === '9'")); 45 | eval(t.TEST("result.index === 1")); 46 | eval(t.TEST("result.input === 'k9'")); 47 | eval(t.TEST("typeof result.groups == 'undefined'")); 48 | 49 | result = (new RE2('(?\\d)', 'u')).match('k9'); 50 | eval(t.TEST("result")); 51 | eval(t.TEST("result[0] === '9'")); 52 | eval(t.TEST("result[1] === '9'")); 53 | eval(t.TEST("result.index === 1")); 54 | eval(t.TEST("result.input === 'k9'")); 55 | eval(t.TEST("t.unify(result.groups, {a: '9'})")); 56 | }, 57 | function test_groupsMatch(t) { 58 | "use strict"; 59 | 60 | eval(t.TEST("(new RE2('(?\\\\w)(?\\\\d)', 'gu')).replace('a1b2c', '$2$1') === '1a2bc'")); 61 | eval(t.TEST("(new RE2('(?\\\\w)(?\\\\d)', 'gu')).replace('a1b2c', '$$') === '1a2bc'")); 62 | 63 | eval(t.TEST("(new RE2('(?\\\\w)(?\\\\d)', 'gu')).replace('a1b2c', replacerByNumbers) === '1a2bc'")); 64 | eval(t.TEST("(new RE2('(?\\\\w)(?\\\\d)', 'gu')).replace('a1b2c', replacerByNames) === '1a2bc'")); 65 | 66 | function replacerByNumbers(match, group1, group2, index, source, groups) { 67 | return group2 + group1; 68 | } 69 | function replacerByNames(match, group1, group2, index, source, groups) { 70 | return groups.d + groups.w; 71 | } 72 | }, 73 | function test_groupsInvalid(t) { 74 | "use strict"; 75 | 76 | try { 77 | new RE2('(?<>.)', 'u'); 78 | t.test(false); // shouldn'be here 79 | } catch(e) { 80 | eval(t.TEST("e instanceof SyntaxError")); 81 | } 82 | 83 | // TODO: do we need to enforce the correct id? 84 | // try { 85 | // RE2('(?<1>.)'); 86 | // t.test(false); // shouldn'be here 87 | // } catch(e) { 88 | // eval(t.TEST("e instanceof SyntaxError")); 89 | // } 90 | 91 | try { 92 | new RE2('(?.)(?.)', 'u'); 93 | t.test(false); // shouldn'be here 94 | } catch(e) { 95 | eval(t.TEST("e instanceof SyntaxError")); 96 | } 97 | } 98 | ]); 99 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/test_invalid.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var unit = require("heya-unit"); 5 | var RE2 = require("../../..").RE2; 6 | 7 | 8 | // tests 9 | 10 | unit.add(module, [ 11 | 12 | function test_inval(t) { 13 | "use strict"; 14 | 15 | var threw; 16 | 17 | // Backreferences 18 | threw = false; 19 | try { 20 | new RE2(/(a)\1/u); 21 | } catch (e) { 22 | threw = true; 23 | eval(t.TEST("e instanceof SyntaxError")); 24 | eval(t.TEST("e.message.endsWith('invalid escape sequence: \\\\1')")); 25 | } 26 | t.test(threw); 27 | 28 | // Lookahead assertions 29 | 30 | // Positive 31 | threw = false; 32 | try { 33 | new RE2(/a(?=b)/u); 34 | } catch (e) { 35 | threw = true; 36 | eval(t.TEST("e instanceof SyntaxError")); 37 | eval(t.TEST("e.message.endsWith('invalid perl operator: (?=')")); 38 | } 39 | t.test(threw); 40 | 41 | // Negative 42 | threw = false; 43 | try { 44 | new RE2(/a(?!b)/u); 45 | } catch (e) { 46 | threw = true; 47 | eval(t.TEST("e instanceof SyntaxError")); 48 | eval(t.TEST("e.message.endsWith('invalid perl operator: (?!')")); 49 | } 50 | t.test(threw); 51 | }, 52 | ]); 53 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/test_match.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var unit = require("heya-unit"); 5 | var RE2 = require("../../..").RE2; 6 | 7 | 8 | // tests 9 | 10 | unit.add(module, [ 11 | 12 | // These tests are copied from MDN: 13 | // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match 14 | 15 | function test_match(t) { 16 | "use strict"; 17 | 18 | var str = "For more information, see Chapter 3.4.5.1"; 19 | 20 | var re = new RE2(/(chapter \d+(\.\d)*)/iu); 21 | var result = re.match(str); 22 | 23 | eval(t.TEST("result.input === str")); 24 | eval(t.TEST("result.index === 26")); 25 | eval(t.TEST("result.length === 3")); 26 | eval(t.TEST("result[0] === 'Chapter 3.4.5.1'")); 27 | eval(t.TEST("result[1] === 'Chapter 3.4.5.1'")); 28 | eval(t.TEST("result[2] === '.1'")); 29 | }, 30 | function test_matchGlobal(t) { 31 | "use strict"; 32 | 33 | var re = new RE2(/[A-E]/giu); 34 | var result = re.match("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); 35 | 36 | eval(t.TEST("t.unify(result, ['A', 'B', 'C', 'D', 'E', 'a', 'b', 'c', 'd', 'e'])")); 37 | }, 38 | function test_matchFail(t) { 39 | "use strict"; 40 | 41 | var re = new RE2("(a+)?(b+)?", "u"); 42 | var result = re.match("aaabb"); 43 | 44 | eval(t.TEST("result[1] === 'aaa'")); 45 | eval(t.TEST("result[2] === 'bb'")); 46 | 47 | result = re.match("aaacbb"); 48 | 49 | eval(t.TEST("result[1] === 'aaa'")); 50 | eval(t.TEST("result[2] === undefined")); 51 | }, 52 | function test_matchInvalid(t) { 53 | "use strict"; 54 | 55 | var re = new RE2('', 'u'); 56 | 57 | try { 58 | re.match({ toString() { throw "corner"; } }); 59 | t.test(false); // shouldn't be here 60 | } catch(e) { 61 | eval(t.TEST("e === 'corner'")); 62 | } 63 | }, 64 | 65 | // Unicode tests 66 | 67 | function test_matchUnicode(t) { 68 | "use strict"; 69 | 70 | var str = "Это ГЛАВА 3.4.5.1"; 71 | 72 | var re = new RE2(/(глава \d+(\.\d)*)/iu); 73 | var result = re.match(str); 74 | 75 | eval(t.TEST("result.input === str")); 76 | eval(t.TEST("result.index === 4")); 77 | eval(t.TEST("result.length === 3")); 78 | eval(t.TEST("result[0] === 'ГЛАВА 3.4.5.1'")); 79 | eval(t.TEST("result[1] === 'ГЛАВА 3.4.5.1'")); 80 | eval(t.TEST("result[2] === '.1'")); 81 | }, 82 | 83 | // Buffer tests 84 | 85 | /* 86 | function test_matchBuffer(t) { 87 | "use strict"; 88 | 89 | var buf = new Buffer("Это ГЛАВА 3.4.5.1"); 90 | 91 | var re = new RE2(/(глава \d+(\.\d)*)/iu); 92 | var result = re.match(buf); 93 | 94 | eval(t.TEST("result.input instanceof Buffer")); 95 | eval(t.TEST("result.length === 3")); 96 | eval(t.TEST("result[0] instanceof Buffer")); 97 | eval(t.TEST("result[1] instanceof Buffer")); 98 | eval(t.TEST("result[2] instanceof Buffer")); 99 | 100 | eval(t.TEST("result.input === buf")); 101 | eval(t.TEST("result.index === 7")); 102 | eval(t.TEST("result.input.toString('utf8', result.index) === 'ГЛАВА 3.4.5.1'")); 103 | eval(t.TEST("result[0].toString() === 'ГЛАВА 3.4.5.1'")); 104 | eval(t.TEST("result[1].toString() === 'ГЛАВА 3.4.5.1'")); 105 | eval(t.TEST("result[2].toString() === '.1'")); 106 | }, 107 | */ 108 | 109 | // Sticky tests 110 | 111 | function test_matchSticky(t) { 112 | "use strict"; 113 | 114 | var re = new RE2("\\s+", "yu"); 115 | 116 | eval(t.TEST("re.match('Hello world, how are you?') === null")); 117 | 118 | re.lastIndex = 5; 119 | 120 | var result = re.match("Hello world, how are you?"); 121 | 122 | eval(t.TEST("t.unify(result, [' '])")); 123 | eval(t.TEST("result.index === 5")); 124 | eval(t.TEST("re.lastIndex === 6")); 125 | 126 | var re2 = new RE2("\\s+", "gyu"); 127 | 128 | eval(t.TEST("re2.match('Hello world, how are you?') === null")); 129 | 130 | re2.lastIndex = 5; 131 | 132 | eval(t.TEST("re2.match('Hello world, how are you?') === null")); 133 | 134 | var re3 = new RE2(/[A-E]/giyu); 135 | var result3 = re3.match("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); 136 | 137 | eval(t.TEST("t.unify(result3, ['A', 'B', 'C', 'D', 'E'])")); 138 | } 139 | ]); 140 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/test_matchAll.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const unit = require('heya-unit'); 4 | const RE2 = require('../../..').RE2; 5 | 6 | // tests 7 | 8 | unit.add(module, [ 9 | // These tests are copied from MDN: 10 | // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/matchAll 11 | 12 | function test_matchAll(t) { 13 | 'use strict'; 14 | 15 | const str = 'test1test2'; 16 | const re = new RE2(/t(e)(st(\d?))/gu); 17 | const result = Array.from(str.matchAll(re)); 18 | 19 | eval(t.TEST('result.length === 2')); 20 | eval(t.TEST('result[0].input === str')); 21 | eval(t.TEST('result[0].index === 0')); 22 | eval(t.TEST('result[0].length === 4')); 23 | eval(t.TEST("result[0][0] === 'test1'")); 24 | eval(t.TEST("result[0][1] === 'e'")); 25 | eval(t.TEST("result[0][2] === 'st1'")); 26 | eval(t.TEST("result[0][3] === '1'")); 27 | eval(t.TEST('result[1].input === str')); 28 | eval(t.TEST('result[1].index === 5')); 29 | eval(t.TEST('result[1].length === 4')); 30 | eval(t.TEST("result[1][0] === 'test2'")); 31 | eval(t.TEST("result[1][1] === 'e'")); 32 | eval(t.TEST("result[1][2] === 'st2'")); 33 | eval(t.TEST("result[1][3] === '2'")); 34 | }, 35 | 36 | function test_matchAll_iterator(t) { 37 | 'use strict'; 38 | 39 | const str = 'table football, foosball'; 40 | const re = new RE2('foo[a-z]*', 'gu'); 41 | 42 | const expected = [ 43 | {start: 6, finish: 14}, 44 | {start: 16, finish: 24}, 45 | ]; 46 | let i = 0; 47 | for (const match of str.matchAll(re)) { 48 | eval(t.TEST('match.index === expected[i].start')); 49 | eval(t.TEST('match.index + match[0].length === expected[i].finish')); 50 | ++i; 51 | } 52 | }, 53 | 54 | function test_matchAll_non_global(t) { 55 | 'use strict'; 56 | 57 | const re = new RE2('b', 'u'); 58 | 59 | try { 60 | 'abc'.matchAll(re); 61 | t.test(false); // shouldn't be here 62 | } catch (e) { 63 | eval(t.TEST('e instanceof TypeError')); 64 | } 65 | }, 66 | 67 | function test_matchAll_lastIndex(t) { 68 | 'use strict'; 69 | 70 | const re = new RE2('[a-c]', 'gu'); 71 | re.lastIndex = 1; 72 | 73 | const expected = ['b', 'c']; 74 | let i = 0; 75 | for (const match of 'abc'.matchAll(re)) { 76 | eval(t.TEST('re.lastIndex === 1')); 77 | eval(t.TEST('match[0] === expected[i]')); 78 | ++i; 79 | } 80 | }, 81 | ]); 82 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/test_prototype.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var unit = require("heya-unit"); 5 | var RE2 = require("../../..").RE2; 6 | 7 | 8 | // tests 9 | 10 | unit.add(module, [ 11 | function test_prototype(t) { 12 | "use strict"; 13 | 14 | // Can't easily modify the prototype in TypeScript 15 | //eval(t.TEST("RE2.prototype.source === '(?:)'")); 16 | eval(t.TEST("RE2.prototype.flags === ''")); 17 | eval(t.TEST("RE2.prototype.global === undefined")); 18 | eval(t.TEST("RE2.prototype.ignoreCase === undefined")); 19 | eval(t.TEST("RE2.prototype.multiline === undefined")); 20 | eval(t.TEST("RE2.prototype.sticky === undefined")); 21 | eval(t.TEST("RE2.prototype.lastIndex === undefined")); 22 | } 23 | ]); 24 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/test_replace.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var unit = require("heya-unit"); 5 | var RE2 = require("../../..").RE2; 6 | 7 | 8 | // tests 9 | 10 | unit.add(module, [ 11 | 12 | // These tests are copied from MDN: 13 | // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace 14 | 15 | function test_replaceString(t) { 16 | "use strict"; 17 | 18 | var re = new RE2(/apples/giu); 19 | var result = re.replace("Apples are round, and apples are juicy.", "oranges"); 20 | eval(t.TEST("result === 'oranges are round, and oranges are juicy.'")); 21 | 22 | re = new RE2(/xmas/iu); 23 | result = re.replace("Twas the night before Xmas...", "Christmas"); 24 | eval(t.TEST("result === 'Twas the night before Christmas...'")); 25 | 26 | re = new RE2(/(\w+)\s(\w+)/u); 27 | result = re.replace("John Smith", "$2, $1"); 28 | eval(t.TEST("result === 'Smith, John'")); 29 | }, 30 | function test_replaceFunReplacer(t) { 31 | "use strict"; 32 | 33 | function replacer(match, p1, p2, p3, offset, string) { 34 | // p1 is nondigits, p2 digits, and p3 non-alphanumerics 35 | return [p1, p2, p3].join(' - '); 36 | } 37 | 38 | var re = new RE2(/([^\d]*)(\d*)([^\w]*)/u); 39 | var result = re.replace("abc12345#$*%", replacer); 40 | eval(t.TEST("result === 'abc - 12345 - #$*%'")); 41 | }, 42 | function test_replaceFunUpper(t) { 43 | "use strict"; 44 | 45 | function upperToHyphenLower(match) { 46 | return '-' + match.toLowerCase(); 47 | } 48 | 49 | var re = new RE2(/[A-Z]/gu); 50 | var result = re.replace("borderTop", upperToHyphenLower); 51 | eval(t.TEST("result === 'border-top'")); 52 | }, 53 | function test_replaceFunConvert(t) { 54 | "use strict"; 55 | 56 | function convert(str, p1, offset, s) { 57 | return ((p1 - 32) * 5/9) + 'C'; 58 | } 59 | 60 | var re = new RE2(/(\d+(?:\.\d*)?)F\b/gu); 61 | 62 | eval(t.TEST("re.replace('32F', convert) === '0C'")); 63 | eval(t.TEST("re.replace('41F', convert) === '5C'")); 64 | eval(t.TEST("re.replace('50F', convert) === '10C'")); 65 | eval(t.TEST("re.replace('59F', convert) === '15C'")); 66 | eval(t.TEST("re.replace('68F', convert) === '20C'")); 67 | eval(t.TEST("re.replace('77F', convert) === '25C'")); 68 | eval(t.TEST("re.replace('86F', convert) === '30C'")); 69 | eval(t.TEST("re.replace('95F', convert) === '35C'")); 70 | eval(t.TEST("re.replace('104F', convert) === '40C'")); 71 | eval(t.TEST("re.replace('113F', convert) === '45C'")); 72 | eval(t.TEST("re.replace('212F', convert) === '100C'")); 73 | }, 74 | { 75 | test: function test_replaceFunLoop(t) { 76 | "use strict"; 77 | 78 | new RE2(/(x_*)|(-)/gu).replace("x-x_", function(match, p1, p2) { 79 | if (p1) { t.info("on: " + p1.length); } 80 | if (p2) { t.info("off: 1"); } 81 | }); 82 | }, 83 | logs: [ 84 | {text: "on: 1"}, 85 | {text: "off: 1"}, 86 | {text: "on: 2"} 87 | ] 88 | }, 89 | function test_replaceInvalid(t) { 90 | "use strict"; 91 | 92 | var re = new RE2('', 'u'); 93 | 94 | try { 95 | re.replace({ toString() { throw "corner1"; } }, ''); 96 | t.test(false); // shouldn't be here 97 | } catch(e) { 98 | eval(t.TEST("e === 'corner1'")); 99 | } 100 | 101 | try { 102 | re.replace('', { toString() { throw "corner2"; } }); 103 | t.test(false); // shouldn't be here 104 | } catch(e) { 105 | eval(t.TEST("e === 'corner2'")); 106 | } 107 | 108 | var arg2Stringified = false; 109 | 110 | try { 111 | re.replace({ toString() { throw "corner1"; } }, { toString() { arg2Stringified = true; throw "corner2"; } }); 112 | t.test(false); // shouldn't be here 113 | } catch(e) { 114 | eval(t.TEST("e === 'corner1'")); 115 | eval(t.TEST("!arg2Stringified")); 116 | } 117 | 118 | try { 119 | re.replace('', () => { throw "corner2"; }); 120 | t.test(false); // shouldn't be here 121 | } catch(e) { 122 | eval(t.TEST("e === 'corner2'")); 123 | } 124 | 125 | try { 126 | re.replace('', () => ({ toString() { throw "corner2"; } })); 127 | t.test(false); // shouldn't be here 128 | } catch(e) { 129 | eval(t.TEST("e === 'corner2'")); 130 | } 131 | }, 132 | 133 | // Unicode tests 134 | 135 | function test_replaceStrUnicode(t) { 136 | "use strict"; 137 | 138 | var re = new RE2(/яблоки/giu); 139 | var result = re.replace("Яблоки красны, яблоки сочны.", "апельсины"); 140 | eval(t.TEST("result === 'апельсины красны, апельсины сочны.'")); 141 | 142 | re = new RE2(/иван/iu); 143 | result = re.replace("Могуч Иван Иванов...", "Сидор"); 144 | eval(t.TEST("result === 'Могуч Сидор Иванов...'")); 145 | 146 | re = new RE2(/иван/igu); 147 | result = re.replace("Могуч Иван Иванов...", "Сидор"); 148 | eval(t.TEST("result === 'Могуч Сидор Сидоров...'")); 149 | 150 | re = new RE2(/([а-яё]+)\s+([а-яё]+)/iu); 151 | result = re.replace("Пётр Петров", "$2, $1"); 152 | eval(t.TEST("result === 'Петров, Пётр'")); 153 | }, 154 | function test_replaceFunUnicode(t) { 155 | "use strict"; 156 | 157 | function replacer(match, offset, string) { 158 | t.test(typeof offset == "number"); 159 | t.test(typeof string == "string"); 160 | t.test(offset === 0 || offset === 7); 161 | t.test(string === "ИВАН и пЁтр"); 162 | return match.charAt(0).toUpperCase() + match.substr(1).toLowerCase(); 163 | } 164 | 165 | var re = new RE2(/(?:иван|пётр|сидор)/igu); 166 | var result = re.replace("ИВАН и пЁтр", replacer); 167 | eval(t.TEST("result === 'Иван и Пётр'")); 168 | }, 169 | 170 | // Sticky tests 171 | 172 | function test_replaceSticky(t) { 173 | "use strict"; 174 | 175 | var re = new RE2(/[A-E]/yu); 176 | 177 | eval(t.TEST("re.replace('ABCDEFABCDEF', '!') === '!BCDEFABCDEF'")); 178 | eval(t.TEST("re.replace('ABCDEFABCDEF', '!') === 'A!CDEFABCDEF'")); 179 | eval(t.TEST("re.replace('ABCDEFABCDEF', '!') === 'AB!DEFABCDEF'")); 180 | eval(t.TEST("re.replace('ABCDEFABCDEF', '!') === 'ABC!EFABCDEF'")); 181 | eval(t.TEST("re.replace('ABCDEFABCDEF', '!') === 'ABCD!FABCDEF'")); 182 | eval(t.TEST("re.replace('ABCDEFABCDEF', '!') === 'ABCDEFABCDEF'")); 183 | eval(t.TEST("re.replace('ABCDEFABCDEF', '!') === '!BCDEFABCDEF'")); 184 | 185 | var re2 = new RE2(/[A-E]/gyu); 186 | 187 | eval(t.TEST("re2.replace('ABCDEFABCDEF', '!') === '!!!!!FABCDEF'")); 188 | eval(t.TEST("re2.replace('FABCDEFABCDE', '!') === 'FABCDEFABCDE'")); 189 | 190 | re2.lastIndex = 3; 191 | 192 | eval(t.TEST("re2.replace('ABCDEFABCDEF', '!') === '!!!!!FABCDEF'")); 193 | eval(t.TEST("re2.lastIndex === 0")); 194 | } 195 | ]); 196 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/test_search.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var unit = require("heya-unit"); 5 | var RE2 = require("../../..").RE2; 6 | 7 | 8 | // tests 9 | 10 | unit.add(module, [ 11 | function test_search(t) { 12 | "use strict"; 13 | 14 | var str = "Total is 42 units."; 15 | 16 | var re = new RE2(/\d+/iu); 17 | var result = re.search(str); 18 | eval(t.TEST("result === 9")); 19 | 20 | re = new RE2("\\b[a-z]+\\b", 'u'); 21 | result = re.search(str); 22 | eval(t.TEST("result === 6")); 23 | 24 | re = new RE2("\\b\\w+\\b", 'u'); 25 | result = re.search(str); 26 | eval(t.TEST("result === 0")); 27 | 28 | re = new RE2("z", "gmu"); 29 | result = re.search(str); 30 | eval(t.TEST("result === -1")); 31 | }, 32 | function test_searchInvalid(t) { 33 | "use strict"; 34 | 35 | var re = new RE2('', 'u'); 36 | 37 | try { 38 | re.search({ toString() { throw "corner"; } }); 39 | t.test(false); // shouldn't be here 40 | } catch(e) { 41 | eval(t.TEST("e === 'corner'")); 42 | } 43 | }, 44 | function test_searchUnicode(t) { 45 | "use strict"; 46 | 47 | var str = "Всего 42 штуки."; 48 | 49 | var re = new RE2(/\d+/iu); 50 | var result = re.search(str); 51 | eval(t.TEST("result === 6")); 52 | 53 | re = new RE2("\\s[а-я]+", 'u'); 54 | result = re.search(str); 55 | eval(t.TEST("result === 8")); 56 | 57 | re = new RE2("[а-яА-Я]+", 'u'); 58 | result = re.search(str); 59 | eval(t.TEST("result === 0")); 60 | 61 | re = new RE2("z", "gmu"); 62 | result = re.search(str); 63 | eval(t.TEST("result === -1")); 64 | }, 65 | function test_searchSticky(t) { 66 | "use strict"; 67 | 68 | var str = "Total is 42 units."; 69 | 70 | var re = new RE2(/\d+/yu); 71 | var result = re.search(str); 72 | eval(t.TEST("result === -1")); 73 | 74 | re = new RE2("\\b[a-z]+\\b", "yu"); 75 | result = re.search(str); 76 | eval(t.TEST("result === -1")); 77 | 78 | re = new RE2("\\b\\w+\\b", "yu"); 79 | result = re.search(str); 80 | eval(t.TEST("result === 0")); 81 | 82 | re = new RE2("z", "gmyu"); 83 | result = re.search(str); 84 | eval(t.TEST("result === -1")); 85 | } 86 | ]); 87 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/test_source.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var unit = require("heya-unit"); 5 | var RE2 = require("../../..").RE2; 6 | 7 | 8 | // tests 9 | 10 | unit.add(module, [ 11 | function test_sourceIdentity(t) { 12 | "use strict"; 13 | 14 | var re = new RE2("a\\cM\\u34\\u1234\\u10abcdz", 'u'); 15 | eval(t.TEST("re.source === 'a\\\\cM\\\\u34\\\\u1234\\\\u10abcdz'")); 16 | 17 | re = new RE2("a\\cM\\u34\\u1234\\u{10abcd}z", 'u'); 18 | eval(t.TEST("re.source === 'a\\\\cM\\\\u34\\\\u1234\\\\u{10abcd}z'")); 19 | 20 | re = new RE2("", 'u'); 21 | eval(t.TEST("re.source === '(?:)'")); 22 | 23 | re = new RE2("foo/bar", 'u'); 24 | eval(t.TEST("re.source === 'foo\\\\/bar'")); 25 | 26 | re = new RE2("foo\\/bar", 'u'); 27 | eval(t.TEST("re.source === 'foo\\\\/bar'")); 28 | 29 | re = new RE2("(?bar)", "u"); 30 | eval(t.TEST("re.source === '(?bar)'")); 31 | }, 32 | function test_sourceTranslation(t) { 33 | "use strict"; 34 | 35 | var re = new RE2("a\\cM\\u34\\u1234\\u10abcdz", 'u'); 36 | eval(t.TEST("re.internalSource === 'a\\\\x0D\\\\x{34}\\\\x{1234}\\\\x{10ab}cdz'")); 37 | 38 | re = new RE2("a\\cM\\u34\\u1234\\u{10abcd}z", 'u'); 39 | eval(t.TEST("re.internalSource === 'a\\\\x0D\\\\x{34}\\\\x{1234}\\\\x{10abcd}z'")); 40 | 41 | re = new RE2("", 'u'); 42 | eval(t.TEST("re.internalSource === '(?:)'")); 43 | 44 | re = new RE2("foo/bar", 'u'); 45 | eval(t.TEST("re.internalSource === 'foo\\\\/bar'")); 46 | 47 | re = new RE2("foo\\/bar", 'u'); 48 | eval(t.TEST("re.internalSource === 'foo\\\\/bar'")); 49 | 50 | re = new RE2("(?bar)", "u"); 51 | eval(t.TEST("re.internalSource === '(?Pbar)'")); 52 | 53 | re = new RE2("foo\\/bar", "mu"); 54 | eval(t.TEST("re.internalSource === '(?m)foo\\\\/bar'")); 55 | }, 56 | function test_sourceBackSlashes(t) { 57 | "use strict"; 58 | 59 | function compare(source, expected) { 60 | var s = new RE2(source, 'u').source; 61 | eval(t.TEST("s === expected")); 62 | } 63 | 64 | compare("a/b", "a\\/b"); 65 | compare("a\/b", "a\\/b"); 66 | compare("a\\/b", "a\\/b"); 67 | compare("a\\\/b", "a\\/b"); 68 | compare("a\\\\/b", "a\\\\\\/b"); 69 | compare("a\\\\\/b", "a\\\\\\/b"); 70 | 71 | compare("/a/b", "\\/a\\/b"); 72 | compare("\\/a/b", "\\/a\\/b"); 73 | compare("\\/a\\/b", "\\/a\\/b"); 74 | compare("\\/a\\\\/b", "\\/a\\\\\\/b"); 75 | } 76 | ]); 77 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/test_split.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var unit = require("heya-unit"); 5 | var RE2 = require("../../..").RE2; 6 | 7 | 8 | // tests 9 | 10 | unit.add(module, [ 11 | 12 | // These tests are copied from MDN: 13 | // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/split 14 | 15 | function test_split(t) { 16 | "use strict"; 17 | 18 | var re = new RE2(/\s+/u); 19 | var result = re.split("Oh brave new world that has such people in it."); 20 | eval(t.TEST("t.unify(result, ['Oh', 'brave', 'new', 'world', 'that', 'has', 'such', 'people', 'in', 'it.'])")); 21 | 22 | re = new RE2(",", 'u'); 23 | result = re.split("Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec"); 24 | eval(t.TEST("t.unify(result, ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])")); 25 | 26 | re = new RE2(",", 'u'); 27 | result = re.split(",Jan,Feb,Mar,Apr,May,Jun,,Jul,Aug,Sep,Oct,Nov,Dec,"); 28 | eval(t.TEST("t.unify(result, ['','Jan','Feb','Mar','Apr','May','Jun','','Jul','Aug','Sep','Oct','Nov','Dec',''])")); 29 | 30 | re = new RE2(/\s*;\s*/u); 31 | result = re.split("Harry Trump ;Fred Barney; Helen Rigby ; Bill Abel ;Chris Hand "); 32 | eval(t.TEST("t.unify(result, ['Harry Trump', 'Fred Barney', 'Helen Rigby', 'Bill Abel', 'Chris Hand '])")); 33 | 34 | re = new RE2(/\s+/u); 35 | result = re.split("Hello World. How are you doing?", 3); 36 | eval(t.TEST("t.unify(result, ['Hello', 'World.', 'How'])")); 37 | 38 | re = new RE2(/(\d)/u); 39 | result = re.split("Hello 1 word. Sentence number 2."); 40 | eval(t.TEST("t.unify(result, ['Hello ', '1', ' word. Sentence number ', '2', '.'])")); 41 | 42 | eval(t.TEST("(new RE2(/[x-z]*/u)).split('asdfghjkl').reverse().join('') === 'lkjhgfdsa'")); 43 | }, 44 | function test_splitInvalid(t) { 45 | "use strict"; 46 | 47 | var re = new RE2('', 'u'); 48 | 49 | try { 50 | re.split({ toString() { throw "corner"; } }); 51 | t.test(false); // shouldn't be here 52 | } catch(e) { 53 | eval(t.TEST("e === 'corner'")); 54 | } 55 | }, 56 | 57 | function test_cornerCases(t) { 58 | "use strict"; 59 | 60 | var re = new RE2(/1/u); 61 | var result = re.split("23456"); 62 | eval(t.TEST("t.unify(result, ['23456'])")); 63 | }, 64 | 65 | // Unicode tests 66 | 67 | function test_splitUnicode(t) { 68 | "use strict"; 69 | 70 | var re = new RE2(/\s+/u); 71 | var result = re.split("Она не понимает, что этим убивает меня."); 72 | eval(t.TEST("t.unify(result, ['Она', 'не', 'понимает,', 'что', 'этим', 'убивает', 'меня.'])")); 73 | 74 | re = new RE2(",", 'u'); 75 | result = re.split("Пн,Вт,Ср,Чт,Пт,Сб,Вс"); 76 | eval(t.TEST("t.unify(result, ['Пн','Вт','Ср','Чт','Пт','Сб','Вс'])")); 77 | 78 | re = new RE2(/\s*;\s*/u); 79 | result = re.split("Ваня Иванов ;Петро Петренко; Саша Машин ; Маша Сашина"); 80 | eval(t.TEST("t.unify(result, ['Ваня Иванов', 'Петро Петренко', 'Саша Машин', 'Маша Сашина'])")); 81 | 82 | re = new RE2(/\s+/u); 83 | result = re.split("Привет мир. Как дела?", 3); 84 | eval(t.TEST("t.unify(result, ['Привет', 'мир.', 'Как'])")); 85 | 86 | re = new RE2(/(\d)/u); 87 | result = re.split("Привет 1 слово. Предложение номер 2."); 88 | eval(t.TEST("t.unify(result, ['Привет ', '1', ' слово. Предложение номер ', '2', '.'])")); 89 | 90 | eval(t.TEST("(new RE2(/[э-я]*/u)).split('фывапролд').reverse().join('') === 'длорпавыф'")); 91 | }, 92 | 93 | // Sticky tests 94 | 95 | function test_splitSticky(t) { 96 | "use strict"; 97 | 98 | var re = new RE2(/\s+/yu); // sticky is ignored 99 | 100 | var result = re.split("Oh brave new world that has such people in it."); 101 | eval(t.TEST("t.unify(result, ['Oh', 'brave', 'new', 'world', 'that', 'has', 'such', 'people', 'in', 'it.'])")); 102 | 103 | var result2 = re.split(" Oh brave new world that has such people in it."); 104 | eval(t.TEST("t.unify(result2, ['', 'Oh', 'brave', 'new', 'world', 'that', 'has', 'such', 'people', 'in', 'it.'])")); 105 | } 106 | ]); 107 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/test_symbols.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var unit = require("heya-unit"); 5 | var RE2 = require("../../..").RE2; 6 | 7 | 8 | // tests 9 | 10 | unit.add(module, [ 11 | function test_match_symbol (t) { 12 | "use strict"; 13 | 14 | if (typeof Symbol == 'undefined' || !Symbol.match) return; 15 | 16 | var str = "For more information, see Chapter 3.4.5.1"; 17 | 18 | var re = new RE2(/(chapter \d+(\.\d)*)/iu); 19 | var result = str.match(re); 20 | 21 | eval(t.TEST("result.input === str")); 22 | eval(t.TEST("result.index === 26")); 23 | eval(t.TEST("result.length === 3")); 24 | eval(t.TEST("result[0] === 'Chapter 3.4.5.1'")); 25 | eval(t.TEST("result[1] === 'Chapter 3.4.5.1'")); 26 | eval(t.TEST("result[2] === '.1'")); 27 | }, 28 | function test_search_symbol (t) { 29 | "use strict"; 30 | 31 | if (typeof Symbol == 'undefined' || !Symbol.search) return; 32 | 33 | var str = "Total is 42 units."; 34 | 35 | var re = new RE2(/\d+/iu); 36 | var result = str.search(re); 37 | eval(t.TEST("result === 9")); 38 | 39 | re = new RE2("\\b[a-z]+\\b", 'u'); 40 | result = str.search(re); 41 | eval(t.TEST("result === 6")); 42 | 43 | re = new RE2("\\b\\w+\\b", 'u'); 44 | result = str.search(re); 45 | eval(t.TEST("result === 0")); 46 | 47 | re = new RE2("z", "gmu"); 48 | result = str.search(re); 49 | eval(t.TEST("result === -1")); 50 | }, 51 | function test_replace_symbol (t) { 52 | "use strict"; 53 | 54 | if (typeof Symbol == 'undefined' || !Symbol.replace) return; 55 | 56 | var re = new RE2(/apples/giu); 57 | var result = "Apples are round, and apples are juicy.".replace(re, "oranges"); 58 | eval(t.TEST("result === 'oranges are round, and oranges are juicy.'")); 59 | 60 | re = new RE2(/xmas/iu); 61 | result = "Twas the night before Xmas...".replace(re, "Christmas"); 62 | eval(t.TEST("result === 'Twas the night before Christmas...'")); 63 | 64 | re = new RE2(/(\w+)\s(\w+)/u); 65 | result = "John Smith".replace(re, "$2, $1"); 66 | eval(t.TEST("result === 'Smith, John'")); 67 | }, 68 | function test_split(t) { 69 | "use strict"; 70 | 71 | if (typeof Symbol == 'undefined' || !Symbol.split) return; 72 | 73 | var re = new RE2(/\s+/u); 74 | var result = "Oh brave new world that has such people in it.".split(re); 75 | eval(t.TEST("t.unify(result, ['Oh', 'brave', 'new', 'world', 'that', 'has', 'such', 'people', 'in', 'it.'])")); 76 | 77 | re = new RE2(",", 'u'); 78 | result = "Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec".split(re); 79 | eval(t.TEST("t.unify(result, ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])")); 80 | 81 | re = new RE2(/\s*;\s*/u); 82 | result = "Harry Trump ;Fred Barney; Helen Rigby ; Bill Abel ;Chris Hand ".split(re); 83 | eval(t.TEST("t.unify(result, ['Harry Trump', 'Fred Barney', 'Helen Rigby', 'Bill Abel', 'Chris Hand '])")); 84 | 85 | re = new RE2(/\s+/u); 86 | result = "Hello World. How are you doing?".split(re, 3); 87 | eval(t.TEST("t.unify(result, ['Hello', 'World.', 'How'])")); 88 | 89 | re = new RE2(/(\d)/u); 90 | result = "Hello 1 word. Sentence number 2.".split(re); 91 | eval(t.TEST("t.unify(result, ['Hello ', '1', ' word. Sentence number ', '2', '.'])")); 92 | 93 | eval(t.TEST("'asdfghjkl'.split(new RE2(/[x-z]*/u)).reverse().join('') === 'lkjhgfdsa'")); 94 | } 95 | ]); 96 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/test_test.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var unit = require("heya-unit"); 5 | var RE2 = require("../../..").RE2; 6 | 7 | 8 | // tests 9 | 10 | unit.add(module, [ 11 | 12 | // These tests are copied from MDN: 13 | // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/test 14 | 15 | function test_testFromExec(t) { 16 | "use strict"; 17 | 18 | var re = new RE2("quick\\s(brown).+?(jumps)", "iu"); 19 | 20 | eval(t.TEST("re.test('The Quick Brown Fox Jumps Over The Lazy Dog')")); 21 | eval(t.TEST("re.test('tHE qUICK bROWN fOX jUMPS oVER tHE lAZY dOG')")); 22 | eval(t.TEST("re.test('the quick brown fox jumps over the lazy dog')")); 23 | eval(t.TEST("re.test('THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG')")); 24 | eval(t.TEST("!re.test('THE KWIK BROWN FOX JUMPS OVER THE LAZY DOG')")); 25 | 26 | re = new RE2("ab*", "gu"); 27 | 28 | eval(t.TEST("re.test('abbcdefabh')")); 29 | eval(t.TEST("!re.test('qwerty')")); 30 | 31 | re = new RE2("(hello \\S+)", 'u'); 32 | 33 | eval(t.TEST("re.test('This is a hello world!')")); 34 | eval(t.TEST("!re.test('This is a Hello world!')")); 35 | }, 36 | function test_testSucc(t) { 37 | "use strict"; 38 | 39 | var str = "abbcdefabh"; 40 | 41 | var re = new RE2("ab*", "gu"); 42 | var result = re.test(str); 43 | 44 | eval(t.TEST("result")); 45 | eval(t.TEST("re.lastIndex === 3")); 46 | 47 | result = re.test(str); 48 | 49 | eval(t.TEST("result")); 50 | eval(t.TEST("re.lastIndex === 9")); 51 | 52 | result = re.test(str); 53 | 54 | eval(t.TEST("!result")); 55 | }, 56 | function test_testSimple(t) { 57 | "use strict"; 58 | 59 | var str = "abbcdefabh"; 60 | 61 | var re1 = new RE2("ab*", "gu"); 62 | 63 | eval(t.TEST("re1.test(str)")); 64 | 65 | var re2 = new RE2("ab*", 'u'); 66 | 67 | eval(t.TEST("re2.test(str)")); 68 | 69 | var re3 = new RE2("abc", 'u'); 70 | 71 | eval(t.TEST("!re3.test(str)")); 72 | }, 73 | function test_testAnchoredToBeginning(t) { 74 | "use strict"; 75 | 76 | var re = new RE2('^hello', 'gu'); 77 | 78 | eval(t.TEST("re.test('hellohello')")); 79 | eval(t.TEST("!re.test('hellohello')")); 80 | }, 81 | function test_testInvalid(t) { 82 | "use strict"; 83 | 84 | var re = new RE2('', 'u'); 85 | 86 | try { 87 | re.test({ toString() { throw "corner"; } }); 88 | t.test(false); // shouldn't be here 89 | } catch(e) { 90 | eval(t.TEST("e === 'corner'")); 91 | } 92 | }, 93 | function test_testAnchor1(t) { 94 | "use strict"; 95 | 96 | var re = new RE2("b|^a", "gu"); 97 | 98 | var result = re.test("aabc"); 99 | eval(t.TEST("result")); 100 | eval(t.TEST("re.lastIndex === 1")); 101 | 102 | result = re.test("aabc"); 103 | eval(t.TEST("result")); 104 | eval(t.TEST("re.lastIndex === 3")); 105 | 106 | result = re.test("aabc"); 107 | eval(t.TEST("!result")); 108 | }, 109 | function test_testAnchor2(t) { 110 | "use strict"; 111 | 112 | var re = new RE2("(?:^a)", "gu"); 113 | 114 | var result = re.test("aabc"); 115 | eval(t.TEST("result")); 116 | eval(t.TEST("re.lastIndex === 1")); 117 | 118 | result = re.test("aabc"); 119 | eval(t.TEST("!result")); 120 | }, 121 | 122 | // Unicode tests 123 | 124 | function test_testUnicode(t) { 125 | "use strict"; 126 | 127 | var re = new RE2("охотник\\s(желает).+?(где)", "iu"); 128 | 129 | eval(t.TEST("re.test('Каждый Охотник Желает Знать Где Сидит Фазан')")); 130 | eval(t.TEST("re.test('кАЖДЫЙ оХОТНИК жЕЛАЕТ зНАТЬ гДЕ сИДИТ фАЗАН')")); 131 | eval(t.TEST("re.test('каждый охотник желает знать где сидит фазан')")); 132 | eval(t.TEST("re.test('КАЖДЫЙ ОХОТНИК ЖЕЛАЕТ ЗНАТЬ ГДЕ СИДИТ ФАЗАН')")); 133 | eval(t.TEST("!re.test('Кажный Стрелок Хочет Найти Иде Прячется Птица')")); 134 | 135 | re = new RE2("аб*", "gu"); 136 | 137 | eval(t.TEST("re.test('аббвгдеабё')")); 138 | eval(t.TEST("!re.test('йцукен')")); 139 | 140 | re = new RE2("(привет \\S+)", 'u'); 141 | 142 | eval(t.TEST("re.test('Это просто привет всем.')")); 143 | eval(t.TEST("!re.test('Это просто Привет всем.')")); 144 | }, 145 | function test_testUnicodeSubsequent(t) { 146 | "use strict"; 147 | 148 | var str = "аббвгдеабё"; 149 | 150 | var re = new RE2("аб*", "gu"); 151 | var result = re.test(str); 152 | 153 | eval(t.TEST("result")); 154 | eval(t.TEST("re.lastIndex === 3")); 155 | 156 | result = re.test(str); 157 | 158 | eval(t.TEST("result")); 159 | eval(t.TEST("re.lastIndex === 9")); 160 | 161 | result = re.test(str); 162 | 163 | eval(t.TEST("!result")); 164 | }, 165 | 166 | // Sticky tests 167 | 168 | function test_testSticky(t) { 169 | "use strict"; 170 | 171 | var re = new RE2("\\s+", "yu"); 172 | 173 | eval(t.TEST("!re.test('Hello world, how are you?')")); 174 | 175 | re.lastIndex = 5; 176 | 177 | eval(t.TEST("re.test('Hello world, how are you?')")); 178 | eval(t.TEST("re.lastIndex === 6")); 179 | 180 | var re2 = new RE2("\\s+", "gyu"); 181 | 182 | eval(t.TEST("!re2.test('Hello world, how are you?')")); 183 | 184 | re2.lastIndex = 5; 185 | 186 | eval(t.TEST("re2.test('Hello world, how are you?')")); 187 | eval(t.TEST("re2.lastIndex === 6")); 188 | } 189 | ]); 190 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/test_toString.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var unit = require("heya-unit"); 5 | var RE2 = require("../../..").RE2; 6 | 7 | 8 | // tests 9 | 10 | unit.add(module, [ 11 | function test_toString(t) { 12 | "use strict"; 13 | 14 | eval(t.TEST("(new RE2('', 'u')).toString() === '/(?:)/u'")); 15 | eval(t.TEST("(new RE2('a', 'u')).toString() === '/a/u'")); 16 | eval(t.TEST("(new RE2('b', 'iu')).toString() === '/b/iu'")); 17 | eval(t.TEST("(new RE2('c', 'gu')).toString() === '/c/gu'")); 18 | eval(t.TEST("(new RE2('d', 'mu')).toString() === '/d/mu'")); 19 | eval(t.TEST("(new RE2('\\\\d+', 'giu')) + '' === '/\\\\d+/giu'")); 20 | eval(t.TEST("(new RE2('\\\\s*', 'gmu')) + '' === '/\\\\s*/gmu'")); 21 | eval(t.TEST("(new RE2('\\\\S{1,3}', 'igu')) + '' === '/\\\\S{1,3}/giu'")); 22 | eval(t.TEST("(new RE2('\\\\D{,2}', 'migu')) + '' === '/\\\\D{,2}/gimu'")); 23 | eval(t.TEST("(new RE2('^a{2,}', 'miu')) + '' === '/^a{2,}/imu'")); 24 | eval(t.TEST("(new RE2('^a{5}$', 'gimu')) + '' === '/^a{5}$/gimu'")); 25 | eval(t.TEST("(new RE2('\\\\u{1F603}/', 'iyu')) + '' === '/\\\\u{1F603}\\\\//iuy'")); 26 | 27 | eval(t.TEST("(new RE2('c', 'ug')).toString() === '/c/gu'")); 28 | eval(t.TEST("(new RE2('d', 'um')).toString() === '/d/mu'")); 29 | } 30 | ]); 31 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/tests.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | 4 | var unit = require("heya-unit"); 5 | 6 | require("./test_general"); 7 | require("./test_source"); 8 | require("./test_exec"); 9 | require("./test_test"); 10 | require("./test_toString"); 11 | require("./test_match"); 12 | require("./test_replace"); 13 | require("./test_search"); 14 | require("./test_split"); 15 | require("./test_invalid"); 16 | require("./test_symbols"); 17 | require("./test_prototype"); 18 | require("./test_groups"); 19 | require("./test_matchAll"); 20 | 21 | unit.run(); 22 | -------------------------------------------------------------------------------- /third_party/node-re2/tests/worker.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const {Worker, isMainThread} = require('worker_threads'); 4 | 5 | var RE2 = require("../../..").RE2; 6 | 7 | if (isMainThread) { 8 | // This re-loads the current file inside a Worker instance. 9 | console.log('Inside Master!'); 10 | const worker = new Worker(__filename); 11 | worker.on('exit', code => { 12 | console.log('Exit code:', code); 13 | test('#2'); 14 | }); 15 | test('#1'); 16 | } else { 17 | console.log('Inside Worker!'); 18 | test(); 19 | } 20 | 21 | function test(msg) { 22 | msg && console.log(isMainThread ? 'Main' : 'Worker', msg); 23 | 24 | const a = new RE2('^\\d+$', 'u'); 25 | console.log(isMainThread, a.test('123'), a.test('abc'), a.test('123abc'), a instanceof RE2); 26 | 27 | const b = RE2('^\\d+$', 'u'); 28 | console.log(isMainThread, b.test('123'), b.test('abc'), b.test('123abc'), b instanceof RE2); 29 | } 30 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./node_modules/gts/tsconfig-google.json", 3 | "compilerOptions": { 4 | "rootDir": ".", 5 | "outDir": "build", 6 | "lib": ["es2017"], 7 | "target": "es2017", 8 | "moduleResolution": "node" 9 | }, 10 | "include": [ 11 | "src/**/*.ts", 12 | "test/**/*.ts" 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /wasm/re2.d.ts: -------------------------------------------------------------------------------- 1 | /* Copyright 2021 Google LLC 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | interface InternalMatchResult { 17 | match: string; 18 | index: number; 19 | groups: (string | undefined)[]; 20 | } 21 | 22 | interface CppVector { 23 | get(index: number): V; 24 | size(): number; 25 | } 26 | 27 | interface CppMap { 28 | keys(): CppVector; 29 | get(index: K): V; 30 | } 31 | 32 | export interface WrappedRE2Interface { 33 | ok(): boolean; 34 | error(): string; 35 | pattern(): string; 36 | match(input: string, start: number, getCapturingGroups: boolean): InternalMatchResult; 37 | capturingGroupNames(): CppMap; 38 | } 39 | 40 | export interface WrappedRE2Constructor { 41 | new(pattern: string, ignoreCase: boolean, multiline: boolean, dotAll: boolean): WrappedRE2Interface; 42 | } 43 | 44 | export class WrappedRE2 implements WrappedRE2Interface { 45 | constructor(pattern: string, ignoreCase: boolean, multiline: boolean, dotAll: boolean); 46 | ok(): boolean; 47 | error(): string; 48 | pattern(): string; 49 | match(input: string, start: number, getCapturingGroups: boolean): InternalMatchResult; 50 | capturingGroupNames(): CppMap; 51 | } -------------------------------------------------------------------------------- /wrap/re2_wrap.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2021 Google LLC 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | #include 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | using namespace emscripten; 24 | 25 | /* Copied from 26 | * https://github.com/uhop/node-re2/blob/c2179c5402f5c426e6b8b87ca46c98d2638623ac/lib/wrapped_re2.h#L147 */ 27 | inline size_t getUtf8CharSize(char ch) 28 | { 29 | return ((0xE5000000 >> ((ch >> 3) & 0x1E)) & 3) + 1; 30 | } 31 | 32 | static std::shared_ptr getOptionsFromFlags(const bool ignoreCase, const bool multiline, const bool dotAll) { 33 | std::shared_ptr options = std::make_shared(); 34 | options->set_log_errors(false); 35 | options->set_case_sensitive(!ignoreCase); 36 | options->set_one_line(!multiline); 37 | options->set_dot_nl(dotAll); 38 | return options; 39 | } 40 | 41 | class WrappedRE2 { 42 | public: 43 | WrappedRE2(const std::string& pattern, const bool ignoreCase, const bool multiline, const bool dotAll): 44 | wrapped(re2::StringPiece(pattern), *getOptionsFromFlags(ignoreCase, multiline, dotAll)) {} 45 | 46 | bool ok() const { 47 | return wrapped.ok(); 48 | } 49 | 50 | const std::string& error() const { 51 | return wrapped.error(); 52 | } 53 | 54 | const std::string& pattern() const { 55 | return wrapped.pattern(); 56 | } 57 | 58 | const val match(const std::string& input, const size_t start, const bool getCaptureGroups) { 59 | /* There is some potential additional optimization here: for the test 60 | * method, no match information is needed, so the submatchCount could be 61 | * 0. For API simplicity, we are currently not doing that. */ 62 | int submatchCount = getCaptureGroups ? wrapped.NumberOfCapturingGroups() + 1 : 1; 63 | re2::StringPiece matches[submatchCount]; 64 | // Convert an index into a UTF8 string to a byte offset 65 | size_t byteStart = 0; 66 | for (size_t i = 0; i < start; i++) { 67 | byteStart += getUtf8CharSize(input[byteStart]); 68 | } 69 | bool success = wrapped.Match(re2::StringPiece(input), byteStart, input.size(), RE2::UNANCHORED, matches, submatchCount); 70 | val result = val::object(); 71 | if (success) { 72 | re2::StringPiece matchResult = matches[0]; 73 | result.set("match", static_cast(matchResult)); 74 | // Convert a byte offset to a UTF8 index 75 | size_t byteIndex = matchResult.data() - input.data(); 76 | size_t utf8Index = 0; 77 | for (size_t i = 0; i < byteIndex; i += getUtf8CharSize(input[i])) { 78 | utf8Index += 1; 79 | } 80 | result.set("index", utf8Index); 81 | val captureGroups = val::array(); 82 | if (getCaptureGroups) { 83 | for (size_t index = 1; index < submatchCount; index++) { 84 | if (matches[index].data() == NULL) { 85 | captureGroups.set(index - 1, val::undefined()); 86 | } else { 87 | captureGroups.set(index - 1, static_cast(matches[index])); 88 | } 89 | } 90 | } 91 | result.set("groups", captureGroups); 92 | } else { 93 | result.set("match", ""); 94 | result.set("index", -1); 95 | result.set("groups", val::array()); 96 | } 97 | return val(result); 98 | } 99 | 100 | const std::map& capturingGroupNames() const { 101 | return wrapped.CapturingGroupNames(); 102 | } 103 | 104 | private: 105 | re2::RE2 wrapped; 106 | }; 107 | 108 | EMSCRIPTEN_BINDINGS(re2) { 109 | class_("WrappedRE2") 110 | .smart_ptr_constructor("WrappedRE2", std::make_shared) 111 | .function("ok", &WrappedRE2::ok) 112 | .function("error", &WrappedRE2::error) 113 | .function("pattern", &WrappedRE2::pattern) 114 | .function("match", &WrappedRE2::match) 115 | .function("capturingGroupNames", &WrappedRE2::capturingGroupNames); 116 | 117 | register_vector("vector"); 118 | register_map("map"); 119 | } --------------------------------------------------------------------------------