├── .editorconfig ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── actions │ └── setup-env │ │ └── action.yml └── workflows │ ├── ci.yml │ └── docs-deploy.yml ├── .gitignore ├── .nvmrc ├── .size-limit.json ├── .watchmanconfig ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── GUIDELINES.md ├── LICENSE ├── README.md ├── babel.config.js ├── jest-setup.ts ├── package.json ├── pnpm-lock.yaml ├── src ├── __tests__ │ ├── builder.test.ts │ ├── example-currency.ts │ ├── example-email.ts │ ├── example-filename.ts │ ├── example-find-suffixes.ts │ ├── example-find-words.ts │ ├── example-hashtags.ts │ ├── example-hex-color.ts │ ├── example-html-tags.ts │ ├── example-ipv4.ts │ ├── example-js-number.ts │ ├── example-password.ts │ ├── example-regexp.ts │ ├── example-url-advanced.ts │ └── example-url-simple.ts ├── builders.ts ├── constructs │ ├── __tests__ │ │ ├── anchors.test.tsx │ │ ├── capture.test.tsx │ │ ├── char-class.test.ts │ │ ├── char-escape.test.ts │ │ ├── choice-of.test.ts │ │ ├── encoder.test.tsx │ │ ├── lookahead.test.ts │ │ ├── lookbehind.test.ts │ │ ├── negative-lookahead.test.ts │ │ ├── negative-lookbehind.test.ts │ │ ├── quantifiers.test.tsx │ │ ├── regex.test.tsx │ │ ├── repeat.test.tsx │ │ └── unicode.test.tsx │ ├── anchors.ts │ ├── capture.ts │ ├── char-class.ts │ ├── char-escape.ts │ ├── choice-of.ts │ ├── lookahead.ts │ ├── lookbehind.ts │ ├── negative-lookahead.ts │ ├── negative-lookbehind.ts │ ├── quantifiers.ts │ ├── regex.ts │ ├── repeat.ts │ └── unicode.ts ├── encoder.ts ├── index.ts ├── patterns │ ├── __tests__ │ │ └── hex-color.test.ts │ ├── hex-color.ts │ └── index.ts ├── types.ts └── utils.ts ├── test-utils ├── to-equal-regex.ts ├── to-match-all-groups.ts ├── to-match-all-named-groups.ts ├── to-match-groups.ts ├── to-match-named-groups.ts ├── to-match-string.ts └── utils.ts ├── tsconfig.json ├── tsconfig.release.json └── website ├── .gitignore ├── README.md ├── babel.config.js ├── docs ├── Examples.md ├── api │ ├── assertions.md │ ├── builder.md │ ├── captures.md │ ├── character-classes.md │ ├── constructs.md │ ├── overview.md │ ├── quantifiers.md │ ├── types.md │ └── unicode.md └── getting-started.md ├── docusaurus.config.js ├── package.json ├── pnpm-lock.yaml ├── sidebars.js ├── src ├── components │ └── HomepageFeatures │ │ ├── index.js │ │ └── styles.module.css └── css │ └── custom.css └── static ├── .nojekyll └── img ├── favicon.ico ├── logo-1024.png └── logo-200.png /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig helps developers define and maintain consistent 2 | # coding styles between different editors and IDEs 3 | # editorconfig.org 4 | 5 | root = true 6 | 7 | [*] 8 | 9 | indent_style = space 10 | indent_size = 2 11 | 12 | max_line_length = 100 13 | 14 | end_of_line = lf 15 | charset = utf-8 16 | trim_trailing_whitespace = true 17 | insert_final_newline = true 18 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.pbxproj -text 2 | # specific for windows script files 3 | *.bat text eol=crlf -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '[Bug]' 5 | labels: '' 6 | assignees: '' 7 | --- 8 | 9 | **Describe the bug** 10 | A clear and concise description of what the bug is. 11 | 12 | **To Reproduce** 13 | Steps to reproduce the behavior: 14 | 15 | 1. 16 | 2. 17 | 3. 18 | 19 | **Expected behavior** 20 | A clear and concise description of what you expected to happen. 21 | 22 | **Screenshots** 23 | If applicable, add screenshots to help explain your problem. 24 | 25 | **Package version** 26 | `ts-regex-builder: ` 27 | 28 | **Additional context** 29 | Add any other context about the problem here. 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '[Feature]' 5 | labels: enhancement 6 | assignees: '' 7 | --- 8 | 9 | **Is your feature request related to a problem? Please describe.** 10 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 11 | 12 | **Describe the solution you'd like** 13 | A clear and concise description of what you want to happen. 14 | 15 | **Describe alternatives you've considered** 16 | A clear and concise description of any alternative solutions or features you've considered. 17 | 18 | **Checklist** 19 | 20 | - [ ] Implementation 21 | - [ ] Tests 22 | - [ ] API docs 23 | - [ ] README docs (if relevant) 24 | - [ ] Example docs & tests (if relevant) 25 | 26 | **Additional context** 27 | Add any other context or screenshots about the feature request here. 28 | -------------------------------------------------------------------------------- /.github/actions/setup-env/action.yml: -------------------------------------------------------------------------------- 1 | name: Setup env 2 | description: Setup Node.js and install deps 3 | 4 | inputs: 5 | working-directory: 6 | description: 'Working directory' 7 | required: true 8 | default: '.' 9 | 10 | runs: 11 | using: composite 12 | steps: 13 | - name: Setup Node.js 14 | uses: actions/setup-node@v4 15 | with: 16 | node-version-file: .nvmrc 17 | 18 | - name: Setup pnpm 19 | uses: pnpm/action-setup@v4 20 | with: 21 | version: 10.2.1+sha512.398035c7bd696d0ba0b10a688ed558285329d27ea994804a52bad9167d8e3a72bcb993f9699585d3ca25779ac64949ef422757a6c31102c12ab932e5cbe5cc92 22 | 23 | - name: Install deps 24 | shell: bash 25 | working-directory: ${{ inputs.working-directory }} 26 | run: pnpm install 27 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - main 6 | pull_request: 7 | branches: 8 | - main 9 | 10 | permissions: 11 | pull-requests: write 12 | 13 | jobs: 14 | lint: 15 | runs-on: ubuntu-latest 16 | name: Lint and Typecheck 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v4 20 | 21 | - name: Setup 22 | uses: ./.github/actions/setup-env 23 | 24 | - name: Lint files 25 | run: pnpm lint 26 | 27 | - name: Typecheck files 28 | run: pnpm typecheck 29 | 30 | test: 31 | runs-on: ubuntu-latest 32 | name: Test 33 | steps: 34 | - name: Checkout 35 | uses: actions/checkout@v4 36 | 37 | - name: Setup 38 | uses: ./.github/actions/setup-env 39 | 40 | - name: Run unit tests 41 | run: pnpm test --coverage 42 | 43 | - name: Upload coverage reports to Codecov 44 | uses: codecov/codecov-action@v3 45 | env: 46 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 47 | 48 | build-library: 49 | runs-on: ubuntu-latest 50 | name: Build Library 51 | steps: 52 | - name: Checkout 53 | uses: actions/checkout@v4 54 | 55 | - name: Setup 56 | uses: ./.github/actions/setup-env 57 | 58 | - name: Build package 59 | run: pnpm build 60 | 61 | - name: Calculate Size 62 | if: github.ref != 'refs/heads/main' 63 | uses: andresz1/size-limit-action@v1 64 | with: 65 | github_token: ${{ secrets.GITHUB_TOKEN }} 66 | 67 | test-website: 68 | runs-on: ubuntu-latest 69 | name: Test Website 70 | steps: 71 | - name: Checkout 72 | uses: actions/checkout@v4 73 | 74 | - name: Setup 75 | uses: ./.github/actions/setup-env 76 | with: 77 | working-directory: ./website 78 | 79 | - name: Build website 80 | working-directory: ./website 81 | run: pnpm build 82 | -------------------------------------------------------------------------------- /.github/workflows/docs-deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy to GitHub Pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - 'website/**' 9 | 10 | permissions: 11 | contents: write 12 | 13 | jobs: 14 | deploy: 15 | name: Deploy to GitHub Pages 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | fetch-depth: 0 21 | 22 | - name: Setup 23 | uses: ./.github/actions/setup-website 24 | 25 | - name: Build website 26 | working-directory: ./website 27 | run: pnpm build 28 | 29 | - name: Deploy to GitHub Pages 30 | uses: peaceiris/actions-gh-pages@v3 31 | with: 32 | github_token: ${{ secrets.GITHUB_TOKEN }} 33 | publish_dir: ./website/build 34 | # The following lines assign commit authorship to the official 35 | # GH-Actions bot for deploys to `gh-pages` branch: 36 | # https://github.com/actions/checkout/issues/13#issuecomment-724415212 37 | # The GH actions bot is used by default if you didn't specify the two fields. 38 | # You can swap them out with your own user credentials. 39 | user_name: github-actions[bot] 40 | user_email: 41898282+github-actions[bot]@users.noreply.github.com 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # OSX 2 | # 3 | .DS_Store 4 | 5 | # VSCode 6 | .vscode/ 7 | jsconfig.json 8 | 9 | # node.js 10 | # 11 | node_modules/ 12 | npm-debug.log 13 | yarn-debug.log 14 | yarn-error.log 15 | 16 | # BUCK 17 | buck-out/ 18 | \.buckd/ 19 | 20 | # Yarn 21 | .yarn/* 22 | !.yarn/patches 23 | !.yarn/plugins 24 | !.yarn/releases 25 | !.yarn/sdks 26 | !.yarn/versions 27 | 28 | # Turborepo 29 | .turbo/ 30 | 31 | # Build process 32 | dist/ 33 | 34 | /.idea 35 | coverage/ 36 | -------------------------------------------------------------------------------- /.nvmrc: -------------------------------------------------------------------------------- 1 | v18 2 | -------------------------------------------------------------------------------- /.size-limit.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "path": "dist/esm/index.mjs", 4 | "limit": "2 kB" 5 | }, 6 | { 7 | "path": "dist/esm/patterns/index.mjs", 8 | "limit": "3 kB" 9 | } 10 | ] 11 | -------------------------------------------------------------------------------- /.watchmanconfig: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | - Demonstrating empathy and kindness toward other people 21 | - Being respectful of differing opinions, viewpoints, and experiences 22 | - Giving and gracefully accepting constructive feedback 23 | - Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | - Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | - The use of sexualized language or imagery, and sexual attention or advances of 31 | any kind 32 | - Trolling, insulting or derogatory comments, and personal or political attacks 33 | - Public or private harassment 34 | - Publishing others' private information, such as a physical or email address, 35 | without their explicit permission 36 | - Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | [INSERT CONTACT METHOD]. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series of 86 | actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or permanent 93 | ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within the 113 | community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.1, available at 119 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 120 | 121 | Community Impact Guidelines were inspired by 122 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 123 | 124 | For answers to common questions about this code of conduct, see the FAQ at 125 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 126 | [https://www.contributor-covenant.org/translations][translations]. 127 | 128 | [homepage]: https://www.contributor-covenant.org 129 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 130 | [Mozilla CoC]: https://github.com/mozilla/diversity 131 | [FAQ]: https://www.contributor-covenant.org/faq 132 | [translations]: https://www.contributor-covenant.org/translations 133 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions are always welcome, no matter how large or small! 4 | 5 | We want this community to be friendly and respectful to each other. Please follow it in all your interactions with the project. Before contributing, please read the [code of conduct](./CODE_OF_CONDUCT.md). 6 | 7 | ## Development workflow 8 | 9 | This project is a monorepo managed using [pnpm workspaces](https://pnpm.io/workspaces). It contains the following packages: 10 | 11 | - The library package in the root directory. 12 | - An example app in the `example/` directory. 13 | 14 | To get started with the project, run `pnpm install` in the root directory to install the required dependencies for each package: 15 | 16 | ```sh 17 | pnpm install 18 | ``` 19 | 20 | > Since the project relies on pnpm workspaces, you cannot use [`npm`](https://github.com/npm/cli) for development. 21 | 22 | Make sure your code passes TypeScript and ESLint. Run the following to verify: 23 | 24 | ```sh 25 | pnpm typecheck 26 | pnpm lint 27 | ``` 28 | 29 | To fix formatting errors, run the following: 30 | 31 | ```sh 32 | pnpm lint --fix 33 | ``` 34 | 35 | Remember to add tests for your change if possible. Run the unit tests by: 36 | 37 | ```sh 38 | pnpm test 39 | ``` 40 | 41 | ### Commit message convention 42 | 43 | We follow the [conventional commits specification](https://www.conventionalcommits.org/en) for our commit messages: 44 | 45 | - `fix`: bug fixes, e.g. fix crash due to deprecated method. 46 | - `feat`: new features, e.g. add new method to the module. 47 | - `refactor`: code refactor, e.g. migrate from class components to hooks. 48 | - `docs`: changes into documentation, e.g. add usage example for the module.. 49 | - `test`: adding or updating tests, e.g. add integration tests using detox. 50 | - `chore`: tooling changes, e.g. change CI config. 51 | 52 | Our pre-commit hooks verify that your commit message matches this format when committing. 53 | 54 | ### Linting and tests 55 | 56 | [ESLint](https://eslint.org/), [Prettier](https://prettier.io/), [TypeScript](https://www.typescriptlang.org/) 57 | 58 | We use [TypeScript](https://www.typescriptlang.org/) for type checking, [ESLint](https://eslint.org/) with [Prettier](https://prettier.io/) for linting and formatting the code, and [Jest](https://jestjs.io/) for testing. 59 | 60 | Our pre-commit hooks verify that the linter and tests pass when committing. 61 | 62 | ### Publishing to npm 63 | 64 | We use [release-it](https://github.com/release-it/release-it) to make it easier to publish new versions. It handles common tasks like bumping version based on semver, creating tags and releases etc. 65 | 66 | To publish new versions, run the following: 67 | 68 | ```sh 69 | pnpm release 70 | ``` 71 | 72 | ### Scripts 73 | 74 | The `package.json` file contains various scripts for common tasks: 75 | 76 | - `pnpm typecheck`: type-check files with TypeScript. 77 | - `pnpm lint`: lint files with ESLint. 78 | - `pnpm test`: run unit tests with Jest. 79 | 80 | ### Sending a pull request 81 | 82 | > **Working on your first pull request?** You can learn how from this _free_ series: [How to Contribute to an Open Source Project on GitHub](https://app.egghead.io/playlists/how-to-contribute-to-an-open-source-project-on-github). 83 | 84 | When you're sending a pull request: 85 | 86 | - Prefer small pull requests focused on one change. 87 | - Verify that linters and tests are passing. 88 | - Review the documentation to make sure it looks good. 89 | - Follow the pull request template when opening a pull request. 90 | - For pull requests that change the API or implementation, discuss with maintainers first by opening an issue. 91 | -------------------------------------------------------------------------------- /GUIDELINES.md: -------------------------------------------------------------------------------- 1 | # Project Guidelines 2 | 3 | ## Core principles 4 | 5 | 1. We strive to create an API that is easy & intuitive to use, helps users avoid making mistakes, and exposes 95%+ power of regular expressions. 6 | 2. We strive to balance replicating the [Swift Regex Builder API](https://developer.apple.com/documentation/regexbuilder) and creating an API native to TypeScript/JavaScript users. 7 | 3. We should adjust our API where there are discrepancies between Swift and JavaScript regex behavior. 8 | 9 | ## Implementation guidelines 10 | 11 | 1. When the user passes the text to any regex construct, it should be treated as an exact string to match and not as a regex string. We might provide an escape hatch for providing raw regex string through, but the user should use it explicitly. 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Maciej Jastrzebski 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all 12 | copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![npm version](https://badge.fury.io/js/ts-regex-builder.svg)](https://badge.fury.io/js/ts-regex-builder) 2 | ![Build](https://github.com/callstack/ts-regex-builder/actions/workflows/ci.yml/badge.svg) 3 | ![npm bundle size](https://deno.bundlejs.com/badge?q=ts-regex-builder) 4 | [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](http://makeapullrequest.com) 5 | [![Star on GitHub](https://img.shields.io/github/stars/callstack/ts-regex-builder.svg?style=social)](https://github.com/callstack/ts-regex-builder/stargazers) 6 | 7 | # TS Regex Builder 8 | 9 | Build maintainable regular expressions for TypeScript and JavaScript. 10 | 11 | [API docs](https://callstack.github.io/ts-regex-builder/api) | [Examples](https://callstack.github.io/ts-regex-builder/examples) 12 | 13 | ## Goal 14 | 15 | Regular expressions are a powerful tool for matching text patterns, yet they are notorious for their hard-to-parse syntax, especially in the case of more complex patterns. 16 | 17 | This library allows users to create regular expressions in a structured way, making them easy to write and review. It provides a domain-specific langauge for defining regular expressions, which are finally turned into JavaScript-native `RegExp` objects for fast execution. 18 | 19 | ```ts 20 | // Regular JS RegExp 21 | const hexColor = /^#?([a-fA-F0-9]{6}|[a-fA-F0-9]{3})$/; 22 | 23 | // TS Regex Builder DSL 24 | const hexDigit = /[a-fA-F0-9]/; // or: charClass(charRange('a', 'f'), charRange('A', 'F'), charRange('0', '9')); 25 | 26 | const hexColor = buildRegExp([ 27 | startOfString, 28 | optional('#'), 29 | capture( 30 | choiceOf( 31 | repeat(hexDigit, 6), // #rrggbb 32 | repeat(hexDigit, 3), // #rgb 33 | ), 34 | ), 35 | endOfString, 36 | ]); 37 | ``` 38 | 39 | ## Installation 40 | 41 | ```sh 42 | npm install ts-regex-builder 43 | ``` 44 | 45 | or 46 | 47 | ```sh 48 | yarn add ts-regex-builder 49 | ``` 50 | 51 | or 52 | 53 | ```sh 54 | pnpm add ts-regex-builder 55 | ``` 56 | 57 | ## Basic usage 58 | 59 | ```js 60 | import { buildRegExp, capture, oneOrMore } from 'ts-regex-builder'; 61 | 62 | // /Hello (\w+)/ 63 | const regex = buildRegExp(['Hello ', capture(oneOrMore(word))]); 64 | ``` 65 | 66 | ## Regex domain-specific language 67 | 68 | TS Regex Builder allows you to build complex regular expressions using domain-specific language. 69 | 70 | Terminology: 71 | 72 | - regex construct (`RegexConstruct`) - common name for all regex constructs like character classes, quantifiers, and anchors. 73 | - regex element (`RegexElement`) - a fundamental building block of a regular expression, defined as either a regex construct, a string, or `RegExp` literal (`/.../`). 74 | - regex sequence (`RegexSequence`) - a sequence of regex elements forming a regular expression. For developer convenience, it also accepts a single element instead of an array. 75 | 76 | Most of the regex constructs accept a regex sequence as their argument. 77 | 78 | Examples of sequences: 79 | 80 | - single element (construct): `capture('Hello')` 81 | - single element (string): `'Hello'` 82 | - single element (`RegExp` literal): `/Hello/` 83 | - array of elements: `['USD', oneOrMore(digit), /Hello/]` 84 | 85 | Regex constructs can be composed into a tree structure: 86 | 87 | ```ts 88 | const currencyCode = repeat(charRange('A', 'Z'), 3); 89 | const currencyAmount = buildRegExp([ 90 | choiceOf('$', '€', currencyCode), // currency 91 | capture( 92 | oneOrMore(digit), // integer part 93 | optional(['.', repeat(digit, 2)]), // fractional part 94 | ), 95 | ]); 96 | ``` 97 | 98 | See [Types API doc](https://callstack.github.io/ts-regex-builder/api/types) for more info. 99 | 100 | ### Regex Builders 101 | 102 | | Builder | Regex Syntax | Description | 103 | | ---------------------------------------- | ------------ | ----------------------------------- | 104 | | `buildRegExp(...)` | `/.../` | Create `RegExp` instance | 105 | | `buildRegExp(..., { ignoreCase: true })` | `/.../i` | Create `RegExp` instance with flags | 106 | 107 | See [Builder API doc](https://callstack.github.io/ts-regex-builder/api/builder) for more info. 108 | 109 | ### Regex Constructs 110 | 111 | | Construct | Regex Syntax | Notes | 112 | | ------------------- | ------------ | ------------------------------- | 113 | | `choiceOf(x, y, z)` | `x\|y\|z` | Match one of provided sequences | 114 | | `capture(...)` | `(...)` | Create a capture group | 115 | 116 | See [Constructs API doc](https://callstack.github.io/ts-regex-builder/api/constructs) for more info. 117 | 118 | > [!NOTE] 119 | > TS Regex Builder does not have a construct for non-capturing groups. Such groups are implicitly added when required. 120 | 121 | ### Quantifiers 122 | 123 | | Quantifier | Regex Syntax | Description | 124 | | -------------------------------- | ------------ | ------------------------------------------------- | 125 | | `zeroOrMore(x)` | `x*` | Zero or more occurrence of a pattern | 126 | | `oneOrMore(x)` | `x+` | One or more occurrence of a pattern | 127 | | `optional(x)` | `x?` | Zero or one occurrence of a pattern | 128 | | `repeat(x, n)` | `x{n}` | Pattern repeats exact number of times | 129 | | `repeat(x, { min: n, })` | `x{n,}` | Pattern repeats at least given number of times | 130 | | `repeat(x, { min: n, max: n2 })` | `x{n1,n2}` | Pattern repeats between n1 and n2 number of times | 131 | 132 | See [Quantifiers API doc](https://callstack.github.io/ts-regex-builder/api/quantifiers) for more info. 133 | 134 | ### Assertions 135 | 136 | | Assertion | Regex Syntax | Description | 137 | | ------------------------- | ------------ | ------------------------------------------------------------------------ | 138 | | `startOfString` | `^` | Match the start of the string (or the start of a line in multiline mode) | 139 | | `endOfString` | `$` | Match the end of the string (or the end of a line in multiline mode) | 140 | | `wordBoundary` | `\b` | Match the start or end of a word without consuming characters | 141 | | `lookahead(...)` | `(?=...)` | Match subsequent text without consuming it | 142 | | `negativeLookahead(...)` | `(?!...)` | Reject subsequent text without consuming it | 143 | | `lookbehind(...)` | `(?<=...)` | Match preceding text without consuming it | 144 | | `negativeLookbehind(...)` | `(? [!TIP] 151 | > You may also use inline regexes for specifying character classes, as they offer a concise yet readable syntax. For example, `/[a-z0-9_]/`. 152 | 153 | | Character class | Regex Syntax | Description | 154 | | --------------------- | ------------ | ------------------------------------------------- | 155 | | `any` | `.` | Any character | 156 | | `word` | `\w` | Word character: letter, digit, underscore | 157 | | `digit` | `\d` | Digit character: 0 to 9 | 158 | | `whitespace` | `\s` | Whitespace character: space, tab, line break, ... | 159 | | `anyOf('abc')` | `[abc]` | Any of provided characters | 160 | | `charRange('a', 'z')` | `[a-z]` | Character in a range | 161 | | `charClass(...)` | `[...]` | Union of multiple character classes | 162 | | `negated(...)` | `[^...]` | Negation of a given character class | 163 | 164 | See [Character Classes API doc](https://callstack.github.io/ts-regex-builder/api/character-classes) and [Unicode API doc](https://callstack.github.io/ts-regex-builder/api/unicode) for more info. 165 | 166 | ## Examples 167 | 168 | See [Examples](https://callstack.github.io/ts-regex-builder/examples). 169 | 170 | ## Performance 171 | 172 | Regular expressions created with this library are executed at runtime, so you should avoid creating them in a context where they would need to be executed multiple times, e.g., inside loops or functions. We recommend that you create a top-level object for each required regex. 173 | 174 | ## Contributing 175 | 176 | See the [contributing guide](CONTRIBUTING.md) to learn how to contribute to the repository and the development workflow. 177 | See the [project guidelines](GUIDELINES.md) to understand our core principles. 178 | 179 | ## License 180 | 181 | MIT 182 | 183 | ## Inspiration 184 | 185 | TS Regex Builder is inspired by [Swift Regex Builder API](https://developer.apple.com/documentation/regexbuilder). 186 | 187 | ## Reference 188 | 189 | - [ECMAScript Regular Expression BNF Grammar](https://tc39.es/ecma262/#sec-regular-expressions) 190 | - [Unicode Regular Expressions](https://www.unicode.org/reports/tr18/) 191 | - [Swift Evolution 351: Regex Builder DSL](https://github.com/apple/swift-evolution/blob/main/proposals/0351-regex-builder.md) 192 | - [Swift Regex Builder API docs](https://developer.apple.com/documentation/regexbuilder) 193 | 194 | --- 195 | 196 | Made with [create-react-native-library](https://github.com/callstack/react-native-builder-bob) 197 | -------------------------------------------------------------------------------- /babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [['@babel/preset-env', { targets: { node: 'current' } }], '@babel/preset-typescript'], 3 | 4 | env: { 5 | cjs: { 6 | presets: [['@babel/preset-env', { targets: { node: 'current' }, modules: 'commonjs' }]], 7 | plugins: [ 8 | ['@babel/plugin-transform-modules-commonjs', { strict: true }], 9 | ['babel-plugin-add-import-extension', { extension: 'js' }], 10 | ], 11 | ignore: ['**/__tests__/*', '**/test.ts', '**/*.d.ts'], 12 | }, 13 | 14 | esm: { 15 | presets: [['@babel/preset-env', { targets: { node: 'current' }, modules: false }]], 16 | plugins: [['babel-plugin-add-import-extension', { extension: 'mjs' }]], 17 | ignore: ['**/__tests__/*', '**/test.ts', '**/*.d.ts'], 18 | }, 19 | }, 20 | }; 21 | -------------------------------------------------------------------------------- /jest-setup.ts: -------------------------------------------------------------------------------- 1 | import './test-utils/to-equal-regex'; 2 | import './test-utils/to-match-groups'; 3 | import './test-utils/to-match-all-groups'; 4 | import './test-utils/to-match-named-groups'; 5 | import './test-utils/to-match-all-named-groups'; 6 | import './test-utils/to-match-string'; 7 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ts-regex-builder", 3 | "version": "1.8.2", 4 | "description": "Maintainable regular expressions for TypeScript and JavaScript.", 5 | "main": "dist/commonjs/index.js", 6 | "module": "dist/esm/index.js", 7 | "types": "dist/types/index.d.ts", 8 | "source": "src/index.ts", 9 | "sideEffects": false, 10 | "files": [ 11 | "dist", 12 | "!**/__tests__", 13 | "!**/__fixtures__", 14 | "!**/__mocks__", 15 | "!**/.*" 16 | ], 17 | "exports": { 18 | ".": { 19 | "types": "./dist/types/index.d.ts", 20 | "require": "./dist/commonjs/index.js", 21 | "import": "./dist/esm/index.mjs" 22 | }, 23 | "./patterns": { 24 | "types": "./dist/types/patterns/index.d.ts", 25 | "require": "./dist/commonjs/patterns/index.js", 26 | "import": "./dist/esm/patterns/index.mjs" 27 | } 28 | }, 29 | "scripts": { 30 | "test": "jest", 31 | "typecheck": "tsc --noEmit", 32 | "lint": "eslint \"**/*.{js,ts,tsx}\"", 33 | "clean": "del-cli dist", 34 | "release": "pnpm build && release-it", 35 | "release:beta": "pnpm build && release-it --preRelease=beta", 36 | "build": "pnpm clean && pnpm build:commonjs && pnpm build:esm && pnpm build:typescript", 37 | "build:commonjs": "BABEL_ENV=cjs babel src --out-dir \"dist/commonjs\" --extensions .js,.ts --out-file-extension .js --source-maps --no-comments", 38 | "build:esm": "BABEL_ENV=esm babel src --out-dir \"dist/esm\" --extensions .js,.ts --out-file-extension .mjs --source-maps --no-comments", 39 | "build:typescript": "tsc --project tsconfig.release.json --outDir \"dist/types\"", 40 | "validate": "pnpm typecheck && pnpm test && pnpm lint" 41 | }, 42 | "keywords": [ 43 | "regex", 44 | "typescript" 45 | ], 46 | "repository": "https://github.com/callstack/ts-regex-builder", 47 | "author": "Maciej Jastrzębski (https://github.com/mdjastrzebski)", 48 | "license": "MIT", 49 | "bugs": { 50 | "url": "https://github.com/callstack/ts-regex-builder/issues" 51 | }, 52 | "homepage": "https://github.com/callstack/ts-regex-builder#readme", 53 | "publishConfig": { 54 | "registry": "https://registry.npmjs.org/" 55 | }, 56 | "devDependencies": { 57 | "@babel/cli": "^7.24.5", 58 | "@babel/core": "^7.24.5", 59 | "@babel/plugin-syntax-flow": "^7.24.1", 60 | "@babel/plugin-transform-react-jsx": "^7.23.4", 61 | "@babel/preset-env": "^7.24.5", 62 | "@babel/preset-typescript": "^7.24.1", 63 | "@callstack/eslint-config": "^14.1.1", 64 | "@release-it/conventional-changelog": "^5.1.1", 65 | "@size-limit/preset-small-lib": "^11.1.6", 66 | "@types/jest": "^29.5.12", 67 | "babel-plugin-add-import-extension": "^1.6.0", 68 | "del-cli": "^5.1.0", 69 | "eslint": "^8.57.0", 70 | "eslint-config-prettier": "^8.10.0", 71 | "eslint-plugin-flowtype": "^8.0.3", 72 | "eslint-plugin-import": "^2.29.1", 73 | "eslint-plugin-prettier": "^4.2.1", 74 | "jest": "^29.7.0", 75 | "prettier": "^2.8.8", 76 | "release-it": "^15.11.0", 77 | "size-limit": "^11.1.6", 78 | "typescript": "^5.4.5" 79 | }, 80 | "engines": { 81 | "node": ">= 18.0.0" 82 | }, 83 | "jest": { 84 | "setupFilesAfterEnv": [ 85 | "/jest-setup.ts" 86 | ], 87 | "collectCoverageFrom": [ 88 | "src/**/*.{js,jsx,ts,tsx}" 89 | ], 90 | "modulePathIgnorePatterns": [ 91 | "/dist/" 92 | ] 93 | }, 94 | "release-it": { 95 | "git": { 96 | "commitMessage": "chore: release ${version}", 97 | "tagName": "v${version}" 98 | }, 99 | "npm": { 100 | "publish": true 101 | }, 102 | "github": { 103 | "release": true 104 | }, 105 | "plugins": { 106 | "@release-it/conventional-changelog": { 107 | "preset": "angular" 108 | } 109 | } 110 | }, 111 | "eslintConfig": { 112 | "root": true, 113 | "extends": [ 114 | "@callstack/eslint-config/node", 115 | "prettier" 116 | ], 117 | "plugins": [ 118 | "import" 119 | ], 120 | "rules": { 121 | "prettier/prettier": [ 122 | "error", 123 | { 124 | "quoteProps": "consistent", 125 | "singleQuote": true, 126 | "tabWidth": 2, 127 | "trailingComma": "all", 128 | "useTabs": false 129 | } 130 | ], 131 | "sort-imports": [ 132 | "error", 133 | { 134 | "ignoreCase": true, 135 | "ignoreDeclarationSort": true 136 | } 137 | ], 138 | "import/order": "error" 139 | } 140 | }, 141 | "eslintIgnore": [ 142 | "node_modules/", 143 | "dist/", 144 | "coverage/", 145 | "website/" 146 | ], 147 | "prettier": { 148 | "quoteProps": "consistent", 149 | "singleQuote": true, 150 | "tabWidth": 2, 151 | "trailingComma": "all", 152 | "useTabs": false 153 | }, 154 | "packageManager": "pnpm@10.2.1+sha512.398035c7bd696d0ba0b10a688ed558285329d27ea994804a52bad9167d8e3a72bcb993f9699585d3ca25779ac64949ef422757a6c31102c12ab932e5cbe5cc92" 155 | } 156 | -------------------------------------------------------------------------------- /src/__tests__/builder.test.ts: -------------------------------------------------------------------------------- 1 | import { buildRegExp, unicodeChar, unicodeProperty } from '..'; 2 | 3 | test('`regexBuilder` flags', () => { 4 | expect(buildRegExp('a').flags).toBe(''); 5 | expect(buildRegExp('a', {}).flags).toBe(''); 6 | 7 | expect(buildRegExp('a', { global: true }).flags).toBe('g'); 8 | expect(buildRegExp('a', { global: false }).flags).toBe(''); 9 | 10 | expect(buildRegExp('a', { ignoreCase: true }).flags).toBe('i'); 11 | expect(buildRegExp('a', { ignoreCase: false }).flags).toBe(''); 12 | 13 | expect(buildRegExp('a', { multiline: true }).flags).toBe('m'); 14 | expect(buildRegExp('a', { multiline: false }).flags).toBe(''); 15 | 16 | expect(buildRegExp('a', { hasIndices: true }).flags).toBe('d'); 17 | expect(buildRegExp('a', { hasIndices: false }).flags).toBe(''); 18 | 19 | expect(buildRegExp('a', { dotAll: true }).flags).toBe('s'); 20 | expect(buildRegExp('a', { dotAll: false }).flags).toBe(''); 21 | 22 | expect(buildRegExp('a', { sticky: true }).flags).toBe('y'); 23 | expect(buildRegExp('a', { sticky: false }).flags).toBe(''); 24 | 25 | expect( 26 | buildRegExp('a', { 27 | global: true, // 28 | ignoreCase: true, 29 | multiline: false, 30 | dotAll: true, 31 | sticky: true, 32 | }).flags, 33 | ).toBe('gisy'); 34 | }); 35 | 36 | test('`regexBuilder` throws when using unicode-aware features without `unicode` flag', () => { 37 | expect(() => buildRegExp(unicodeChar(0x1234))).not.toThrow(); 38 | expect(() => buildRegExp(unicodeChar(0x12345), { unicode: true })).not.toThrow(); 39 | expect(() => buildRegExp(unicodeProperty('Emoji_Presentation'), { unicode: true })).not.toThrow(); 40 | 41 | expect(() => buildRegExp(unicodeChar(0x123456))).toThrowErrorMatchingInlineSnapshot( 42 | `"Expected a valid unicode code point but received 1193046"`, 43 | ); 44 | expect(() => buildRegExp(unicodeChar(0x12345))).toThrowErrorMatchingInlineSnapshot( 45 | `"Pattern "\\u{12345}" requires "unicode" flag to be set."`, 46 | ); 47 | expect(() => 48 | buildRegExp(unicodeProperty('Emoji_Presentation')), 49 | ).toThrowErrorMatchingInlineSnapshot( 50 | `"Pattern "\\p{Emoji_Presentation}" requires "unicode" flag to be set."`, 51 | ); 52 | expect(() => buildRegExp(/\P{Letter}/u)).toThrowErrorMatchingInlineSnapshot( 53 | `"Pattern "\\P{Letter}" requires "unicode" flag to be set."`, 54 | ); 55 | }); 56 | 57 | test('`regexBuilder` does not throws on tricky unicode mode-like patterns', () => { 58 | expect(() => buildRegExp(/\\u{1234}/)).not.toThrow(); 59 | }); 60 | -------------------------------------------------------------------------------- /src/__tests__/example-currency.ts: -------------------------------------------------------------------------------- 1 | import { buildRegExp } from '../builders'; 2 | import { anyOf, digit, endOfString, optional, repeat, whitespace, zeroOrMore } from '../index'; 3 | import { lookbehind } from '../constructs/lookbehind'; 4 | 5 | const currencySymbol = '$€£¥R₿'; 6 | const decimalSeparator = '.'; 7 | 8 | const firstThousandsClause = repeat(digit, { min: 1, max: 3 }); 9 | const thousandsSeparator = ','; 10 | const thousands = repeat(digit, 3); 11 | const thousandsClause = [optional(thousandsSeparator), thousands]; 12 | const cents = repeat(digit, 2); 13 | const isCurrency = lookbehind(anyOf(currencySymbol)); 14 | 15 | test('example: extracting currency values', () => { 16 | const currencyRegex = buildRegExp([ 17 | isCurrency, 18 | optional(whitespace), 19 | firstThousandsClause, 20 | zeroOrMore(thousandsClause), 21 | optional([decimalSeparator, cents]), 22 | endOfString, 23 | ]); 24 | 25 | expect(currencyRegex).toMatchString('$10'); 26 | expect(currencyRegex).toMatchString('$ 10'); 27 | expect(currencyRegex).not.toMatchString('$ 10.'); 28 | expect(currencyRegex).toMatchString('$ 10'); 29 | expect(currencyRegex).not.toMatchString('$10.5'); 30 | expect(currencyRegex).toMatchString('$10.50'); 31 | expect(currencyRegex).not.toMatchString('$10.501'); 32 | expect(currencyRegex).toMatchString('€100'); 33 | expect(currencyRegex).toMatchString('£1,000'); 34 | expect(currencyRegex).toMatchString('$ 100000000000000000'); 35 | expect(currencyRegex).toMatchString('€ 10000'); 36 | expect(currencyRegex).toMatchString('₿ 100,000'); 37 | expect(currencyRegex).not.toMatchString('10$'); 38 | expect(currencyRegex).not.toMatchString('£A000'); 39 | 40 | expect(currencyRegex).toEqualRegex(/(?<=[$€£¥R₿])\s?\d{1,3}(?:,?\d{3})*(?:\.\d{2})?$/); 41 | }); 42 | -------------------------------------------------------------------------------- /src/__tests__/example-email.ts: -------------------------------------------------------------------------------- 1 | import { 2 | anyOf, 3 | buildRegExp, 4 | charClass, 5 | charRange, 6 | digit, 7 | endOfString, 8 | oneOrMore, 9 | repeat, 10 | startOfString, 11 | } from '..'; 12 | 13 | test('example: email validation', () => { 14 | const usernameChars = charClass(charRange('a', 'z'), digit, anyOf('._%+-')); 15 | const hostnameChars = charClass(charRange('a', 'z'), digit, anyOf('.-')); 16 | const domainChars = charRange('a', 'z'); 17 | 18 | const regex = buildRegExp( 19 | [ 20 | startOfString, 21 | oneOrMore(usernameChars), 22 | '@', 23 | oneOrMore(hostnameChars), 24 | '.', 25 | repeat(domainChars, { min: 2 }), 26 | endOfString, 27 | ], 28 | { ignoreCase: true }, 29 | ); 30 | 31 | expect(regex).toMatchString('aaa@gmail.co'); 32 | expect(regex).toMatchString('aaa@gmail.com'); 33 | expect(regex).toMatchString('Aaa@GMail.Com'); 34 | expect(regex).toMatchString('aaa@long.domain.example.com'); 35 | 36 | expect(regex).not.toMatchString('@'); 37 | expect(regex).not.toMatchString('aaa@'); 38 | expect(regex).not.toMatchString('a@gmail.c'); 39 | expect(regex).not.toMatchString('@gmail.com'); 40 | 41 | // eslint-disable-next-line no-useless-escape 42 | expect(regex).toEqualRegex(/^[a-z\d._%+\-]+@[a-z\d.\-]+\.[a-z]{2,}$/i); 43 | }); 44 | -------------------------------------------------------------------------------- /src/__tests__/example-filename.ts: -------------------------------------------------------------------------------- 1 | import { buildRegExp, choiceOf, endOfString, negativeLookbehind, oneOrMore } from '../index'; 2 | 3 | const isRejectedFileExtension = negativeLookbehind(choiceOf('js', 'css', 'html')); 4 | 5 | test('example: filename validator', () => { 6 | const filenameRegex = buildRegExp([ 7 | oneOrMore(/[A-Za-z0-9_]/), 8 | isRejectedFileExtension, 9 | endOfString, 10 | ]); 11 | 12 | expect(filenameRegex).toMatchString('index.ts'); 13 | expect(filenameRegex).toMatchString('index.tsx'); 14 | expect(filenameRegex).toMatchString('ind/ex.ts'); 15 | expect(filenameRegex).not.toMatchString('index.js'); 16 | expect(filenameRegex).not.toMatchString('index.html'); 17 | expect(filenameRegex).not.toMatchString('index.css'); 18 | expect(filenameRegex).not.toMatchString('./index.js'); 19 | expect(filenameRegex).not.toMatchString('./index.html'); 20 | expect(filenameRegex).not.toMatchString('./index.css'); 21 | }); 22 | -------------------------------------------------------------------------------- /src/__tests__/example-find-suffixes.ts: -------------------------------------------------------------------------------- 1 | import { buildRegExp, choiceOf, nonWordBoundary, wordBoundary } from '..'; 2 | 3 | test('example: find words with suffix', () => { 4 | const suffixesToFind = ['acy', 'ism']; 5 | 6 | const regex = buildRegExp([ 7 | nonWordBoundary, // match suffixes only 8 | choiceOf(...suffixesToFind), 9 | wordBoundary, 10 | ]); 11 | 12 | expect(regex).toMatchString('democracy'); 13 | expect(regex).toMatchString('Bureaucracy'); 14 | expect(regex).toMatchString('abc privacy '); 15 | expect(regex).toMatchString('abc dynamism'); 16 | expect(regex).toMatchString('realism abc'); 17 | expect(regex).toMatchString('abc modernism abc'); 18 | 19 | expect(regex).not.toMatchString('abc acy'); 20 | expect(regex).not.toMatchString('ism abc'); 21 | expect(regex).not.toMatchString('dynamisms'); 22 | 23 | expect(regex).toEqualRegex(/\B(?:acy|ism)\b/); 24 | }); 25 | -------------------------------------------------------------------------------- /src/__tests__/example-find-words.ts: -------------------------------------------------------------------------------- 1 | import { buildRegExp, choiceOf, wordBoundary } from '..'; 2 | 3 | test('example: find specific words', () => { 4 | const wordsToFind = ['word', 'date']; 5 | 6 | const regex = buildRegExp([ 7 | wordBoundary, // match whole words only 8 | choiceOf(...wordsToFind), 9 | wordBoundary, 10 | ]); 11 | 12 | expect(regex).toMatchString('word'); 13 | expect(regex).toMatchString('some date'); 14 | expect(regex).toMatchString('date and word'); 15 | 16 | expect(regex).not.toMatchString('sword'); 17 | expect(regex).not.toMatchString('keywords'); 18 | expect(regex).not.toMatchString('words'); 19 | expect(regex).not.toMatchString('update'); 20 | expect(regex).not.toMatchString('dates'); 21 | 22 | expect(regex).toEqualRegex(/\b(?:word|date)\b/); 23 | }); 24 | -------------------------------------------------------------------------------- /src/__tests__/example-hashtags.ts: -------------------------------------------------------------------------------- 1 | import { buildRegExp, capture, oneOrMore, word } from '..'; 2 | 3 | test('example: extracting hashtags', () => { 4 | const regex = buildRegExp( 5 | [ 6 | '#', // prettier break-line 7 | capture(oneOrMore(word)), 8 | ], 9 | { global: true }, 10 | ); 11 | 12 | expect(regex).toMatchAllGroups('Hello #world!', [['#world', 'world']]); 13 | expect(regex).toMatchAllGroups('#Hello #world!', [ 14 | ['#Hello', 'Hello'], 15 | ['#world', 'world'], 16 | ]); 17 | 18 | expect(regex).not.toMatchString('aa'); 19 | expect(regex).not.toMatchString('#'); 20 | expect(regex).not.toMatchString('a# '); 21 | 22 | expect(regex).toEqualRegex(/#(\w+)/g); 23 | }); 24 | -------------------------------------------------------------------------------- /src/__tests__/example-hex-color.ts: -------------------------------------------------------------------------------- 1 | import { 2 | buildRegExp, 3 | charClass, 4 | charRange, 5 | choiceOf, 6 | digit, 7 | endOfString, 8 | optional, 9 | repeat, 10 | startOfString, 11 | } from '..'; 12 | 13 | test('example: hex color validation', () => { 14 | const hexDigit = charClass(digit, charRange('a', 'f')); 15 | 16 | const regex = buildRegExp( 17 | [ 18 | startOfString, 19 | optional('#'), 20 | choiceOf( 21 | repeat(hexDigit, 6), // #rrggbb 22 | repeat(hexDigit, 3), // #rgb 23 | ), 24 | endOfString, 25 | ], 26 | { ignoreCase: true }, 27 | ); 28 | 29 | expect(regex).toMatchString('#ffffff'); 30 | expect(regex).toMatchString('ffffff'); 31 | expect(regex).toMatchString('#eee'); 32 | expect(regex).toMatchString('bbb'); 33 | expect(regex).toMatchString('#000'); 34 | expect(regex).toMatchString('#123456'); 35 | expect(regex).toMatchString('123456'); 36 | expect(regex).toMatchString('#123'); 37 | expect(regex).toMatchString('123'); 38 | 39 | expect(regex).not.toMatchString('#1'); 40 | expect(regex).not.toMatchString('#12'); 41 | expect(regex).not.toMatchString('#1234'); 42 | expect(regex).not.toMatchString('#12345'); 43 | expect(regex).not.toMatchString('#1234567'); 44 | 45 | expect(regex).toEqualRegex(/^#?(?:[\da-f]{6}|[\da-f]{3})$/i); 46 | }); 47 | -------------------------------------------------------------------------------- /src/__tests__/example-html-tags.ts: -------------------------------------------------------------------------------- 1 | import { 2 | any, 3 | buildRegExp, 4 | capture, 5 | charClass, 6 | charRange, 7 | digit, 8 | oneOrMore, 9 | ref, 10 | zeroOrMore, 11 | } from '..'; 12 | 13 | test('example: html tag matching', () => { 14 | const tagName = oneOrMore(charClass(charRange('a', 'z'), digit)); 15 | const tagContent = zeroOrMore(any, { greedy: false }); 16 | 17 | const tagMatcher = buildRegExp( 18 | [ 19 | '<', 20 | capture(tagName, { name: 'tag' }), 21 | '>', 22 | capture(tagContent, { name: 'content' }), 23 | '', 26 | ], 27 | { ignoreCase: true, global: true }, 28 | ); 29 | 30 | expect(tagMatcher).toMatchAllNamedGroups('abc', [{ tag: 'a', content: 'abc' }]); 31 | expect(tagMatcher).toMatchAllNamedGroups('abc', [ 32 | { tag: 'a', content: 'abc' }, 33 | ]); 34 | expect(tagMatcher).toMatchAllNamedGroups('abc1abc2', [ 35 | { tag: 'a', content: 'abc1' }, 36 | { tag: 'b', content: 'abc2' }, 37 | ]); 38 | 39 | expect(tagMatcher).not.toMatchString('abc'); 40 | 41 | expect(tagMatcher).toEqualRegex('<(?[a-z\\d]+)>(?.*?)<\\/\\k>'); 42 | }); 43 | -------------------------------------------------------------------------------- /src/__tests__/example-ipv4.ts: -------------------------------------------------------------------------------- 1 | import { buildRegExp, charRange, choiceOf, digit, endOfString, repeat, startOfString } from '..'; 2 | 3 | test('example: IPv4 address validator', () => { 4 | const octet = choiceOf( 5 | [digit], 6 | [charRange('1', '9'), digit], 7 | ['1', repeat(digit, 2)], 8 | ['2', charRange('0', '4'), digit], 9 | ['25', charRange('0', '5')], 10 | ); 11 | 12 | const regex = buildRegExp([ 13 | startOfString, // prettier break-line 14 | repeat([octet, '.'], 3), 15 | octet, 16 | endOfString, 17 | ]); 18 | 19 | expect(regex).toMatchString('0.0.0.0'); 20 | expect(regex).toMatchString('192.168.0.1'); 21 | expect(regex).toMatchString('1.99.100.249'); 22 | expect(regex).toMatchString('255.255.255.255'); 23 | expect(regex).toMatchString('123.45.67.89'); 24 | 25 | expect(regex).not.toMatchString('0.0.0.'); 26 | expect(regex).not.toMatchString('0.0.0.0.'); 27 | expect(regex).not.toMatchString('0.-1.0.0'); 28 | expect(regex).not.toMatchString('0.1000.0.0'); 29 | expect(regex).not.toMatchString('0.0.300.0'); 30 | expect(regex).not.toMatchString('255.255.255.256'); 31 | 32 | expect(regex).toEqualRegex( 33 | /^(?:(?:\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.){3}(?:\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$/, 34 | ); 35 | }); 36 | -------------------------------------------------------------------------------- /src/__tests__/example-js-number.ts: -------------------------------------------------------------------------------- 1 | import { 2 | anyOf, 3 | buildRegExp, 4 | choiceOf, 5 | digit, 6 | endOfString, 7 | oneOrMore, 8 | optional, 9 | regex, 10 | startOfString, 11 | zeroOrMore, 12 | } from '..'; 13 | 14 | test('example: validate JavaScript number', () => { 15 | const sign = anyOf('+-'); 16 | const exponent = regex([anyOf('eE'), optional(sign), oneOrMore(digit)]); 17 | 18 | const numberValidator = buildRegExp([ 19 | startOfString, 20 | optional(sign), 21 | choiceOf( 22 | [oneOrMore(digit), optional(['.', zeroOrMore(digit)])], // leading digit 23 | ['.', oneOrMore(digit)], // leading dot 24 | ), 25 | optional(exponent), // exponent 26 | endOfString, 27 | ]); 28 | 29 | expect(numberValidator).toMatchString('0'); 30 | expect(numberValidator).toMatchString('-1'); 31 | expect(numberValidator).toMatchString('+1'); 32 | expect(numberValidator).toMatchString('1.0'); 33 | expect(numberValidator).toMatchString('1.1234'); 34 | expect(numberValidator).toMatchString('1.'); 35 | expect(numberValidator).toMatchString('.1'); 36 | expect(numberValidator).toMatchString('-.1234'); 37 | expect(numberValidator).toMatchString('+.5'); 38 | expect(numberValidator).toMatchString('1e21'); 39 | expect(numberValidator).toMatchString('1e-21'); 40 | expect(numberValidator).toMatchString('+1e+42'); 41 | expect(numberValidator).toMatchString('-1e-42'); 42 | 43 | expect(numberValidator).not.toMatchString(''); 44 | expect(numberValidator).not.toMatchString('a'); 45 | expect(numberValidator).not.toMatchString('1a'); 46 | expect(numberValidator).not.toMatchString('1.0.'); 47 | expect(numberValidator).not.toMatchString('.1.1'); 48 | expect(numberValidator).not.toMatchString('.'); 49 | 50 | // eslint-disable-next-line no-useless-escape 51 | expect(numberValidator).toEqualRegex(/^[+\-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+\-]?\d+)?$/); 52 | }); 53 | -------------------------------------------------------------------------------- /src/__tests__/example-password.ts: -------------------------------------------------------------------------------- 1 | import { any, buildRegExp, endOfString, lookahead, startOfString, zeroOrMore } from '../index'; 2 | 3 | //^(?=.*[A-Z])(?=.*[a-z])(?=.*\d)(?=.*[^A-Za-z0-9\s]).{8,}$ 4 | 5 | // 6 | // The password policy is as follows: 7 | // - At least one uppercase letter 8 | // - At least one lowercase letter 9 | // - At least one digit 10 | // - At least one special character 11 | // - At least 8 characters long 12 | 13 | const atLeastOneUppercase = lookahead([zeroOrMore(any), /[A-Z]/]); 14 | const atLeastOneLowercase = lookahead([zeroOrMore(any), /[a-z]/]); 15 | const atLeastOneDigit = lookahead([zeroOrMore(any), /[0-9]/]); 16 | const atLeastOneSpecialChar = lookahead([zeroOrMore(any), /[^A-Za-z0-9\s]/]); 17 | const atLeastEightChars = /.{8,}/; 18 | 19 | test('Example: Validating passwords', () => { 20 | const validPassword = buildRegExp([ 21 | startOfString, 22 | atLeastOneUppercase, 23 | atLeastOneLowercase, 24 | atLeastOneDigit, 25 | atLeastOneSpecialChar, 26 | atLeastEightChars, 27 | endOfString, 28 | ]); 29 | 30 | expect(validPassword).toMatchString('Aaaaa$aaaaaaa1'); 31 | expect(validPassword).not.toMatchString('aaaaaaaaaaa'); 32 | expect(validPassword).toMatchString('9aaa#aaaaA'); 33 | expect(validPassword).not.toMatchString('Aa'); 34 | expect(validPassword).toMatchString('Aa$123456'); 35 | expect(validPassword).not.toMatchString('Abba'); 36 | expect(validPassword).not.toMatchString('#password'); 37 | expect(validPassword).toMatchString('#passworD666'); 38 | expect(validPassword).not.toMatchString('Aa%1234'); 39 | 40 | expect(validPassword).toEqualRegex( 41 | /^(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[^A-Za-z0-9\s])(?:.{8,})$/, 42 | ); 43 | }); 44 | -------------------------------------------------------------------------------- /src/__tests__/example-regexp.ts: -------------------------------------------------------------------------------- 1 | import { buildRegExp, choiceOf, endOfString, repeat, startOfString } from '..'; 2 | 3 | test('example: mixing with RegExp literals (IPv4 address validator)', () => { 4 | const octet = choiceOf( 5 | /[0-9]/, // 0-9 6 | /[1-9][0-9]/, // 10-99 7 | /1[0-9][0-9]/, // 100-199 8 | /2[0-4][0-9]/, // 200-249 9 | /25[0-5]/, // 250-255 10 | ); 11 | 12 | const regex = buildRegExp([ 13 | startOfString, // prettier break-line 14 | repeat([octet, '.'], 3), 15 | octet, 16 | endOfString, 17 | ]); 18 | 19 | expect(regex).toMatchString('0.0.0.0'); 20 | expect(regex).toMatchString('192.168.0.1'); 21 | expect(regex).toMatchString('1.99.100.249'); 22 | expect(regex).toMatchString('255.255.255.255'); 23 | expect(regex).toMatchString('123.45.67.89'); 24 | 25 | expect(regex).not.toMatchString('0.0.0.'); 26 | expect(regex).not.toMatchString('0.0.0.0.'); 27 | expect(regex).not.toMatchString('0.-1.0.0'); 28 | expect(regex).not.toMatchString('0.1000.0.0'); 29 | expect(regex).not.toMatchString('0.0.300.0'); 30 | expect(regex).not.toMatchString('255.255.255.256'); 31 | 32 | expect(regex).toEqualRegex( 33 | /^(?:(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\.){3}(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])$/, 34 | ); 35 | }); 36 | -------------------------------------------------------------------------------- /src/__tests__/example-url-advanced.ts: -------------------------------------------------------------------------------- 1 | import { 2 | anyOf, 3 | buildRegExp, 4 | capture, 5 | charClass, 6 | charRange, 7 | digit, 8 | endOfString, 9 | negativeLookahead, 10 | oneOrMore, 11 | optional, 12 | repeat, 13 | startOfString, 14 | } from '../index'; 15 | 16 | // URL = Scheme ":"["//" Authority]Path["?" Query]["#" Fragment] 17 | // Source: https://en.wikipedia.org/wiki/URL#External_links 18 | 19 | // The building blocks of the URL regex. 20 | const lowercase = charRange('a', 'z'); 21 | const uppercase = charRange('A', 'Z'); 22 | const hyphen = anyOf('-'); 23 | const alphabetical = charClass(lowercase, uppercase); 24 | const specialChars = anyOf('._%+-'); 25 | const portSeperator = ':'; 26 | const schemeSeperator = ':'; 27 | const doubleSlash = '//'; 28 | const at = '@'; 29 | const pathSeparator = '/'; 30 | const querySeparator = '?'; 31 | const fragmentSeparator = '#'; 32 | const usernameChars = charClass(lowercase, digit, specialChars); 33 | const hostnameChars = charClass(charRange('a', 'z'), digit, anyOf('-')); 34 | const domainChars = charRange('a', 'z'); 35 | 36 | // Scheme: 37 | // The scheme is the first part of the URL and defines the protocol to be used. 38 | // Examples of popular schemes include http, https, ftp, mailto, file, data and irc. 39 | // A URL string must be a scheme, followed by a colon, followed by a scheme-specific part. 40 | 41 | const scheme = [repeat(charClass(hyphen, alphabetical), { min: 3, max: 6 }), optional('s')]; 42 | const schemeRegex = buildRegExp([startOfString, capture(scheme), endOfString], { 43 | ignoreCase: true, 44 | }); 45 | 46 | test('Matching the Schema components.', () => { 47 | expect(schemeRegex).toMatchString('ftp'); 48 | expect(schemeRegex).not.toMatchString('ftp:'); 49 | expect(schemeRegex).not.toMatchString('h'); 50 | expect(schemeRegex).not.toMatchString('nameiswaytoolong'); 51 | expect(schemeRegex).toMatchString('HTTPS'); 52 | expect(schemeRegex).toMatchString('http'); 53 | }); 54 | 55 | // Authority: 56 | // The authority part of a URL consists of three sub-parts: 57 | // 1. An optional username, followed by an at symbol (@) 58 | // 2. A hostname (e.g. www.google.com) 59 | // 3. An optional port number, preceded by a colon (:) 60 | // Authority = [userinfo "@"] host [":" port] 61 | 62 | const userInfo = oneOrMore(usernameChars); 63 | const hostname = repeat(hostnameChars, { min: 1, max: 63 }); 64 | const hostnameEnd = capture([hostname, endOfString]); 65 | const host = capture([oneOrMore([hostname, '.'])]); 66 | const port = [portSeperator, oneOrMore(digit)]; 67 | 68 | const authority = [doubleSlash, optional([userInfo, at]), hostname, optional(port)]; 69 | const authorityRegex = buildRegExp([startOfString, ...authority, endOfString], { 70 | ignoreCase: true, 71 | }); 72 | 73 | const hostEx = buildRegExp([startOfString, host, hostnameEnd, endOfString], { ignoreCase: true }); 74 | 75 | test('match URL hostname component', () => { 76 | expect(hostEx).toMatchString('www.google.com'); 77 | expect(hostEx).not.toMatchString('www.google.com.'); 78 | }); 79 | 80 | test('match URL authority components', () => { 81 | expect(authorityRegex).toMatchString('//davidbowie@localhost:8080'); 82 | expect(authorityRegex).toMatchString('//localhost:1234'); 83 | expect(authorityRegex).not.toMatchString('davidbowie@localhost:1972'); 84 | expect(authorityRegex).not.toMatchString('nameiswaytoolong'); 85 | }); 86 | 87 | // Path: 88 | // The path is the part of the URL that comes after the authority and before the query. 89 | // It consists of a sequence of path segments separated by a forward slash (/). 90 | // A path string must begin with a forward slash (/). 91 | 92 | const pathSegment = [ 93 | pathSeparator, 94 | optional(oneOrMore(charClass(lowercase, uppercase, digit, anyOf(':@%._+~#=')))), 95 | ]; 96 | 97 | const path = oneOrMore(pathSegment); 98 | const pathRegex = buildRegExp([startOfString, path, endOfString], { 99 | ignoreCase: true, 100 | }); 101 | 102 | test('match URL Path components.', () => { 103 | expect(pathRegex).toMatchString('/'); 104 | expect(pathRegex).not.toMatchString(''); 105 | expect(pathRegex).toMatchString('/a'); 106 | expect(pathRegex).not.toMatchString('a'); 107 | expect(pathRegex).not.toMatchString('a/'); 108 | expect(pathRegex).toMatchString('/a/b'); 109 | expect(pathRegex).not.toMatchString('a/b'); 110 | expect(pathRegex).not.toMatchString('a/b/'); 111 | }); 112 | 113 | // Query: 114 | // The query part of a URL is optional and comes after the path. 115 | // It is separated from the path by a question mark (?). 116 | // The query string consists of a sequence of field-value pairs separated by an ampersand (&). 117 | // Each field-value pair is separated by an equals sign (=). 118 | 119 | const queryKey = oneOrMore(charClass(lowercase, uppercase, digit, anyOf('_-'))); 120 | const queryValue = oneOrMore(charClass(lowercase, uppercase, digit, anyOf('_-'))); 121 | const queryDelimiter = anyOf('&;'); 122 | const equals = '='; 123 | 124 | const queryKeyValuePair = buildRegExp([queryKey, equals, queryValue]); 125 | 126 | const query = [querySeparator, oneOrMore([queryKeyValuePair, optional(queryDelimiter)])]; 127 | const queryRegex = buildRegExp([startOfString, ...query, endOfString], { 128 | ignoreCase: true, 129 | }); 130 | 131 | test('match URL query components', () => { 132 | expect(queryRegex).not.toMatchString(''); 133 | expect(queryRegex).not.toMatchString('??'); 134 | expect(queryRegex).not.toMatchString('?'); 135 | expect(queryRegex).not.toMatchString('?a-b'); 136 | expect(queryRegex).toMatchString('?a=b'); 137 | expect(queryRegex).toMatchString('?a=b&c=d'); 138 | expect(queryRegex).not.toMatchString('a=b&c-d'); 139 | }); 140 | 141 | // Fragment: 142 | // The fragment part of a URL is optional and comes after the query. 143 | // It is separated from the query by a hash (#). 144 | // The fragment string consists of a sequence of characters. 145 | 146 | const fragment = [ 147 | fragmentSeparator, 148 | oneOrMore(charClass(lowercase, uppercase, digit, anyOf(':@%._+~#=&'))), 149 | ]; 150 | const fragmentRegex = buildRegExp([startOfString, ...fragment, endOfString], { 151 | ignoreCase: true, 152 | }); 153 | 154 | test('match URL fragment component', () => { 155 | expect(fragmentRegex).not.toMatchString(''); 156 | expect(fragmentRegex).toMatchString('#section1'); 157 | expect(fragmentRegex).not.toMatchString('#'); 158 | }); 159 | 160 | const urlRegex = buildRegExp( 161 | [ 162 | startOfString, 163 | capture([ 164 | optional(scheme), 165 | schemeSeperator, 166 | optional(authority), 167 | path, 168 | optional(query), 169 | optional(fragment), 170 | ]), 171 | endOfString, 172 | ], 173 | { 174 | ignoreCase: true, 175 | }, 176 | ); 177 | 178 | test('match URLs', () => { 179 | expect(urlRegex).not.toMatchString(''); 180 | expect(urlRegex).not.toMatchString('http'); 181 | expect(urlRegex).toMatchString('http://localhost:8080'); 182 | expect(urlRegex).toMatchString('http://localhost:8080/users/paul/research/data.json'); 183 | expect(urlRegex).toMatchString( 184 | 'http://localhost:8080/users/paul/research/data.json?request=regex&email=me', 185 | ); 186 | expect(urlRegex).toMatchString( 187 | 'http://localhost:8080/users/paul/research/data.json?request=regex&email=me#section1', 188 | ); 189 | }); 190 | 191 | const emailRegex = buildRegExp( 192 | [ 193 | startOfString, 194 | capture([ 195 | oneOrMore(usernameChars), 196 | '@', 197 | oneOrMore(hostnameChars), 198 | '.', 199 | repeat(domainChars, { min: 2 }), 200 | ]), 201 | endOfString, 202 | ], 203 | { 204 | ignoreCase: true, 205 | }, 206 | ); 207 | 208 | test('match email address', () => { 209 | expect(emailRegex).not.toMatchString(''); 210 | expect(emailRegex).toMatchString('stevenwilson@porcupinetree.com'); 211 | expect(emailRegex).not.toMatchString('stevenwilson@porcupinetree'); 212 | }); 213 | 214 | const urlsWithoutEmailsRegex = buildRegExp( 215 | [ 216 | startOfString, 217 | negativeLookahead(emailRegex), // Reject emails 218 | urlRegex, 219 | endOfString, 220 | ], 221 | { 222 | ignoreCase: true, 223 | }, 224 | ); 225 | 226 | test('match URL but not email', () => { 227 | expect(urlsWithoutEmailsRegex).toMatchString('http://localhost:8080'); 228 | expect(urlsWithoutEmailsRegex).toMatchString( 229 | 'http://paul@localhost:8080/users/paul/research/data.json?request=regex&email=me#section1', 230 | ); 231 | expect(urlsWithoutEmailsRegex).toMatchString('ftp://data/#January'); 232 | expect(urlsWithoutEmailsRegex).not.toMatchString('https:'); 233 | expect(urlsWithoutEmailsRegex).not.toMatchString('piotr@riverside.com'); 234 | expect(urlsWithoutEmailsRegex).toMatchString('http://www.google.com'); 235 | expect(urlsWithoutEmailsRegex).toMatchString('https://www.google.com?search=regex'); 236 | expect(urlsWithoutEmailsRegex).not.toMatchString('www.google.com?search=regex&email=me'); 237 | expect(urlsWithoutEmailsRegex).toMatchString('mailto://paul@thebeatles.com'); 238 | expect(urlsWithoutEmailsRegex).not.toMatchString('ftphttpmailto://neal@nealmorse'); 239 | }); 240 | -------------------------------------------------------------------------------- /src/__tests__/example-url-simple.ts: -------------------------------------------------------------------------------- 1 | import { 2 | anyOf, 3 | buildRegExp, 4 | charClass, 5 | charRange, 6 | choiceOf, 7 | digit, 8 | endOfString, 9 | oneOrMore, 10 | optional, 11 | startOfString, 12 | zeroOrMore, 13 | } from '..'; 14 | 15 | // Modified from: https://stackoverflow.com/a/2015516 16 | test('example: simple url validation', () => { 17 | const protocol = [choiceOf('http', 'https'), '://']; 18 | const domainChars = charClass(charRange('a', 'z'), digit); 19 | const domainCharsHyphen = charClass(domainChars, anyOf('-')); 20 | 21 | const domainSegment = choiceOf( 22 | domainChars, // single char 23 | [domainChars, zeroOrMore(domainCharsHyphen), domainChars], // multi char 24 | ); 25 | 26 | const regex = buildRegExp([ 27 | startOfString, 28 | optional(protocol), 29 | oneOrMore([domainSegment, '.']), // domain segment 30 | charRange('a', 'z'), // TLD first char 31 | oneOrMore(domainChars), // TLD remaining chars 32 | endOfString, 33 | ]); 34 | 35 | expect(regex).toMatchString('example.com'); 36 | expect(regex).toMatchString('beta.example.com'); 37 | expect(regex).toMatchString('http://beta.example.com'); 38 | expect(regex).toMatchString('https://beta.example.com'); 39 | expect(regex).toMatchString('a.co'); 40 | 41 | expect(regex).not.toMatchString('example'); 42 | expect(regex).not.toMatchString('aaa.a'); 43 | expect(regex).not.toMatchString('a.-a.com'); 44 | expect(regex).not.toMatchString('a.-a.com'); 45 | expect(regex).not.toMatchString('@gmail.com'); 46 | 47 | expect(regex).toEqualRegex( 48 | // eslint-disable-next-line no-useless-escape 49 | /^(?:(?:http|https):\/\/)?(?:(?:[a-z\d]|[a-z\d][a-z\d\-]*[a-z\d])\.)+[a-z][a-z\d]+$/, 50 | ); 51 | }); 52 | -------------------------------------------------------------------------------- /src/builders.ts: -------------------------------------------------------------------------------- 1 | import type { RegexFlags, RegexSequence } from './types'; 2 | import { encode } from './encoder'; 3 | 4 | /** 5 | * Generate RegExp object from elements with optional flags. 6 | * 7 | * @param elements Single regex element or array of elements 8 | * @param flags RegExp flags object 9 | * @returns RegExp object 10 | */ 11 | export function buildRegExp(sequence: RegexSequence, flags?: RegexFlags): RegExp { 12 | const pattern = encode(sequence).pattern; 13 | ensureUnicodeFlagIfNeeded(pattern, flags); 14 | 15 | const flagsString = encodeFlags(flags ?? {}); 16 | return new RegExp(pattern, flagsString); 17 | } 18 | 19 | /** 20 | * Generate regex pattern from elements. 21 | * @param elements Single regex element or array of elements 22 | * @returns regex pattern string 23 | */ 24 | export function buildPattern(sequence: RegexSequence): string { 25 | return encode(sequence).pattern; 26 | } 27 | 28 | function encodeFlags(flags: RegexFlags): string { 29 | let result = ''; 30 | 31 | if (flags.global) result += 'g'; 32 | if (flags.ignoreCase) result += 'i'; 33 | if (flags.multiline) result += 'm'; 34 | if (flags.hasIndices) result += 'd'; 35 | if (flags.dotAll) result += 's'; 36 | if (flags.sticky) result += 'y'; 37 | if (flags.unicode) result += 'u'; 38 | 39 | return result; 40 | } 41 | 42 | // Matches unicode mode patterns: \u{...}, \p{...}, \P{...}, but avoids valid \\u{...}, etc 43 | const unicodeModePatterns = /(? { 14 | expect(startOfString).toEqualRegex(/^/); 15 | expect([startOfString, 'a', 'b']).toEqualRegex(/^ab/); 16 | }); 17 | 18 | test('`startOfString` matching', () => { 19 | expect([startOfString, oneOrMore('a')]).toMatchGroups('a aa aaa', ['a']); 20 | }); 21 | 22 | test('`endOfString` pattern', () => { 23 | expect(endOfString).toEqualRegex(/$/); 24 | expect(['a', 'b', endOfString]).toEqualRegex(/ab$/); 25 | }); 26 | 27 | test('`endOfString` matching', () => { 28 | expect([oneOrMore('a'), endOfString]).toMatchGroups('a aa aaa', ['aaa']); 29 | }); 30 | 31 | test('`wordBoundary` pattern', () => { 32 | expect(wordBoundary).toEqualRegex(/\b/); 33 | expect([wordBoundary, 'a', 'b']).toEqualRegex(/\bab/); 34 | }); 35 | 36 | test('`wordBoundary` matching', () => { 37 | expect( 38 | buildRegExp([wordBoundary, 'a', zeroOrMore(nonWhitespace)], { global: true }), 39 | ).toMatchGroups('a ba ab aa', ['a', 'ab', 'aa']); 40 | 41 | expect( 42 | buildRegExp([zeroOrMore(nonWhitespace), 'a', wordBoundary], { global: true }), 43 | ).toMatchGroups('a ba ab aa', ['a', 'ba', 'aa']); 44 | }); 45 | 46 | test('`nonWordBoundary` pattern', () => { 47 | expect(nonWordBoundary).toEqualRegex(/\B/); 48 | expect([nonWordBoundary, 'a', 'b']).toEqualRegex(/\Bab/); 49 | expect(['a', nonWordBoundary, 'b']).toEqualRegex(/a\Bb/); 50 | expect(['a', 'b', nonWordBoundary]).toEqualRegex(/ab\B/); 51 | }); 52 | 53 | test('`nonWordBoundary` matching', () => { 54 | expect(buildRegExp([nonWordBoundary, 'abc', digit], { global: true })).toMatchGroups( 55 | 'abc1 xabc2 xxabc3', 56 | ['abc2', 'abc3'], 57 | ); 58 | 59 | expect(buildRegExp([digit, 'abc', nonWordBoundary], { global: true })).toMatchGroups( 60 | '1abc 2abcx 3abcxx', 61 | ['2abc', '3abc'], 62 | ); 63 | }); 64 | -------------------------------------------------------------------------------- /src/constructs/__tests__/capture.test.tsx: -------------------------------------------------------------------------------- 1 | import { 2 | any, 3 | anyOf, 4 | buildRegExp, 5 | capture, 6 | digit, 7 | negated, 8 | oneOrMore, 9 | ref, 10 | word, 11 | wordBoundary, 12 | } from '../..'; 13 | 14 | test('`capture` pattern', () => { 15 | expect(capture('a')).toEqualRegex(/(a)/); 16 | expect(capture('abc')).toEqualRegex(/(abc)/); 17 | expect(capture(oneOrMore('abc'))).toEqualRegex(/((?:abc)+)/); 18 | expect(oneOrMore(capture('abc'))).toEqualRegex(/(abc)+/); 19 | }); 20 | 21 | test('`capture` matching', () => { 22 | expect(capture('b')).toMatchGroups('ab', ['b', 'b']); 23 | expect(['a', capture('b')]).toMatchGroups('ab', ['ab', 'b']); 24 | expect(['a', capture('b'), capture('c')]).toMatchGroups('abc', ['abc', 'b', 'c']); 25 | }); 26 | 27 | test('named `capture` pattern', () => { 28 | expect(capture('a', { name: 'xyz' })).toEqualRegex('(?a)'); 29 | expect(capture('abc', { name: 'xyz' })).toEqualRegex('(?abc)'); 30 | expect(capture(oneOrMore('abc'), { name: 'xyz' })).toEqualRegex('(?(?:abc)+)'); 31 | expect(oneOrMore(capture('abc', { name: 'xyz' }))).toEqualRegex('(?abc)+'); 32 | }); 33 | 34 | test('named `capture` matching', () => { 35 | expect(capture('b', { name: 'x1' })).toMatchGroups('ab', ['b', 'b']); 36 | expect(capture('b', { name: 'x1' })).toMatchNamedGroups('ab', { x1: 'b' }); 37 | 38 | expect(['a', capture('b', { name: 'x1' })]).toMatchGroups('ab', ['ab', 'b']); 39 | expect(['a', capture('b', { name: 'x1' })]).toMatchNamedGroups('ab', { x1: 'b' }); 40 | 41 | expect([capture('a'), capture('b', { name: 'x1' }), capture('c', { name: 'x2' })]).toMatchGroups( 42 | 'abc', 43 | ['abc', 'a', 'b', 'c'], 44 | ); 45 | expect([ 46 | capture('a'), 47 | capture('b', { name: 'x1' }), 48 | capture('c', { name: 'x2' }), 49 | ]).toMatchNamedGroups('abc', { x1: 'b', x2: 'c' }); 50 | }); 51 | 52 | test('`reference` pattern', () => { 53 | // @ts-expect-error 54 | expect([ref('ref0')]).toEqualRegex(/\k/); 55 | // @ts-expect-error 56 | expect([ref('xyz')]).toEqualRegex(/\k/); 57 | expect([capture(any, { name: 'ref0' }), ' ', ref('ref0')]).toEqualRegex('(?.) \\k'); 58 | 59 | expect(['xx', capture(any, { name: 'r123' }), ' ', ref('r123'), 'xx']).toEqualRegex( 60 | 'xx(?.) \\kxx', 61 | ); 62 | }); 63 | 64 | test('`reference` matching basic case', () => { 65 | expect([capture(word, { name: 'a' }), ref('a')]).toMatchString('aa'); 66 | expect([capture(digit, { name: 'a' }), ref('a')]).toMatchString('11'); 67 | 68 | expect([capture(any, { name: 'a' }), ref('a')]).not.toMatchString('ab'); 69 | expect([capture(digit, { name: 'a' }), ref('a')]).not.toMatchString('1a'); 70 | expect([capture(digit, { name: 'a' }), ref('a')]).not.toMatchString('a1'); 71 | }); 72 | 73 | test('`reference` matching variable case', () => { 74 | const someRef = ref('test'); 75 | expect([capture(word, { name: someRef.name }), someRef]).toMatchString('aa'); 76 | expect([capture(digit, { name: someRef.name }), someRef]).toMatchString('11'); 77 | 78 | expect([capture(any, { name: someRef.name }), someRef]).not.toMatchString('ab'); 79 | expect([capture(digit, { name: someRef.name }), someRef]).not.toMatchString('1a'); 80 | expect([capture(digit, { name: someRef.name }), someRef]).not.toMatchString('a1'); 81 | }); 82 | 83 | test('`reference` matching HTML attributes', () => { 84 | const quoteChars = anyOf('"\''); 85 | const htmlAttributeRegex = buildRegExp([ 86 | wordBoundary, 87 | capture(oneOrMore(word), { name: 'name' }), 88 | '=', 89 | capture(quoteChars, { name: 'quote' }), 90 | capture(oneOrMore(negated(quoteChars)), { name: 'value' }), 91 | ref('quote'), 92 | ]); 93 | 94 | expect(htmlAttributeRegex).toMatchNamedGroups('a="b"', { 95 | name: 'a', 96 | quote: '"', 97 | value: 'b', 98 | }); 99 | expect(htmlAttributeRegex).toMatchNamedGroups('aa="bbb"', { 100 | name: 'aa', 101 | quote: '"', 102 | value: 'bbb', 103 | }); 104 | expect(htmlAttributeRegex).toMatchNamedGroups(`aa='bbb'`, { 105 | name: 'aa', 106 | quote: `'`, 107 | value: 'bbb', 108 | }); 109 | expect(htmlAttributeRegex).toMatchNamedGroups('', { 110 | quote: '"', 111 | name: 'type', 112 | value: 'number', 113 | }); 114 | expect(htmlAttributeRegex).toMatchNamedGroups(``, { 115 | quote: "'", 116 | name: 'type', 117 | value: 'number', 118 | }); 119 | 120 | expect(htmlAttributeRegex).not.toMatchString(`aa="bbb'`); 121 | expect(htmlAttributeRegex).not.toMatchString(`aa='bbb"`); 122 | expect(htmlAttributeRegex).not.toMatchString(``); 124 | }); 125 | -------------------------------------------------------------------------------- /src/constructs/__tests__/char-class.test.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-useless-escape */ 2 | import { 3 | anyOf, 4 | buildRegExp, 5 | charClass, 6 | charRange, 7 | digit, 8 | negated, 9 | nonDigit, 10 | nonWhitespace, 11 | nonWord, 12 | oneOrMore, 13 | optional, 14 | type RegexSequence, 15 | whitespace, 16 | word, 17 | zeroOrMore, 18 | } from '../..'; 19 | 20 | function u(sequence: RegexSequence) { 21 | return buildRegExp(sequence, { unicode: true }); 22 | } 23 | 24 | test('`charClass` base cases', () => { 25 | expect(charClass(charRange('a', 'z'))).toEqualRegex(/[a-z]/); 26 | expect(charClass(charRange('a', 'z'), charRange('A', 'Z'))).toEqualRegex(/[a-zA-Z]/); 27 | expect(charClass(charRange('a', 'z'), anyOf('05'))).toEqualRegex(/[a-z05]/); 28 | expect(charClass(charRange('a', 'z'), whitespace, anyOf('05'))).toEqualRegex(/[a-z\s05]/); 29 | }); 30 | 31 | test('`charClass` joins character escapes', () => { 32 | expect(charClass(word)).toEqualRegex(/[\w]/); 33 | expect(charClass(digit)).toEqualRegex(/[\d]/); 34 | expect(charClass(whitespace)).toEqualRegex(/[\s]/); 35 | expect(charClass(nonWord)).toEqualRegex(/[\W]/); 36 | expect(charClass(nonDigit)).toEqualRegex(/[\D]/); 37 | expect(charClass(nonWhitespace)).toEqualRegex(/[\S]/); 38 | 39 | expect(charClass(whitespace, nonWhitespace)).toEqualRegex(/[\s\S]/); 40 | 41 | expect(charClass(word, whitespace)).toEqualRegex(/[\w\s]/); 42 | expect(charClass(word, digit, whitespace)).toEqualRegex(/[\w\d\s]/); 43 | expect(charClass(word, nonDigit)).toEqualRegex(/[\w\D]/); 44 | }); 45 | 46 | test('`charClass` throws on empty text', () => { 47 | expect(() => charClass()).toThrowErrorMatchingInlineSnapshot(`"Expected at least one element"`); 48 | }); 49 | 50 | test('`charRange` pattern', () => { 51 | expect(charRange('a', 'z')).toEqualRegex(/[a-z]/); 52 | expect(['x', charRange('0', '9')]).toEqualRegex(/x[0-9]/); 53 | expect([charRange('A', 'F'), 'x']).toEqualRegex(/[A-F]x/); 54 | }); 55 | 56 | test('`charRange` works both ways', () => { 57 | expect(charRange('a', 'z')).toEqualRegex(/[a-z]/); 58 | expect(charRange('z', 'a')).toEqualRegex(/[a-z]/); 59 | }); 60 | 61 | test('`charRange` throws on incorrect arguments', () => { 62 | expect(() => charRange('aa', 'z')).toThrowErrorMatchingInlineSnapshot( 63 | `"Expected single characters, but received "aa" & "z""`, 64 | ); 65 | expect(() => charRange('a', 'zz')).toThrowErrorMatchingInlineSnapshot( 66 | `"Expected single characters, but received "a" & "zz""`, 67 | ); 68 | expect(() => charRange('', 'z')).toThrowErrorMatchingInlineSnapshot( 69 | `"Expected single characters, but received "" & "z""`, 70 | ); 71 | expect(() => charRange('a', '')).toThrowErrorMatchingInlineSnapshot( 72 | `"Expected single characters, but received "a" & """`, 73 | ); 74 | }); 75 | 76 | test('`anyOf` handles basic cases pattern', () => { 77 | expect(anyOf('a')).toMatchString('a'); 78 | expect(anyOf('a')).toEqualRegex(/[a]/); 79 | 80 | expect(['x', anyOf('a'), 'x']).toMatchString('xax'); 81 | expect(['x', anyOf('a'), 'x']).toEqualRegex(/x[a]x/); 82 | 83 | expect(anyOf('ab')).toMatchString('a'); 84 | expect(anyOf('ab')).toMatchString('b'); 85 | expect(anyOf('ab')).not.toMatchString('c'); 86 | expect(anyOf('ab')).toEqualRegex(/[ab]/); 87 | 88 | expect(['x', anyOf('ab')]).toMatchString('xa'); 89 | expect(['x', anyOf('ab')]).toMatchString('xb'); 90 | expect(['x', anyOf('ab')]).not.toMatchString('x0'); 91 | expect(['x', anyOf('ab')]).toEqualRegex(/x[ab]/); 92 | 93 | expect(['x', anyOf('ab'), 'x']).toMatchString('xax'); 94 | expect(['x', anyOf('ab'), 'x']).toMatchString('xbx'); 95 | expect(['x', anyOf('ab'), 'x']).not.toMatchString('x0x'); 96 | expect(['x', anyOf('ab'), 'x']).toEqualRegex(/x[ab]x/); 97 | }); 98 | 99 | test('`anyOf` throws on empty text', () => { 100 | expect(() => anyOf('')).toThrowErrorMatchingInlineSnapshot(`"Expected at least one character"`); 101 | }); 102 | 103 | test('`anyOf` pattern with quantifiers', () => { 104 | expect(['x', oneOrMore(anyOf('abc')), 'x']).toEqualRegex(/x[abc]+x/); 105 | expect(['x', optional(anyOf('abc')), 'x']).toEqualRegex(/x[abc]?x/); 106 | expect(['x', zeroOrMore(anyOf('abc')), 'x']).toEqualRegex(/x[abc]*x/); 107 | }); 108 | 109 | test('`anyOf` handles hyphens', () => { 110 | expect(anyOf('^-')).toMatchString('^'); 111 | expect(anyOf('^-')).toMatchString('-'); 112 | expect(anyOf('^-')).not.toMatchString('a'); 113 | expect(anyOf('^-')).toEqualRegex(/[\^\-]/); 114 | 115 | expect(anyOf('-^')).toMatchString('^'); 116 | expect(anyOf('-^')).toMatchString('-'); 117 | expect(anyOf('-^')).not.toMatchString('a'); 118 | expect(anyOf('-^')).toEqualRegex(/[\-\^]/); 119 | 120 | expect(anyOf('-^a')).toMatchString('^'); 121 | expect(anyOf('-^a')).toMatchString('-'); 122 | expect(anyOf('-^a')).toMatchString('a'); 123 | expect(anyOf('-^a')).not.toMatchString('b'); 124 | expect(anyOf('-^a')).toEqualRegex(/[\-\^a]/); 125 | }); 126 | 127 | test('`anyOf` handles hyphens in unicode mode', () => { 128 | expect(u(anyOf('^-'))).toMatchString('^'); 129 | expect(u(anyOf('^-'))).toMatchString('^'); 130 | expect(u(anyOf('^-'))).toMatchString('-'); 131 | expect(u(anyOf('^-'))).not.toMatchString('a'); 132 | expect(u(anyOf('^-'))).toEqualRegex(/[\^\-]/u); 133 | 134 | expect(u(anyOf('-^'))).toMatchString('^'); 135 | expect(u(anyOf('-^'))).toMatchString('-'); 136 | expect(u(anyOf('-^'))).not.toMatchString('a'); 137 | expect(u(anyOf('-^'))).toEqualRegex(/[\-\^]/u); 138 | 139 | expect(u(anyOf('-^a'))).toMatchString('^'); 140 | expect(u(anyOf('-^a'))).toMatchString('-'); 141 | expect(u(anyOf('-^a'))).toMatchString('a'); 142 | expect(u(anyOf('-^a'))).not.toMatchString('b'); 143 | expect(u(anyOf('-^a'))).toEqualRegex(/[\-\^a]/u); 144 | }); 145 | 146 | test('`anyOf` handles special chars', () => { 147 | expect(anyOf('.')).toMatchString('.'); 148 | expect(anyOf('.')).not.toMatchString('a'); 149 | expect(anyOf('.')).toEqualRegex(/[.]/); 150 | 151 | expect(anyOf('*')).toMatchString('*'); 152 | expect(anyOf('*')).not.toMatchString('a'); 153 | expect(anyOf('*')).toEqualRegex(/[*]/); 154 | 155 | expect(anyOf('+')).toMatchString('+'); 156 | expect(anyOf('+')).not.toMatchString('a'); 157 | expect(anyOf('+')).toEqualRegex(/[+]/); 158 | 159 | expect(anyOf('?')).toMatchString('?'); 160 | expect(anyOf('?')).not.toMatchString('a'); 161 | expect(anyOf('?')).toEqualRegex(/[?]/); 162 | 163 | expect(anyOf('^')).toMatchString('^'); 164 | expect(anyOf('^')).not.toMatchString('a'); 165 | expect(anyOf('^')).toEqualRegex(/[\^]/); 166 | 167 | expect(anyOf('^0')).toMatchString('^'); 168 | expect(anyOf('^0')).not.toMatchString('a'); 169 | expect(anyOf('^0')).toEqualRegex(/[\^0]/); 170 | 171 | expect(anyOf('0^')).toMatchString('^'); 172 | expect(anyOf('0^')).not.toMatchString('a'); 173 | expect(anyOf('0^')).toEqualRegex(/[0\^]/); 174 | 175 | expect(anyOf('$')).toMatchString('$'); 176 | expect(anyOf('$')).not.toMatchString('a'); 177 | expect(anyOf('$')).toEqualRegex(/[$]/); 178 | 179 | expect(anyOf('{')).toMatchString('{'); 180 | expect(anyOf('{')).not.toMatchString('a'); 181 | expect(anyOf('{')).toEqualRegex(/[{]/); 182 | 183 | expect(anyOf('}')).toMatchString('}'); 184 | expect(anyOf('}')).not.toMatchString('a'); 185 | expect(anyOf('}')).toEqualRegex(/[}]/); 186 | 187 | expect(anyOf('(')).toMatchString('('); 188 | expect(anyOf('(')).not.toMatchString('a'); 189 | expect(anyOf('(')).toEqualRegex(/[(]/); 190 | 191 | expect(anyOf(')')).toMatchString(')'); 192 | expect(anyOf(')')).not.toMatchString('a'); 193 | expect(anyOf(')')).toEqualRegex(/[)]/); 194 | 195 | expect(anyOf('|')).toMatchString('|'); 196 | expect(anyOf('|')).not.toMatchString('a'); 197 | expect(anyOf('|')).toEqualRegex(/[|]/); 198 | 199 | expect(anyOf('[')).toMatchString('['); 200 | expect(anyOf('[')).not.toMatchString('a'); 201 | expect(anyOf('[')).toEqualRegex(/[[]/); 202 | 203 | expect(anyOf(']')).toMatchString(']'); 204 | expect(anyOf(']')).not.toMatchString('a'); 205 | expect(anyOf(']')).toEqualRegex(/[\]]/); 206 | 207 | expect(anyOf('\\')).toMatchString('\\'); 208 | expect(anyOf('\\')).not.toMatchString('a'); 209 | expect(anyOf('\\')).toEqualRegex(/[\\]/); 210 | }); 211 | 212 | test('`anyof` matches special characters', () => { 213 | expect(anyOf('a')).toMatchString('a'); 214 | }); 215 | 216 | test('`anyof` matches special characters in unicode mode', () => { 217 | expect(u(anyOf('a'))).toMatchString('a'); 218 | 219 | expect(u(anyOf('.'))).toMatchString('.'); 220 | expect(u(anyOf('.'))).not.toMatchString('a'); 221 | expect(u(anyOf('*'))).toMatchString('*'); 222 | expect(u(anyOf('*'))).not.toMatchString('a'); 223 | expect(u(anyOf('+'))).toMatchString('+'); 224 | expect(u(anyOf('+'))).not.toMatchString('a'); 225 | expect(u(anyOf('?'))).toMatchString('?'); 226 | expect(u(anyOf('?'))).not.toMatchString('a'); 227 | expect(u(anyOf('^'))).toMatchString('^'); 228 | expect(u(anyOf('^'))).not.toMatchString('a'); 229 | expect(u(anyOf('^0'))).toMatchString('^'); 230 | expect(u(anyOf('^0'))).not.toMatchString('a'); 231 | expect(u(anyOf('0^'))).toMatchString('^'); 232 | expect(u(anyOf('0^'))).not.toMatchString('a'); 233 | expect(u(anyOf('$'))).toMatchString('$'); 234 | expect(u(anyOf('$'))).not.toMatchString('a'); 235 | expect(u(anyOf('{'))).toMatchString('{'); 236 | expect(u(anyOf('{'))).not.toMatchString('a'); 237 | expect(u(anyOf('}'))).toMatchString('}'); 238 | expect(u(anyOf('}'))).not.toMatchString('a'); 239 | expect(u(anyOf('('))).toMatchString('('); 240 | expect(u(anyOf('('))).not.toMatchString('a'); 241 | expect(u(anyOf(')'))).toMatchString(')'); 242 | expect(u(anyOf(')'))).not.toMatchString('a'); 243 | expect(u(anyOf('|'))).toMatchString('|'); 244 | expect(u(anyOf('|'))).not.toMatchString('a'); 245 | expect(u(anyOf('['))).toMatchString('['); 246 | expect(u(anyOf('['))).not.toMatchString('a'); 247 | expect(u(anyOf(']'))).toMatchString(']'); 248 | expect(u(anyOf(']'))).not.toMatchString('a'); 249 | expect(u(anyOf('\\'))).toMatchString('\\'); 250 | expect(u(anyOf('\\'))).not.toMatchString('a'); 251 | }); 252 | 253 | test('`negated` character class pattern', () => { 254 | expect(negated(anyOf('a'))).toEqualRegex(/[^a]/); 255 | expect(negated(anyOf('abc'))).toEqualRegex(/[^abc]/); 256 | }); 257 | 258 | test('`negated` character class matching', () => { 259 | expect(negated(anyOf('a'))).not.toMatchString('aa'); 260 | expect(negated(anyOf('a'))).toMatchGroups('aba', ['b']); 261 | }); 262 | -------------------------------------------------------------------------------- /src/constructs/__tests__/char-escape.test.ts: -------------------------------------------------------------------------------- 1 | import { any, digit, nonDigit, nonWhitespace, nonWord, whitespace, word } from '../..'; 2 | 3 | test('`any` character escape', () => { 4 | expect(any).toEqualRegex(/./); 5 | expect(['x', any]).toEqualRegex(/x./); 6 | expect(['x', any, 'x']).toEqualRegex(/x.x/); 7 | }); 8 | 9 | test('`digit` character escape', () => { 10 | expect(digit).toEqualRegex(/\d/); 11 | expect(['x', digit]).toEqualRegex(/x\d/); 12 | expect(['x', digit, 'x']).toEqualRegex(/x\dx/); 13 | expect(digit).toMatchString('1'); 14 | expect(digit).not.toMatchString('A'); 15 | }); 16 | 17 | test('`nonDigit` character escape', () => { 18 | expect(nonDigit).toEqualRegex(/\D/); 19 | expect(['x', nonDigit]).toEqualRegex(/x\D/); 20 | expect(['x', nonDigit, 'x']).toEqualRegex(/x\Dx/); 21 | expect(nonDigit).not.toMatchString('1'); 22 | expect(nonDigit).toMatchString('A'); 23 | }); 24 | 25 | test('`word` character escape', () => { 26 | expect(word).toEqualRegex(/\w/); 27 | expect(['x', word]).toEqualRegex(/x\w/); 28 | expect(['x', word, 'x']).toEqualRegex(/x\wx/); 29 | expect(word).toMatchString('A'); 30 | expect(word).toMatchString('1'); 31 | expect(word).not.toMatchString('$'); 32 | }); 33 | 34 | test('`nonWord` character escape', () => { 35 | expect(nonWord).toEqualRegex(/\W/); 36 | expect(['x', nonWord]).toEqualRegex(/x\W/); 37 | expect(['x', nonWord, 'x']).toEqualRegex(/x\Wx/); 38 | expect(nonWord).not.toMatchString('A'); 39 | expect(nonWord).not.toMatchString('1'); 40 | expect(nonWord).toMatchString('$'); 41 | }); 42 | 43 | test('`whitespace` character escape', () => { 44 | expect(whitespace).toEqualRegex(/\s/); 45 | expect(['x', whitespace]).toEqualRegex(/x\s/); 46 | expect(['x', whitespace, 'x']).toEqualRegex(/x\sx/); 47 | expect(whitespace).toMatchString(' '); 48 | expect(whitespace).toMatchString('\t'); 49 | expect(whitespace).not.toMatchString('A'); 50 | expect(whitespace).not.toMatchString('1'); 51 | }); 52 | 53 | test('`nonWhitespace` character escape', () => { 54 | expect(nonWhitespace).toEqualRegex(/\S/); 55 | expect(['x', nonWhitespace]).toEqualRegex(/x\S/); 56 | expect(['x', nonWhitespace, 'x']).toEqualRegex(/x\Sx/); 57 | expect(nonWhitespace).not.toMatchString(' '); 58 | expect(nonWhitespace).not.toMatchString('\t'); 59 | expect(nonWhitespace).toMatchString('A'); 60 | expect(nonWhitespace).toMatchString('1'); 61 | }); 62 | 63 | test('example: negated character classes', () => { 64 | expect(nonDigit).toEqualRegex(/\D/); 65 | expect(nonWord).toEqualRegex(/\W/); 66 | expect(nonWhitespace).toEqualRegex(/\S/); 67 | 68 | expect(nonDigit).toMatchString('A'); 69 | expect(nonDigit).not.toMatchString('1'); 70 | expect(nonDigit).toMatchString(' '); 71 | expect(nonDigit).toMatchString('#'); 72 | 73 | expect(nonWord).not.toMatchString('A'); 74 | expect(nonWord).not.toMatchString('1'); 75 | expect(nonWord).toMatchString(' '); 76 | expect(nonWord).toMatchString('#'); 77 | 78 | expect(nonWhitespace).toMatchString('A'); 79 | expect(nonWhitespace).toMatchString('1'); 80 | expect(nonWhitespace).not.toMatchString(' '); 81 | expect(nonWhitespace).toMatchString('#'); 82 | }); 83 | -------------------------------------------------------------------------------- /src/constructs/__tests__/choice-of.test.ts: -------------------------------------------------------------------------------- 1 | import { choiceOf, oneOrMore, repeat, zeroOrMore } from '../..'; 2 | 3 | test('`choiceOf` pattern', () => { 4 | expect(choiceOf('a')).toEqualRegex(/a/); 5 | expect(choiceOf('a', 'b')).toEqualRegex(/a|b/); 6 | expect(choiceOf('a', 'b', 'c')).toEqualRegex(/a|b|c/); 7 | expect(choiceOf('aaa', 'bbb')).toEqualRegex(/aaa|bbb/); 8 | }); 9 | 10 | test('`choiceOf` pattern in sequence', () => { 11 | expect(['x', choiceOf('a'), 'x']).toEqualRegex(/xax/); 12 | expect([choiceOf('a', 'b'), 'x']).toEqualRegex(/(?:a|b)x/); 13 | expect(['x', choiceOf('a', 'b')]).toEqualRegex(/x(?:a|b)/); 14 | 15 | expect(choiceOf('a', 'b', 'c')).toEqualRegex(/a|b|c/); 16 | expect(['x', choiceOf('a', 'b', 'c')]).toEqualRegex(/x(?:a|b|c)/); 17 | expect([choiceOf('a', 'b', 'c'), 'x']).toEqualRegex(/(?:a|b|c)x/); 18 | 19 | expect(choiceOf('aaa', 'bbb')).toEqualRegex(/aaa|bbb/); 20 | }); 21 | 22 | test('`choiceOf` pattern with sequence options', () => { 23 | expect([choiceOf(['a', 'b'])]).toEqualRegex(/ab/); 24 | expect([choiceOf(['a', 'b'], ['c', 'd'])]).toEqualRegex(/ab|cd/); 25 | expect([choiceOf(['a', zeroOrMore('b')], [oneOrMore('c'), 'd'])]).toEqualRegex(/ab*|c+d/); 26 | }); 27 | 28 | test('`choiceOf` pattern using nested regex', () => { 29 | expect(choiceOf(oneOrMore('a'), zeroOrMore('b'))).toEqualRegex(/a+|b*/); 30 | expect(choiceOf(repeat('a', { min: 1, max: 3 }), repeat('bx', 5))).toEqualRegex( 31 | /a{1,3}|(?:bx){5}/, 32 | ); 33 | }); 34 | 35 | test('`choiceOf` throws on empty options', () => { 36 | expect(() => choiceOf()).toThrowErrorMatchingInlineSnapshot( 37 | `"Expected at least one alternative"`, 38 | ); 39 | }); 40 | -------------------------------------------------------------------------------- /src/constructs/__tests__/encoder.test.tsx: -------------------------------------------------------------------------------- 1 | import { 2 | buildPattern, 3 | buildRegExp, 4 | capture, 5 | choiceOf, 6 | oneOrMore, 7 | optional, 8 | repeat, 9 | zeroOrMore, 10 | } from '../..'; 11 | 12 | test('basic quantifies', () => { 13 | expect('a').toEqualRegex(/a/); 14 | expect(['a', 'b']).toEqualRegex(/ab/); 15 | 16 | expect(oneOrMore('a')).toEqualRegex(/a+/); 17 | expect(optional('a')).toEqualRegex(/a?/); 18 | 19 | expect(['a', oneOrMore('b')]).toEqualRegex(/ab+/); 20 | expect(['a', oneOrMore('bc')]).toEqualRegex(/a(?:bc)+/); 21 | expect(['a', oneOrMore('bc')]).toEqualRegex(/a(?:bc)+/); 22 | 23 | expect(['a', repeat('b', { min: 1, max: 5 })]).toEqualRegex(/ab{1,5}/); 24 | 25 | expect(['a', zeroOrMore('b')]).toEqualRegex(/ab*/); 26 | expect(['a', zeroOrMore('bc')]).toEqualRegex(/a(?:bc)*/); 27 | expect(['a', zeroOrMore('bc')]).toEqualRegex(/a(?:bc)*/); 28 | 29 | expect([optional('a'), 'b']).toEqualRegex(/a?b/); 30 | 31 | expect([optional('a'), 'b', oneOrMore('d')]).toEqualRegex(/a?bd+/); 32 | }); 33 | 34 | test('`buildRegExp` escapes special characters', () => { 35 | expect('.').toEqualRegex(/\./); 36 | expect('*').toEqualRegex(/\*/); 37 | expect('+').toEqualRegex(/\+/); 38 | expect('?').toEqualRegex(/\?/); 39 | expect('^').toEqualRegex(/\^/); 40 | expect('$').toEqualRegex(/\$/); 41 | expect('{').toEqualRegex(/\{/); 42 | expect('}').toEqualRegex(/\}/); 43 | expect('|').toEqualRegex(/\|/); 44 | expect('[').toEqualRegex(/\[/); 45 | expect(']').toEqualRegex(/\]/); 46 | expect('\\').toEqualRegex(/\\/); 47 | 48 | expect('*.*').toEqualRegex(/\*\.\*/); 49 | 50 | expect([oneOrMore('.*'), zeroOrMore('[]{}')]).toEqualRegex(/(?:\.\*)+(?:\[\]\{\})*/); 51 | }); 52 | 53 | test('`buildRegExp` accepts RegExp object', () => { 54 | expect(buildRegExp(/abc/)).toEqual(/abc/); 55 | expect(buildRegExp(oneOrMore(/abc/))).toEqual(/(?:abc)+/); 56 | expect(buildRegExp(repeat(/abc/, 5))).toEqual(/(?:abc){5}/); 57 | expect(buildRegExp(capture(/abc/))).toEqual(/(abc)/); 58 | expect(buildRegExp(choiceOf(/a/, /b/))).toEqual(/a|b/); 59 | expect(buildRegExp(choiceOf(/a|b/, /c/))).toEqual(/a|b|c/); 60 | }); 61 | 62 | test('`buildRegExp` detects common atomic patterns', () => { 63 | expect(buildRegExp(/a/)).toEqual(/a/); 64 | expect(buildRegExp(/[a-z]/)).toEqual(/[a-z]/); 65 | expect(buildRegExp(/(abc)/)).toEqual(/(abc)/); 66 | expect(buildRegExp(oneOrMore(/a/))).toEqual(/a+/); 67 | expect(buildRegExp(oneOrMore(/[a-z]/))).toEqual(/[a-z]+/); 68 | expect(buildRegExp(oneOrMore(/(abc)/))).toEqual(/(abc)+/); 69 | expect(buildRegExp(repeat(/a/, 5))).toEqual(/a{5}/); 70 | expect(buildRegExp(oneOrMore(/(a|b|c)/))).toEqual(/(a|b|c)+/); 71 | }); 72 | 73 | test('`buildRegExp` throws error on unknown element', () => { 74 | expect(() => 75 | // @ts-expect-error intentionally passing incorrect object 76 | buildRegExp({ type: 'unknown' }), 77 | ).toThrowErrorMatchingInlineSnapshot(` 78 | "Unsupported element. Received: { 79 | "type": "unknown" 80 | }" 81 | `); 82 | }); 83 | 84 | test('`buildPattern` throws on empty text', () => { 85 | expect(() => buildPattern('')).toThrowErrorMatchingInlineSnapshot( 86 | `"Expected at least one character"`, 87 | ); 88 | }); 89 | -------------------------------------------------------------------------------- /src/constructs/__tests__/lookahead.test.ts: -------------------------------------------------------------------------------- 1 | import { capture, digit, lookahead, oneOrMore, word, zeroOrMore } from '../../index'; 2 | 3 | test('`lookahead` pattern', () => { 4 | expect(lookahead('a')).toEqualRegex(/(?=a)/); 5 | expect([digit, lookahead('abc')]).toEqualRegex(/\d(?=abc)/); 6 | expect(lookahead(oneOrMore('abc'))).toEqualRegex(/(?=(?:abc)+)/); 7 | expect([zeroOrMore(word), lookahead('abc')]).toEqualRegex(/\w*(?=abc)/); 8 | }); 9 | 10 | test('`lookahead` matching', () => { 11 | expect([oneOrMore(digit), lookahead('$')]).toMatchString('1 turkey costs 30$'); 12 | expect(['q', lookahead('u')]).toMatchString('queen'); 13 | expect(['a', lookahead('b'), lookahead('c')]).not.toMatchString('abc'); 14 | expect(['a', lookahead(capture('bba'))]).toMatchGroups('abba', ['a', 'bba']); 15 | }); 16 | 17 | test('`lookahead` matching with multiple elements', () => { 18 | expect(lookahead(['a', 'b', 'c'])).toEqualRegex(/(?=abc)/); 19 | }); 20 | 21 | test('`lookahead` matching with nested constructs', () => { 22 | expect(lookahead(oneOrMore(capture('abc')))).toEqualRegex(/(?=(abc)+)/); 23 | expect(lookahead([zeroOrMore(word), capture('abc')])).toEqualRegex(/(?=\w*(abc))/); 24 | }); 25 | 26 | test('`lookahead` matching with special characters', () => { 27 | expect(lookahead(['$', capture('abc')])).toEqualRegex(/(?=\$(abc))/); 28 | expect(lookahead(['q', capture('u')])).toEqualRegex(/(?=q(u))/); 29 | }); 30 | 31 | test('`lookahead` matching with capture group', () => { 32 | expect(lookahead(capture('bba'))).toEqualRegex(/(?=(bba))/); 33 | }); 34 | 35 | test('`lookahead` matching with digit character class', () => { 36 | expect(lookahead([digit, 'abc'])).toEqualRegex(/(?=\dabc)/); 37 | }); 38 | -------------------------------------------------------------------------------- /src/constructs/__tests__/lookbehind.test.ts: -------------------------------------------------------------------------------- 1 | import { anyOf, digit, lookbehind, oneOrMore, whitespace, word, zeroOrMore } from '../../index'; 2 | 3 | test('`lookbehind` pattern', () => { 4 | expect(lookbehind('a')).toEqualRegex(/(?<=a)/); 5 | expect(lookbehind('abc')).toEqualRegex(/(?<=abc)/); 6 | expect(lookbehind(oneOrMore('abc'))).toEqualRegex(/(?<=(?:abc)+)/); 7 | expect(lookbehind('abc')).toEqualRegex(/(?<=abc)/); 8 | }); 9 | 10 | test('`lookbehind` matching', () => { 11 | expect([zeroOrMore(whitespace), word, lookbehind('s'), oneOrMore(whitespace)]).toMatchString( 12 | 'too many cats to feed.', 13 | ); 14 | 15 | expect([lookbehind('USD'), zeroOrMore(whitespace), oneOrMore(digit)]).toMatchString( 16 | 'The price is USD 30', 17 | ); 18 | 19 | expect([lookbehind('USD'), zeroOrMore(whitespace), oneOrMore(digit)]).not.toMatchString( 20 | 'The price is CDN 30', 21 | ); 22 | 23 | expect([lookbehind('a'), 'b']).toMatchString('abba'); 24 | 25 | const mjsImport = [lookbehind('.mjs')]; 26 | expect(mjsImport).toMatchString("import {Person} from './person.mjs';"); 27 | expect(mjsImport).not.toMatchString("import {Person} from './person.js';"); 28 | expect([anyOf('+-'), oneOrMore(digit), lookbehind('-')]).not.toMatchString('+123'); 29 | }); 30 | 31 | test('`lookbehind` matching with multiple elements', () => { 32 | expect(lookbehind(['abc', 'def'])).toEqualRegex(/(?<=abcdef)/); 33 | expect(lookbehind([oneOrMore('abc'), 'def'])).toEqualRegex(/(?<=(?:abc)+def)/); 34 | expect(lookbehind(['abc', oneOrMore('def')])).toEqualRegex(/(?<=abc(?:def)+)/); 35 | }); 36 | 37 | test('`lookbehind` matching with special characters', () => { 38 | expect(lookbehind(['$', '+'])).toEqualRegex(/(?<=\$\+)/); 39 | expect(lookbehind(['[', ']'])).toEqualRegex(/(?<=\[\])/); 40 | expect(lookbehind(['\\', '\\'])).toEqualRegex(/(?<=\\\\)/); 41 | }); 42 | 43 | test('`lookbehind` matching with quantifiers', () => { 44 | expect(lookbehind(zeroOrMore('abc'))).toEqualRegex(/(?<=(?:abc)*)/); 45 | expect(lookbehind(oneOrMore('abc'))).toEqualRegex(/(?<=(?:abc)+)/); 46 | expect(lookbehind(['abc', zeroOrMore('def')])).toEqualRegex(/(?<=abc(?:def)*)/); 47 | }); 48 | 49 | test('`lookbehind` matching with character classes', () => { 50 | expect(lookbehind(word)).toEqualRegex(/(?<=\w)/); 51 | expect(lookbehind(whitespace)).toEqualRegex(/(?<=\s)/); 52 | expect(lookbehind(digit)).toEqualRegex(/(?<=\d)/); 53 | expect(lookbehind(anyOf('abc'))).toEqualRegex(/(?<=[abc])/); 54 | }); 55 | -------------------------------------------------------------------------------- /src/constructs/__tests__/negative-lookahead.test.ts: -------------------------------------------------------------------------------- 1 | import { anyOf, capture, digit, negativeLookahead, oneOrMore, zeroOrMore } from '../../index'; 2 | 3 | test('`negativeLookahead` pattern', () => { 4 | expect(negativeLookahead('a')).toEqualRegex(/(?!a)/); 5 | expect(negativeLookahead('abc')).toEqualRegex(/(?!abc)/); 6 | expect(negativeLookahead(oneOrMore('abc'))).toEqualRegex(/(?!(?:abc)+)/); 7 | expect(oneOrMore(negativeLookahead('abc'))).toEqualRegex(/(?!abc)+/); 8 | }); 9 | 10 | test('`negativeLookahead` matching', () => { 11 | expect([negativeLookahead('$'), oneOrMore(digit)]).toMatchString('1 turkey costs 30$'); 12 | expect([negativeLookahead('a'), 'b']).toMatchString('abba'); 13 | expect(['a', negativeLookahead(capture('bba'))]).not.toMatchGroups('abba', ['a', 'bba']); 14 | expect([negativeLookahead('-'), anyOf('+-'), zeroOrMore(digit)]).not.toMatchString('-123'); 15 | expect([negativeLookahead('-'), anyOf('+-'), zeroOrMore(digit)]).toMatchString('+123'); 16 | }); 17 | 18 | test('`negativeLookahead` matching with multiple elements', () => { 19 | expect(negativeLookahead(['abc', 'def'])).toEqualRegex(/(?!abcdef)/); 20 | expect(negativeLookahead([oneOrMore('abc'), 'def'])).toEqualRegex(/(?!(?:abc)+def)/); 21 | expect(negativeLookahead(['abc', oneOrMore('def')])).toEqualRegex(/(?!abc(?:def)+)/); 22 | }); 23 | 24 | test('`negativeLookahead` matching with special characters', () => { 25 | expect(negativeLookahead(['$', '+'])).toEqualRegex(/(?!\$\+)/); 26 | expect(negativeLookahead(['[', ']'])).toEqualRegex(/(?!\[\])/); 27 | expect(negativeLookahead(['\\', '\\'])).toEqualRegex(/(?!\\\\)/); 28 | }); 29 | 30 | test('`negativeLookahead` matching with quantifiers', () => { 31 | expect(negativeLookahead(zeroOrMore('abc'))).toEqualRegex(/(?!(?:abc)*)/); 32 | expect(negativeLookahead(oneOrMore('abc'))).toEqualRegex(/(?!(?:abc)+)/); 33 | expect(negativeLookahead(['abc', zeroOrMore('def')])).toEqualRegex(/(?!abc(?:def)*)/); 34 | }); 35 | -------------------------------------------------------------------------------- /src/constructs/__tests__/negative-lookbehind.test.ts: -------------------------------------------------------------------------------- 1 | import { negativeLookbehind } from '../negative-lookbehind'; 2 | import { oneOrMore } from '../quantifiers'; 3 | 4 | test('`negativeLookbehind` pattern', () => { 5 | expect(negativeLookbehind('a')).toEqualRegex(/(? { 11 | expect(negativeLookbehind('abc')).toEqualRegex(/(? { 17 | expect(negativeLookbehind(oneOrMore('abc'))).toEqualRegex(/(? { 23 | expect(negativeLookbehind('-')).toEqualRegex(/(? { 4 | expect(oneOrMore('a')).toEqualRegex(/a+/); 5 | expect(oneOrMore('ab')).toEqualRegex(/(?:ab)+/); 6 | }); 7 | 8 | test('`optional` quantifier pattern', () => { 9 | expect(optional('a')).toEqualRegex(/a?/); 10 | expect(optional('ab')).toEqualRegex(/(?:ab)?/); 11 | }); 12 | 13 | test('`zeroOrMore` quantifier pattern', () => { 14 | expect(zeroOrMore('a')).toEqualRegex(/a*/); 15 | expect(zeroOrMore('ab')).toEqualRegex(/(?:ab)*/); 16 | }); 17 | 18 | test('`oneOrMore` matching does not generate capture when grouping', () => { 19 | expect(oneOrMore('aa')).toMatchGroups('aa', ['aa']); 20 | }); 21 | 22 | test('`optional` matching does not generate capture when grouping', () => { 23 | expect(optional('aa')).toMatchGroups('aa', ['aa']); 24 | }); 25 | 26 | test('`zeroOrMore` matching does not generate capture when grouping', () => { 27 | expect(zeroOrMore('aa')).toMatchGroups('aa', ['aa']); 28 | }); 29 | 30 | test('base quantifiers patterns optimize grouping for atoms', () => { 31 | expect(oneOrMore(digit)).toEqualRegex(/\d+/); 32 | expect(optional(digit)).toEqualRegex(/\d?/); 33 | expect(zeroOrMore(digit)).toEqualRegex(/\d*/); 34 | 35 | expect(oneOrMore('a')).toEqualRegex(/a+/); 36 | expect(optional('a')).toEqualRegex(/a?/); 37 | expect(zeroOrMore('a')).toEqualRegex(/a*/); 38 | }); 39 | 40 | test('greedy quantifiers patterns', () => { 41 | expect(oneOrMore('a', { greedy: true })).toEqualRegex(/a+/); 42 | expect(oneOrMore('ab', { greedy: true })).toEqualRegex(/(?:ab)+/); 43 | 44 | expect(optional('a', { greedy: true })).toEqualRegex(/a?/); 45 | expect(optional('ab', { greedy: true })).toEqualRegex(/(?:ab)?/); 46 | 47 | expect(zeroOrMore('a', { greedy: true })).toEqualRegex(/a*/); 48 | expect(zeroOrMore('ab', { greedy: true })).toEqualRegex(/(?:ab)*/); 49 | }); 50 | 51 | test('non-greedy quantifiers patterns', () => { 52 | expect(oneOrMore('a', { greedy: false })).toEqualRegex(/a+?/); 53 | expect(oneOrMore('ab', { greedy: false })).toEqualRegex(/(?:ab)+?/); 54 | 55 | expect(optional('a', { greedy: false })).toEqualRegex(/a??/); 56 | expect(optional('ab', { greedy: false })).toEqualRegex(/(?:ab)??/); 57 | 58 | expect(zeroOrMore('a', { greedy: false })).toEqualRegex(/a*?/); 59 | expect(zeroOrMore('ab', { greedy: false })).toEqualRegex(/(?:ab)*?/); 60 | }); 61 | 62 | test('greedy quantifiers matching', () => { 63 | const html = '
Hello World!
'; 64 | 65 | const greedyTag = buildRegExp(['<', oneOrMore(any), '>'], { global: true }); 66 | expect(greedyTag).toMatchGroups(html, ['
Hello World!
']); 67 | }); 68 | 69 | test('non-greedy quantifiers matching', () => { 70 | const html = '
Hello World!
'; 71 | 72 | const nonGreedyTag = buildRegExp(['<', oneOrMore(any, { greedy: false }), '>'], { global: true }); 73 | expect(nonGreedyTag).toMatchGroups(html, ['
', '', '', '
']); 74 | }); 75 | -------------------------------------------------------------------------------- /src/constructs/__tests__/regex.test.tsx: -------------------------------------------------------------------------------- 1 | import { regex } from '../..'; 2 | 3 | test('`regex` no-op pattern', () => { 4 | expect(regex('a')).toEqualRegex(/a/); 5 | expect(regex(['a', 'b'])).toEqualRegex(/ab/); 6 | expect([regex('a'), regex(['b', 'c'])]).toEqualRegex(/abc/); 7 | }); 8 | -------------------------------------------------------------------------------- /src/constructs/__tests__/repeat.test.tsx: -------------------------------------------------------------------------------- 1 | import { digit, oneOrMore, repeat, zeroOrMore } from '../..'; 2 | 3 | test('`repeat` quantifier pattern', () => { 4 | expect(['a', repeat('b', { min: 1, max: 5 })]).toEqualRegex(/ab{1,5}/); 5 | expect(['a', repeat('b', { min: 1 })]).toEqualRegex(/ab{1,}/); 6 | expect(['a', repeat('b', 1)]).toEqualRegex(/ab{1}/); 7 | 8 | expect(['a', repeat(['a', zeroOrMore('b')], 1)]).toEqualRegex(/a(?:ab*){1}/); 9 | expect(repeat(['text', ' ', oneOrMore('d')], 5)).toEqualRegex(/(?:text d+){5}/); 10 | }); 11 | 12 | test('`repeat` pattern optimizes grouping for atoms', () => { 13 | expect(repeat(digit, 2)).toEqualRegex(/\d{2}/); 14 | expect(repeat(digit, { min: 2 })).toEqualRegex(/\d{2,}/); 15 | expect(repeat(digit, { min: 1, max: 5 })).toEqualRegex(/\d{1,5}/); 16 | }); 17 | 18 | test('`repeat` throws on no children', () => { 19 | expect(() => repeat([], 1)).toThrowErrorMatchingInlineSnapshot(`"Expected at least one element"`); 20 | }); 21 | 22 | test('greedy `repeat` quantifier pattern', () => { 23 | expect(repeat('a', { min: 1, greedy: true })).toEqualRegex(/a{1,}/); 24 | expect(repeat('a', { min: 1, max: 5, greedy: true })).toEqualRegex(/a{1,5}/); 25 | }); 26 | 27 | test('non-greedy `repeat` quantifier pattern', () => { 28 | expect(repeat('a', { min: 1, greedy: false })).toEqualRegex(/a{1,}?/); 29 | expect(repeat('a', { min: 1, max: 5, greedy: false })).toEqualRegex(/a{1,5}?/); 30 | }); 31 | -------------------------------------------------------------------------------- /src/constructs/__tests__/unicode.test.tsx: -------------------------------------------------------------------------------- 1 | import { 2 | buildRegExp, 3 | char, 4 | charClass, 5 | endOfString, 6 | type RegexSequence, 7 | startOfString, 8 | unicodeChar, 9 | unicodeProperty, 10 | } from '../..'; 11 | 12 | function u(sequence: RegexSequence) { 13 | return buildRegExp(sequence, { unicode: true }); 14 | } 15 | 16 | test('`unicodeChar` pattern', () => { 17 | // eslint-disable-next-line no-control-regex 18 | expect(unicodeChar(0)).toEqualRegex(/\u0000/); 19 | // eslint-disable-next-line no-control-regex 20 | expect(unicodeChar(0x1)).toEqualRegex(/\u0001/); 21 | // eslint-disable-next-line no-control-regex 22 | expect(unicodeChar(0x12)).toEqualRegex(/\u0012/); 23 | expect(unicodeChar(0x123)).toEqualRegex(/\u0123/); 24 | expect(unicodeChar(0x1234)).toEqualRegex(/\u1234/); 25 | 26 | // eslint-disable-next-line no-control-regex 27 | expect(u(unicodeChar(0))).toEqualRegex(new RegExp('\\u0000', 'u')); 28 | // eslint-disable-next-line no-control-regex 29 | expect(u(unicodeChar(0x1))).toEqualRegex(new RegExp('\\u0001', 'u')); 30 | expect(u(unicodeChar(0x12))).toEqualRegex( 31 | // eslint-disable-next-line no-control-regex 32 | new RegExp('\\u0012', 'u'), 33 | ); 34 | expect(unicodeChar(0x0123)).toEqualRegex(/\u0123/); 35 | expect(unicodeChar(0x1234)).toEqualRegex(/\u1234/); 36 | 37 | expect(u(unicodeChar(0x0123))).toEqualRegex(/\u0123/u); 38 | expect(u(unicodeChar(0x1234))).toEqualRegex(/\u1234/u); 39 | expect(u(unicodeChar(0x12345))).toEqualRegex(new RegExp('\\u{12345}', 'u')); 40 | expect(u(unicodeChar(0x103456))).toEqualRegex(new RegExp('\\u{103456}', 'u')); 41 | }); 42 | 43 | test('`unicodeChar` matching', () => { 44 | expect(unicodeChar(0)).toMatchString('\u{0}'); 45 | expect(unicodeChar(0x1)).toMatchString('\u{1}'); 46 | expect(unicodeChar(0x12)).toMatchString('\u{12}}'); 47 | expect(unicodeChar(0x123)).toMatchString('\u{123}'); 48 | expect(unicodeChar(0x1234)).toMatchString('\u{1234}}'); 49 | 50 | expect(unicodeChar('a'.codePointAt(0)!)).toMatchString('a'); 51 | expect(unicodeChar('ą'.codePointAt(0)!)).toMatchString('ą'); 52 | expect(unicodeChar('©'.codePointAt(0)!)).toMatchString('©'); 53 | 54 | expect(u(unicodeChar(0))).toMatchString('\u{0}'); 55 | expect(u(unicodeChar(0))).not.toMatchString('a'); 56 | expect(u(unicodeChar(0x1))).toMatchString('\u{1}'); 57 | expect(u(unicodeChar(0x12))).toMatchString('\u{12}'); 58 | expect(u(unicodeChar(0x123))).toMatchString('\u{123}'); 59 | expect(u(unicodeChar(0x1234))).toMatchString('\u{1234}'); 60 | expect(u(unicodeChar(0x12345))).toMatchString('\u{12345}'); 61 | expect(u(unicodeChar(0x103456))).toMatchString('\u{103456}'); 62 | 63 | expect(u(unicodeChar('a'.codePointAt(0)!))).toMatchString('a'); 64 | expect(u(unicodeChar('ą'.codePointAt(0)!))).toMatchString('ą'); 65 | expect(u(unicodeChar('©'.codePointAt(0)!))).toMatchString('©'); 66 | expect(u(unicodeChar('😎'.codePointAt(0)!))).toMatchString('😎'); 67 | expect(u(unicodeChar('😎'.codePointAt(0)!))).toMatchString('\u{1f60e}'); 68 | }); 69 | 70 | test('`unicodeChar` nesting matching', () => { 71 | expect( 72 | u(charClass(unicodeChar('a'.codePointAt(0)!), unicodeChar('ą'.codePointAt(0)!))), 73 | ).toMatchString('a'); 74 | expect( 75 | u(charClass(unicodeChar('a'.codePointAt(0)!), unicodeChar('ą'.codePointAt(0)!))), 76 | ).toMatchString('ą'); 77 | expect( 78 | u(charClass(unicodeChar('a'.codePointAt(0)!), unicodeChar('ą'.codePointAt(0)!))), 79 | ).not.toMatchString('b'); 80 | }); 81 | 82 | test('`unicodeChar` edge cases handling', () => { 83 | expect(() => u(unicodeChar(NaN))).toThrowErrorMatchingInlineSnapshot( 84 | `"Expected a valid unicode code point but received NaN"`, 85 | ); 86 | expect(() => u(unicodeChar(1.5))).toThrowErrorMatchingInlineSnapshot( 87 | `"Expected a valid unicode code point but received 1.5"`, 88 | ); 89 | expect(() => u(unicodeChar(-1))).toThrowErrorMatchingInlineSnapshot( 90 | `"Expected a valid unicode code point but received -1"`, 91 | ); 92 | expect(() => u(unicodeChar(0x110000))).toThrowErrorMatchingInlineSnapshot( 93 | `"Expected a valid unicode code point but received 1114112"`, 94 | ); 95 | 96 | expect(u(unicodeChar(0x10ffff))).toEqualRegex(/\u{10ffff}/u); 97 | }); 98 | 99 | test('"char" alias', () => { 100 | expect(char('a'.codePointAt(0)!)).toEqualRegex(/\u0061/); 101 | }); 102 | 103 | test('`unicodeProperty` pattern', () => { 104 | expect(u(unicodeProperty('General_Category', 'Letter'))).toEqualRegex( 105 | /\p{General_Category=Letter}/u, 106 | ); 107 | expect(u(unicodeProperty('Letter'))).toEqualRegex(/\p{Letter}/u); 108 | expect(u(unicodeProperty('L'))).toEqualRegex(/\p{L}/u); 109 | expect(u(unicodeProperty('Lu'))).toEqualRegex(/\p{Lu}/u); 110 | expect(u(unicodeProperty('Ll'))).toEqualRegex(/\p{Ll}/u); 111 | expect(u(unicodeProperty('Lt'))).toEqualRegex(/\p{Lt}/u); 112 | expect(u(unicodeProperty('Lm'))).toEqualRegex(/\p{Lm}/u); 113 | expect(u(unicodeProperty('Lo'))).toEqualRegex(/\p{Lo}/u); 114 | 115 | expect(u(unicodeProperty('Script', 'Latin'))).toEqualRegex('\\p{Script=Latin}'); 116 | expect(u(unicodeProperty('Script', 'Grek'))).toEqualRegex('\\p{Script=Grek}'); 117 | expect(u(unicodeProperty('sc', 'Cyrillic'))).toEqualRegex('\\p{sc=Cyrillic}'); 118 | 119 | expect(u(unicodeProperty('Script', 'Thaana'))).toEqualRegex('\\p{Script=Thaana}'); 120 | expect(u(unicodeProperty('Script_Extensions', 'Thaana'))).toEqualRegex( 121 | '\\p{Script_Extensions=Thaana}', 122 | ); 123 | expect(u(unicodeProperty('scx', 'Thaana'))).toEqualRegex('\\p{scx=Thaana}'); 124 | 125 | expect(u(unicodeProperty('Emoji'))).toEqualRegex('\\p{Emoji}'); 126 | }); 127 | 128 | test('`unicodeProperty` matching', () => { 129 | expect(u(unicodeProperty('General_Category', 'Letter'))).toMatchString('A'); 130 | expect(u(unicodeProperty('Letter'))).toMatchString('A'); 131 | expect(u(unicodeProperty('L'))).toMatchString('A'); 132 | 133 | expect(u(unicodeProperty('Uppercase'))).toMatchString('A'); 134 | expect(u(unicodeProperty('Uppercase'))).not.toMatchString('a'); 135 | expect(u(unicodeProperty('Lu'))).toMatchString('A'); 136 | 137 | expect(u(unicodeProperty('Lowercase'))).toMatchString('a'); 138 | expect(u(unicodeProperty('Lowercase'))).not.toMatchString('A'); 139 | expect(u(unicodeProperty('Ll'))).toMatchString('a'); 140 | 141 | expect(u(unicodeProperty('Script', 'Latin'))).toMatchString('A'); 142 | expect(u(unicodeProperty('Script', 'Latin'))).not.toMatchString('α'); 143 | expect(u(unicodeProperty('Script', 'Grek'))).toMatchString('α'); 144 | expect(u(unicodeProperty('Script', 'Grek'))).not.toMatchString('A'); 145 | 146 | // Basic emoji 147 | expect(u([startOfString, unicodeProperty('Emoji'), endOfString])).toMatchString('😎'); 148 | expect(u([startOfString, unicodeProperty('Emoji'), endOfString])).toMatchString('🐌'); 149 | 150 | // Complex emoji with skin tone modifier 151 | expect(u(unicodeProperty('Emoji'))).toMatchString('☝🏼'); 152 | expect(u([startOfString, unicodeProperty('Emoji'), endOfString])).not.toMatchString('☝🏼'); 153 | }); 154 | 155 | test('`unicodeProperty` nesting matching', () => { 156 | expect(u(charClass(unicodeProperty('Lowercase'), unicodeProperty('White_Space')))).toMatchString( 157 | 'a', 158 | ); 159 | expect(u(charClass(unicodeProperty('Lowercase'), unicodeProperty('White_Space')))).toMatchString( 160 | ' ', 161 | ); 162 | expect( 163 | u(charClass(unicodeProperty('Lowercase'), unicodeProperty('White_Space'))), 164 | ).not.toMatchString('A'); 165 | }); 166 | -------------------------------------------------------------------------------- /src/constructs/anchors.ts: -------------------------------------------------------------------------------- 1 | import type { EncodedRegex } from '../types'; 2 | 3 | /** 4 | * Start of string anchor. Matches the start of of string. In `multiline` mode, also matches immediately following a newline. 5 | */ 6 | export const startOfString: EncodedRegex = { 7 | precedence: 'atom', 8 | pattern: '^', 9 | }; 10 | 11 | /** 12 | * End of string anchor. Matches the end of a string. In `multiline` mode, also matches immediately preceding a newline. 13 | */ 14 | export const endOfString: EncodedRegex = { 15 | precedence: 'atom', 16 | pattern: '$', 17 | }; 18 | 19 | /** 20 | * Word boundary anchor. Matches the position where one side is a word character (alphanumeric or underscore) and the other side is a non-word character (anything else). 21 | */ 22 | export const wordBoundary: EncodedRegex = { 23 | precedence: 'atom', 24 | pattern: '\\b', 25 | }; 26 | 27 | /** 28 | * Non-word boundary anchor. Matches the position where both sides are word characters. 29 | */ 30 | export const nonWordBoundary: EncodedRegex = { 31 | precedence: 'atom', 32 | pattern: '\\B', 33 | }; 34 | 35 | /** 36 | * @deprecated Renamed to `nonWordBoundary`. 37 | */ 38 | export const notWordBoundary = nonWordBoundary; 39 | -------------------------------------------------------------------------------- /src/constructs/capture.ts: -------------------------------------------------------------------------------- 1 | import { encode } from '../encoder'; 2 | import type { EncodedRegex, RegexSequence } from '../types'; 3 | 4 | export type CaptureOptions = { 5 | /** 6 | * Name to be given to the capturing group. 7 | */ 8 | name?: string; 9 | }; 10 | 11 | export interface Reference extends EncodedRegex { 12 | name: string; 13 | } 14 | 15 | /** 16 | * Creates a capturing group which allows the matched pattern to be available: 17 | * - in the match results (`String.match`, `String.matchAll`, or `RegExp.exec`) 18 | * - in the regex itself, through {@link ref} 19 | */ 20 | export function capture(sequence: RegexSequence, options?: CaptureOptions): EncodedRegex { 21 | const name = options?.name; 22 | if (name) { 23 | return { 24 | precedence: 'atom', 25 | pattern: `(?<${name}>${encode(sequence).pattern})`, 26 | }; 27 | } 28 | 29 | return { 30 | precedence: 'atom', 31 | pattern: `(${encode(sequence).pattern})`, 32 | }; 33 | } 34 | 35 | /** 36 | * Creates a reference, also known as backreference, which allows matching 37 | * again the exact text that a capturing group previously matched. 38 | * 39 | * In order to form a valid regex, the reference must use the same name as 40 | * a capturing group earlier in the expression. 41 | * 42 | * @param name - Name of the capturing group to reference. 43 | */ 44 | export function ref(name: string): Reference { 45 | return { 46 | precedence: 'atom', 47 | pattern: `\\k<${name}>`, 48 | name, 49 | }; 50 | } 51 | -------------------------------------------------------------------------------- /src/constructs/char-class.ts: -------------------------------------------------------------------------------- 1 | import type { CharacterClass, CharacterEscape, EncodedRegex } from '../types'; 2 | import { ensureText } from '../utils'; 3 | 4 | /** 5 | * Creates a character class which matches any one of the given characters. 6 | * 7 | * @param elements - Member characters or character ranges. 8 | * @returns Character class. 9 | */ 10 | export function charClass(...elements: Array): CharacterClass { 11 | if (!elements.length) { 12 | throw new Error('Expected at least one element'); 13 | } 14 | 15 | return { 16 | elements: elements.map((c) => c.elements).flat(), 17 | encode: encodeCharClass, 18 | }; 19 | } 20 | 21 | /** 22 | * Creates a character class which matches any one of the characters in the range. 23 | * 24 | * @param start - Start of the range (single character). 25 | * @param end - End of the range (single character). 26 | * @returns Character class. 27 | */ 28 | export function charRange(start: string, end: string): CharacterClass { 29 | if (start.length !== 1 || end.length !== 1) { 30 | throw new Error(`Expected single characters, but received "${start}" & "${end}"`); 31 | } 32 | 33 | if (start > end) { 34 | [start, end] = [end, start]; 35 | } 36 | 37 | return { 38 | elements: [`${start}-${end}`], 39 | encode: encodeCharClass, 40 | }; 41 | } 42 | 43 | /** 44 | * Creates a character class which matches any one of the given characters. 45 | * 46 | * @param chars - Characters to match. 47 | * @returns Character class. 48 | */ 49 | export function anyOf(chars: string): CharacterClass { 50 | ensureText(chars); 51 | 52 | return { 53 | elements: chars.split('').map(escapeChar), 54 | encode: encodeCharClass, 55 | }; 56 | } 57 | 58 | /** 59 | * Creates a negated character class which matches any character that is not in the given character class. 60 | * 61 | * @param element - Character class or character escape to negate. 62 | * @returns Negated character class. 63 | */ 64 | export function negated(element: CharacterClass | CharacterEscape): EncodedRegex { 65 | return encodeCharClass.call(element, true); 66 | } 67 | 68 | /** 69 | * @deprecated Renamed to `negated`. 70 | */ 71 | export const inverted = negated; 72 | 73 | /** Escape chars for usage inside char class */ 74 | function escapeChar(text: string): string { 75 | // anyOf(']-\\^') 76 | return text.replace(/[\]\-\\^]/g, '\\$&'); // "$&" is whole matched string 77 | } 78 | 79 | function encodeCharClass( 80 | this: CharacterClass | CharacterEscape, 81 | isNegated?: boolean, 82 | ): EncodedRegex { 83 | return { 84 | precedence: 'atom', 85 | pattern: `[${isNegated ? '^' : ''}${this.elements.join('')}]`, 86 | }; 87 | } 88 | -------------------------------------------------------------------------------- /src/constructs/char-escape.ts: -------------------------------------------------------------------------------- 1 | import type { CharacterEscape, EncodedRegex } from '../types'; 2 | 3 | /** 4 | * Matches any single character. 5 | * Specifically this one is NOT a character escape. 6 | */ 7 | export const any: EncodedRegex = { 8 | precedence: 'atom', 9 | pattern: '.', 10 | }; 11 | 12 | /** 13 | * Matches any digit (0-9). 14 | */ 15 | export const digit: CharacterEscape = { 16 | precedence: 'atom', 17 | pattern: '\\d', 18 | elements: ['\\d'], 19 | }; 20 | 21 | /** 22 | * Matches any non-digit (0-9) character. 23 | */ 24 | export const nonDigit: CharacterEscape = { 25 | precedence: 'atom', 26 | pattern: '\\D', 27 | elements: ['\\D'], 28 | }; 29 | 30 | /** 31 | * Matches any word character (alphanumeric or underscore). 32 | */ 33 | export const word: CharacterEscape = { 34 | precedence: 'atom', 35 | pattern: '\\w', 36 | elements: ['\\w'], 37 | }; 38 | 39 | /** 40 | * Matches any non-word (alphanumeric or underscore) character. 41 | */ 42 | export const nonWord: CharacterEscape = { 43 | precedence: 'atom', 44 | pattern: '\\W', 45 | elements: ['\\W'], 46 | }; 47 | 48 | /** 49 | * Matches any whitespace character (space, tab, newline, etc.). 50 | */ 51 | export const whitespace: CharacterEscape = { 52 | precedence: 'atom', 53 | pattern: '\\s', 54 | elements: ['\\s'], 55 | }; 56 | 57 | /** 58 | * Matches any non-whitespace (space, tab, newline, etc.) character. 59 | */ 60 | export const nonWhitespace: CharacterEscape = { 61 | precedence: 'atom', 62 | pattern: '\\S', 63 | elements: ['\\S'], 64 | }; 65 | 66 | /** 67 | * @deprecated Renamed to `nonDigit`. 68 | */ 69 | export const notDigit = nonDigit; 70 | 71 | /** 72 | * @deprecated Renamed to `nonWord`. 73 | */ 74 | export const notWord = nonWord; 75 | 76 | /** 77 | * @deprecated Renamed to `nonWhitespace`. 78 | */ 79 | export const notWhitespace = nonWhitespace; 80 | -------------------------------------------------------------------------------- /src/constructs/choice-of.ts: -------------------------------------------------------------------------------- 1 | import { encode } from '../encoder'; 2 | import type { EncodedRegex, RegexSequence } from '../types'; 3 | 4 | /** 5 | * Creates a disjunction (choice of) which matches any of the alternatives. 6 | * 7 | * @param alternatives - Alternatives to choose from. 8 | * @returns Choice of alternatives. 9 | */ 10 | export function choiceOf(...alternatives: RegexSequence[]): EncodedRegex { 11 | if (alternatives.length === 0) { 12 | throw new Error('Expected at least one alternative'); 13 | } 14 | 15 | const encodedAlternatives = alternatives.map((c) => encode(c)); 16 | if (encodedAlternatives.length === 1) { 17 | return encodedAlternatives[0]!; 18 | } 19 | 20 | return { 21 | precedence: 'disjunction', 22 | pattern: encodedAlternatives.map((n) => n.pattern).join('|'), 23 | }; 24 | } 25 | -------------------------------------------------------------------------------- /src/constructs/lookahead.ts: -------------------------------------------------------------------------------- 1 | import { encode } from '../encoder'; 2 | import type { EncodedRegex, RegexSequence } from '../types'; 3 | 4 | /** 5 | * Positive lookahead assertion. 6 | * 7 | * A positive lookahead assertion is a zero-width assertion that matches a group of characters only if it is followed by a specific group of characters. 8 | * 9 | * @example 10 | * ```ts 11 | * lookahead("a"); 12 | * // /(?=a)/ 13 | * 14 | * lookahead(["a", "b", "c"]); 15 | * // /(?=abc)/ 16 | * ``` 17 | */ 18 | export function lookahead(sequence: RegexSequence): EncodedRegex { 19 | return { 20 | precedence: 'atom', 21 | pattern: `(?=${encode(sequence).pattern})`, 22 | }; 23 | } 24 | -------------------------------------------------------------------------------- /src/constructs/lookbehind.ts: -------------------------------------------------------------------------------- 1 | import { encode } from '../encoder'; 2 | import type { EncodedRegex, RegexSequence } from '../types'; 3 | 4 | /** 5 | * Positive lookbehind assertion. 6 | * 7 | * A positive lookbehind assertion is a zero-width assertion that matches a group of characters only if it is preceded by a specific group of characters. 8 | * 9 | * @example 10 | * ```ts 11 | * lookbehind("a"); 12 | * // /(?<=a)/ 13 | * 14 | * lookbehind(["a", "b", "c"]); 15 | * // /(?<=abc)/ 16 | * ``` 17 | */ 18 | export function lookbehind(sequence: RegexSequence): EncodedRegex { 19 | return { 20 | precedence: 'atom', 21 | pattern: `(?<=${encode(sequence).pattern})`, 22 | }; 23 | } 24 | -------------------------------------------------------------------------------- /src/constructs/negative-lookahead.ts: -------------------------------------------------------------------------------- 1 | import { encode } from '../encoder'; 2 | import type { EncodedRegex, RegexSequence } from '../types'; 3 | 4 | /** 5 | * Negative lookahead assertion. 6 | * 7 | * A negative lookahead assertion is a zero-width assertion that matches a group of characters only if it is not followed by a specific group of characters. 8 | * 9 | * @example 10 | * ```ts 11 | * negativeLookahead("a"); 12 | * // /(?=a)/ 13 | * 14 | * negativeLookahead(["a", "b", "c"]); 15 | * // /(?=abc)/ 16 | * ``` 17 | */ 18 | export function negativeLookahead(sequence: RegexSequence): EncodedRegex { 19 | return { 20 | precedence: 'atom', 21 | pattern: `(?!${encode(sequence).pattern})`, 22 | }; 23 | } 24 | -------------------------------------------------------------------------------- /src/constructs/negative-lookbehind.ts: -------------------------------------------------------------------------------- 1 | import { encode } from '../encoder'; 2 | import type { EncodedRegex, RegexSequence } from '../types'; 3 | 4 | /** 5 | * Negative lookbehind assertion. 6 | * 7 | * A negative lookbehind assertion is a zero-width assertion that matches a group of characters only if it is not preceded by a specific group of characters. 8 | * 9 | * @example 10 | * ```ts 11 | * negativeLookbehind("a"); 12 | * // /(? 0x10ffff) { 17 | throw new RangeError(`Expected a valid unicode code point but received ${codePoint}`); 18 | } 19 | 20 | let escape = 21 | codePoint < 0x10000 22 | ? `\\u${codePoint.toString(16).padStart(4, '0')}` // 4-digit hex (works in all modes) 23 | : `\\u{${codePoint.toString(16)}}`; // 1-6 digit hex (requires unicode-aware mode) 24 | 25 | return { 26 | precedence: 'atom', 27 | pattern: escape, 28 | elements: [escape], 29 | }; 30 | } 31 | 32 | /** 33 | * Alias for `unicodeChar`. 34 | */ 35 | export const char = unicodeChar; 36 | 37 | /** 38 | * Unicode property escape matching a set of characters specified by a Unicode property. 39 | * 40 | * Regex pattern: `\p{Property}` or `\p{Property=Value}` 41 | * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Unicode_character_class_escape 42 | * 43 | * Note: the regex must be [unicode-aware](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode#unicode-aware_mode). 44 | * 45 | * @param property Unicode property name. 46 | * @param value Unicode property value (optional). 47 | * @returns A character class representing the unicode property escape. 48 | */ 49 | export function unicodeProperty(property: string, value?: string): CharacterEscape { 50 | const escape = `\\p{${property}${value ? `=${value}` : ''}}`; 51 | 52 | return { 53 | precedence: 'atom', 54 | pattern: escape, 55 | elements: [escape], 56 | }; 57 | } 58 | -------------------------------------------------------------------------------- /src/encoder.ts: -------------------------------------------------------------------------------- 1 | import type { EncodedRegex, RegexElement, RegexSequence } from './types'; 2 | import { ensureElements, ensureText } from './utils'; 3 | 4 | export function encode(sequence: RegexSequence): EncodedRegex { 5 | const elements = ensureElements(sequence); 6 | const encoded = elements.map((n) => encodeElement(n)); 7 | 8 | if (encoded.length === 1) { 9 | return encoded[0]!; 10 | } 11 | 12 | return { 13 | precedence: 'sequence', 14 | pattern: encoded 15 | .map((n) => (n.precedence === 'disjunction' ? encodeAtomic(n) : n.pattern)) 16 | .join(''), 17 | }; 18 | } 19 | 20 | export function encodeAtomic(sequence: RegexSequence): string { 21 | const encoded = encode(sequence); 22 | return encoded.precedence === 'atom' ? encoded.pattern : `(?:${encoded.pattern})`; 23 | } 24 | 25 | function encodeElement(element: RegexElement): EncodedRegex { 26 | if (typeof element === 'string') { 27 | return encodeText(element); 28 | } 29 | 30 | if (element instanceof RegExp) { 31 | return encodeRegExp(element); 32 | } 33 | 34 | if (typeof element === 'object') { 35 | // EncodedRegex 36 | if ('pattern' in element) { 37 | return element; 38 | } 39 | 40 | // LazyEncodableRegex 41 | if ('encode' in element) { 42 | return element.encode(); 43 | } 44 | } 45 | 46 | throw new Error(`Unsupported element. Received: ${JSON.stringify(element, null, 2)}`); 47 | } 48 | 49 | function encodeText(text: string): EncodedRegex { 50 | ensureText(text); 51 | 52 | return { 53 | // Optimize for single character case 54 | precedence: text.length === 1 ? 'atom' : 'sequence', 55 | pattern: escapeText(text), 56 | }; 57 | } 58 | 59 | function encodeRegExp(regexp: RegExp): EncodedRegex { 60 | const pattern = regexp.source; 61 | 62 | return { 63 | // Encode at safe precedence 64 | precedence: isAtomicPattern(pattern) ? 'atom' : 'disjunction', 65 | pattern, 66 | }; 67 | } 68 | 69 | // This is intended to catch only some popular atomic patterns like char classes and groups. 70 | function isAtomicPattern(pattern: string): boolean { 71 | // Simple char, char class [...] or group (...) 72 | return pattern.length === 1 || /^\[[^[\]]*\]$/.test(pattern) || /^\([^()]*\)$/.test(pattern); 73 | } 74 | 75 | // Source: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#escaping 76 | function escapeText(text: string) { 77 | return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string 78 | } 79 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | // Types 2 | export type * from './types'; 3 | export type { CaptureOptions } from './constructs/capture'; 4 | export type { QuantifierOptions } from './constructs/quantifiers'; 5 | export type { RepeatOptions } from './constructs/repeat'; 6 | 7 | // Builders 8 | export { buildRegExp, buildPattern } from './builders'; 9 | 10 | // Constructs 11 | export { 12 | startOfString, 13 | endOfString, 14 | wordBoundary, 15 | nonWordBoundary, 16 | notWordBoundary, 17 | } from './constructs/anchors'; 18 | export { capture, ref } from './constructs/capture'; 19 | export { charClass, charRange, anyOf, negated, inverted } from './constructs/char-class'; 20 | export { 21 | any, 22 | digit, 23 | nonDigit, 24 | word, 25 | nonWord, 26 | whitespace, 27 | nonWhitespace, 28 | notDigit, 29 | notWhitespace, 30 | notWord, 31 | } from './constructs/char-escape'; 32 | export { choiceOf } from './constructs/choice-of'; 33 | export { lookahead } from './constructs/lookahead'; 34 | export { lookbehind } from './constructs/lookbehind'; 35 | export { negativeLookahead } from './constructs/negative-lookahead'; 36 | export { negativeLookbehind } from './constructs/negative-lookbehind'; 37 | export { zeroOrMore, oneOrMore, optional } from './constructs/quantifiers'; 38 | export { regex } from './constructs/regex'; 39 | export { repeat } from './constructs/repeat'; 40 | export { char, unicodeChar, unicodeProperty } from './constructs/unicode'; 41 | -------------------------------------------------------------------------------- /src/patterns/__tests__/hex-color.test.ts: -------------------------------------------------------------------------------- 1 | import { hexColorFinder, hexColorValidator } from '..'; 2 | 3 | test('hexColorValidator', () => { 4 | expect(hexColorValidator).toMatchString('#ffffff'); 5 | expect(hexColorValidator).toMatchString('#000'); 6 | 7 | expect(hexColorValidator).not.toMatchString('#000 '); 8 | expect(hexColorValidator).not.toMatchString(' #000'); 9 | expect(hexColorValidator).not.toMatchString('#0'); 10 | expect(hexColorValidator).not.toMatchString('#11'); 11 | expect(hexColorValidator).not.toMatchString('#4444'); 12 | expect(hexColorValidator).not.toMatchString('#55555'); 13 | expect(hexColorValidator).not.toMatchString('#7777777'); 14 | }); 15 | 16 | test('hexColorFinder', () => { 17 | expect(hexColorFinder).toMatchAllGroups('The color is #ffffff', [['#ffffff']]); 18 | expect(hexColorFinder).toMatchAllGroups('The colors are #1, #22, #333, #4444, #55555, #666666', [ 19 | ['#333'], 20 | ['#666666'], 21 | ]); 22 | }); 23 | -------------------------------------------------------------------------------- /src/patterns/hex-color.ts: -------------------------------------------------------------------------------- 1 | import { buildRegExp } from '../builders'; 2 | import { endOfString, startOfString, wordBoundary } from '../constructs/anchors'; 3 | import { choiceOf } from '../constructs/choice-of'; 4 | import { repeat } from '../constructs/repeat'; 5 | 6 | const hexDigit = /[0-9a-f]/; 7 | 8 | /** Find hex color strings in a text. */ 9 | export const hexColorFinder = buildRegExp( 10 | [ 11 | '#', 12 | choiceOf( 13 | repeat(hexDigit, 6), // #rrggbb 14 | repeat(hexDigit, 3), // #rgb 15 | ), 16 | wordBoundary, 17 | ], 18 | { ignoreCase: true, global: true }, 19 | ); 20 | 21 | /** 22 | * Check that given text is a valid hex color. 23 | * 24 | * Allows both 3 and 6 digit hex colors. 25 | * */ 26 | export const hexColorValidator = buildRegExp( 27 | [ 28 | startOfString, // Match whole string 29 | '#', 30 | choiceOf( 31 | repeat(hexDigit, 6), // #rrggbb 32 | repeat(hexDigit, 3), // #rgb 33 | ), 34 | endOfString, 35 | ], 36 | { ignoreCase: true }, 37 | ); 38 | -------------------------------------------------------------------------------- /src/patterns/index.ts: -------------------------------------------------------------------------------- 1 | export { hexColorFinder, hexColorValidator } from './hex-color'; 2 | -------------------------------------------------------------------------------- /src/types.ts: -------------------------------------------------------------------------------- 1 | export type ArrayOrSingle = T[] | T; 2 | 3 | /** 4 | * Sequence of regex elements forming a regular expression. 5 | * 6 | * For developer convenience it also accepts a single element instead of array. 7 | */ 8 | export type RegexSequence = RegexElement[] | RegexElement; 9 | 10 | /** 11 | * Fundamental building block of a regular expression, defined as either a regex construct, `RegExp` object or a string. 12 | */ 13 | export type RegexElement = RegexConstruct | RegExp | string; 14 | 15 | /** 16 | * Fundamental building block of a regular expression, defined as either an encoded regex or a character class. 17 | */ 18 | export type RegexConstruct = EncodedRegex | LazyEncodableRegex; 19 | 20 | /** 21 | * Encoded regex pattern with information about its precedence (atom, sequence, disjunction) 22 | */ 23 | export interface EncodedRegex { 24 | precedence: EncodePrecedence; 25 | pattern: string; 26 | } 27 | 28 | /** 29 | * Precedence of given regex pattern. 30 | */ 31 | export type EncodePrecedence = 'atom' | 'sequence' | 'disjunction'; 32 | 33 | /** 34 | * Regex patter that can be encoded by calling the `encode` method. 35 | */ 36 | export interface LazyEncodableRegex { 37 | encode: () => EncodedRegex; 38 | } 39 | 40 | /** 41 | * Character escape: `EncodedRegex` that can albo be put into `charClass`. 42 | */ 43 | export interface CharacterEscape extends EncodedRegex { 44 | elements: string[]; 45 | } 46 | 47 | /** 48 | * Character class. 49 | * Regex: `[...]` 50 | */ 51 | export interface CharacterClass extends LazyEncodableRegex { 52 | elements: string[]; 53 | } 54 | 55 | /** 56 | * Flags to be passed to RegExp constructor. 57 | * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/RegExp#flags 58 | */ 59 | export interface RegexFlags { 60 | /** 61 | * Find all matches in a string, instead of just the first one. 62 | * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/global 63 | */ 64 | global?: boolean; 65 | 66 | /** 67 | * Perform case-insensitive matching. 68 | * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/ignoreCase 69 | */ 70 | ignoreCase?: boolean; 71 | 72 | /** 73 | * Treat the start and end of each line in a string as the beginning and end of the string. 74 | * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/multiline 75 | */ 76 | multiline?: boolean; 77 | 78 | /** 79 | * Generate the start and end indices of each captured group in a match. 80 | * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/hasIndices 81 | */ 82 | hasIndices?: boolean; 83 | 84 | /** 85 | * MDN: _Allows . to match newlines._ 86 | * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/dotAll 87 | */ 88 | dotAll?: boolean; 89 | 90 | /** 91 | * MDN: _Matches only from the index indicated by the `lastIndex` property of this regular expression in the target string. Does not attempt to match from any later indexes._ 92 | * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/sticky 93 | */ 94 | sticky?: boolean; 95 | 96 | /** 97 | * Enables [Unicode-aware mode](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode#unicode-aware_mode). 98 | * 99 | * This enables features like: 100 | * - Unicode character escapes: `\u{xxxx}` 101 | * - Unicode character property escapes:`\p{Property=Value}` 102 | * 103 | * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode 104 | */ 105 | unicode?: boolean; 106 | } 107 | -------------------------------------------------------------------------------- /src/utils.ts: -------------------------------------------------------------------------------- 1 | import type { RegexElement, RegexSequence } from './types'; 2 | 3 | export function ensureElements(sequence: RegexSequence): RegexElement[] { 4 | const elements = Array.isArray(sequence) ? sequence : [sequence]; 5 | if (elements.length === 0) { 6 | throw new Error('Expected at least one element'); 7 | } 8 | 9 | return elements; 10 | } 11 | 12 | export function ensureText(text: string): void { 13 | if (text.length === 0) { 14 | throw new Error('Expected at least one character'); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /test-utils/to-equal-regex.ts: -------------------------------------------------------------------------------- 1 | import type { RegexSequence } from '../src/types'; 2 | import { wrapRegExp } from './utils'; 3 | 4 | export function toEqualRegex( 5 | this: jest.MatcherContext, 6 | received: RegExp | RegexSequence, 7 | expected: RegExp | string, 8 | ) { 9 | received = wrapRegExp(received); 10 | 11 | const options = { 12 | isNot: this.isNot, 13 | }; 14 | 15 | const expectedSource = typeof expected === 'string' ? expected : expected.source; 16 | const expectedFlags = typeof expected === 'string' ? undefined : expected.flags; 17 | 18 | return { 19 | pass: 20 | expectedSource === received.source && 21 | (expectedFlags === undefined || expectedFlags === received.flags), 22 | message: () => 23 | this.utils.matcherHint('toEqualRegex', undefined, undefined, options) + 24 | '\n\n' + 25 | `Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expected)}\n` + 26 | `Received: ${this.utils.printReceived(received)}`, 27 | }; 28 | } 29 | 30 | expect.extend({ toEqualRegex }); 31 | 32 | declare global { 33 | namespace jest { 34 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 35 | interface Matchers { 36 | toEqualRegex(expected: RegExp | string): R; 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /test-utils/to-match-all-groups.ts: -------------------------------------------------------------------------------- 1 | import type { RegexSequence } from '../src/types'; 2 | import { wrapRegExp } from './utils'; 3 | 4 | export function toMatchAllGroups( 5 | this: jest.MatcherContext, 6 | received: RegExp | RegexSequence, 7 | expectedString: string, 8 | expectedGroups: string[], 9 | ) { 10 | const receivedRegex = wrapRegExp(received); 11 | const receivedGroups = toNestedArray(expectedString.matchAll(receivedRegex)); 12 | const options = { 13 | isNot: this.isNot, 14 | }; 15 | 16 | return { 17 | pass: this.equals(receivedGroups, expectedGroups), 18 | message: () => 19 | this.utils.matcherHint('toMatchGroups', undefined, undefined, options) + 20 | '\n\n' + 21 | `Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expectedGroups)}\n` + 22 | `Received: ${this.utils.printReceived(receivedGroups)}`, 23 | }; 24 | } 25 | 26 | expect.extend({ toMatchAllGroups }); 27 | 28 | declare global { 29 | namespace jest { 30 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 31 | interface Matchers { 32 | toMatchAllGroups(input: string, expected: string[][]): R; 33 | } 34 | } 35 | } 36 | 37 | function toNestedArray(iterator: IterableIterator) { 38 | const result: string[][] = []; 39 | 40 | for (const match of iterator) { 41 | result.push([...match]); 42 | } 43 | 44 | return result; 45 | } 46 | -------------------------------------------------------------------------------- /test-utils/to-match-all-named-groups.ts: -------------------------------------------------------------------------------- 1 | import type { RegexSequence } from '../src/types'; 2 | import { wrapRegExp } from './utils'; 3 | 4 | export function toMatchAllNamedGroups( 5 | this: jest.MatcherContext, 6 | received: RegExp | RegexSequence, 7 | inputText: string, 8 | expectedGroups: Array>, 9 | ) { 10 | const receivedRegex = wrapRegExp(received); 11 | const matchResult = inputText.matchAll(receivedRegex); 12 | const receivedGroups = matchResult ? [...matchResult].map((r) => r.groups) : null; 13 | const options = { 14 | isNot: this.isNot, 15 | }; 16 | 17 | return { 18 | pass: this.equals(receivedGroups, expectedGroups), 19 | message: () => 20 | this.utils.matcherHint('toMatchGroups', undefined, undefined, options) + 21 | '\n\n' + 22 | `Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expectedGroups)}\n` + 23 | `Received: ${this.utils.printReceived(receivedGroups)}`, 24 | }; 25 | } 26 | 27 | expect.extend({ toMatchAllNamedGroups }); 28 | 29 | declare global { 30 | namespace jest { 31 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 32 | interface Matchers { 33 | toMatchAllNamedGroups(inputText: string, expectedGroups: Array>): R; 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /test-utils/to-match-groups.ts: -------------------------------------------------------------------------------- 1 | import type { RegexSequence } from '../src/types'; 2 | import { wrapRegExp } from './utils'; 3 | 4 | export function toMatchGroups( 5 | this: jest.MatcherContext, 6 | received: RegExp | RegexSequence, 7 | inputText: string, 8 | expectedGroups: string[], 9 | ) { 10 | const receivedRegex = wrapRegExp(received); 11 | const matchResult = inputText.match(receivedRegex); 12 | const receivedGroups = matchResult ? [...matchResult] : null; 13 | const options = { 14 | isNot: this.isNot, 15 | }; 16 | 17 | return { 18 | pass: this.equals(receivedGroups, expectedGroups), 19 | message: () => 20 | this.utils.matcherHint('toMatchGroups', undefined, undefined, options) + 21 | '\n\n' + 22 | `Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expectedGroups)}\n` + 23 | `Received: ${this.utils.printReceived(receivedGroups)}`, 24 | }; 25 | } 26 | 27 | expect.extend({ toMatchGroups }); 28 | 29 | declare global { 30 | namespace jest { 31 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 32 | interface Matchers { 33 | toMatchGroups(inputText: string, expectedGroups: string[]): R; 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /test-utils/to-match-named-groups.ts: -------------------------------------------------------------------------------- 1 | import type { RegexSequence } from '../src/types'; 2 | import { wrapRegExp } from './utils'; 3 | 4 | export function toMatchNamedGroups( 5 | this: jest.MatcherContext, 6 | received: RegExp | RegexSequence, 7 | inputText: string, 8 | expectedGroups: Record, 9 | ) { 10 | const receivedRegex = wrapRegExp(received); 11 | const matchResult = inputText.match(receivedRegex); 12 | const receivedGroups = matchResult ? matchResult.groups : null; 13 | const options = { 14 | isNot: this.isNot, 15 | }; 16 | 17 | return { 18 | pass: this.equals(receivedGroups, expectedGroups), 19 | message: () => 20 | this.utils.matcherHint('toMatchGroups', undefined, undefined, options) + 21 | '\n\n' + 22 | `Expected: ${this.isNot ? 'not ' : ''}${this.utils.printExpected(expectedGroups)}\n` + 23 | `Received: ${this.utils.printReceived(receivedGroups)}`, 24 | }; 25 | } 26 | 27 | expect.extend({ toMatchNamedGroups }); 28 | 29 | declare global { 30 | namespace jest { 31 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 32 | interface Matchers { 33 | toMatchNamedGroups(inputText: string, expectedGroups: Record): R; 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /test-utils/to-match-string.ts: -------------------------------------------------------------------------------- 1 | import type { RegexSequence } from '../src/types'; 2 | import { wrapRegExp } from './utils'; 3 | 4 | export function toMatchString( 5 | this: jest.MatcherContext, 6 | received: RegExp | RegexSequence, 7 | expected: string, 8 | ) { 9 | const receivedRegex = wrapRegExp(received); 10 | const matchResult = expected.match(receivedRegex); 11 | const options = { 12 | isNot: this.isNot, 13 | }; 14 | 15 | return { 16 | pass: matchResult !== null, 17 | message: () => 18 | this.utils.matcherHint('toMatchString', undefined, undefined, options) + 19 | '\n\n' + 20 | `Expected: ${this.isNot ? 'not ' : ''} matching ${this.utils.printExpected(expected)}\n` + 21 | `Received pattern: ${this.utils.printReceived(receivedRegex.source)}`, 22 | }; 23 | } 24 | 25 | expect.extend({ toMatchString }); 26 | 27 | declare global { 28 | namespace jest { 29 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 30 | interface Matchers { 31 | toMatchString(expected: string): R; 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /test-utils/utils.ts: -------------------------------------------------------------------------------- 1 | import { buildRegExp } from '../src/builders'; 2 | import type { RegexSequence } from '../src/types'; 3 | 4 | export function wrapRegExp(regex: RegExp | RegexSequence) { 5 | if (regex instanceof RegExp) { 6 | return regex; 7 | } 8 | 9 | return buildRegExp(regex); 10 | } 11 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "rootDir": ".", 4 | "paths": { 5 | "ts-regex-builder": ["./src/index"] 6 | }, 7 | "allowUnreachableCode": false, 8 | "allowUnusedLabels": false, 9 | "esModuleInterop": true, 10 | "forceConsistentCasingInFileNames": true, 11 | "jsx": "react", 12 | "lib": ["esnext"], 13 | "module": "esnext", 14 | "moduleResolution": "node", 15 | "noFallthroughCasesInSwitch": true, 16 | "noImplicitReturns": true, 17 | "noImplicitUseStrict": false, 18 | "noStrictGenericChecks": false, 19 | "noUncheckedIndexedAccess": true, 20 | "noUnusedLocals": true, 21 | "noUnusedParameters": true, 22 | "resolveJsonModule": true, 23 | "skipLibCheck": true, 24 | "strict": true, 25 | "target": "esnext", 26 | "verbatimModuleSyntax": true 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /tsconfig.release.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig", 3 | "compilerOptions": { 4 | "declaration": true, 5 | "emitDeclarationOnly": true, 6 | "rootDir": "src" 7 | }, 8 | "include": ["src/**/*.ts"], 9 | "exclude": ["**/__tests__**", "**/test.ts"] 10 | } 11 | -------------------------------------------------------------------------------- /website/.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | /node_modules 3 | 4 | # Production 5 | /build 6 | 7 | # Generated files 8 | .docusaurus 9 | .cache-loader 10 | 11 | # Misc 12 | .DS_Store 13 | .env.local 14 | .env.development.local 15 | .env.test.local 16 | .env.production.local 17 | 18 | npm-debug.log* 19 | yarn-debug.log* 20 | yarn-error.log* 21 | 22 | # Yarn 4 23 | .pnp.* 24 | .yarn/* 25 | !.yarn/patches 26 | !.yarn/plugins 27 | !.yarn/releases 28 | !.yarn/sdks 29 | !.yarn/versions -------------------------------------------------------------------------------- /website/README.md: -------------------------------------------------------------------------------- 1 | # Website 2 | 3 | This website is built using [Docusaurus](https://docusaurus.io/), a modern static website generator. 4 | 5 | ### Installation 6 | 7 | ``` 8 | $ pnpm install 9 | ``` 10 | 11 | ### Local Development 12 | 13 | ``` 14 | $ pnpm start 15 | ``` 16 | 17 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server. 18 | 19 | ### Build 20 | 21 | ``` 22 | $ pnpm build 23 | ``` 24 | 25 | This command generates static content into the `build` directory and can be served using any static contents hosting service. 26 | 27 | ### Deployment 28 | 29 | Using SSH: 30 | 31 | ``` 32 | $ USE_SSH=true pnpm deploy 33 | ``` 34 | 35 | Not using SSH: 36 | 37 | ``` 38 | $ GIT_USER= pnpm deploy 39 | ``` 40 | 41 | If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch. 42 | -------------------------------------------------------------------------------- /website/babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [require.resolve('@docusaurus/core/lib/babel/preset')], 3 | }; 4 | -------------------------------------------------------------------------------- /website/docs/Examples.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: examples 3 | title: Examples 4 | --- 5 | 6 | ## Match hashtags 7 | 8 | This regex matches and captures all hashtags in a given string. 9 | 10 | ```ts 11 | const hashtags = buildRegExp(['#', capture(oneOrMore(word))], { global: true }); 12 | 13 | const hashtagMatches = '#hello #world'.matchAll(hashtags); 14 | ``` 15 | 16 | Encoded regex: `/#(\w+)/g`. 17 | 18 | See tests: [example-hashtags.ts](https://github.com/callstack/ts-regex-builder/blob/main/src/__tests__/example-hashtags.ts). 19 | 20 | ## Hex color validation 21 | 22 | This regex validates whether a given string is a valid hex color, with 6 or 3 hex digits. 23 | 24 | ```ts 25 | const hexDigit = charClass(digit, charRange('a', 'f')); 26 | 27 | const regex = buildRegExp( 28 | [ 29 | startOfString, 30 | optional('#'), 31 | choiceOf( 32 | repeat(hexDigit, 6), // #rrggbb 33 | repeat(hexDigit, 3), // #rgb 34 | ), 35 | endOfString, 36 | ], 37 | { ignoreCase: true }, 38 | ); 39 | 40 | const isValid = regex.test('#ffffff'); 41 | ``` 42 | 43 | Encoded regex: `/^#?(?:[a-f\d]{6}|[a-f\d]{3})$/i`. 44 | 45 | See tests: [example-hex-color.ts](https://github.com/callstack/ts-regex-builder/blob/main/src/__tests__/example-hex-color.ts). 46 | 47 | ## URL validation 48 | 49 | This regex validates (in a simplified way) whether a given string is a URL. 50 | 51 | ```ts 52 | const protocol = [choiceOf('http', 'https'), '://']; 53 | const domainChars = charClass(charRange('a', 'z'), digit); 54 | const domainCharsHypen = charClass(domainChars, anyOf('-')); 55 | 56 | const domainSegment = choiceOf( 57 | domainChars, // single char 58 | [domainChars, zeroOrMore(domainCharsHypen), domainChars], // multi char 59 | ); 60 | 61 | const regex = buildRegExp([ 62 | startOfString, 63 | optional(protocol), 64 | oneOrMore([domainSegment, '.']), // domain segment 65 | charRange('a', 'z'), // TLD first char 66 | oneOrMore(domainChars), // TLD remaining chars 67 | endOfString, 68 | ]); 69 | 70 | const isValid = regex.test('https://hello.github.com'); 71 | ``` 72 | 73 | Encoded regex: `/^(?:(?:http|https):\/\/)?(?:(?:[a-z\d]|[a-z\d][a-z\d-]*[a-z\d])\.)+[a-z][a-z\d]+$/`. 74 | 75 | See tests: [example-url-simple.ts](https://github.com/callstack/ts-regex-builder/blob/main/src/__tests__/example-url-simple.ts). 76 | 77 | For more advanced URL validation check: [example-url-advanced.ts](https://github.com/callstack/ts-regex-builder/blob/main/src/__tests__/example-url-advanced.ts). 78 | 79 | ## Email address validation 80 | 81 | This regex validates whether a given string is a properly formatted email address. 82 | 83 | ```ts 84 | const hostnameChars = charClass(charRange('a', 'z'), digit, anyOf('-.')); 85 | const domainChars = charRange('a', 'z'); 86 | 87 | const regex = buildRegExp( 88 | [ 89 | startOfString, 90 | oneOrMore(usernameChars), 91 | '@', 92 | oneOrMore(hostnameChars), 93 | '.', 94 | repeat(domainChars, { min: 2 }), 95 | endOfString, 96 | ], 97 | { ignoreCase: true }, 98 | ); 99 | 100 | const isValid = regex.test('user@example.com'); 101 | ``` 102 | 103 | Encoded regex: `/^[a-z\d._%+-]+@[a-z\d.-]+\.[a-z]{2,}$/i`. 104 | 105 | See tests: [example-email.ts](https://github.com/callstack/ts-regex-builder/blob/main/src/__tests__/example-email.ts). 106 | 107 | ## JavaScript number validation 108 | 109 | This regex validates if a given string is a valid JavaScript number. 110 | 111 | ```ts 112 | const sign = anyOf('+-'); 113 | const exponent = [anyOf('eE'), optional(sign), oneOrMore(digit)]; 114 | 115 | const regex = buildRegExp([ 116 | startOfString, 117 | optional(sing), 118 | choiceOf( 119 | [oneOrMore(digit), optional(['.', zeroOrMore(digit)])], // leading digit 120 | ['.', oneOrMore(digit)], // leading dot 121 | ), 122 | optional(exponent), // exponent 123 | endOfString, 124 | ]); 125 | 126 | const isValid = regex.test('1.0e+27'); 127 | ``` 128 | 129 | Encoded regex: `/^[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$/`. 130 | 131 | See tests: [example-js-number.ts](https://github.com/callstack/ts-regex-builder/blob/main/src/__tests__/example-js-number.ts). 132 | 133 | ## IPv4 address validation 134 | 135 | ```ts 136 | // Match integers from 0-255 137 | const octet = choiceOf( 138 | [digit], 139 | [charRange('1', '9'), digit], 140 | ['1', repeat(digit, 2)], 141 | ['2', charRange('0', '4'), digit], 142 | ['25', charRange('0', '5')], 143 | ); 144 | 145 | // Match 146 | const regex = buildRegExp([ 147 | startOfString, // 148 | repeat([octet, '.'], 3), 149 | octet, 150 | endOfString, 151 | ]); 152 | 153 | const isValid = regex.test(192.168.0.1"); 154 | ``` 155 | 156 | Encoded regex: `/^(?:(?:\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.){3}(?:\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$/`. 157 | 158 | See tests: [example-ipv4.ts](https://github.com/callstack/ts-regex-builder/blob/main/src/__tests__/example-ipv4.ts). 159 | 160 | ## Mixing with RegExp literals 161 | 162 | ```ts 163 | // Match integers from 0-255 164 | const octet = choiceOf( 165 | /[0-9]/, // 0-9 166 | /[1-9][0-9]/, // 10-99 167 | /1[0-9][0-9]/, // 100-199 168 | /2[0-4][0-9]/, // 200-249 169 | /25[0-5]/, // 250-255 170 | ); 171 | 172 | // Match 173 | const regex = buildRegExp([ 174 | startOfString, // 175 | repeat([octet, '.'], 3), 176 | octet, 177 | endOfString, 178 | ]); 179 | 180 | const isValid = regex.test(192.168.0.1"); 181 | ``` 182 | 183 | Encoded regex: `/^(?:(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\.){3}(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])$/,`. 184 | 185 | See tests: [example-regexp.ts](https://github.com/callstack/ts-regex-builder/blob/main/src/__tests__/example-regexp.ts). 186 | 187 | ## Simple password validation 188 | 189 | This regex corresponds to following password policy: 190 | 191 | - at least one uppercase letter 192 | - at least one lowercase letter 193 | - at least one digit 194 | - at least one special character 195 | - at least 8 characters long 196 | 197 | ```ts 198 | const atLeastOneUppercase = lookahead([zeroOrMore(any), /[A-Z]/]); 199 | const atLeastOneLowercase = lookahead([zeroOrMore(any), /[a-z]/]); 200 | const atLeastOneDigit = lookahead([zeroOrMore(any), /[0-9]/]); 201 | const atLeastOneSpecialChar = lookahead([zeroOrMore(any), /[^A-Za-z0-9\s]/]); 202 | const atLeastEightChars = /.{8,}/; 203 | 204 | // Match 205 | const validPassword = buildRegExp([ 206 | startOfString, 207 | atLeastOneUppercase, 208 | atLeastOneLowercase, 209 | atLeastOneDigit, 210 | atLeastOneSpecialChar, 211 | atLeastEightChars, 212 | endOfString, 213 | ]); 214 | 215 | const isValid = regex.test('Aa$123456'); 216 | ``` 217 | 218 | Encoded regex: `/^(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[^A-Za-z0-9\s])(?:.{8,})$/`. 219 | 220 | See tests: [example-password.ts](https://github.com/callstack/ts-regex-builder/blob/main/src/__tests__/example-password.ts). 221 | 222 | ## Match currency values 223 | 224 | ```ts 225 | const currencySymbol = '$€£¥R₿'; 226 | const decimalSeparator = '.'; 227 | 228 | const firstThousandsClause = repeat(digit, { min: 1, max: 3 }); 229 | const thousandsSeparator = ','; 230 | const thousands = repeat(digit, 3); 231 | const thousandsClause = [optional(thousandsSeparator), thousands]; 232 | const cents = repeat(digit, 2); 233 | const isCurrency = lookbehind(anyOf(currencySymbol)); 234 | 235 | const currencyRegex = buildRegExp([ 236 | isCurrency, 237 | optional(whitespace), 238 | firstThousandsClause, 239 | zeroOrMore(thousandsClause), 240 | optional([decimalSeparator, cents]), 241 | endOfString, 242 | ]); 243 | 244 | const isValid = regex.test('£1,000'); 245 | ``` 246 | 247 | Encoded regex: `/(?<=[$€£¥R₿])\s?\d{1,3}(?:,?\d{3})*(?:\.\d{2})?$/`. 248 | 249 | See tests: [example-currency.ts](https://github.com/callstack/ts-regex-builder/blob/main/src/__tests__/example-currency.ts). 250 | 251 | ## Finding specific whole words 252 | 253 | Ignoring cases where given word is part of a bigger word. 254 | 255 | ```ts 256 | const wordsToFind = ['word', 'date']; 257 | 258 | const regex = buildRegExp([ 259 | wordBoundary, // match whole words only 260 | choiceOf(...wordsToFind), 261 | wordBoundary, 262 | ]); 263 | 264 | expect(regex).toMatchString('word'); 265 | expect(regex).toMatchString('date'); 266 | 267 | expect(regex).not.toMatchString('sword'); 268 | expect(regex).not.toMatchString('update'); 269 | ``` 270 | 271 | Encoded regex: `/\b(?:word|date)\b/`. 272 | 273 | See tests: [example-find-words.ts](https://github.com/callstack/ts-regex-builder/blob/main/src/__tests__/example-find-words.ts). 274 | 275 | ## Finding specific suffixes 276 | 277 | Ignoring cases where given word is part of a bigger word. 278 | 279 | ```ts 280 | const suffixesToFind = ['acy', 'ism']; 281 | 282 | const regex = buildRegExp([ 283 | nonWordBoundary, // match suffixes only 284 | choiceOf(...suffixesToFind), 285 | wordBoundary, 286 | ]); 287 | 288 | expect(regex).toMatchString('privacy '); 289 | expect(regex).toMatchString('democracy'); 290 | 291 | expect(regex).not.toMatchString('acy'); 292 | expect(regex).not.toMatchString('ism'); 293 | ``` 294 | 295 | Encoded regex: `/\B(?:acy|ism)\b/`. 296 | 297 | See tests: [example-find-suffixes.ts](https://github.com/callstack/ts-regex-builder/blob/main/src/__tests__/example-find-suffixes.ts). 298 | -------------------------------------------------------------------------------- /website/docs/api/assertions.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: assertions 3 | title: Assertions 4 | --- 5 | 6 | ## Anchors 7 | 8 | Anchors are special characters or sequences that specify positions in the input string rather than matching specific characters. 9 | 10 | ### Start and end of string 11 | 12 | ```ts 13 | const startOfString: RegexConstruct; 14 | const endOfString: RegexConstruct; 15 | ``` 16 | 17 | - `startOfString` anchor matches the start of a string (or line, if multiline mode is enabled). Regex syntax: `^`. 18 | - `endOfString` anchor matches the end of a string (or line, if multiline mode is enabled). Regex syntax: `$`. 19 | 20 | ### Word boundary 21 | 22 | _This API was added in version 1.3.0._ 23 | 24 | ```ts 25 | const wordBoundary: RegexConstruct; 26 | const nonWordBoundary: RegexConstruct; 27 | ``` 28 | 29 | - `wordBoundary` matches the positions where a word character is not followed or preceded by another word character, effectively indicating the start or end of a word. Regex syntax: `\b`. 30 | - `nonWordBoundary` matches the positions where a word character is followed or preceded by another word character, indicating that it is not at the start or end of a word. Regex syntax: `\B`. 31 | 32 | Note: word characters are letters, digits, and underscore (`_`). Other special characters like `#`, `$`, etc are not considered word characters. 33 | 34 | ## Lookarounds 35 | 36 | Lookarounds in regex are used for asserting that some pattern is or isn't followed or preceded by another pattern, without including the latter in the match. 37 | 38 | ### `lookahead()` 39 | 40 | _This API was added in version 1.3.0._ 41 | 42 | ```ts 43 | function lookahead(sequence: RegexSequence): RegexConstruct; 44 | ``` 45 | 46 | Regex syntax: `(?=...)`. 47 | 48 | Allows for conditional matching by checking for subsequent patterns in regexes without consuming them. 49 | 50 | ### `negativeLookahead()` 51 | 52 | _This API was added in version 1.3.0._ 53 | 54 | ```ts 55 | function negativeLookahead(sequence: RegexSequence): RegexConstruct; 56 | ``` 57 | 58 | Regex syntax: `(?!...)`. 59 | 60 | Allows for matches to be rejected if a specified subsequent pattern is present, without consuming any characters. 61 | 62 | ### `lookbehind()` 63 | 64 | _This API was added in version 1.3.0._ 65 | 66 | ```ts 67 | function lookbehind(sequence: RegexSequence): RegexConstruct; 68 | ``` 69 | 70 | Regex syntax: `(?<=...)`. 71 | 72 | Allows for conditional matching by checking for preceeding patterns in regexes without consuming them. 73 | 74 | ### `negativeLookbehind()` 75 | 76 | _This API was added in version 1.3.0._ 77 | 78 | ```ts 79 | function negativeLookahead(sequence: RegexSequence): RegexConstruct; 80 | ``` 81 | 82 | Regex syntax: `(?...)` for named capturing groups (`name` option) 21 | 22 | Captures, also known as capturing groups, extract and store parts of the matched string for later use. 23 | 24 | Capture results are available using array-like [`match()` result object](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match#using_match). 25 | 26 | #### Named groups 27 | 28 | When using `name` options, the group becomes a [named capturing group](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Named_capturing_group) allowing to refer to it using name instead of index. 29 | 30 | Named capture results are available using [`groups`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match#using_named_capturing_groups) property on `match()` result. 31 | 32 | :::note 33 | 34 | TS Regex Builder does not have a construct for non-capturing groups. Such groups are implicitly added when required. E.g., `zeroOrMore("abc")` is encoded as `(?:abc)+`. 35 | 36 | ::: 37 | 38 | ### `ref()` 39 | 40 | ```ts 41 | function ref(name: string): RegexConstruct; 42 | ``` 43 | 44 | Regex syntax: `\k<...>`. 45 | 46 | Creates a reference, also known as a backreference, which allows matching again the exact text that a capturing group previously matched. The reference must use the same name as some capturing group earlier in the expression to form a valid regex. 47 | 48 | Usage with `capture()`: 49 | 50 | ```ts 51 | const regex = buildRegExp([ 52 | // Create a named capture using name from `someKey`. 53 | capture(..., { name: 'someKey' }), 54 | // ... some other elements ... 55 | 56 | // Match the same text as matched by `capture` with the same name. 57 | ref('someKey'), 58 | ]) 59 | ``` 60 | 61 | :::note 62 | 63 | TS Regex Builder doesn't support using ordinal backreferences (`\1`, `\2`, etc) because in complex regex patterns, these references are difficult to accurately use. 64 | 65 | ::: 66 | -------------------------------------------------------------------------------- /website/docs/api/character-classes.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: character-classes 3 | title: Character Classes 4 | --- 5 | 6 | Character classes are a set of characters that match any one of the characters in the set. 7 | 8 | ### Common character class escapes 9 | 10 | ```ts 11 | const any: RegexConstruct; 12 | const word: CharacterEscape; 13 | const nonWord: CharacterEscape; 14 | const digit: CharacterEscape; 15 | const nonDigit: CharacterEscape; 16 | const whitespace: CharacterEscape; 17 | const nonWhitespace: CharacterEscape; 18 | ``` 19 | 20 | - `any` matches any character except newline characters. Regex syntax: `.`. 21 | - `word` matches any word character (letters, digits & underscore). Regex syntax: `\w`. 22 | - `nonWord` matches any character **except** word characters (letters, digits & underscore). Regex syntax: `\W`. 23 | - `digit` matches any digit. Regex syntax: `\d`. 24 | - `nonDigit` matches any character **except** digits. Regex syntax: `\D`. 25 | - `whitespace` matches any whitespace character (spaces, tabs, line breaks). Regex syntax: `\s`. 26 | - `nonWhitespace` matches any character **except** whitespace characters (spaces, tabs, line breaks). Regex syntax: `\S`. 27 | 28 | ### `anyOf()` 29 | 30 | ```ts 31 | function anyOf(characters: string): CharacterClass; 32 | ``` 33 | 34 | Regex syntax: `[abc]`. 35 | 36 | The `anyOf` class matches any character in the `character` string. 37 | 38 | Example: `anyOf('aeiou')` will match either `a`, `e`, `i` `o` or `u` characters. 39 | 40 | ### `charRange()` 41 | 42 | ```ts 43 | function charRange(start: string, end: string): CharacterClass; 44 | ``` 45 | 46 | Regex syntax: `[a-z]`. 47 | 48 | The `charRange` class matches any characters in the range from `start` to `end` (inclusive). 49 | 50 | Examples: 51 | 52 | - `charRange('a', 'z')` will match all lowercase characters from `a` to `z`. 53 | - `charRange('A', 'Z')` will match all uppercase characters from `A` to `Z`. 54 | - `charRange('0', '9')` will match all digit characters from `0` to `9`. 55 | 56 | ### `charClass()` 57 | 58 | ```ts 59 | function charClass(...elements: CharacterClass[]): CharacterClass; 60 | ``` 61 | 62 | Regex syntax: `[...]`. 63 | 64 | The `charClass` construct creates a new character class that includes all passed character classes. 65 | 66 | Examples: 67 | 68 | - `charClass(charRange('a', 'f'), digit)` will match all lowercase hex digits (`0` to `9` and `a` to `f`). 69 | - `charClass(charRange('a', 'z'), digit, anyOf("._-"))` will match any digit, lowercase Latin letter from `a` to `z`, and either of `.`, `_`, and `-` characters. 70 | 71 | ### `negated()` 72 | 73 | ```ts 74 | function negated(element: CharacterClass): RegexConstruct; 75 | ``` 76 | 77 | Regex syntax: `[^...]`. 78 | 79 | The `negated` construct creates a new character class that matches any character not present in the passed character class. 80 | 81 | Examples: 82 | 83 | - `negated(digit)` matches any character that is not a digit 84 | - `negated(anyOf('aeiou'))` matches any character that is not a lowercase vowel. 85 | -------------------------------------------------------------------------------- /website/docs/api/constructs.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: constructs 3 | title: Constructs 4 | --- 5 | 6 | These functions and objects represent available regex constructs. 7 | 8 | ### `choiceOf()` 9 | 10 | ```ts 11 | function choiceOf( 12 | ...alternatives: RegexSequence[], 13 | ): RegexConstruct { 14 | ``` 15 | 16 | Regex syntax: `a|b|c`. 17 | 18 | The `choiceOf` (disjunction) construct matches one out of several possible sequences. It functions similarly to a logical OR operator in programming. It can match simple string options as well as complex patterns. 19 | 20 | Example: `choiceOf("color", "colour")` matches either `color` or `colour` pattern. 21 | 22 | ### `regex()` 23 | 24 | ```ts 25 | function regex(sequence: RegexSequence): RegexConstruct; 26 | ``` 27 | 28 | Regex syntax: the pattern remains unchanged when wrapped by this construct. 29 | 30 | This construct is a no-op operator that groups array of `RegexElements` into a single element for composition purposes. This is particularly useful for defining smaller sequence patterns as separate variables. 31 | 32 | Without `regex()`: 33 | 34 | ```ts 35 | const exponent = [anyOf('eE'), optional(anyOf('+-')), oneOrMore(digit)]; 36 | const numberWithExponent = buildRegExp([ 37 | oneOrMore(digit), 38 | ...exponent, // Need to spread "exponent" as it's an array. 39 | ]); 40 | ``` 41 | 42 | With `regex()`: 43 | 44 | ```ts 45 | const exponent = regex([anyOf('eE'), optional(anyOf('+-')), oneOrMore(digit)]); 46 | const numberWithExponent = buildRegExp([ 47 | oneOrMore(digit), 48 | exponent, // Easily compose "exponent" sequence as a single element. 49 | ]); 50 | ``` 51 | -------------------------------------------------------------------------------- /website/docs/api/overview.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: overview 3 | title: Overview 4 | slug: /api 5 | --- 6 | 7 | TS Regex Builder allows you to build complex regular expressions using domain-specific language. 8 | 9 | ### Terminology 10 | 11 | - regex construct (`RegexConstruct`) - common name for all regex constructs like character classes, quantifiers, and anchors. 12 | - regex element (`RegexElement`) - a fundamental building block of a regular expression, defined as either a regex construct, a string, or `RegExp` literal (`/.../`). 13 | - regex sequence (`RegexSequence`) - a sequence of regex elements forming a regular expression. For developer convenience, it also accepts a single element instead of an array. 14 | 15 | Most of the regex constructs accept a regex sequence as their argument. 16 | 17 | #### Examples of sequences 18 | 19 | - single element (construct): `capture('Hello')` 20 | - single element (string): `'Hello'` 21 | - single element (`RegExp` literal): `/Hello/` 22 | - array of elements: `['USD', oneOrMore(digit), /Hello/]` 23 | 24 | Regex constructs can be composed into a tree structure: 25 | 26 | ```ts 27 | const currencyCode = repeat(/[A-Z]/, 3); // or repeat(charRange('A', 'Z'), 3); 28 | 29 | const currencyAmount = buildRegExp([ 30 | choiceOf('$', '€', currencyCode), // currency 31 | capture( 32 | oneOrMore(digit), // integer part 33 | optional(['.', repeat(digit, 2)]), // fractional part 34 | ), 35 | ]); 36 | ``` 37 | 38 | See [Types](./api/types) for more info. 39 | 40 | ### Regex Builders 41 | 42 | | Builder | Regex Syntax | Description | 43 | | ---------------------------------------- | ------------ | ----------------------------------- | 44 | | `buildRegExp(...)` | `/.../` | Create `RegExp` instance | 45 | | `buildRegExp(..., { ignoreCase: true })` | `/.../i` | Create `RegExp` instance with flags | 46 | 47 | See [Builder](./api/builder) for more info. 48 | 49 | ### Regex Constructs 50 | 51 | | Construct | Regex Syntax | Notes | 52 | | ------------------- | ------------ | ------------------------------- | 53 | | `choiceOf(x, y, z)` | `x\|y\|z` | Match one of provided sequences | 54 | | `capture(...)` | `(...)` | Create a capture group | 55 | 56 | See [Constructs](./api/constructs) for more info. 57 | 58 | :::note 59 | 60 | TS Regex Builder does not have a construct for non-capturing groups. Such groups are implicitly added when required. 61 | 62 | ::: 63 | 64 | ### Quantifiers 65 | 66 | | Quantifier | Regex Syntax | Description | 67 | | -------------------------------- | ------------ | ------------------------------------------------- | 68 | | `zeroOrMore(x)` | `x*` | Zero or more occurrence of a pattern | 69 | | `oneOrMore(x)` | `x+` | One or more occurrence of a pattern | 70 | | `optional(x)` | `x?` | Zero or one occurrence of a pattern | 71 | | `repeat(x, n)` | `x{n}` | Pattern repeats exact number of times | 72 | | `repeat(x, { min: n, })` | `x{n,}` | Pattern repeats at least given number of times | 73 | | `repeat(x, { min: n, max: n2 })` | `x{n1,n2}` | Pattern repeats between n1 and n2 number of times | 74 | 75 | See [Quantifiers](./api/quantifiers) for more info. 76 | 77 | ### Assertions 78 | 79 | | Assertion | Regex Syntax | Description | 80 | | ------------------------- | ------------ | ------------------------------------------------------------------------ | 81 | | `startOfString` | `^` | Match the start of the string (or the start of a line in multiline mode) | 82 | | `endOfString` | `$` | Match the end of the string (or the end of a line in multiline mode) | 83 | | `wordBoundary` | `\b` | Match the start or end of a word without consuming characters | 84 | | `lookahead(...)` | `(?=...)` | Match subsequent text without consuming it | 85 | | `negativeLookahead(...)` | `(?!...)` | Reject subsequent text without consuming it | 86 | | `lookbehind(...)` | `(?<=...)` | Match preceding text without consuming it | 87 | | `negativeLookbehind(...)` | `(? 39 | 40 | 41 | ```sh 42 | npm install ts-regex-builder 43 | ``` 44 | 45 | 46 | 47 | 48 | ```sh 49 | yarn add ts-regex-builder 50 | ``` 51 | 52 | 53 | 54 | 55 | ```sh 56 | pnpm add ts-regex-builder 57 | ``` 58 | 59 | 60 | 61 | 62 | ## Basic usage 63 | 64 | ```js 65 | import { buildRegExp, capture, oneOrMore } from 'ts-regex-builder'; 66 | 67 | // /Hello (\w+)/ 68 | const regex = buildRegExp(['Hello ', capture(oneOrMore(word))]); 69 | ``` 70 | 71 | ## Performance 72 | 73 | Regular expressions created with this library are executed at runtime, so you should avoid creating them in a context where they would need to be executed multiple times, e.g., inside loops or functions. We recommend that you create a top-level object for each required regex. 74 | 75 | ## License 76 | 77 | MIT 78 | 79 | ## Inspiration 80 | 81 | TS Regex Builder is inspired by [Swift Regex Builder API](https://developer.apple.com/documentation/regexbuilder). 82 | -------------------------------------------------------------------------------- /website/docusaurus.config.js: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | // `@type` JSDoc annotations allow editor autocompletion and type checking 3 | // (when paired with `@ts-check`). 4 | // There are various equivalent ways to declare your Docusaurus config. 5 | // See: https://docusaurus.io/docs/api/docusaurus-config 6 | 7 | import { themes as prismThemes } from 'prism-react-renderer'; 8 | 9 | /** @type {import('@docusaurus/types').Config} */ 10 | const config = { 11 | title: 'TS Regex Builder', 12 | tagline: 'Maintainable regular expressions for TypeScript and JavaScript.', 13 | favicon: 'img/favicon.ico', 14 | 15 | // Set the production url of your site here 16 | url: 'https://callstack.github.io', 17 | // Set the // pathname under which your site is served 18 | // For GitHub pages deployment, it is often '//' 19 | baseUrl: '/ts-regex-builder/', 20 | 21 | // GitHub pages deployment config. 22 | // If you aren't using GitHub pages, you don't need these. 23 | organizationName: 'callstack', // Usually your GitHub org/user name. 24 | projectName: 'ts-regex-builder', // Usually your repo name 25 | trailingSlash: false, 26 | 27 | onBrokenLinks: 'throw', 28 | onBrokenMarkdownLinks: 'warn', 29 | 30 | // Even if you don't use internationalization, you can use this field to set 31 | // useful metadata like html lang. For example, if your site is Chinese, you 32 | // may want to replace "en" with "zh-Hans". 33 | i18n: { 34 | defaultLocale: 'en', 35 | locales: ['en'], 36 | }, 37 | 38 | presets: [ 39 | [ 40 | 'classic', 41 | /** @type {import('@docusaurus/preset-classic').Options} */ 42 | ({ 43 | docs: { 44 | routeBasePath: '/', 45 | sidebarPath: './sidebars.js', 46 | sidebarCollapsible: false, 47 | }, 48 | theme: { 49 | customCss: './src/css/custom.css', 50 | }, 51 | }), 52 | ], 53 | ], 54 | 55 | themeConfig: 56 | /** @type {import('@docusaurus/preset-classic').ThemeConfig} */ 57 | ({ 58 | // Replace with your project's social card 59 | image: 'img/docusaurus-social-card.jpg', 60 | navbar: { 61 | title: 'TS Regex Builder', 62 | logo: { 63 | alt: 'TS Regex Builder', 64 | src: 'img/logo-200.png', 65 | }, 66 | items: [ 67 | { 68 | href: 'https://github.com/callstack/ts-regex-builder', 69 | label: 'GitHub', 70 | position: 'right', 71 | }, 72 | ], 73 | }, 74 | footer: { 75 | style: 'dark', 76 | links: [ 77 | { 78 | title: 'Docs', 79 | items: [ 80 | { 81 | label: 'Getting Started', 82 | to: '/', 83 | }, 84 | { 85 | label: 'API', 86 | to: '/api', 87 | }, 88 | { 89 | label: 'Examples', 90 | to: '/examples', 91 | }, 92 | ], 93 | }, 94 | ], 95 | copyright: `Copyright © ${new Date().getFullYear()} Maciej Jastrzębski. Built with Docusaurus.`, 96 | }, 97 | prism: { 98 | theme: prismThemes.github, 99 | darkTheme: prismThemes.dracula, 100 | additionalLanguages: ['bash'], 101 | }, 102 | }), 103 | }; 104 | 105 | export default config; 106 | -------------------------------------------------------------------------------- /website/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "website", 3 | "version": "0.0.0", 4 | "private": true, 5 | "scripts": { 6 | "docusaurus": "docusaurus", 7 | "start": "docusaurus start", 8 | "build": "docusaurus build", 9 | "swizzle": "docusaurus swizzle", 10 | "deploy": "docusaurus deploy", 11 | "clear": "docusaurus clear", 12 | "serve": "docusaurus serve", 13 | "write-translations": "docusaurus write-translations", 14 | "write-heading-ids": "docusaurus write-heading-ids" 15 | }, 16 | "dependencies": { 17 | "@docusaurus/core": "3.3.2", 18 | "@docusaurus/preset-classic": "3.3.2", 19 | "@mdx-js/react": "^3.0.1", 20 | "clsx": "^2.1.1", 21 | "prism-react-renderer": "^2.3.1", 22 | "react": "^18.0.0", 23 | "react-dom": "^18.0.0" 24 | }, 25 | "devDependencies": { 26 | "@docusaurus/module-type-aliases": "3.3.2", 27 | "@docusaurus/types": "3.3.2" 28 | }, 29 | "browserslist": { 30 | "production": [ 31 | ">0.5%", 32 | "not dead", 33 | "not op_mini all" 34 | ], 35 | "development": [ 36 | "last 3 chrome version", 37 | "last 3 firefox version", 38 | "last 5 safari version" 39 | ] 40 | }, 41 | "engines": { 42 | "node": ">=18.0" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /website/sidebars.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Creating a sidebar enables you to: 3 | - create an ordered group of docs 4 | - render a sidebar for each doc of that group 5 | - provide next/previous navigation 6 | 7 | The sidebars can be generated from the filesystem, or explicitly defined here. 8 | 9 | Create as many sidebars as you want. 10 | */ 11 | 12 | // @ts-check 13 | 14 | /** @type {import('@docusaurus/plugin-content-docs').SidebarsConfig} */ 15 | export default { 16 | docs: [ 17 | { 18 | type: 'category', 19 | label: 'Introduction', 20 | collapsible: false, 21 | items: ['getting-started'], 22 | }, 23 | { 24 | type: 'category', 25 | label: 'API Reference', 26 | collapsible: false, 27 | items: [ 28 | 'api/overview', 29 | 'api/types', 30 | 'api/builder', 31 | 'api/constructs', 32 | 'api/captures', 33 | 'api/quantifiers', 34 | 'api/assertions', 35 | 'api/character-classes', 36 | 'api/unicode', 37 | ], 38 | }, 39 | { 40 | type: 'doc', 41 | label: 'Examples', 42 | id: 'examples', 43 | }, 44 | ], 45 | }; 46 | -------------------------------------------------------------------------------- /website/src/components/HomepageFeatures/index.js: -------------------------------------------------------------------------------- 1 | import clsx from 'clsx'; 2 | import Heading from '@theme/Heading'; 3 | import styles from './styles.module.css'; 4 | 5 | const FeatureList = [ 6 | { 7 | title: 'Structured Regexes', 8 | Svg: require('@site/static/img/undraw_docusaurus_react.svg').default, 9 | description: ( 10 | <> 11 | TS Regex Builder lets you build maintainable regular expressions even for complex use cases. 12 | 13 | ), 14 | }, 15 | { 16 | title: 'Easy to Use', 17 | Svg: require('@site/static/img/undraw_docusaurus_tree.svg').default, 18 | description: <>It's easy to learn, easy to read, and we have comprehensive docs too., 19 | }, 20 | 21 | { 22 | title: 'Minimal Footprint', 23 | Svg: require('@site/static/img/undraw_docusaurus_mountain.svg').default, 24 | description: <>It weights only 1.8 kB when minified and gzipped. Supports tree shaking., 25 | }, 26 | ]; 27 | 28 | function Feature({ Svg, title, description }) { 29 | return ( 30 |
31 |
32 | 33 |
34 |
35 | {title} 36 |

{description}

37 |
38 |
39 | ); 40 | } 41 | 42 | export default function HomepageFeatures() { 43 | return ( 44 |
45 |
46 |
47 | {FeatureList.map((props, idx) => ( 48 | 49 | ))} 50 |
51 |
52 |
53 | ); 54 | } 55 | -------------------------------------------------------------------------------- /website/src/components/HomepageFeatures/styles.module.css: -------------------------------------------------------------------------------- 1 | .features { 2 | display: flex; 3 | align-items: center; 4 | padding: 2rem 0; 5 | width: 100%; 6 | } 7 | 8 | .featureSvg { 9 | height: 200px; 10 | width: 200px; 11 | } 12 | -------------------------------------------------------------------------------- /website/src/css/custom.css: -------------------------------------------------------------------------------- 1 | /** 2 | * Any CSS included here will be global. The classic template 3 | * bundles Infima by default. Infima is a CSS framework designed to 4 | * work well for content-centric websites. 5 | */ 6 | 7 | /* You can override the default Infima variables here. */ 8 | :root { 9 | --ifm-color-primary: #2e8555; 10 | --ifm-color-primary-dark: #29784c; 11 | --ifm-color-primary-darker: #277148; 12 | --ifm-color-primary-darkest: #205d3b; 13 | --ifm-color-primary-light: #33925d; 14 | --ifm-color-primary-lighter: #359962; 15 | --ifm-color-primary-lightest: #3cad6e; 16 | --ifm-code-font-size: 95%; 17 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1); 18 | } 19 | 20 | /* For readability concerns, you should choose a lighter palette in dark mode. */ 21 | [data-theme='dark'] { 22 | --ifm-color-primary: #25c2a0; 23 | --ifm-color-primary-dark: #21af90; 24 | --ifm-color-primary-darker: #1fa588; 25 | --ifm-color-primary-darkest: #1a8870; 26 | --ifm-color-primary-light: #29d5b0; 27 | --ifm-color-primary-lighter: #32d8b4; 28 | --ifm-color-primary-lightest: #4fddbf; 29 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3); 30 | } 31 | -------------------------------------------------------------------------------- /website/static/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callstack/ts-regex-builder/8918ec1d6dd812d30c620617ceaf675ebc4bcf1b/website/static/.nojekyll -------------------------------------------------------------------------------- /website/static/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callstack/ts-regex-builder/8918ec1d6dd812d30c620617ceaf675ebc4bcf1b/website/static/img/favicon.ico -------------------------------------------------------------------------------- /website/static/img/logo-1024.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callstack/ts-regex-builder/8918ec1d6dd812d30c620617ceaf675ebc4bcf1b/website/static/img/logo-1024.png -------------------------------------------------------------------------------- /website/static/img/logo-200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callstack/ts-regex-builder/8918ec1d6dd812d30c620617ceaf675ebc4bcf1b/website/static/img/logo-200.png --------------------------------------------------------------------------------