├── .travis.yml ├── .gitignore ├── regex-source.ts ├── tsconfig.json ├── tsconfig.build.json ├── is-atomic-test.ts ├── package.json ├── is-atomic.ts ├── index.ts ├── index-test.ts └── readme.md /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "8" 4 | - "10" 5 | - "12" 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .eslintcache 3 | 4 | # Build artifacts 5 | *.js 6 | *.d.ts 7 | -------------------------------------------------------------------------------- /regex-source.ts: -------------------------------------------------------------------------------- 1 | export default (regex: RegExp | string): string => regex instanceof RegExp ? regex.source : regex 2 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "NodeNext", 5 | "moduleResolution": "NodeNext", 6 | "strict": true, 7 | "noEmit": true, 8 | "skipLibCheck": true, 9 | "allowImportingTsExtensions": true 10 | }, 11 | "include": ["*.ts"] 12 | } 13 | -------------------------------------------------------------------------------- /tsconfig.build.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "noEmit": false, 5 | "declaration": true, 6 | "allowImportingTsExtensions": false, 7 | "rewriteRelativeImportExtensions": true 8 | }, 9 | "include": ["index.ts", "is-atomic.ts", "regex-source.ts"] 10 | } 11 | -------------------------------------------------------------------------------- /is-atomic-test.ts: -------------------------------------------------------------------------------- 1 | import { strict as assert } from 'node:assert' 2 | 3 | import isAtomic from './is-atomic.ts' 4 | 5 | const assertAtomic = (regex: RegExp): void => { 6 | assert.equal(isAtomic(regex), true, `${regex.source} should be atomic`) 7 | } 8 | const assertNotAtomic = (regex: RegExp): void => { 9 | assert.equal(isAtomic(regex), false, `${regex.source} should not be atomic`) 10 | } 11 | 12 | const test = (description: string, fn: () => void): void => { 13 | fn() 14 | console.log(`"${description}" passed`) 15 | } 16 | 17 | test(`atomic regexes`, () => { 18 | assertAtomic(/(wat)/) 19 | assertAtomic(/[wat]/) 20 | assertAtomic(/(oh(what)now)/) 21 | assertAtomic(/([wat])/) 22 | assertAtomic(/[(wat)]/) 23 | 24 | assertAtomic(/a/) 25 | }) 26 | 27 | test(`non-atomic regex`, () => { 28 | assertNotAtomic(/(wat)*/) 29 | assertNotAtomic(/[wat][oh]/) 30 | assertNotAtomic(/now(oh(what))/) 31 | assertNotAtomic(/(ok)([wat])/) 32 | 33 | assertNotAtomic(/aa/) 34 | }) 35 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "regex-fun", 3 | "version": "3.1.0", 4 | "description": "Build regular expressions with functions", 5 | "main": "index.js", 6 | "types": "index.d.ts", 7 | "type": "module", 8 | "files": [ 9 | "*.js", 10 | "*.d.ts" 11 | ], 12 | "scripts": { 13 | "build": "tsc -p tsconfig.build.json", 14 | "test": "node index-test.ts && node is-atomic-test.ts", 15 | "test:types": "tsc --noEmit", 16 | "test-readme": "jsmd readme.md # I would like to get this working again" 17 | }, 18 | "repository": { 19 | "type": "git", 20 | "url": "git+https://github.com/TehShrike/regex-fun.git" 21 | }, 22 | "keywords": [ 23 | "regex", 24 | "compose", 25 | "functions", 26 | "regular", 27 | "expressions" 28 | ], 29 | "author": "TehShrike", 30 | "license": "WTFPL", 31 | "bugs": { 32 | "url": "https://github.com/TehShrike/regex-fun/issues" 33 | }, 34 | "homepage": "https://github.com/TehShrike/regex-fun#readme", 35 | "devDependencies": { 36 | "@types/node": "^24.10.1", 37 | "jsmd": "^2.0.0", 38 | "typescript": "^5.7.2" 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /is-atomic.ts: -------------------------------------------------------------------------------- 1 | import regexSource from './regex-source.ts' 2 | 3 | const closingCharacters: Record = { 4 | '(': `)`, 5 | '[': `]`, 6 | } 7 | 8 | export default function isAtomic(regex: RegExp | string): boolean { 9 | const string = regexSource(regex) 10 | 11 | return /^\w$/.test(string) || enclosedByTopLevelCharacters(string) 12 | } 13 | 14 | function enclosedByTopLevelCharacters(string: string): boolean { 15 | const openingCharacter = string[0] 16 | const closingCharacter = closingCharacters[openingCharacter] 17 | 18 | 19 | const closedByAppropriateCharacter = closingCharacter !== undefined 20 | && string[string.length - 1] === closingCharacter 21 | 22 | 23 | if (!closedByAppropriateCharacter) { 24 | return false 25 | } 26 | 27 | return !isClosedBeforeEndOfString(string, openingCharacter, closingCharacter) 28 | } 29 | 30 | 31 | function isClosedBeforeEndOfString(string: string, openingCharacter: string, closingCharacter: string): boolean { 32 | let depth = 0 33 | 34 | for (let characterIndex = 0; characterIndex < string.length - 1; ++characterIndex) { 35 | depth = calculateNewDepth(depth, openingCharacter, closingCharacter, string[characterIndex]) 36 | if (depth === 0) { 37 | return true 38 | } 39 | } 40 | 41 | return false 42 | } 43 | 44 | function calculateNewDepth(previousDepth: number, openingCharacter: string, closingCharacter: string, character: string): number { 45 | if (character === openingCharacter) { 46 | return previousDepth + 1 47 | } else if (character === closingCharacter) { 48 | return previousDepth - 1 49 | } else { 50 | return previousDepth 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /index.ts: -------------------------------------------------------------------------------- 1 | import isAtomic from './is-atomic.ts' 2 | import regexSource from './regex-source.ts' 3 | 4 | type RegexInput = RegExp | string 5 | 6 | export const combine = returnsRegex((...args: RegexInput[]) => escapeInputForCombining(...args).join(``)) 7 | 8 | const guaranteeAtomic = (regex: RegexInput): string => isAtomic(regex) ? regexSource(regex) : `(?:${regexSource(regex)})` 9 | const escapeRegex = (str: string): string => str.replace(/[.?*+^$[\]\\(){}|-]/g, `\\$&`) 10 | const ifRegex = (input: RegexInput, ifCase: (r: RegExp) => T, elseIfCase: (s: string) => T): T => input instanceof RegExp ? ifCase(input) : elseIfCase(input) 11 | const escapeInputAndReturnString = (regex: RegexInput): string => ifRegex(regex, regex => regex.source, escapeRegex) 12 | 13 | function removeNonCapturingGroupIfExists(regexString: string): string { 14 | const match = /^\(\?:(.+)\)$/.exec(regexString) 15 | return match ? match[1] : regexString 16 | } 17 | 18 | function guaranteeNoTopLevelOrs(regexString: string): string { 19 | return regexString.indexOf(`|`) >= 0 ? guaranteeAtomic(regexString) : regexString 20 | } 21 | 22 | function escapeInputForCombining(...args: RegexInput[]): string[] { 23 | return args.map(escapeInputAndReturnString).map(guaranteeNoTopLevelOrs) 24 | } 25 | 26 | function returnsRegex(fn: (...args: RegexInput[]) => RegexInput): (...args: RegexInput[]) => RegExp { 27 | return (...args: RegexInput[]) => ifRegex(fn(...args), regex => regex, input => new RegExp(input)) 28 | } 29 | 30 | function makeJoiningFunction(openingCharacter: string, joinCharacter: string, closingCharacter: string): (...args: RegexInput[]) => RegExp { 31 | return returnsRegex((...args: RegexInput[]) => { 32 | const naiveBody = escapeInputForCombining(...args).join(joinCharacter) 33 | const body = isAtomic(naiveBody) ? removeNonCapturingGroupIfExists(naiveBody) : naiveBody 34 | 35 | return concat(openingCharacter, body, closingCharacter) 36 | }) 37 | } 38 | 39 | function suffix(appendCharacter: string): (...args: RegexInput[]) => RegExp { 40 | return returnsRegex((...args: RegexInput[]) => concat(guaranteeAtomic(combine(...args)), appendCharacter)) 41 | } 42 | 43 | function concat(...regexes: RegexInput[]): string { 44 | return regexes.map(regexSource).join(``) 45 | } 46 | 47 | export const flags = (flags: string, ...args: RegexInput[]): RegExp => new RegExp(combine(...args).source, flags) 48 | export const either = makeJoiningFunction(`(?:`, `|`, `)`) 49 | export const capture = makeJoiningFunction(`(`, ``, `)`) 50 | export const anyNumber = suffix(`*`) 51 | export const oneOrMore = suffix(`+`) 52 | export const optional = suffix(`?`) 53 | export const exactly = (n: number, ...regexes: RegexInput[]): RegExp => suffix(`{${n}}`)(...regexes) 54 | export const atLeast = (n: number, ...regexes: RegexInput[]): RegExp => suffix(`{${n},}`)(...regexes) 55 | export const between = (n: number, m: number, ...regexes: RegexInput[]): RegExp => suffix(`{${n},${m}}`)(...regexes) 56 | export const anyNumberNonGreedy = suffix(`*?`) 57 | export const oneOrMoreNonGreedy = suffix(`+?`) 58 | export const optionalNonGreedy = suffix(`??`) 59 | export const exactlyNonGreedy = (n: number, ...regexes: RegexInput[]): RegExp => suffix(`{${n}}?`)(...regexes) 60 | export const atLeastNonGreedy = (n: number, ...regexes: RegexInput[]): RegExp => suffix(`{${n},}?`)(...regexes) 61 | export const betweenNonGreedy = (n: number, m: number, ...regexes: RegexInput[]): RegExp => suffix(`{${n},${m}}?`)(...regexes) 62 | -------------------------------------------------------------------------------- /index-test.ts: -------------------------------------------------------------------------------- 1 | import { strict as assert } from 'node:assert' 2 | 3 | import { 4 | combine, 5 | flags, 6 | capture, 7 | either, 8 | 9 | anyNumber, 10 | oneOrMore, 11 | optional, 12 | exactly, 13 | atLeast, 14 | between, 15 | 16 | anyNumberNonGreedy, 17 | oneOrMoreNonGreedy, 18 | optionalNonGreedy, 19 | exactlyNonGreedy, 20 | atLeastNonGreedy, 21 | betweenNonGreedy, 22 | } from './index.ts' 23 | 24 | const compareRegex = () => (actual: RegExp, expected: RegExp, description = `Matching ${expected.toString()}`): void => { 25 | assert.equal(actual.toString(), expected.toString(), description) 26 | } 27 | 28 | const test = (description: string, fn: (assertEquals: (actual: RegExp, expected: RegExp, description?: string) => void) => void): void => { 29 | const assertEquals = compareRegex() 30 | fn(assertEquals) 31 | console.log(`"${description}" passed`) 32 | } 33 | 34 | test(`combine`, assertEquals => { 35 | assertEquals(combine(/a/, /b/, /c/), /abc/) 36 | assertEquals(combine(/ab/, /c/), /abc/) 37 | }) 38 | 39 | test(`suffix`, assertEquals => { 40 | assertEquals(anyNumber(/wat/), /(?:wat)*/) 41 | assertEquals(anyNumber(`wat*`), /(?:wat\*)*/) 42 | assertEquals(anyNumber(`wat*`, /yarp/), /(?:wat\*yarp)*/) 43 | 44 | assertEquals(oneOrMore(/wat/), /(?:wat)+/) 45 | assertEquals(oneOrMore(`wat*`), /(?:wat\*)+/) 46 | assertEquals(oneOrMore(`wat*`, /yarp/), /(?:wat\*yarp)+/) 47 | 48 | assertEquals(optional(/wat/), /(?:wat)?/) 49 | assertEquals(optional(`wat*`), /(?:wat\*)?/) 50 | assertEquals(optional(`wat*`, /yarp/), /(?:wat\*yarp)?/) 51 | 52 | assertEquals(exactly(3, /wat/), /(?:wat){3}/) 53 | assertEquals(exactly(2, `wat*`), /(?:wat\*){2}/) 54 | assertEquals(exactly(2, `wat*`, /yarp/), /(?:wat\*yarp){2}/) 55 | 56 | assertEquals(atLeast(3, /wat/), /(?:wat){3,}/) 57 | assertEquals(atLeast(2, `wat*`), /(?:wat\*){2,}/) 58 | assertEquals(atLeast(2, `wat*`, /yarp/), /(?:wat\*yarp){2,}/) 59 | 60 | assertEquals(between(2, 3, /wat/), /(?:wat){2,3}/) 61 | assertEquals(between(3, 4, `wat*`), /(?:wat\*){3,4}/) 62 | assertEquals(between(3, 4, `wat*`, /yarp/), /(?:wat\*yarp){3,4}/) 63 | 64 | assertEquals(anyNumberNonGreedy(/wat/), /(?:wat)*?/) 65 | assertEquals(anyNumberNonGreedy(`wat*`), /(?:wat\*)*?/) 66 | assertEquals(anyNumberNonGreedy(`wat*`, /yarp/), /(?:wat\*yarp)*?/) 67 | 68 | assertEquals(oneOrMoreNonGreedy(/wat/), /(?:wat)+?/) 69 | assertEquals(oneOrMoreNonGreedy(`wat*`), /(?:wat\*)+?/) 70 | assertEquals(oneOrMoreNonGreedy(`wat*`, /yarp/), /(?:wat\*yarp)+?/) 71 | 72 | assertEquals(optionalNonGreedy(/wat/), /(?:wat)??/) 73 | assertEquals(optionalNonGreedy(`wat*`), /(?:wat\*)??/) 74 | assertEquals(optionalNonGreedy(`wat*`, /yarp/), /(?:wat\*yarp)??/) 75 | 76 | assertEquals(exactlyNonGreedy(3, /wat/), /(?:wat){3}?/) 77 | assertEquals(exactlyNonGreedy(2, `wat*`), /(?:wat\*){2}?/) 78 | assertEquals(exactlyNonGreedy(2, `wat*`, /yarp/), /(?:wat\*yarp){2}?/) 79 | 80 | assertEquals(atLeastNonGreedy(3, /wat/), /(?:wat){3,}?/) 81 | assertEquals(atLeastNonGreedy(2, `wat*`), /(?:wat\*){2,}?/) 82 | assertEquals(atLeastNonGreedy(2, `wat*`, /yarp/), /(?:wat\*yarp){2,}?/) 83 | 84 | assertEquals(betweenNonGreedy(2, 3, /wat/), /(?:wat){2,3}?/) 85 | assertEquals(betweenNonGreedy(3, 4, `wat*`), /(?:wat\*){3,4}?/) 86 | assertEquals(betweenNonGreedy(3, 4, `wat*`, /yarp/), /(?:wat\*yarp){3,4}?/) 87 | }) 88 | 89 | test(`flags`, assertEquals => { 90 | assertEquals(flags(`g`, `butts`), /butts/g) 91 | assertEquals(flags(`g`, `yarp`, /butts/), /yarpbutts/g) 92 | }) 93 | 94 | test(`capture`, assertEquals => { 95 | assertEquals(capture(/whatever/), /(whatever)/) 96 | assertEquals(capture(/(wh)/, /[at]/, /ever/), /((wh)[at]ever)/) 97 | }) 98 | 99 | test(`either`, assertEquals => { 100 | assertEquals(either(`*`, /a*/, `a`), /(?:\*|a*|a)/) 101 | }) 102 | 103 | test(`some composition`, assertEquals => { 104 | assertEquals(optional(combine(/a/, /bc/)), /(?:abc)?/) 105 | assertEquals(optional(either(/a/, /bc/)), /(?:a|bc)?/) 106 | 107 | assertEquals(capture(either(/a/, /bc/)), /(a|bc)/) 108 | 109 | assertEquals(either(`sup+`, either(/a/, /bc/)), /(?:sup\+|(?:a|bc))/) 110 | 111 | assertEquals(capture(either(/butts/, /lol|buttocks/)), /(butts|(?:lol|buttocks))/) 112 | }) 113 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | Build regular expressions with functions. 2 | 3 | # Quick example 4 | 5 | 28 | 29 | ```js 30 | const anyGreeting = either('howdy', 'hi', 'hey') 31 | const regex = combine(anyGreeting, optional(','), ' ', capture(/\w+/)) 32 | regex // => /(?:howdy|hi|hey)(?:,)? (\w+)/ 33 | 'hey bub'.match(regex)[1] // => 'bub' 34 | ``` 35 | 36 | # API 37 | 38 | Functions return a regular expression without flags. If you want any flags, call the `flags` function last. 39 | 40 | Regular expression `input` may be either a `RegExp` or a string. If it is a string, regex characters will be escaped - `anyNumber('a+')` will match any number of occurrences of `a+` in a string (`/a\+*/`). 41 | 42 | ``` 43 | import { 44 | combine, 45 | flags, 46 | capture, 47 | either, 48 | 49 | anyNumber, 50 | oneOrMore, 51 | optional, 52 | exactly, 53 | atLeast, 54 | between, 55 | 56 | anyNumberNonGreedy, 57 | oneOrMoreNonGreedy, 58 | optionalNonGreedy, 59 | exactlyNonGreedy, 60 | atLeastNonGreedy, 61 | betweenNonGreedy, 62 | 63 | } from 'regex-fun' 64 | 65 | // or 66 | 67 | import * as r from 'regex-fun' 68 | ``` 69 | ## `combine(...input)` 70 | 71 | ```js 72 | combine(/sup/, 'd*g') // => /supd\*g/ 73 | ``` 74 | 75 | ## `either(...input)` 76 | 77 | ```js 78 | either(/this/, /that/, 'other thing') // => /(?:this|that|other thing)/ 79 | ``` 80 | 81 | ## `capture(...input)` 82 | 83 | ```js 84 | capture(/\w+/, either('this', 'that')) // => /(\w+(?:this|that))/ 85 | ``` 86 | 87 | ## `flags(flags, ...input)` 88 | 89 | ```js 90 | flags('gm', /HOWDY/i) // => /HOWDY/gm 91 | ``` 92 | 93 | ## Greedy matching 94 | 95 | ### `anyNumber(...input)` 96 | 97 | ```js 98 | anyNumber('wat') // => /(?:wat)*/ 99 | ``` 100 | 101 | ### `oneOrMore(...input)` 102 | 103 | ```js 104 | oneOrMore('wat') // => /(?:wat)+/ 105 | ``` 106 | 107 | ### `optional(...input)` 108 | 109 | ```js 110 | optional('wat') // => /(?:wat)?/ 111 | ``` 112 | 113 | ### `exactly(n, ...input)` 114 | 115 | ```js 116 | exactly(2, 'wat') // => /(?:wat){2}/ 117 | ``` 118 | 119 | ### `atLeast(n, ...input)` 120 | 121 | ```js 122 | atLeast(3, 'wat') // => /(?:wat){3,}/ 123 | ``` 124 | 125 | ### `between(n, m, ...input)` 126 | 127 | ```js 128 | between(4, 5, 'wat') // => /(?:wat){4,5}/ 129 | ``` 130 | 131 | ## Non-greedy matching 132 | 133 | ### `anyNumberNonGreedy(...input)` 134 | 135 | ```js 136 | anyNumberNonGreedy('wat') // => /(?:wat)*?/ 137 | ``` 138 | 139 | ### `oneOrMoreNonGreedy(...input)` 140 | 141 | ```js 142 | oneOrMoreNonGreedy('wat') // => /(?:wat)+?/ 143 | ``` 144 | 145 | ### `optionalNonGreedy(...input)` 146 | 147 | ```js 148 | optionalNonGreedy('wat') // => /(?:wat)??/ 149 | ``` 150 | 151 | ### `exactlyNonGreedy(n, ...input)` 152 | 153 | ```js 154 | exactlyNonGreedy(2, 'wat') // => /(?:wat){2}?/ 155 | ``` 156 | 157 | ### `atLeastNonGreedy(n, ...input)` 158 | 159 | ```js 160 | atLeastNonGreedy(3, 'wat') // => /(?:wat){3,}?/ 161 | ``` 162 | 163 | ### `betweenNonGreedy(n, m, ...input)` 164 | 165 | ```js 166 | betweenNonGreedy(4, 5, 'wat') // => /(?:wat){4,5}?/ 167 | ``` 168 | 169 | # Put it all together and you can do some cool stuff 170 | 171 | This example is from [verse-reference-regex](https://github.com/tehshrike/verse-reference-regex), which finds and parses Bible verse ranges like "Revelation 13:5-6": 172 | 173 | 177 | 178 | ```js 179 | const requireVerse = true 180 | 181 | const number = /(\d+)/ 182 | const numberAndOptionalLetter = /(\d+)([a-z])?/ 183 | const colonVerse = combine(':', numberAndOptionalLetter) 184 | const chapterAndVerse = combine(number, requireVerse ? colonVerse : optional(colonVerse)) 185 | 186 | const secondHalfOfRange = combine( 187 | '-', 188 | either( 189 | /([a-z])/, 190 | /(\d+)([a-z])/, 191 | chapterAndVerse, 192 | numberAndOptionalLetter 193 | ) 194 | ) 195 | const range = combine(chapterAndVerse, optional(secondHalfOfRange)) 196 | 197 | const regexThatMatchesVerses = combine( 198 | capture(either(...bookNames, ...abbreviations)), 199 | ' ', 200 | range 201 | ) 202 | ``` 203 | 204 | If you see a function missing, open a pull request, otherwise I'll add new functions as I need them. 205 | 206 | # License 207 | 208 | [WTFPL](http://wtfpl2.com) 209 | --------------------------------------------------------------------------------