├── package.json ├── README.process.js ├── LICENSE ├── js-xre.js ├── README.source.md └── README.md /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "js-xre", 3 | "version": "0.1.2", 4 | "description": "Extended (and genuinely-multi-line) Regular Expressions in JavaScript using ES2015+ tagged template strings. ", 5 | "main": "js-xre.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1", 8 | "update-docs": "NODE_PATH=$NODE_PATH:. ./README.process.js" 9 | }, 10 | "repository": { 11 | "type": "git", 12 | "url": "git+https://github.com/jawj/js-xre.git" 13 | }, 14 | "keywords": [ 15 | "regular", 16 | "expressions", 17 | "extended", 18 | "multiline" 19 | ], 20 | "author": "George MacKerron", 21 | "license": "MIT", 22 | "bugs": { 23 | "url": "https://github.com/jawj/js-xre/issues" 24 | }, 25 | "homepage": "https://github.com/jawj/js-xre", 26 | "devDependencies": { 27 | "intercept-stdout": "^0.1.2" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /README.process.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | // npm run update-docs 4 | 5 | // script that clumsily processes README to insert logged values as comments 6 | // at marked locations 7 | 8 | const [fs, intercept] = ['fs', 'intercept-stdout'].map(lib => require(lib)); 9 | 10 | const marker = /\/\*\*\//g; 11 | const jsSections = /```\s*js\b([\s\S]+?)```/gi; 12 | const separator = '--- 39e8e56c-1e57-4bb8-aee0-3df80b1a9fc6 ---'; 13 | const source = fs.readFileSync('README.source.md', 'utf-8'); 14 | const linePrefix = '// '; 15 | 16 | const target = source.replace(jsSections, (match, js) => { 17 | let sections = ['']; 18 | let instrumentedJs = js.replace(marker, `; console.log("${separator}");`); 19 | 20 | const unintercept = intercept(line => { 21 | if (line == separator + '\n') sections.push(''); 22 | else sections[sections.length - 1] += line.replace(/^(?!$)/gm, linePrefix); 23 | }); 24 | eval(instrumentedJs); 25 | unintercept(); 26 | 27 | let substitutedJS = js.replace(marker, () => sections.shift().trimRight()); 28 | return '```js' + substitutedJS + '```'; 29 | }); 30 | 31 | fs.writeFileSync('README.md', target); -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 – 2017 George MacKerron 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /js-xre.js: -------------------------------------------------------------------------------- 1 | // https://github.com/jawj/js-xre 2 | // Copyright (C) George MacKerron 2010 - 2017 3 | // MIT licenced 4 | 5 | const xRE = (function () { 6 | 7 | function xRE(literals, ...values) { // tag function that returns a tag function 8 | return (flagLiterals, ...flagValues) => { 9 | const flags = reassembleTemplate(flagLiterals, flagValues); 10 | const x = flags.indexOf('x') > -1; 11 | const mm = flags.indexOf('mm') > -1; 12 | const escapeValues = flags.indexOf('b') > -1; 13 | const valueTransform = escapeValues ? xRE.escape : undefined; 14 | const extendedSource = reassembleTemplate( 15 | literals, values, true, undefined, valueTransform); 16 | const nativeSource = transpile(extendedSource, x, mm); 17 | const nativeFlags = flags.replace(/x|b/g, '').replace('mm', 'm'); 18 | return new RegExp(nativeSource, nativeFlags); 19 | } 20 | } 21 | 22 | xRE.escape = (source) => 23 | String(source).replace(/[-\/\\^$.*+?()[\]{}|]/g, '\\$&'); 24 | 25 | reassembleTemplate = (literals, values, raw = false, 26 | literalTransform = String, valueTransform = String) => { 27 | if (typeof literals === 'string') return literals; 28 | if (raw) literals = literals.raw; 29 | let s = literalTransform(literals[0]); 30 | for (let i = 1, len = literals.length; i < len; i++) s += 31 | valueTransform(values[i - 1]) + literalTransform(literals[i]); 32 | return s; 33 | } 34 | 35 | transpile = (source, x, m) => { 36 | if (!x && !m) return source; 37 | const len = source.length; 38 | let convertedSource = '', inCharClass = false, inComment = false, justBackslashed = false; 39 | for (let i = 0; i < len; i++) { 40 | let c = source.charAt(i); 41 | if (justBackslashed) { 42 | if (!inComment) convertedSource += c; 43 | justBackslashed = false; 44 | continue; 45 | } 46 | if (c == '\\') { 47 | if (!inComment) convertedSource += c; 48 | justBackslashed = true; 49 | continue; 50 | } 51 | if (inCharClass) { 52 | convertedSource += c; 53 | if (c == ']') inCharClass = false; 54 | continue; 55 | } 56 | if (inComment) { 57 | if (c == "\n" || c == "\r") inComment = false; 58 | continue; 59 | } 60 | if (c == '[') { 61 | convertedSource += c; 62 | inCharClass = true; 63 | continue; 64 | } 65 | if (x && c == '#') { 66 | inComment = true; 67 | continue; 68 | } 69 | if (m && c == '.') { 70 | convertedSource += '[\\s\\S]'; 71 | continue; 72 | } 73 | if (!x || !c.match(/\s/)) convertedSource += c; 74 | } 75 | return convertedSource; 76 | } 77 | 78 | return xRE; 79 | })(); 80 | 81 | if (typeof module !== 'undefined' && typeof module.exports !== 'undefined') module.exports = xRE; 82 | -------------------------------------------------------------------------------- /README.source.md: -------------------------------------------------------------------------------- 1 | # xRE: extended RegExps for JavaScript ES2015+ 2 | 3 | Extended Regular Expressions in JavaScript using, ES2015+ tagged template literals. 4 | 5 | Small: < 1 KB gzipped. Focused: it doesn't do a lot else (disclosure: it does do properly multiline expressions too). And forward-looking (which is a nice way of saying you'll need a recent node version or modern browser to enjoy it). 6 | 7 | ## Installation and use 8 | 9 | ### Browser 10 | 11 | ```html 12 | 13 | 16 | ``` 17 | 18 | ### Node 19 | 20 | `npm install js-xre` 21 | 22 | then 23 | 24 | ```javascript 25 | const xRE = require('js-xre'); 26 | const myRegExp = xRE `^\d$ # just one digit` `x`; 27 | ``` 28 | 29 | ## What's an extended RegExp? 30 | 31 | Perl, Ruby, and some other languages support a readable _extended_ regular expression syntax, in which literal whitespace is ignored and comments (starting with `#`) are available. This is triggered with the `x` flag. 32 | 33 | (Don't confuse this with the 'extended' expressions of `egrep`, which are just modern regular expressions. The sort of extended expressions I am talking about might perhaps be better be described as _commented_ or even _literate_). 34 | 35 | For example, as far as Ruby is concerned, 36 | 37 | ```regexp 38 | /\d(?=(\d{3})+\b)/ 39 | ``` 40 | 41 | and 42 | 43 | ```regexp 44 | /(?x) 45 | \d # a digit 46 | (?= # followed by (look-ahead match) 47 | (\d{3})+ # one or more sets of three digits 48 | \b # and then a word boundary 49 | ) 50 | / 51 | ``` 52 | 53 | are equivalent. For humans, however, the extended second version is obviously much easier to get to grips with. 54 | 55 | These languages also support a properly multi-line match mode, where the `.` character really does match anything, including `\n`. 56 | 57 | ## JS: no dice — 58 | 59 | JavaScript traditionally offers neither of these options. 60 | 61 | It doesn’t recognise the extended syntax, and its multi-line support consists only in permitting the `^` and `$` characters to match the beginnings and ends of lines within a string. It will never allow the `.` to match `\n`. 62 | 63 | I first wrote a function to convert extended and fully-multi-line RegExp source strings to standard syntax [in 2010](http://blog.mackerron.com/2010/08/08/extended-multi-line-js-regexps/). But it was tricky and error-prone to use it, because a standard JS string can't span multiple lines and you would have to backslash-escape all the backslashes. 64 | 65 | ## — until now 66 | 67 | ES2015's pleasingly flexible [tagged template literals](https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Template_literals) now make this a genuinely usable and useful capability. 68 | 69 | As implemented here, the syntax is: 70 | 71 | ```javascript 72 | xRE `myregexp` `flags` 73 | ``` 74 | 75 | (Note: the `flags` argument is required — to specify no flags, use an empty literal, ``` `` ```). 76 | 77 | In addition to the standard flags (`i`, `g`, `m`, `y`, `u`), which are passed straight through to the native `RegExp`, three additional flags are provided: 78 | 79 | * `x` activates extended mode, stripping out whitespace and comments 80 | * `mm` activates genuinely-multi-line mode, where `.` matches anything, including newlines (achieved by replacing `.` with `[\s\S]`) 81 | * `b` is for backslashes, and automatically escapes all template expressions so they are treated as literal text (alternatively, an `xRE.escape` method is provided so this can be done case-by-case). 82 | 83 | ## Alternatives 84 | 85 | You should also check out [XRegExp](http://xregexp.com/), an impressive library that takes a rather more and-the-kitchen-sink approach. The complete version of XRegExp is 62 KB gzipped, against this library's few hundred bytes. 86 | 87 | 88 | ## Examples 89 | 90 | ### `x` for extended 91 | 92 | An simple example with the extended flag `x`: 93 | 94 | ```js 95 | const xRE = require('js-xre'); 96 | 97 | const digitsThatNeedSeparators = xRE ` 98 | \d # a digit 99 | (?= # followed by (look-ahead match) 100 | (\d{3})+ # one or more sets of three digits 101 | \b # and then a word boundary 102 | ) 103 | ` `xg`; 104 | 105 | console.log(digitsThatNeedSeparators); 106 | /**/ 107 | 108 | const separate000s = (n, sep = '\u202f') => 109 | String(n).replace(digitsThatNeedSeparators, '$&' + sep); 110 | 111 | console.log(separate000s(1234567)); 112 | /**/ 113 | ``` 114 | 115 | And a monstrously complex example: [Daring Fireball's URL RegExp](http://daringfireball.net/2010/07/improved_regex_for_matching_urls): 116 | 117 | ```js 118 | const xRE = require('js-xre'); 119 | 120 | const url = xRE ` 121 | \b 122 | (?: 123 | [a-z][\w-]+: # URL protocol and colon 124 | (?: 125 | /{1,3} # 1-3 slashes 126 | | # or 127 | [a-z0-9%] # single letter or digit or '%' 128 | # (trying not to match e.g. "URI::Escape") 129 | ) 130 | | # or 131 | www\d{0,3}[.] # "www.", "www1.", "www2." … "www999." 132 | | # or 133 | [a-z0-9.\-]+[.][a-z]{2,4}/ # looks like domain name followed by a slash 134 | ) 135 | (?: # one or more: 136 | [^\s()<>]+ # run of non-space, non-()<> 137 | | # or 138 | \(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels 139 | )+ 140 | (?: # end with: 141 | \(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels 142 | | # or 143 | [^\s\`!()\[\]{};:'".,<>?«»“”‘’] # not a space or one of these punct chars 144 | ) 145 | ` `xig`; 146 | 147 | console.log(url); 148 | /**/ 149 | 150 | console.log('Please visit http://mackerron.com.'.replace(url, '$&')); 151 | /**/ 152 | ``` 153 | 154 | ### `mm` for massively multiline 155 | 156 | Serious HTML wrangling should be done with XPath or similar, of course. But: 157 | 158 | ```js 159 | const xRE = require('js-xre'); 160 | 161 | const html = ` 162 |
A paragraph on one line.
163 |A paragraph which, by contrast, 164 | spans multiple lines.
165 | `; 166 | 167 | const mPara = xRE `` `mg`; 168 | console.log(mPara); 169 | /**/ 170 | 171 | console.log(html.match(mPara)); 172 | /**/ 173 | 174 | const mmPara = xRE `
` `mmg`; // note: mm 175 | console.log(mmPara); 176 | /**/ 177 | 178 | console.log(html.match(mmPara)); 179 | /**/ 180 | ``` 181 | 182 | ### `b` for backslashes 183 | 184 | Since our syntax for extended regular expressions uses template strings, you can interpolate any `${value}` in there. The `b` flag causes all values to be automatically escaped, so that they're treated as literal text rather then metacharacters. 185 | 186 | For example, say you're allowing users to type in something to find all matches: 187 | 188 | ```js 189 | const xRE = require('js-xre'); 190 | 191 | const searchText = '12.6'; // this might come from an field 192 | const search = xRE `^${searchText}$` `bg`; 193 | 194 | console.log(search); 195 | /**/ 196 | ``` 197 | 198 | The alternative (useful if you want to mix-and-match your escaping for any reason) is to use the `escape` method of the main function: 199 | 200 | ```js 201 | const xRE = require('js-xre'); 202 | 203 | const searchText = '12.6'; // might come from an 204 | const anchorStart = true; // might come from an 205 | const anchorEnd = false; // might come from an 206 | 207 | const search = xRE ` 208 | ${anchorStart ? '^' : ''} 209 | ${xRE.escape(searchText)} 210 | ${anchorEnd ? '$' : ''} 211 | ` `gx`; 212 | 213 | console.log(search); 214 | /**/ 215 | ``` 216 | 217 | ## Usage as a regular function 218 | 219 | `xRE` can also be called as a regular (non-tagged-template) function. This could be useful if you wanted to create an extended regular expression based on user input in a `