├── screen.png ├── .travis.yml ├── package.json ├── LICENSE.txt ├── lib ├── xregexp-xescape.js └── xregexp-lookbehind.js ├── README.md ├── test.js └── index.js /screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcoe/onigurumajs/HEAD/screen.png -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "0.10" 4 | - "0.12" 5 | - "4.1" 6 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "onigurumajs", 3 | "version": "1.0.0", 4 | "description": "a pure JavaScript port of the oniguruma regex engine", 5 | "main": "index.js", 6 | "scripts": { 7 | "pretest": "standard", 8 | "test": "tap --coverage test.js" 9 | }, 10 | "keywords": [ 11 | "regex", 12 | "oniguruma", 13 | "javascript" 14 | ], 15 | "author": "Ben Coe ", 16 | "license": "ISC", 17 | "dependencies": { 18 | "lodash": "^3.10.1", 19 | "xregexp": "^3.0.0" 20 | }, 21 | "devDependencies": { 22 | "standard": "^5.3.1", 23 | "tap": "^2.2.0" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Contributors 2 | 3 | Permission to use, copy, modify, and/or distribute this software 4 | for any purpose with or without fee is hereby granted, provided 5 | that the above copyright notice and this permission notice 6 | appear in all copies. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES 10 | OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE 11 | LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES 12 | OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 13 | WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 14 | ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | -------------------------------------------------------------------------------- /lib/xregexp-xescape.js: -------------------------------------------------------------------------------- 1 | // Simulating infinite-length leading lookbehind in JavaScript. Uses XRegExp 2 | // and XRegExp.matchRecursive. Any regex pattern can be used within lookbehind, 3 | // including nested groups. Captures within lookbehind are not included in 4 | // match results. Lazy repetition in lookbehind may lead to unexpected results. 5 | module.exports = function (XRegExp) { 6 | XRegExp.addToken( 7 | /\\x{([\dA-Fa-f]+)}/, 8 | function (match, scope, flags) { 9 | var code = dec(match[1]) 10 | if (code > 0x10FFFF) { 11 | throw new SyntaxError('Invalid Unicode code point ' + match[0]) 12 | } 13 | if (code <= 0xFFFF) { 14 | // Converting to \uNNNN avoids needing to escape the literal character and keep it 15 | // separate from preceding tokens 16 | return '\\u' + pad4(hex(code)) 17 | } 18 | // If `code` is between 0xFFFF and 0x10FFFF, require and defer to native handling 19 | if (flags.indexOf('x') > -1) { 20 | return match[0] 21 | } 22 | throw new SyntaxError('Cannot use Unicode code point above \\u{FFFF} without flag u') 23 | }, 24 | { 25 | scope: 'all', 26 | leadChar: '\\' 27 | } 28 | ) 29 | } 30 | 31 | function dec (hex) { 32 | return parseInt(hex, 16) 33 | } 34 | 35 | function pad4 (str) { 36 | while (str.length < 4) { 37 | str = '0' + str 38 | } 39 | return str 40 | } 41 | 42 | function hex (dec) { 43 | return parseInt(dec, 10).toString(16) 44 | } 45 | -------------------------------------------------------------------------------- /lib/xregexp-lookbehind.js: -------------------------------------------------------------------------------- 1 | // Simulating infinite-length leading lookbehind in JavaScript. Uses XRegExp 2 | // and XRegExp.matchRecursive. Any regex pattern can be used within lookbehind, 3 | // including nested groups. Captures within lookbehind are not included in 4 | // match results. Lazy repetition in lookbehind may lead to unexpected results. 5 | module.exports = function (XRegExp) { 6 | function preparePattern (pattern, flags) { 7 | var lbOpen 8 | var lbEndPos 9 | var lbInner 10 | 11 | flags = flags || '' 12 | // Extract flags from a leading mode modifier, if present 13 | pattern = pattern.replace(/^\(\?([\w$]+)\)/, function ($0, $1) { 14 | flags += $1 15 | return '' 16 | }) 17 | 18 | lbOpen = /^\(\?<([=!])/.exec(pattern) 19 | 20 | if (lbOpen) { 21 | // Extract the lookbehind pattern. Allows nested groups, escaped parens, and unescaped parens within classes 22 | lbEndPos = XRegExp.matchRecursive(pattern, /\((?:[^()[\\]|\\.|\[(?:[^\\\]]|\\.)*])*/.source, '\\)', 's', { 23 | valueNames: [null, null, null, 'right'], 24 | escapeChar: '\\' 25 | })[0].end 26 | lbInner = pattern.slice('(?<='.length, lbEndPos - 1) 27 | } else { 28 | throw new Error('lookbehind not at start of pattern') 29 | } 30 | return { 31 | lb: XRegExp('(?:' + lbInner + ')$(?!\\s)', flags.replace(/[gy]/g, '')), // $(?!\s) allows use of flag m 32 | lbType: lbOpen[1] === '=', // Positive or negative lookbehind 33 | main: XRegExp(pattern.slice(('(?<=)' + lbInner).length), flags) 34 | } 35 | } 36 | 37 | XRegExp.execLb = function (str, pattern, pos) { 38 | pos = pos || 0 39 | var match, leftContext 40 | pattern = preparePattern(pattern) 41 | while (true) { 42 | match = XRegExp.exec(str, pattern.main, pos) 43 | if (!match) break 44 | 45 | leftContext = str.slice(0, match.index) 46 | if (pattern.lbType === pattern.lb.test(leftContext)) { 47 | return match 48 | } 49 | pos = match.index + 1 50 | } 51 | return null 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ⚠️ This project is deprecated in favor of [oniguruma-to-es](https://github.com/slevithan/oniguruma-to-es). Please migrate over. 2 | 3 | # onigurumajs 4 | 5 | [![Build Status](https://travis-ci.org/bcoe/onigurumajs.svg)](https://travis-ci.org/bcoe/onigurumajs) 6 | [![Coverage Status](https://coveralls.io/repos/bcoe/onigurumajs/badge.svg?branch=master)](https://coveralls.io/r/bcoe/onigurumajs?branch=master) 7 | [![NPM version](https://img.shields.io/npm/v/onigurumajs.svg)](https://www.npmjs.com/package/onigurumajs) 8 | 9 | implementation of the [node-oniguruma API](https://github.com/atom/node-oniguruma/) using 10 | [xregexp](https://github.com/slevithan/xregexp), various shims, replacements, and elbow grease. 11 | 12 | ## Why does this exist? 13 | 14 | 1. It would be nice to be able to parse [TextMate grammars](https://manual.macromates.com/en/language_grammars) (the basis for syntax highlighting in Atom) in pure JS: 15 | 16 | 17 | 18 | 2. JavaScript's regex parser lacks some useful features, such as [lookbehinds](http://www.regular-expressions.info/lookaround.html). onigurumajs adds them. 19 | 20 | ## Usage 21 | 22 | See [node-oniguruma](https://github.com/atom/node-oniguruma/). 23 | 24 | ## Adds support to JavaScript for 25 | 26 | * extended xregexp syntax described here: http://xregexp.com/syntax/ 27 | * leading lookbehind zero-length assertions: 28 | 29 | ```js 30 | var scanner = new OnigScanner(['(?<][=!]?/.exec(pattern.original)) { 130 | // a leading lookbehind regex. 131 | results = xregexp.execLb(text, pattern.original, start) 132 | } else if (/\(\?[><][=!]?/.exec(pattern.original)) { 133 | // allow for an alternation chracter followed by 134 | // a lookbehind regex. 135 | var splitPattern = pattern.original.split(/\|(\(\?[><][=!][^|]*)/g) 136 | if (splitPattern.length > 1) results = alternationPrefixedLookbehinds(text, splitPattern, start) 137 | } 138 | 139 | return results 140 | } 141 | 142 | function alternationPrefixedLookbehinds (text, splitPattern, start) { 143 | var patterns = [] 144 | var currentPattern = '' 145 | var result = null 146 | 147 | // rebuild valid regex from splitting on (foo|(?<=foo)). 148 | for (var i = 0, pattern; (pattern = splitPattern[i]) !== undefined; i++) { 149 | if (/\(\?[><][=!]?/.exec(pattern)) { 150 | patterns.push(currentPattern) 151 | currentPattern = '' 152 | } 153 | currentPattern += pattern 154 | } 155 | patterns.push(currentPattern) 156 | 157 | // now apply each pattern. 158 | for (i = 0, pattern; (pattern = patterns[i]) !== undefined; i++) { 159 | try { 160 | if (/\(\?[><][=!]?/.exec(pattern)) { 161 | result = xregexp.execLb(text, pattern, start) 162 | } else { 163 | result = xregexp.exec(text, xregexp(pattern), start) 164 | } 165 | if (result) return result 166 | } catch (e) { 167 | // we're officially in uncharted territory. 168 | return null 169 | } 170 | } 171 | 172 | return null 173 | } 174 | 175 | function applyReplacements (pattern) { 176 | // TODO: write tests and/or find better generic 177 | // solutions for each of these replacements. 178 | pattern = pattern.replace(/\\h/g, '[\t\p{Zs}]') // any whitespace character. 179 | pattern = pattern.replace(/\\A/g, '^') // \A matches start of string, rather than line. 180 | pattern = pattern.replace(/\\G/, '') // start of match group. 181 | pattern = pattern.replace(/\$$/, '[\r\n]?$') // match \n or end of string. 182 | return pattern 183 | } 184 | 185 | OnigScanner.prototype.findNextMatchSync = function (text, start) { 186 | var result = null 187 | this.findNextMatch(text, start, function (err, _result) { 188 | if (err) throw err 189 | result = _result 190 | }) 191 | return result 192 | } 193 | 194 | function transformMatches (results, text, start) { 195 | var matchIndex = 0 196 | var slicedText = text.slice(start) 197 | var difference = text.length - slicedText.length 198 | var transform = [] 199 | 200 | results.forEach(function (result, i) { 201 | matchIndex = difference + slicedText.indexOf(result) 202 | var start = result ? matchIndex : results.index 203 | var end = result ? matchIndex + result.length : results.index 204 | if (typeof result === 'undefined') start = end = 0 205 | transform.push({ 206 | index: i, 207 | start: start, 208 | end: end, 209 | match: result, 210 | length: result ? result.length : 0 211 | }) 212 | }) 213 | 214 | return transform 215 | } 216 | 217 | exports.OnigRegExp = OnigRegExp 218 | exports.OnigScanner = OnigScanner 219 | --------------------------------------------------------------------------------