├── screen.png
├── .travis.yml
├── package.json
├── LICENSE.txt
├── lib
    ├── xregexp-xescape.js
    └── xregexp-lookbehind.js
├── README.md
├── test.js
└── index.js


/screen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bcoe/onigurumajs/HEAD/screen.png


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 | node_js:
3 |   - "0.10"
4 |   - "0.12"
5 |   - "4.1"
6 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "onigurumajs",
 3 |   "version": "1.0.0",
 4 |   "description": "a pure JavaScript port of the oniguruma regex engine",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "pretest": "standard",
 8 |     "test": "tap --coverage test.js"
 9 |   },
10 |   "keywords": [
11 |     "regex",
12 |     "oniguruma",
13 |     "javascript"
14 |   ],
15 |   "author": "Ben Coe <ben@npmjs.com>",
16 |   "license": "ISC",
17 |   "dependencies": {
18 |     "lodash": "^3.10.1",
19 |     "xregexp": "^3.0.0"
20 |   },
21 |   "devDependencies": {
22 |     "standard": "^5.3.1",
23 |     "tap": "^2.2.0"
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, Contributors
 2 | 
 3 | Permission to use, copy, modify, and/or distribute this software
 4 | for any purpose with or without fee is hereby granted, provided
 5 | that the above copyright notice and this permission notice
 6 | appear in all copies.
 7 | 
 8 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
10 | OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE
11 | LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
12 | OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
13 | WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
14 | ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 | 


--------------------------------------------------------------------------------
/lib/xregexp-xescape.js:
--------------------------------------------------------------------------------
 1 | // Simulating infinite-length leading lookbehind in JavaScript. Uses XRegExp
 2 | // and XRegExp.matchRecursive. Any regex pattern can be used within lookbehind,
 3 | // including nested groups. Captures within lookbehind are not included in
 4 | // match results. Lazy repetition in lookbehind may lead to unexpected results.
 5 | module.exports = function (XRegExp) {
 6 |   XRegExp.addToken(
 7 |     /\\x{([\dA-Fa-f]+)}/,
 8 |     function (match, scope, flags) {
 9 |       var code = dec(match[1])
10 |       if (code > 0x10FFFF) {
11 |         throw new SyntaxError('Invalid Unicode code point ' + match[0])
12 |       }
13 |       if (code <= 0xFFFF) {
14 |         // Converting to \uNNNN avoids needing to escape the literal character and keep it
15 |         // separate from preceding tokens
16 |         return '\\u' + pad4(hex(code))
17 |       }
18 |       // If `code` is between 0xFFFF and 0x10FFFF, require and defer to native handling
19 |       if (flags.indexOf('x') > -1) {
20 |         return match[0]
21 |       }
22 |       throw new SyntaxError('Cannot use Unicode code point above \\u{FFFF} without flag u')
23 |     },
24 |     {
25 |       scope: 'all',
26 |       leadChar: '\\'
27 |     }
28 |   )
29 | }
30 | 
31 | function dec (hex) {
32 |   return parseInt(hex, 16)
33 | }
34 | 
35 | function pad4 (str) {
36 |   while (str.length < 4) {
37 |     str = '0' + str
38 |   }
39 |   return str
40 | }
41 | 
42 | function hex (dec) {
43 |   return parseInt(dec, 10).toString(16)
44 | }
45 | 


--------------------------------------------------------------------------------
/lib/xregexp-lookbehind.js:
--------------------------------------------------------------------------------
 1 | // Simulating infinite-length leading lookbehind in JavaScript. Uses XRegExp
 2 | // and XRegExp.matchRecursive. Any regex pattern can be used within lookbehind,
 3 | // including nested groups. Captures within lookbehind are not included in
 4 | // match results. Lazy repetition in lookbehind may lead to unexpected results.
 5 | module.exports = function (XRegExp) {
 6 |   function preparePattern (pattern, flags) {
 7 |     var lbOpen
 8 |     var lbEndPos
 9 |     var lbInner
10 | 
11 |     flags = flags || ''
12 |     // Extract flags from a leading mode modifier, if present
13 |     pattern = pattern.replace(/^\(\?([\w$]+)\)/, function ($0, $1) {
14 |       flags += $1
15 |       return ''
16 |     })
17 | 
18 |     lbOpen = /^\(\?<([=!])/.exec(pattern)
19 | 
20 |     if (lbOpen) {
21 |       // Extract the lookbehind pattern. Allows nested groups, escaped parens, and unescaped parens within classes
22 |       lbEndPos = XRegExp.matchRecursive(pattern, /\((?:[^()[\\]|\\.|\[(?:[^\\\]]|\\.)*])*/.source, '\\)', 's', {
23 |         valueNames: [null, null, null, 'right'],
24 |         escapeChar: '\\'
25 |       })[0].end
26 |       lbInner = pattern.slice('(?<='.length, lbEndPos - 1)
27 |     } else {
28 |       throw new Error('lookbehind not at start of pattern')
29 |     }
30 |     return {
31 |       lb: XRegExp('(?:' + lbInner + ')$(?!\\s)', flags.replace(/[gy]/g, '')), // $(?!\s) allows use of flag m
32 |       lbType: lbOpen[1] === '=', // Positive or negative lookbehind
33 |       main: XRegExp(pattern.slice(('(?<=)' + lbInner).length), flags)
34 |     }
35 |   }
36 | 
37 |   XRegExp.execLb = function (str, pattern, pos) {
38 |     pos = pos || 0
39 |     var match, leftContext
40 |     pattern = preparePattern(pattern)
41 |     while (true) {
42 |       match = XRegExp.exec(str, pattern.main, pos)
43 |       if (!match) break
44 | 
45 |       leftContext = str.slice(0, match.index)
46 |       if (pattern.lbType === pattern.lb.test(leftContext)) {
47 |         return match
48 |       }
49 |       pos = match.index + 1
50 |     }
51 |     return null
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ⚠️ This project is deprecated in favor of [oniguruma-to-es](https://github.com/slevithan/oniguruma-to-es). Please migrate over.
 2 | 
 3 | # onigurumajs
 4 | 
 5 | [![Build Status](https://travis-ci.org/bcoe/onigurumajs.svg)](https://travis-ci.org/bcoe/onigurumajs)
 6 | [![Coverage Status](https://coveralls.io/repos/bcoe/onigurumajs/badge.svg?branch=master)](https://coveralls.io/r/bcoe/onigurumajs?branch=master)
 7 | [![NPM version](https://img.shields.io/npm/v/onigurumajs.svg)](https://www.npmjs.com/package/onigurumajs)
 8 | 
 9 | implementation of the [node-oniguruma API](https://github.com/atom/node-oniguruma/) using
10 | [xregexp](https://github.com/slevithan/xregexp), various shims, replacements, and elbow grease.
11 | 
12 | ## Why does this exist?
13 | 
14 | 1. It would be nice to be able to parse [TextMate grammars](https://manual.macromates.com/en/language_grammars) (the basis for syntax highlighting in Atom) in pure JS:
15 | 
16 | <img width="500" src="screen.png">
17 | 
18 | 2. JavaScript's regex parser lacks some useful features, such as [lookbehinds](http://www.regular-expressions.info/lookaround.html). onigurumajs adds them.
19 | 
20 | ## Usage
21 | 
22 | See [node-oniguruma](https://github.com/atom/node-oniguruma/).
23 | 
24 | ## Adds support to JavaScript for
25 | 
26 | * extended xregexp syntax described here: http://xregexp.com/syntax/
27 | * leading lookbehind zero-length assertions:
28 | 
29 | ```js
30 | var scanner = new OnigScanner(['(?<!a)b'])
31 | scanner.test('bb') // match.
32 | scanner.test('ab') // fails to match.
33 | ```
34 | 
35 | * lookbehind assertions following alternation characters:
36 | 
37 | ```js
38 | var scanner = new OnigScanner(['cat|(?<!a)b'])
39 | scanner.test('cat') // match.
40 | scanner.test('bb') // match.
41 | scanner.test('ab') // fails to match.
42 | ```
43 | 
44 | * `\x{xxxx}` format unicode escape codes
45 | 
46 | ```js
47 | var scanner = new NOnigScanner(['\\x{2603}'])
48 | scanner.findNextMatchSync('☃') // match.
49 | ```
50 | 
51 | ## Contribute
52 | 
53 | This is a work in progress please join in, [open some issues](https://github.com/bcoe/onigurumajs/issues/new), submit pull requests, and help build a crazy full-featured regex parser for JavaScript.
54 | 
55 | ## License
56 | 
57 | ISC
58 | 


--------------------------------------------------------------------------------
/test.js:
--------------------------------------------------------------------------------
  1 | var NOnigRegExp = require('./').OnigRegExp
  2 | var NOnigScanner = require('./').OnigScanner
  3 | // var OnigScanner = require('oniguruma').OnigScanner
  4 | var tap = require('tap')
  5 | 
  6 | tap.test('OnigRegExp.searchSync begins search at index 0', function (t) {
  7 |   var nregex = new NOnigRegExp('a([b-d])c')
  8 | 
  9 |   t.deepEqual(
 10 |     [ { index: 0, start: 1, end: 4, length: 3, match: 'abc' },
 11 |       { index: 1, start: 2, end: 3, length: 1, match: 'b' }],
 12 |     nregex.searchSync('!abcdef!abcdef')
 13 |   )
 14 |   t.done()
 15 | })
 16 | 
 17 | tap.test('OnigRegExp.search begins search at index 0', function (t) {
 18 |   var nregex = new NOnigRegExp('a([b-d])c')
 19 | 
 20 |   nregex.search('!abcdef!abcdef', function (e, nmatch) {
 21 |     t.deepEqual(
 22 |       [ { index: 0, start: 1, end: 4, length: 3, match: 'abc' },
 23 |         { index: 1, start: 2, end: 3, length: 1, match: 'b' } ],
 24 |       nmatch
 25 |     )
 26 |     t.done()
 27 |   })
 28 | })
 29 | 
 30 | tap.test('OnigRegExp.search allows offset to be provided', function (t) {
 31 |   var nregex = new NOnigRegExp('a([b-d])c')
 32 | 
 33 |   nregex.search('abcdef!abcdef', 4, function (e, nmatch) {
 34 |     t.deepEqual(
 35 |       [ { index: 0, start: 7, end: 10, length: 3, match: 'abc' },
 36 |         { index: 1, start: 8, end: 9, length: 1, match: 'b' } ],
 37 |       nmatch
 38 |     )
 39 |     t.done()
 40 |   })
 41 | })
 42 | 
 43 | tap.test('OnigRegExp.search handles no match existing', function (t) {
 44 |   var nregex = new NOnigRegExp('a([b-d])c')
 45 | 
 46 |   nregex.search('banana', function (e, nmatch) {
 47 |     t.equal(nmatch, null)
 48 |     t.done()
 49 |   })
 50 | })
 51 | 
 52 | tap.test('OnigScanner.findNextMatch finds best match', function (t) {
 53 |   var nscanner = new NOnigScanner(['c', 'a(b)?'])
 54 | 
 55 |   nscanner.findNextMatch('abc', function (e, nmatch) {
 56 |     t.deepEqual({ index: 1,
 57 |       captureIndices:
 58 |         [ { index: 0, start: 0, end: 2, length: 2 },
 59 |           { index: 1, start: 1, end: 2, length: 1 } ],
 60 |       scanner: {} },
 61 |       nmatch
 62 |     )
 63 |     t.done()
 64 |   })
 65 | })
 66 | 
 67 | tap.test('OnigScanner.findNextMatch allows offset to be provided', function (t) {
 68 |   var nscanner = new NOnigScanner(['c', 'a(b)?'])
 69 | 
 70 |   nscanner.findNextMatch('abcabc', 2, function (e, nmatch) {
 71 |     t.deepEqual({ index: 0,
 72 |       captureIndices: [ { index: 0, start: 2, end: 3, length: 1 } ],
 73 |       scanner: {} },
 74 |       nmatch
 75 |     )
 76 |     t.done()
 77 |   })
 78 | })
 79 | 
 80 | tap.test('OnigScanner.findNextMatch handles no match existing', function (t) {
 81 |   var nscanner = new NOnigScanner(['c', 'a(b)?'])
 82 | 
 83 |   nscanner.findNextMatch('banana', function (e, nmatch) {
 84 |     t.deepEqual({ index: 1,
 85 |       captureIndices:
 86 |       [ { index: 0, start: 1, end: 2, length: 1 },
 87 |         { index: 1, start: 0, end: 0, length: 0 } ],
 88 |       scanner: {} },
 89 |       nmatch
 90 |     )
 91 |     t.done()
 92 |   })
 93 | })
 94 | 
 95 | tap.test('OnigScanner.findNextMatchSync finds best match', function (t) {
 96 |   var nscanner = new NOnigScanner(['c', 'a(b)?'])
 97 | 
 98 |   t.deepEqual(
 99 |     { index: 1,
100 |       captureIndices:
101 |       [ { index: 0, start: 0, end: 2, length: 2 },
102 |         { index: 1, start: 1, end: 2, length: 1 } ],
103 |       scanner: {} },
104 |     nscanner.findNextMatchSync('abc')
105 |   )
106 |   t.done()
107 | })
108 | 
109 | // the following tests exercise various shims, replacements,
110 | // duct-tape, etc, to make xregexp behave like oniguruma:
111 | 
112 | tap.test('handles leading lookbehind', function (t) {
113 |   var nscanner = new NOnigScanner(['(?<!a)b'])
114 | 
115 |   t.deepEqual(
116 |     { index: 0,
117 |       captureIndices: [ { index: 0, start: 0, end: 1, length: 1 } ],
118 |       scanner: {} },
119 |     nscanner.findNextMatchSync('bb')
120 |   )
121 |   t.equal(
122 |     nscanner.findNextMatchSync('ab'),
123 |     null
124 |   )
125 |   t.done()
126 | })
127 | 
128 | tap.test('handles lookbehind immediately following an alternation', function (t) {
129 |   var nscanner = new NOnigScanner(['cat|(?<!a)b|(?<=a)qwerty|banana'])
130 | 
131 |   t.deepEqual(
132 |     { index: 0,
133 |       captureIndices: [ { index: 0, start: 0, end: 1, length: 1 } ],
134 |       scanner: {} },
135 |     nscanner.findNextMatchSync('bb')
136 |   )
137 |   t.equal(
138 |     nscanner.findNextMatchSync('ab'),
139 |     null
140 |   )
141 |   t.done()
142 | })
143 | 
144 | tap.test('$ character should match both newline and end of string', function (t) {
145 |   var nscanner = new NOnigScanner(['ab$'])
146 | 
147 |   t.notEqual(
148 |     nscanner.findNextMatchSync('ab\n'),
149 |     null
150 |   )
151 | 
152 |   t.done()
153 | })
154 | 
155 | tap.test('\\x should work as an alias for \\u', function (t) {
156 |   var nscanner = new NOnigScanner(['\\x{2603}'])
157 | 
158 |   t.notEqual(
159 |     nscanner.findNextMatchSync('☃'),
160 |     null
161 |   )
162 | 
163 |   t.done()
164 | })
165 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
  1 | var _ = require('lodash')
  2 | var xregexp = require('xregexp')
  3 | require('./lib/xregexp-lookbehind')(xregexp)
  4 | require('./lib/xregexp-xescape')(xregexp)
  5 | 
  6 | function OnigRegExp (pattern) {
  7 |   pattern = applyReplacements(pattern)
  8 |   try {
  9 |     this.pattern = xregexp(pattern)
 10 |     this.pattern.original = pattern
 11 |   } catch (e) {
 12 |     // we'll do it live!
 13 |     this.pattern = {
 14 |       original: pattern
 15 |     }
 16 |   }
 17 | }
 18 | 
 19 | OnigRegExp.prototype.search = function (text, start, cb) {
 20 |   var results = null
 21 | 
 22 |   if (typeof start === 'function') {
 23 |     cb = start
 24 |     start = 0
 25 |   }
 26 | 
 27 |   start = start || 0
 28 | 
 29 |   try {
 30 |     results = execRegex(text, this.pattern, start)
 31 |   } catch (e) {
 32 |     cb(null, null)
 33 |   }
 34 | 
 35 |   if (results) cb(null, transformMatches(results, text, start))
 36 |   else cb(null, null)
 37 | }
 38 | 
 39 | OnigRegExp.prototype.searchSync = function (text, start) {
 40 |   var result = null
 41 |   this.search(text, start, function (err, _result) {
 42 |     if (err) throw err
 43 |     result = _result
 44 |   })
 45 |   return result
 46 | }
 47 | 
 48 | OnigRegExp.prototype.test = function (text, cb) {
 49 |   this.search(text, function (err, result) {
 50 |     return cb(err, !!result)
 51 |   })
 52 | }
 53 | 
 54 | OnigRegExp.prototype.testSync = function (text) {
 55 |   var result = false
 56 |   this.test(text, function (err, _result) {
 57 |     if (err) throw err
 58 |     result = _result
 59 |   })
 60 |   return result
 61 | }
 62 | 
 63 | function OnigScanner (patterns) {
 64 |   this.patterns = []
 65 |   for (var i = 0, pattern, xpattern; (pattern = patterns[i]) !== undefined; i++) {
 66 |     try {
 67 |       pattern = applyReplacements(pattern)
 68 |       xpattern = xregexp(pattern)
 69 |       xpattern.original = pattern
 70 |       this.patterns.push(xpattern)
 71 |     } catch (e) {
 72 |       // we'll do it live!
 73 |       this.patterns.push({
 74 |         original: pattern
 75 |       })
 76 |     }
 77 |   }
 78 | }
 79 | 
 80 | OnigScanner.prototype.findNextMatch = function (text, start, cb) {
 81 |   var bestMatch = null
 82 |   var bestIndex = 1
 83 |   var results = null
 84 | 
 85 |   if (typeof start === 'function') {
 86 |     cb = start
 87 |     start = 0
 88 |   }
 89 | 
 90 |   start = start || 0
 91 | 
 92 |   // https://github.com/atom/node-oniguruma/blob/master/src/onig-searcher.cc
 93 |   for (var i = 0, pattern; (pattern = this.patterns[i]) !== undefined; i++) {
 94 |     try {
 95 |       results = execRegex(text, pattern, start)
 96 | 
 97 |       if (!results) continue
 98 | 
 99 |       if (!bestMatch || results.index < bestMatch.index) {
100 |         bestMatch = results
101 |         bestIndex = i
102 |       }
103 |     } catch (e) {
104 |       // ignore failing patterns until we can add
105 |       // shims for more with tests!
106 |       // console.log('cannot match: ', e.message)
107 |     }
108 |   }
109 | 
110 |   if (bestMatch) {
111 |     cb(null, {
112 |       captureIndices: transformMatches(bestMatch, text, start).map(function (match) {
113 |         return _.omit(match, 'match')
114 |       }),
115 |       index: bestIndex,
116 |       scanner: {}
117 |     })
118 |   } else {
119 |     cb(null, null)
120 |   }
121 | }
122 | 
123 | function execRegex (text, pattern, start) {
124 |   var results = null
125 | 
126 |   if (pattern.xregexp) {
127 |     // a regex that xregexp can handle right out of the gate.
128 |     results = xregexp.exec(text, pattern, start)
129 |   } else if (/^\(\?[><][=!]?/.exec(pattern.original)) {
130 |     // a leading lookbehind regex.
131 |     results = xregexp.execLb(text, pattern.original, start)
132 |   } else if (/\(\?[><][=!]?/.exec(pattern.original)) {
133 |     // allow for an alternation chracter followed by
134 |     // a lookbehind regex.
135 |     var splitPattern = pattern.original.split(/\|(\(\?[><][=!][^|]*)/g)
136 |     if (splitPattern.length > 1) results = alternationPrefixedLookbehinds(text, splitPattern, start)
137 |   }
138 | 
139 |   return results
140 | }
141 | 
142 | function alternationPrefixedLookbehinds (text, splitPattern, start) {
143 |   var patterns = []
144 |   var currentPattern = ''
145 |   var result = null
146 | 
147 |   // rebuild valid regex from splitting on (foo|(?<=foo)).
148 |   for (var i = 0, pattern; (pattern = splitPattern[i]) !== undefined; i++) {
149 |     if (/\(\?[><][=!]?/.exec(pattern)) {
150 |       patterns.push(currentPattern)
151 |       currentPattern = ''
152 |     }
153 |     currentPattern += pattern
154 |   }
155 |   patterns.push(currentPattern)
156 | 
157 |   // now apply each pattern.
158 |   for (i = 0, pattern; (pattern = patterns[i]) !== undefined; i++) {
159 |     try {
160 |       if (/\(\?[><][=!]?/.exec(pattern)) {
161 |         result = xregexp.execLb(text, pattern, start)
162 |       } else {
163 |         result = xregexp.exec(text, xregexp(pattern), start)
164 |       }
165 |       if (result) return result
166 |     } catch (e) {
167 |       // we're officially in uncharted territory.
168 |       return null
169 |     }
170 |   }
171 | 
172 |   return null
173 | }
174 | 
175 | function applyReplacements (pattern) {
176 |   // TODO: write tests and/or find better generic
177 |   // solutions for each of these replacements.
178 |   pattern = pattern.replace(/\\h/g, '[\t\p{Zs}]') // any whitespace character.
179 |   pattern = pattern.replace(/\\A/g, '^') // \A matches start of string, rather than line.
180 |   pattern = pattern.replace(/\\G/, '') // start of match group.
181 |   pattern = pattern.replace(/\$$/, '[\r\n]?$') // match \n or end of string.
182 |   return pattern
183 | }
184 | 
185 | OnigScanner.prototype.findNextMatchSync = function (text, start) {
186 |   var result = null
187 |   this.findNextMatch(text, start, function (err, _result) {
188 |     if (err) throw err
189 |     result = _result
190 |   })
191 |   return result
192 | }
193 | 
194 | function transformMatches (results, text, start) {
195 |   var matchIndex = 0
196 |   var slicedText = text.slice(start)
197 |   var difference = text.length - slicedText.length
198 |   var transform = []
199 | 
200 |   results.forEach(function (result, i) {
201 |     matchIndex = difference + slicedText.indexOf(result)
202 |     var start = result ? matchIndex : results.index
203 |     var end = result ? matchIndex + result.length : results.index
204 |     if (typeof result === 'undefined') start = end = 0
205 |     transform.push({
206 |       index: i,
207 |       start: start,
208 |       end: end,
209 |       match: result,
210 |       length: result ? result.length : 0
211 |     })
212 |   })
213 | 
214 |   return transform
215 | }
216 | 
217 | exports.OnigRegExp = OnigRegExp
218 | exports.OnigScanner = OnigScanner
219 | 


--------------------------------------------------------------------------------