├── Cakefile ├── LICENSE ├── README.md ├── doc └── strscan.html ├── lib └── strscan.js ├── package.json ├── src └── strscan.coffee └── test └── test_strscan.coffee /Cakefile: -------------------------------------------------------------------------------- 1 | # Much of this Cakefile shamelessly ripped from coffee-script 2 | 3 | fs = require 'fs' 4 | CoffeeScript = require 'coffee-script' 5 | {spawn, exec} = require 'child_process' 6 | 7 | # ANSI Terminal Colors. 8 | red = '\033[0;31m' 9 | green = '\033[0;32m' 10 | reset = '\033[0m' 11 | 12 | # Log a message with a color. 13 | log = (message, color, explanation) -> 14 | puts color + message + reset + ' ' + (explanation or '') 15 | 16 | extend = (object, properties) -> 17 | (object[key] = val) for all key, val of properties 18 | 19 | run = (args) -> 20 | proc = spawn 'coffee', args 21 | proc.stderr.on 'data', (buffer) -> puts buffer.toString() 22 | proc.on 'exit', (status) -> process.exit(1) if status != 0 23 | 24 | task 'build', -> 25 | src = fs.readFileSync 'src/strscan.coffee' 26 | js = CoffeeScript.compile src.toString() 27 | fs.writeFileSync 'lib/strscan.js', js 28 | 29 | task 'test', -> 30 | extend global, require 'assert' 31 | passedTests = failedTests = 0 32 | startTime = new Date 33 | originalOk = ok 34 | helpers.extend global, 35 | ok: (args...) -> passedTests += 1; originalOk(args...) 36 | process.on 'exit', -> 37 | time = ((new Date - startTime) / 1000).toFixed(2) 38 | message = "passed #{passedTests} tests in #{time} seconds#{reset}" 39 | if failedTests 40 | log "failed #{failedTests} and #{message}", red 41 | else 42 | log message, green 43 | fs.readdir 'test', (err, files) -> 44 | files.forEach (file) -> 45 | return unless file.match(/\.coffee$/i) 46 | fileName = path.join 'test', file 47 | fs.readFile fileName, (err, code) -> 48 | try 49 | CoffeeScript.run code.toString(), {fileName} 50 | catch err 51 | failedTests += 1 52 | log "failed #{fileName}", red, '\n' + err.stack.toString() 53 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010 Sam Stephenson 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## StringScanner 2 | 3 | **StringScanner** is a simple string tokenizer that provides for lexical 4 | scanning operations on a string. It's a JavaScript port of the [Ruby 5 | library with the same name](http://ruby-doc.org/core/classes/StringScanner.html). 6 | 7 | Scanning a string means keeping track of and advancing a position (a 8 | zero-based index into the source string) and matching regular expressions 9 | against the portion of the source string after the position. 10 | 11 | StringScanner is written in [CoffeeScript](http://coffeescript.org/) and 12 | distributed via [npm](http://npm.mape.me/) as a [CommonJS 13 | module](http://www.commonjs.org/). 14 | 15 | ### Quick start 16 | 17 | $ npm install strscan 18 | $ node-repl 19 | > var StringScanner = require("strscan").StringScanner 20 | > var s = new StringScanner("This is a test") 21 | > s.scan(/\w+/) # => "This" 22 | > s.scan(/\w+/) # => null 23 | > s.scan(/\s+/) # => " " 24 | > s.scan(/\s+/) # => null 25 | > s.scan(/\w+/) # => "is" 26 | > s.hasTerminated() # => false 27 | > s.scan(/\s+/) # => " " 28 | > s.scan(/(\w+)\s+(\w+)/) # => "a test" 29 | > s.getMatch() # => "a test" 30 | > s.getCapture(0) # => "a" 31 | > s.getCapture(1) # => "test" 32 | > s.hasTerminated() # => true 33 | 34 | ### More 35 | 36 | [Clone, fork, or file bugs at GitHub](http://github.com/sstephenson/strscan-js) 37 | 38 | [Read the full documentation/annotated source code](http://sstephenson.github.com/strscan-js/) 39 | 40 | ### Copyright 41 | 42 | Copyright (c) 2010 Sam Stephenson. Distributed under the terms of an 43 | MIT-style license. See LICENSE for details. 44 | -------------------------------------------------------------------------------- /doc/strscan.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | strscan.coffee 6 | 7 | 8 | 9 |
10 |
11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 60 | 63 | 64 | 65 | 73 | 76 | 77 | 78 | 84 | 90 | 91 | 92 | 105 | 108 | 109 | 110 | 118 | 127 | 128 | 129 | 138 | 148 | 149 | 150 | 156 | 160 | 161 | 162 | 170 | 174 | 175 | 176 | 185 | 189 | 190 | 191 | 202 | 205 | 206 | 207 | 215 | 222 | 223 | 224 | 233 | 241 | 242 | 243 | 251 | 255 | 256 | 257 | 269 | 272 | 273 | 274 | 280 | 284 | 285 | 286 | 293 | 297 | 298 | 299 | 307 | 311 | 312 | 313 | 319 | 323 | 324 | 325 | 336 | 339 | 340 | 341 | 349 | 353 | 354 | 355 | 362 | 366 | 367 | 368 | 375 | 379 | 380 | 381 | 388 | 392 | 393 | 394 | 405 | 408 | 409 | 410 | 417 | 421 | 422 | 423 | 430 | 434 | 435 | 436 | 443 | 447 | 448 | 449 | 457 | 464 | 465 | 466 | 472 | 475 | 476 | 477 | 484 | 491 | 492 |

strscan.coffee

21 |
22 | # 23 |
24 |

StringScanner is a simple string tokenizer that provides for lexical 25 | scanning operations on a string. It’s a JavaScript port of the Ruby 26 | library with the same name.

27 | 28 |

Scanning a string means keeping track of and advancing a position (a 29 | zero-based index into the source string) and matching regular expressions 30 | against the portion of the source string after the position.

31 | 32 |

StringScanner is written in CoffeeScript and 33 | distributed via npm as a CommonJS 34 | module.

35 | 36 |

Clone, fork, or file bugs at GitHub.

37 | 38 |

Quick start

39 | 40 |
41 | 42 |
 $ npm install strscan
 43 |  $ node-repl
 44 |  > var StringScanner = require("strscan").StringScanner
 45 |  > var s = new StringScanner("This is a test")
 46 |  > s.scan(/\w+/)             # => "This"
 47 |  > s.scan(/\w+/)             # => null
 48 |  > s.scan(/\s+/)             # => " "
 49 |  > s.scan(/\s+/)             # => null
 50 |  > s.scan(/\w+/)             # => "is"
 51 |  > s.hasTerminated()         # => false
 52 |  > s.scan(/\s+/)             # => " "
 53 |  > s.scan(/(\w+)\s+(\w+)/)   # => "a test"
 54 |  > s.getMatch()              # => "a test"
 55 |  > s.getCapture(0)           # => "a"
 56 |  > s.getCapture(1)           # => "test"
 57 |  > s.hasTerminated()         # => true
 58 | 
59 |
61 |
62 |
66 |
67 | # 68 |
69 |

Creating a scanner

70 | 71 |
72 |
74 |
75 |
79 |
80 | # 81 |
82 |

Create a new StringScanner with a source string.

83 |
85 |
(exports ? this).StringScanner = class StringScanner
 86 |   constructor: (source) ->
 87 |     @source = source.toString()
 88 |     @reset()
89 |
93 |
94 | # 95 |
96 |

Scanning for matches

97 | 98 |

The scan, scanUntil, scanChar, skip, and skipUntil methods look 99 | for matching strings and advance the scanner’s position. The scan 100 | methods return the matched string; the skip methods return the number 101 | of characters by which the scan position advanced.

102 | 103 |
104 |
106 |
107 |
111 |
112 | # 113 |
114 |

Matches regexp at the current position. Returns the matched string 115 | and advances the scanner’s position, or returns null if there is no 116 | match.

117 |
119 |
  scan: (regexp) ->
120 |     if (matches = regexp.exec @getRemainder()) and matches.index is 0
121 |       @setState matches,
122 |         head: @head + matches[0].length
123 |         last: @head
124 |     else
125 |       @setState []
126 |
130 |
131 | # 132 |
133 |

Matches regexp at or after the current position. Returns the 134 | portion of the source string after the scanner’s position up to and 135 | including the end of the match and advances the scanner’s position, 136 | or returns null if there is no match.

137 |
139 |
  scanUntil: (regexp) ->
140 |     if matches = regexp.exec @getRemainder()
141 |       @setState matches,
142 |         head: @head + matches.index + matches[0].length
143 |         last: @head
144 |       @source.slice @last, @head
145 |     else
146 |       @setState []
147 |
151 |
152 | # 153 |
154 |

Scans one character, returns it, and advances the scanner’s position.

155 |
157 |
  scanChar: ->
158 |     @scan /./
159 |
163 |
164 | # 165 |
166 |

Skips over the given regexp at the current position. Returns the 167 | length of the matched string and advances the scanner’s position, or 168 | returns null if there is no match.

169 |
171 |
  skip: (regexp) ->
172 |     @match.length if @scan regexp
173 |
177 |
178 | # 179 |
180 |

Skips over the given regexp at or after the current position. 181 | Returns the length of the string up to and including the end of the 182 | match and advances the scanner’s position, or returns null if there 183 | is no match.

184 |
186 |
  skipUntil: (regexp) ->
187 |     @head - @last if @scanUntil regexp
188 |
192 |
193 | # 194 |
195 |

Looking ahead

196 | 197 |

The check, checkUntil and peek methods look for matching strings 198 | without advancing the scanner’s position.

199 | 200 |
201 |
203 |
204 |
208 |
209 | # 210 |
211 |

Checks to see if regexp can be matched at the current position and 212 | returns the matched string without advancing the scanner’s position, or 213 | returns null if there is no match.

214 |
216 |
  check: (regexp) ->
217 |     if (matches = regexp.exec @getRemainder()) and matches.index is 0
218 |       @setState matches
219 |     else
220 |       @setState []
221 |
225 |
226 | # 227 |
228 |

Checks to see if regexp can be matched at or after the current 229 | position. Returns the portion of the source string after the current 230 | position up to and including the end of the match without advancing the 231 | scanner’s position, or returns null if there is no match.

232 |
234 |
  checkUntil: (regexp) ->
235 |     if matches = regexp.exec @getRemainder()
236 |       @setState matches
237 |       @source.slice @head, @head + matches.index + matches[0].length
238 |     else
239 |       @setState []
240 |
244 |
245 | # 246 |
247 |

Returns the next length characters after the current position. If 248 | called without a length, returns the next character. The scanner’s 249 | position is not advanced.

250 |
252 |
  peek: (length) ->
253 |     @source.substr @head, length ? 1
254 |
258 |
259 | # 260 |
261 |

Accessing scanner data

262 | 263 |

The getSource, getRemainder, getPosition and hasTerminated 264 | methods provide information about the scanner’s source string and 265 | position.

266 | 267 |
268 |
270 |
271 |
275 |
276 | # 277 |
278 |

Returns the scanner’s source string.

279 |
281 |
  getSource: ->
282 |     @source
283 |
287 |
288 | # 289 |
290 |

Returns the portion of the source string from the scanner’s position 291 | onward.

292 |
294 |
  getRemainder: ->
295 |     @source.slice @head
296 |
300 |
301 | # 302 |
303 |

Returns the scanner’s position. In the reset position, this value is 304 | zero. In the terminated position, this value is the length of the 305 | source string.

306 |
308 |
  getPosition: ->
309 |     @head
310 |
314 |
315 | # 316 |
317 |

Checks to see if the scanner has reached the end of the string.

318 |
320 |
  hasTerminated: ->
321 |     @head is @source.length
322 |
326 |
327 | # 328 |
329 |

Accessing match data

330 | 331 |

The getPreMatch, getMatch, getPostMatch and getCapture methods 332 | provide information about the most recent match.

333 | 334 |
335 |
337 |
338 |
342 |
343 | # 344 |
345 |

Returns the portion of the source string leading up to, but not 346 | including, the most recent match. (Returns null if there is no recent 347 | match.)

348 |
350 |
  getPreMatch: ->
351 |     @source.slice 0, @head - @match.length if @match
352 |
356 |
357 | # 358 |
359 |

Returns the most recently matched portion of the source string (or 360 | null if there is no recent match).

361 |
363 |
  getMatch: ->
364 |     @match
365 |
369 |
370 | # 371 |
372 |

Returns the portion of the source string immediately following the most 373 | recent match. (Returns null if there is no recent match.)

374 |
376 |
  getPostMatch: ->
377 |     @source.slice @head if @match
378 |
382 |
383 | # 384 |
385 |

Returns the indexth capture from the most recent match (or null if 386 | there is no recent match).

387 |
389 |
  getCapture: (index) ->
390 |     @captures[index]
391 |
395 |
396 | # 397 |
398 |

Modifying the scanner’s state

399 | 400 |

The reset, terminate, concat and unscan methods let you change 401 | the state of the scanner.

402 | 403 |
404 |
406 |
407 |
411 |
412 | # 413 |
414 |

Resets the scanner back to its original position and clears its match 415 | data.

416 |
418 |
  reset: ->
419 |     @setState [], head: 0, last: 0
420 |
424 |
425 | # 426 |
427 |

Advances the scanner position to the end of the string and clears its 428 | match data.

429 |
431 |
  terminate: ->
432 |     @setState [], head: @source.length, last: @head
433 |
437 |
438 | # 439 |
440 |

Appends string to the scanner’s source string. The scanner’s position 441 | is not affected.

442 |
444 |
  concat: (string) ->
445 |     @source += string
446 |
450 |
451 | # 452 |
453 |

Sets the scanner’s position to its previous position and clears its 454 | match data. Only one previous position is stored. Throws an exception 455 | if there is no previous position.

456 |
458 |
  unscan: ->
459 |     if @match
460 |       @setState [], head: @last, last: 0
461 |     else
462 |       throw "nothing to unscan"
463 |
467 |
468 | # 469 |
470 |

Private methods

471 |
473 |
474 |
478 |
479 | # 480 |
481 |

Sets the state of the scanner (for internal use only).

482 | 483 |
485 |
  setState: (matches, values) ->
486 |     @head     = values?.head ? @head
487 |     @last     = values?.last ? @last
488 |     @captures = matches.slice 1
489 |     @match    = matches[0]
490 |
493 |
494 | 495 | -------------------------------------------------------------------------------- /lib/strscan.js: -------------------------------------------------------------------------------- 1 | (function() { 2 | var StringScanner; 3 | ((typeof exports !== "undefined" && exports !== null) ? exports : this).StringScanner = (function() { 4 | StringScanner = function(source) { 5 | this.source = source.toString(); 6 | this.reset(); 7 | return this; 8 | }; 9 | StringScanner.prototype.scan = function(regexp) { 10 | var matches; 11 | return (matches = regexp.exec(this.getRemainder())) && matches.index === 0 ? this.setState(matches, { 12 | head: this.head + matches[0].length, 13 | last: this.head 14 | }) : this.setState([]); 15 | }; 16 | StringScanner.prototype.scanUntil = function(regexp) { 17 | var matches; 18 | if (matches = regexp.exec(this.getRemainder())) { 19 | this.setState(matches, { 20 | head: this.head + matches.index + matches[0].length, 21 | last: this.head 22 | }); 23 | return this.source.slice(this.last, this.head); 24 | } else { 25 | return this.setState([]); 26 | } 27 | }; 28 | StringScanner.prototype.scanChar = function() { 29 | return this.scan(/./); 30 | }; 31 | StringScanner.prototype.skip = function(regexp) { 32 | if (this.scan(regexp)) { 33 | return this.match.length; 34 | } 35 | }; 36 | StringScanner.prototype.skipUntil = function(regexp) { 37 | if (this.scanUntil(regexp)) { 38 | return this.head - this.last; 39 | } 40 | }; 41 | StringScanner.prototype.check = function(regexp) { 42 | var matches; 43 | return (matches = regexp.exec(this.getRemainder())) && matches.index === 0 ? this.setState(matches) : this.setState([]); 44 | }; 45 | StringScanner.prototype.checkUntil = function(regexp) { 46 | var matches; 47 | if (matches = regexp.exec(this.getRemainder())) { 48 | this.setState(matches); 49 | return this.source.slice(this.head, this.head + matches.index + matches[0].length); 50 | } else { 51 | return this.setState([]); 52 | } 53 | }; 54 | StringScanner.prototype.peek = function(length) { 55 | return this.source.substr(this.head, (typeof length !== "undefined" && length !== null) ? length : 1); 56 | }; 57 | StringScanner.prototype.getSource = function() { 58 | return this.source; 59 | }; 60 | StringScanner.prototype.getRemainder = function() { 61 | return this.source.slice(this.head); 62 | }; 63 | StringScanner.prototype.getPosition = function() { 64 | return this.head; 65 | }; 66 | StringScanner.prototype.hasTerminated = function() { 67 | return this.head === this.source.length; 68 | }; 69 | StringScanner.prototype.getPreMatch = function() { 70 | if (this.match) { 71 | return this.source.slice(0, this.head - this.match.length); 72 | } 73 | }; 74 | StringScanner.prototype.getMatch = function() { 75 | return this.match; 76 | }; 77 | StringScanner.prototype.getPostMatch = function() { 78 | if (this.match) { 79 | return this.source.slice(this.head); 80 | } 81 | }; 82 | StringScanner.prototype.getCapture = function(index) { 83 | return this.captures[index]; 84 | }; 85 | StringScanner.prototype.reset = function() { 86 | return this.setState([], { 87 | head: 0, 88 | last: 0 89 | }); 90 | }; 91 | StringScanner.prototype.terminate = function() { 92 | return this.setState([], { 93 | head: this.source.length, 94 | last: this.head 95 | }); 96 | }; 97 | StringScanner.prototype.concat = function(string) { 98 | return this.source += string; 99 | }; 100 | StringScanner.prototype.unscan = function() { 101 | if (this.match) { 102 | return this.setState([], { 103 | head: this.last, 104 | last: 0 105 | }); 106 | } else { 107 | throw "nothing to unscan"; 108 | } 109 | }; 110 | StringScanner.prototype.setState = function(matches, values) { 111 | var _a, _b; 112 | this.head = (typeof (_a = ((typeof values === "undefined" || values === null) ? undefined : values.head)) !== "undefined" && _a !== null) ? _a : this.head; 113 | this.last = (typeof (_b = ((typeof values === "undefined" || values === null) ? undefined : values.last)) !== "undefined" && _b !== null) ? _b : this.last; 114 | this.captures = matches.slice(1); 115 | return (this.match = matches[0]); 116 | }; 117 | return StringScanner; 118 | })(); 119 | })(); 120 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { "name": "strscan" 2 | , "description": "Simple string tokenizer for lexical scanning operations" 3 | , "author": "Sam Stephenson" 4 | , "version": "1.0.1" 5 | , "licenses": [ 6 | { "type": "MIT" 7 | , "url": "http://github.com/sstephenson/strscan-js/raw/master/LICENSE" 8 | }] 9 | , "directories": { "lib": "./lib" } 10 | , "main": "./lib/strscan" 11 | , "repository": 12 | { "type": "git" 13 | , "url": "http://github.com/sstephenson/strscan-js.git" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/strscan.coffee: -------------------------------------------------------------------------------- 1 | # **StringScanner** is a simple string tokenizer that provides for lexical 2 | # scanning operations on a string. It's a JavaScript port of the [Ruby 3 | # library with the same name](http://ruby-doc.org/core/classes/StringScanner.html). 4 | # 5 | # Scanning a string means keeping track of and advancing a position (a 6 | # zero-based index into the source string) and matching regular expressions 7 | # against the portion of the source string after the position. 8 | # 9 | # StringScanner is written in [CoffeeScript](http://coffeescript.org/) and 10 | # distributed via [npm](http://npm.mape.me/) as a [CommonJS 11 | # module](http://www.commonjs.org/). 12 | # 13 | # [Clone, fork, or file bugs at GitHub](http://github.com/sstephenson/strscan-js). 14 | # 15 | #### Quick start 16 | # ------------------------------------------------------------------------- 17 | # 18 | # $ npm install strscan 19 | # $ node-repl 20 | # > var StringScanner = require("strscan").StringScanner 21 | # > var s = new StringScanner("This is a test") 22 | # > s.scan(/\w+/) # => "This" 23 | # > s.scan(/\w+/) # => null 24 | # > s.scan(/\s+/) # => " " 25 | # > s.scan(/\s+/) # => null 26 | # > s.scan(/\w+/) # => "is" 27 | # > s.hasTerminated() # => false 28 | # > s.scan(/\s+/) # => " " 29 | # > s.scan(/(\w+)\s+(\w+)/) # => "a test" 30 | # > s.getMatch() # => "a test" 31 | # > s.getCapture(0) # => "a" 32 | # > s.getCapture(1) # => "test" 33 | # > s.hasTerminated() # => true 34 | 35 | #### Creating a scanner 36 | # ------------------------------------------------------------------------- 37 | 38 | # Create a new `StringScanner` with a source string. 39 | (exports ? this).StringScanner = class StringScanner 40 | constructor: (source) -> 41 | @source = source.toString() 42 | @reset() 43 | 44 | 45 | #### Scanning for matches 46 | # The `scan`, `scanUntil`, `scanChar`, `skip`, and `skipUntil` methods look 47 | # for matching strings and advance the scanner's position. The _scan_ 48 | # methods return the matched string; the _skip_ methods return the number 49 | # of characters by which the scan position advanced. 50 | # ------------------------------------------------------------------------- 51 | 52 | # Matches `regexp` at the current position. Returns the matched string 53 | # and advances the scanner's position, or returns `null` if there is no 54 | # match. 55 | scan: (regexp) -> 56 | if (matches = regexp.exec @getRemainder()) and matches.index is 0 57 | @setState matches, 58 | head: @head + matches[0].length 59 | last: @head 60 | else 61 | @setState [] 62 | 63 | # Matches `regexp` at _or after_ the current position. Returns the 64 | # portion of the source string after the scanner's position up to and 65 | # including the end of the match and advances the scanner's position, 66 | # or returns `null` if there is no match. 67 | scanUntil: (regexp) -> 68 | if matches = regexp.exec @getRemainder() 69 | @setState matches, 70 | head: @head + matches.index + matches[0].length 71 | last: @head 72 | @source.slice @last, @head 73 | else 74 | @setState [] 75 | 76 | # Scans one character, returns it, and advances the scanner's position. 77 | scanChar: -> 78 | @scan /[\s\S]/ 79 | 80 | # Skips over the given `regexp` at the current position. Returns the 81 | # length of the matched string and advances the scanner's position, or 82 | # returns `null` if there is no match. 83 | skip: (regexp) -> 84 | @match.length if @scan regexp 85 | 86 | # Skips over the given `regexp` at _or after_ the current position. 87 | # Returns the length of the string up to and including the end of the 88 | # match and advances the scanner's position, or returns `null` if there 89 | # is no match. 90 | skipUntil: (regexp) -> 91 | @head - @last if @scanUntil regexp 92 | 93 | 94 | #### Looking ahead 95 | # The `check`, `checkUntil` and `peek` methods look for matching strings 96 | # without advancing the scanner's position. 97 | # ------------------------------------------------------------------------- 98 | 99 | # Checks to see if `regexp` can be matched at the current position and 100 | # returns the matched string without advancing the scanner's position, or 101 | # returns `null` if there is no match. 102 | check: (regexp) -> 103 | if (matches = regexp.exec @getRemainder()) and matches.index is 0 104 | @setState matches 105 | else 106 | @setState [] 107 | 108 | # Checks to see if `regexp` can be matched at _or after_ the current 109 | # position. Returns the portion of the source string after the current 110 | # position up to and including the end of the match without advancing the 111 | # scanner's position, or returns `null` if there is no match. 112 | checkUntil: (regexp) -> 113 | if matches = regexp.exec @getRemainder() 114 | @setState matches 115 | @source.slice @head, @head + matches.index + matches[0].length 116 | else 117 | @setState [] 118 | 119 | # Returns the next `length` characters after the current position. If 120 | # called without a `length`, returns the next character. The scanner's 121 | # position is not advanced. 122 | peek: (length) -> 123 | @source.substr @head, length ? 1 124 | 125 | 126 | #### Accessing scanner data 127 | # The `getSource`, `getRemainder`, `getPosition` and `hasTerminated` 128 | # methods provide information about the scanner's source string and 129 | # position. 130 | # ------------------------------------------------------------------------- 131 | 132 | # Returns the scanner's source string. 133 | getSource: -> 134 | @source 135 | 136 | # Returns the portion of the source string from the scanner's position 137 | # onward. 138 | getRemainder: -> 139 | @source.slice @head 140 | 141 | # Returns the scanner's position. In the _reset_ position, this value is 142 | # zero. In the _terminated_ position, this value is the length of the 143 | # source string. 144 | getPosition: -> 145 | @head 146 | 147 | # Checks to see if the scanner has reached the end of the string. 148 | hasTerminated: -> 149 | @head is @source.length 150 | 151 | 152 | #### Accessing match data 153 | # The `getPreMatch`, `getMatch`, `getPostMatch` and `getCapture` methods 154 | # provide information about the most recent match. 155 | # ------------------------------------------------------------------------- 156 | 157 | # Returns the portion of the source string leading up to, but not 158 | # including, the most recent match. (Returns `null` if there is no recent 159 | # match.) 160 | getPreMatch: -> 161 | @source.slice 0, @head - @match.length if @match 162 | 163 | # Returns the most recently matched portion of the source string (or 164 | # `null` if there is no recent match). 165 | getMatch: -> 166 | @match 167 | 168 | # Returns the portion of the source string immediately following the most 169 | # recent match. (Returns `null` if there is no recent match.) 170 | getPostMatch: -> 171 | @source.slice @head if @match 172 | 173 | # Returns the `index`th capture from the most recent match (or `null` if 174 | # there is no recent match). 175 | getCapture: (index) -> 176 | @captures[index] 177 | 178 | 179 | #### Modifying the scanner's state 180 | # The `reset`, `terminate`, `concat` and `unscan` methods let you change 181 | # the state of the scanner. 182 | # ------------------------------------------------------------------------- 183 | 184 | # Resets the scanner back to its original position and clears its match 185 | # data. 186 | reset: -> 187 | @setState [], head: 0, last: 0 188 | 189 | # Advances the scanner position to the end of the string and clears its 190 | # match data. 191 | terminate: -> 192 | @setState [], head: @source.length, last: @head 193 | 194 | # Appends `string` to the scanner's source string. The scanner's position 195 | # is not affected. 196 | concat: (string) -> 197 | @source += string 198 | 199 | # Sets the scanner's position to its previous position and clears its 200 | # match data. Only one previous position is stored. Throws an exception 201 | # if there is no previous position. 202 | unscan: -> 203 | if @match 204 | @setState [], head: @last, last: 0 205 | else 206 | throw "nothing to unscan" 207 | 208 | 209 | ##### Private methods 210 | 211 | # Sets the state of the scanner (for internal use only). 212 | setState: (matches, values) -> 213 | @head = values?.head ? @head 214 | @last = values?.last ? @last 215 | @captures = matches.slice 1 216 | @match = matches[0] 217 | 218 | -------------------------------------------------------------------------------- /test/test_strscan.coffee: -------------------------------------------------------------------------------- 1 | {StringScanner} = require "../src/strscan" 2 | 3 | s = new StringScanner "Fri Dec 12 1975 14:39" 4 | s.scan /Fri / 5 | s.concat " +1000 GMT" 6 | ok s.getSource() is "Fri Dec 12 1975 14:39 +1000 GMT" 7 | ok s.scan(/Dec/) is "Dec" 8 | 9 | s.reset() 10 | ok s.getPosition() is 0 11 | ok not s.getPreMatch() 12 | ok not s.getMatch() 13 | ok not s.getPostMatch() 14 | ok s.getRemainder() is s.getSource() 15 | ok s.scan(/Fri /) 16 | 17 | s = new StringScanner "Fri Dec 12 1975 14:39" 18 | ok s.scan(/(\w+) (\w+) (\d+) /) is "Fri Dec 12 " 19 | ok s.getMatch() is "Fri Dec 12 " 20 | ok s.getCapture(0) is "Fri" 21 | ok s.getCapture(1) is "Dec" 22 | ok s.getCapture(2) is "12" 23 | ok s.getPostMatch() is "1975 14:39" 24 | ok s.getPreMatch() is "" 25 | 26 | s = new StringScanner "test string" 27 | ok not s.hasTerminated() 28 | s.scan /test/ 29 | ok not s.hasTerminated() 30 | s.terminate() 31 | ok s.hasTerminated() 32 | 33 | ok s.getPosition() is 11 34 | s.concat "123" 35 | ok not s.hasTerminated() 36 | ok s.getRemainder() is "123" 37 | ok s.scan /123/ 38 | ok s.getPosition() is 14 39 | 40 | s = new StringScanner "ab" 41 | ok s.scanChar() is "a" 42 | ok s.scanChar() is "b" 43 | ok not s.scanChar() 44 | 45 | s = new StringScanner "☃\n1" 46 | ok s.scanChar() is "☃" 47 | ok s.scanChar() is "\n" 48 | ok s.scanChar() is "1" 49 | ok not s.scanChar() 50 | 51 | s = new StringScanner "test string" 52 | ok s.peek(7) is "test st" 53 | ok s.peek(7) is "test st" 54 | 55 | s = new StringScanner "test string" 56 | ok s.scan(/\w+/) is "test" 57 | ok not s.scan(/\w+/) 58 | ok s.scan(/\s+/) is " " 59 | ok s.scan(/\w+/) is "string" 60 | ok not s.scan(/\w+/) 61 | 62 | s = new StringScanner "test string" 63 | ok s.scan(/\w+/) is "test" 64 | ok s.scan(/\s+/) is " " 65 | ok s.getPreMatch() is "test" 66 | ok s.getPostMatch() is "string" 67 | 68 | s = new StringScanner "Fri Dec 12 1975 14:39" 69 | ok s.scanUntil(/1/) is "Fri Dec 1" 70 | ok s.getPreMatch() is "Fri Dec " 71 | ok not s.scanUntil(/XYZ/) 72 | 73 | s = new StringScanner "abaabaaab" 74 | ok s.scanUntil(/b/) is "ab" 75 | ok s.scanUntil(/b/) is "aab" 76 | ok s.scanUntil(/b/) is "aaab" 77 | 78 | s = new StringScanner "test string" 79 | ok s.skip(/\w+/) is 4 80 | ok not s.skip(/\w+/) 81 | ok s.skip(/\s+/) is 1 82 | ok s.skip(/\w+/) is 6 83 | ok not s.skip(/./) 84 | 85 | s = new StringScanner "Fri Dec 12 1975 14:39" 86 | ok s.skipUntil(/12/) is 10 87 | ok s.peek() is " " 88 | ok s.peek(3) is " 19" 89 | 90 | s = new StringScanner "test string" 91 | ok s.scan(/\w+/) is "test" 92 | s.unscan() 93 | ok s.scan(/../) is "te" 94 | ok not s.scan(/\d/) 95 | 96 | raised = true 97 | try 98 | s.unscan() 99 | raised = false 100 | catch e 101 | ok raised 102 | 103 | s = new StringScanner "Fri Dec 12 1975 14:39" 104 | ok s.check(/Fri/) is "Fri" 105 | ok s.getPosition() is 0 106 | ok s.getMatch() is "Fri" 107 | ok not s.check(/12/) 108 | ok not s.getMatch() 109 | 110 | s = new StringScanner "Fri Dec 12 1975 14:39" 111 | ok s.checkUntil(/12/) is "Fri Dec 12" 112 | ok s.getPosition() is 0 113 | ok s.getMatch() is "12" 114 | --------------------------------------------------------------------------------