├── Cakefile ├── LICENSE ├── README.md ├── doc └── strscan.html ├── lib └── strscan.js ├── package.json ├── src └── strscan.coffee └── test └── test_strscan.coffee /Cakefile: -------------------------------------------------------------------------------- 1 | # Much of this Cakefile shamelessly ripped from coffee-script 2 | 3 | fs = require 'fs' 4 | CoffeeScript = require 'coffee-script' 5 | {spawn, exec} = require 'child_process' 6 | 7 | # ANSI Terminal Colors. 8 | red = '\033[0;31m' 9 | green = '\033[0;32m' 10 | reset = '\033[0m' 11 | 12 | # Log a message with a color. 13 | log = (message, color, explanation) -> 14 | puts color + message + reset + ' ' + (explanation or '') 15 | 16 | extend = (object, properties) -> 17 | (object[key] = val) for all key, val of properties 18 | 19 | run = (args) -> 20 | proc = spawn 'coffee', args 21 | proc.stderr.on 'data', (buffer) -> puts buffer.toString() 22 | proc.on 'exit', (status) -> process.exit(1) if status != 0 23 | 24 | task 'build', -> 25 | src = fs.readFileSync 'src/strscan.coffee' 26 | js = CoffeeScript.compile src.toString() 27 | fs.writeFileSync 'lib/strscan.js', js 28 | 29 | task 'test', -> 30 | extend global, require 'assert' 31 | passedTests = failedTests = 0 32 | startTime = new Date 33 | originalOk = ok 34 | helpers.extend global, 35 | ok: (args...) -> passedTests += 1; originalOk(args...) 36 | process.on 'exit', -> 37 | time = ((new Date - startTime) / 1000).toFixed(2) 38 | message = "passed #{passedTests} tests in #{time} seconds#{reset}" 39 | if failedTests 40 | log "failed #{failedTests} and #{message}", red 41 | else 42 | log message, green 43 | fs.readdir 'test', (err, files) -> 44 | files.forEach (file) -> 45 | return unless file.match(/\.coffee$/i) 46 | fileName = path.join 'test', file 47 | fs.readFile fileName, (err, code) -> 48 | try 49 | CoffeeScript.run code.toString(), {fileName} 50 | catch err 51 | failedTests += 1 52 | log "failed #{fileName}", red, '\n' + err.stack.toString() 53 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010 Sam Stephenson 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## StringScanner 2 | 3 | **StringScanner** is a simple string tokenizer that provides for lexical 4 | scanning operations on a string. It's a JavaScript port of the [Ruby 5 | library with the same name](http://ruby-doc.org/core/classes/StringScanner.html). 6 | 7 | Scanning a string means keeping track of and advancing a position (a 8 | zero-based index into the source string) and matching regular expressions 9 | against the portion of the source string after the position. 10 | 11 | StringScanner is written in [CoffeeScript](http://coffeescript.org/) and 12 | distributed via [npm](http://npm.mape.me/) as a [CommonJS 13 | module](http://www.commonjs.org/). 14 | 15 | ### Quick start 16 | 17 | $ npm install strscan 18 | $ node-repl 19 | > var StringScanner = require("strscan").StringScanner 20 | > var s = new StringScanner("This is a test") 21 | > s.scan(/\w+/) # => "This" 22 | > s.scan(/\w+/) # => null 23 | > s.scan(/\s+/) # => " " 24 | > s.scan(/\s+/) # => null 25 | > s.scan(/\w+/) # => "is" 26 | > s.hasTerminated() # => false 27 | > s.scan(/\s+/) # => " " 28 | > s.scan(/(\w+)\s+(\w+)/) # => "a test" 29 | > s.getMatch() # => "a test" 30 | > s.getCapture(0) # => "a" 31 | > s.getCapture(1) # => "test" 32 | > s.hasTerminated() # => true 33 | 34 | ### More 35 | 36 | [Clone, fork, or file bugs at GitHub](http://github.com/sstephenson/strscan-js) 37 | 38 | [Read the full documentation/annotated source code](http://sstephenson.github.com/strscan-js/) 39 | 40 | ### Copyright 41 | 42 | Copyright (c) 2010 Sam Stephenson. Distributed under the terms of an 43 | MIT-style license. See LICENSE for details. 44 | -------------------------------------------------------------------------------- /doc/strscan.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 |strscan.coffee |
15 | 16 | |
---|---|
21 |
22 | #
23 |
24 | StringScanner is a simple string tokenizer that provides for lexical 25 | scanning operations on a string. It’s a JavaScript port of the Ruby 26 | library with the same name. 27 | 28 |Scanning a string means keeping track of and advancing a position (a 29 | zero-based index into the source string) and matching regular expressions 30 | against the portion of the source string after the position. 31 | 32 |StringScanner is written in CoffeeScript and 33 | distributed via npm as a CommonJS 34 | module. 35 | 36 |Clone, fork, or file bugs at GitHub. 37 | 38 |Quick start39 | 40 |41 | 42 |
59 | |
60 |
61 |
62 | |
63 |
66 |
67 | #
68 |
69 | Creating a scanner70 | 71 |72 | |
73 |
74 |
75 | |
76 |
79 |
80 | #
81 |
82 | Create a new |
84 |
85 | (exports ? this).StringScanner = class StringScanner
86 | constructor: (source) ->
87 | @source = source.toString()
88 | @reset() |
90 |
93 |
94 | #
95 |
96 | Scanning for matches97 | 98 | The 104 | |
105 |
106 |
107 | |
108 |
111 |
112 | #
113 |
114 | Matches |
118 |
119 | scan: (regexp) ->
120 | if (matches = regexp.exec @getRemainder()) and matches.index is 0
121 | @setState matches,
122 | head: @head + matches[0].length
123 | last: @head
124 | else
125 | @setState [] |
127 |
130 |
131 | #
132 |
133 | Matches |
138 |
139 | scanUntil: (regexp) ->
140 | if matches = regexp.exec @getRemainder()
141 | @setState matches,
142 | head: @head + matches.index + matches[0].length
143 | last: @head
144 | @source.slice @last, @head
145 | else
146 | @setState [] |
148 |
151 |
152 | #
153 |
154 | Scans one character, returns it, and advances the scanner’s position. 155 | |
156 |
157 | scanChar: ->
158 | @scan /./ |
160 |
163 |
164 | #
165 |
166 | Skips over the given |
170 |
171 | skip: (regexp) ->
172 | @match.length if @scan regexp |
174 |
177 |
178 | #
179 |
180 | Skips over the given |
185 |
186 | skipUntil: (regexp) ->
187 | @head - @last if @scanUntil regexp |
189 |
192 |
193 | #
194 |
195 | Looking ahead196 | 197 | The 201 | |
202 |
203 |
204 | |
205 |
208 |
209 | #
210 |
211 | Checks to see if |
215 |
216 | check: (regexp) ->
217 | if (matches = regexp.exec @getRemainder()) and matches.index is 0
218 | @setState matches
219 | else
220 | @setState [] |
222 |
225 |
226 | #
227 |
228 | Checks to see if |
233 |
234 | checkUntil: (regexp) ->
235 | if matches = regexp.exec @getRemainder()
236 | @setState matches
237 | @source.slice @head, @head + matches.index + matches[0].length
238 | else
239 | @setState [] |
241 |
244 |
245 | #
246 |
247 | Returns the next |
251 |
252 | peek: (length) ->
253 | @source.substr @head, length ? 1 |
255 |
258 |
259 | #
260 |
261 | Accessing scanner data262 | 263 | The 268 | |
269 |
270 |
271 | |
272 |
275 |
276 | #
277 |
278 | Returns the scanner’s source string. 279 | |
280 |
281 | getSource: ->
282 | @source |
284 |
287 |
288 | #
289 |
290 | Returns the portion of the source string from the scanner’s position 291 | onward. 292 | |
293 |
294 | getRemainder: ->
295 | @source.slice @head |
297 |
300 |
301 | #
302 |
303 | Returns the scanner’s position. In the reset position, this value is 304 | zero. In the terminated position, this value is the length of the 305 | source string. 306 | |
307 |
308 | getPosition: ->
309 | @head |
311 |
314 |
315 | #
316 |
317 | Checks to see if the scanner has reached the end of the string. 318 | |
319 |
320 | hasTerminated: ->
321 | @head is @source.length |
323 |
326 |
327 | #
328 |
329 | Accessing match data330 | 331 | The 335 | |
336 |
337 |
338 | |
339 |
342 |
343 | #
344 |
345 | Returns the portion of the source string leading up to, but not
346 | including, the most recent match. (Returns |
349 |
350 | getPreMatch: ->
351 | @source.slice 0, @head - @match.length if @match |
353 |
356 |
357 | #
358 |
359 | Returns the most recently matched portion of the source string (or
360 | |
362 |
363 | getMatch: ->
364 | @match |
366 |
369 |
370 | #
371 |
372 | Returns the portion of the source string immediately following the most
373 | recent match. (Returns |
375 |
376 | getPostMatch: ->
377 | @source.slice @head if @match |
379 |
382 |
383 | #
384 |
385 | Returns the |
388 |
389 | getCapture: (index) ->
390 | @captures[index] |
392 |
395 |
396 | #
397 |
398 | Modifying the scanner’s state399 | 400 | The 404 | |
405 |
406 |
407 | |
408 |
411 |
412 | #
413 |
414 | Resets the scanner back to its original position and clears its match 415 | data. 416 | |
417 |
418 | reset: ->
419 | @setState [], head: 0, last: 0 |
421 |
424 |
425 | #
426 |
427 | Advances the scanner position to the end of the string and clears its 428 | match data. 429 | |
430 |
431 | terminate: ->
432 | @setState [], head: @source.length, last: @head |
434 |
437 |
438 | #
439 |
440 | Appends |
443 |
444 | concat: (string) ->
445 | @source += string |
447 |
450 |
451 | #
452 |
453 | Sets the scanner’s position to its previous position and clears its 454 | match data. Only one previous position is stored. Throws an exception 455 | if there is no previous position. 456 | |
457 |
458 | unscan: ->
459 | if @match
460 | @setState [], head: @last, last: 0
461 | else
462 | throw "nothing to unscan" |
464 |
467 |
468 | #
469 |
470 | Private methods471 | |
472 |
473 |
474 | |
475 |
478 |
479 | #
480 |
481 | Sets the state of the scanner (for internal use only). 482 | 483 | |
484 |
485 | setState: (matches, values) ->
486 | @head = values?.head ? @head
487 | @last = values?.last ? @last
488 | @captures = matches.slice 1
489 | @match = matches[0] |
491 |