├── var ├── name ├── title ├── version ├── created ├── requirements ├── summary ├── repositories ├── authors ├── description └── resources ├── lib ├── regex.yml ├── regex │ ├── command.rb │ ├── string.rb │ ├── templates.rb │ ├── replacer.rb │ └── extractor.rb └── regex.rb ├── Gemfile ├── bin └── regex ├── .yardopts ├── .travis.yml ├── .gitignore ├── work ├── ANN ├── defunct │ └── webme │ │ └── advert.html ├── trash │ └── command.rb ├── ruby-matches.rb ├── test_extractor.rb ├── xact-ruby.rb └── xact.xml ├── demo ├── cli │ ├── applique │ │ └── env.rb │ ├── 02_replace.md │ └── 01_search.md └── api │ ├── 01_regex.md │ └── 02_replacer.md ├── MANIFEST ├── Assembly ├── HISTORY.md ├── .index ├── LICENSE.txt ├── man └── man1 │ └── regex.1.ronn ├── README.md └── .gemspec /var/name: -------------------------------------------------------------------------------- 1 | regex 2 | -------------------------------------------------------------------------------- /lib/regex.yml: -------------------------------------------------------------------------------- 1 | ../.ruby -------------------------------------------------------------------------------- /var/title: -------------------------------------------------------------------------------- 1 | Regex 2 | -------------------------------------------------------------------------------- /var/version: -------------------------------------------------------------------------------- 1 | 1.1.1 2 | -------------------------------------------------------------------------------- /var/created: -------------------------------------------------------------------------------- 1 | 2006-05-09 2 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source :rubygems 2 | gemspec 3 | -------------------------------------------------------------------------------- /var/requirements: -------------------------------------------------------------------------------- 1 | --- 2 | - detroit (build) 3 | - qed (test) 4 | 5 | -------------------------------------------------------------------------------- /var/summary: -------------------------------------------------------------------------------- 1 | Regex is a simple commmand-line Regular Expression tool. 2 | -------------------------------------------------------------------------------- /bin/regex: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'regex' 3 | Regex.cli(*ARGV) 4 | -------------------------------------------------------------------------------- /var/repositories: -------------------------------------------------------------------------------- 1 | --- 2 | upstream: git://github.com/proutils/regex.git 3 | -------------------------------------------------------------------------------- /var/authors: -------------------------------------------------------------------------------- 1 | --- 2 | - Thomas Sawyer 3 | - Tyler Rick 4 | 5 | -------------------------------------------------------------------------------- /.yardopts: -------------------------------------------------------------------------------- 1 | --title "RegEx" 2 | --readme README.rdoc 3 | --protected 4 | --private 5 | lib/**/*.rb 6 | - 7 | [A-Z]*.* 8 | 9 | -------------------------------------------------------------------------------- /var/description: -------------------------------------------------------------------------------- 1 | Regex is a simple commmand-line Regular Expression tool 2 | that makes it easy to search documents for content matches. 3 | 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | --- 2 | rvm: 3 | - 1.8.7 4 | - 1.9.2 5 | - 1.9.3 6 | - rbx-2.0 7 | - jruby 8 | - ree 9 | script: "bundle exec qed" 10 | 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .fire/digest 2 | .yardoc 3 | doc 4 | log 5 | man/man1/*.1 6 | man/man1/*.1.html 7 | pkg 8 | tmp 9 | web 10 | work/sandbox 11 | DEMO.rdoc 12 | -------------------------------------------------------------------------------- /var/resources: -------------------------------------------------------------------------------- 1 | --- 2 | home: http://rubyworks.github.com/regex 3 | wiki: http://wiki.github.com/rubyworks/regex 4 | code: http://github.com/rubyworks/regex 5 | mail: http://groups.google.com/group/rubyworks-mailinglist 6 | 7 | -------------------------------------------------------------------------------- /work/ANN: -------------------------------------------------------------------------------- 1 | ______ __ 2 | / ____/ __ ____ _____/ /_ ___ 3 | / __/ \ / / __ `/ ___/ __/ __ \ 4 | / /__ > 8 | 9 | 12 | 13 | -------------------------------------------------------------------------------- /demo/cli/applique/env.rb: -------------------------------------------------------------------------------- 1 | require 'regex' 2 | 3 | When 'Given a file (((\S+))) containing' do |file, text| 4 | File.open(file, 'w'){ |f| f << text } 5 | end 6 | 7 | When 'invoking the command' do |text| 8 | text = text.sub(/^\$\s+/, '') 9 | @out = `#{text}` 10 | end 11 | 12 | When 'Should produce' do |text| 13 | @out.strip.assert == text.strip 14 | end 15 | 16 | When 'result in a new file (((\S+))) containing' do |file, text| 17 | File.read(file).strip.assert == text.strip 18 | end 19 | 20 | -------------------------------------------------------------------------------- /MANIFEST: -------------------------------------------------------------------------------- 1 | #!mast .index .yardopts bin demo lib man spec test *.md *.txt 2 | .index 3 | .yardopts 4 | bin/regex 5 | demo/api/01_regex.md 6 | demo/api/02_replacer.md 7 | demo/cli/01_search.md 8 | demo/cli/02_replace.md 9 | demo/cli/applique/env.rb 10 | lib/regex/command.rb 11 | lib/regex/extractor.rb 12 | lib/regex/replacer.rb 13 | lib/regex/string.rb 14 | lib/regex/templates.rb 15 | lib/regex.rb 16 | lib/regex.yml 17 | man/man1/regex.1 18 | man/man1/regex.1.html 19 | man/man1/regex.1.ronn 20 | HISTORY.md 21 | README.md 22 | LICENSE.txt 23 | -------------------------------------------------------------------------------- /Assembly: -------------------------------------------------------------------------------- 1 | --- 2 | email: 3 | mailto: 4 | - ruby-talk@ruby-lang.org 5 | - rubyworks-mailinglist@googlegroups.com 6 | 7 | gem: 8 | active: true 9 | 10 | github: 11 | gh_pages: web 12 | 13 | qed: 14 | files: demo 15 | 16 | qedoc: 17 | files: demo 18 | title: RegEx Denonstration 19 | output: 20 | - DEMO.rdoc 21 | 22 | yard: 23 | priority: 2 24 | 25 | dnote: 26 | title: Developer's Notes 27 | output: log/notes.html 28 | 29 | vclog: 30 | output: 31 | - log/history.html 32 | - log/changes.html 33 | 34 | #ronn: 35 | # active: true 36 | 37 | -------------------------------------------------------------------------------- /lib/regex/command.rb: -------------------------------------------------------------------------------- 1 | require 'regex/extractor' 2 | require 'regex/replacer' 3 | 4 | module Regex 5 | 6 | # Commandline interface. 7 | def self.cli(*argv) 8 | if argv.include?('-r') or argv.include?('--replace') 9 | controller = Replacer 10 | else 11 | controller = Extractor 12 | end 13 | 14 | begin 15 | controller.cli(argv) 16 | rescue => error 17 | if $DEBUG 18 | raise error 19 | #puts error.backtrace.join("\n ") 20 | else 21 | abort error.to_s 22 | end 23 | end 24 | end 25 | 26 | end 27 | 28 | -------------------------------------------------------------------------------- /demo/cli/02_replace.md: -------------------------------------------------------------------------------- 1 | ## Search and Replace on Files 2 | 3 | Given a file a.txt containing: 4 | 5 | This is file a.txt. 6 | This is an example. 7 | 8 | And given a file b.txt containing: 9 | 10 | This is file b.txt. 11 | This is another example. 12 | 13 | Then invoking the command: 14 | 15 | $ regex -s example -r EXAMPLE a.txt b.txt 16 | 17 | Should result in a new file a.txt containing: 18 | 19 | This is file a.txt. 20 | This is an EXAMPLE. 21 | 22 | And should result in a new file b.txt containing: 23 | 24 | This is file b.txt. 25 | This is another EXAMPLE. 26 | 27 | -------------------------------------------------------------------------------- /demo/cli/01_search.md: -------------------------------------------------------------------------------- 1 | ## Searching Files 2 | 3 | Given a file a.txt containing: 4 | 5 | This is file a.txt. 6 | This is an example. 7 | 8 | And given a file b.txt containing: 9 | 10 | This is file b.txt. 11 | This is another example. 12 | 13 | Then invoking the command: 14 | 15 | $ regex -s example a.txt b.txt 16 | 17 | Should produce: 18 | 19 | example 20 | 21 | In this case it found the first match and returned it. 22 | To handle a global search we add the `-g` flag. 23 | 24 | Invoking the command: 25 | 26 | $ regex -g -s example a.txt b.txt 27 | 28 | Will give a more complex result. 29 | 30 | @out.assert == "example\036\nexample\n" 31 | 32 | -------------------------------------------------------------------------------- /lib/regex.rb: -------------------------------------------------------------------------------- 1 | module Regex 2 | # Access to PACAKGE metadata. 3 | def self.metadata 4 | @metadata ||= ( 5 | require 'yaml' 6 | YAML.load(File.new(File.dirname(__FILE__) + '/regex.yml')) 7 | ) 8 | end 9 | 10 | # Need VRESION? You got it. 11 | def self.const_missing(name) 12 | metadata[name.to_s.downcase] || super(name) 13 | end 14 | 15 | # TODO: This is only here to support broken Ruby 1.8.x. 16 | VERSION = metadata['version'] 17 | 18 | # Shortcut to create a new Regex::Extractor instance. 19 | def self.new(*io) 20 | Extractor.new(*io) 21 | end 22 | end 23 | 24 | require 'regex/templates' 25 | require 'regex/extractor' 26 | require 'regex/replacer' 27 | require 'regex/command' 28 | 29 | -------------------------------------------------------------------------------- /HISTORY.md: -------------------------------------------------------------------------------- 1 | # RELEASE HISTORY 2 | 3 | ## 1.1.1 / 2011-10-24 4 | 5 | Maintenance release updates build configuration. This release 6 | also adds a man-page and fixes one bug with single search output. 7 | 8 | Changes: 9 | 10 | * Modernize build configuration. 11 | * Fix return value when no single match is found. 12 | * Add man-page for help. 13 | 14 | 15 | ## 1.1.0 / 2010-10-12 16 | 17 | This release adds a detailed output option, and corrects 18 | a bug when using `--escape` with search and replace. It also 19 | entails a pretty extensive under-the-hood overhaul of the 20 | Extractor class. One consequence of this overhaul is that the 21 | `--unxml` option has been deprecated until such time that it can 22 | be reimplemented correctly. 23 | 24 | Changes: 25 | 26 | * Add --detail/-d output option. 27 | * Fix isssue using escape with search and replace. 28 | * Reimplement Extractor class. 29 | * Deprecate `--unxml` option until implementation can be worked out. 30 | 31 | 32 | ## 1.0.0 / 2010-02-10 33 | 34 | Initial release of Regex. Regex is a simple 35 | commandline Regular Expression tool. 36 | 37 | Changes: 38 | 39 | * Happy Birthday 40 | 41 | -------------------------------------------------------------------------------- /.index: -------------------------------------------------------------------------------- 1 | --- 2 | revision: 2013 3 | type: ruby 4 | sources: 5 | - var 6 | authors: 7 | - name: Thomas Sawyer 8 | email: transfire@gmail.com 9 | - name: Tyler Rick 10 | organizations: [] 11 | requirements: 12 | - groups: 13 | - build 14 | development: true 15 | name: detroit 16 | - groups: 17 | - test 18 | development: true 19 | name: qed 20 | conflicts: [] 21 | alternatives: [] 22 | resources: 23 | - type: home 24 | uri: http://rubyworks.github.com/regex 25 | label: Homepage 26 | - type: wiki 27 | uri: http://wiki.github.com/rubyworks/regex 28 | label: User Guide 29 | - type: code 30 | uri: http://github.com/rubyworks/regex 31 | label: Source Code 32 | - type: mail 33 | uri: http://groups.google.com/group/rubyworks-mailinglist 34 | label: Mailing List 35 | repositories: 36 | - name: upstream 37 | scm: git 38 | uri: git://github.com/proutils/regex.git 39 | categories: [] 40 | copyrights: [] 41 | customs: [] 42 | paths: 43 | lib: 44 | - lib 45 | created: '2006-05-09' 46 | summary: Regex is a simple commmand-line Regular Expression tool. 47 | title: Regex 48 | version: 1.1.1 49 | name: regex 50 | description: ! 'Regex is a simple commmand-line Regular Expression tool 51 | 52 | that makes it easy to search documents for content matches.' 53 | date: '2013-01-30' 54 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD-2-Clause License 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, 7 | this list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 14 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 15 | AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 16 | COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 17 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 18 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 19 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 20 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 21 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 22 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | -------------------------------------------------------------------------------- /demo/api/01_regex.md: -------------------------------------------------------------------------------- 1 | # Regex class 2 | 3 | Regex is really meant to be used on the command-line since 4 | it is really nothing more than a front end to Ruby's regular 5 | expression engine. But we will demonstrate it's use here in 6 | code just the same, and to help ensure code quality. 7 | 8 | First we need to require the Regex library. 9 | 10 | require 'regex' 11 | 12 | Now let's create some material to work with. 13 | 14 | text = "We will match against this string." 15 | 16 | Now we can then create a Regex object using the text. 17 | We will also suppoly a matching pattern, as none of 18 | the matching functions will work without providing 19 | a pattern or the name of built-in pattern template. 20 | 21 | rx = Regex.new(text, :pattern=>'\w+') 22 | 23 | We can see that the Regex object has converted the pattern 24 | into the expected regular expression via the #regex method. 25 | 26 | rx.regex.assert == /\w+/ 27 | 28 | Under the hood, Regex has split the process of matching, 29 | organizing and formating the results into separate methods. 30 | We can use the #structure method to see thematch results 31 | organized into uniform arrays. 32 | 33 | rx.structure.assert == %w{We} 34 | 35 | Whereas the last use only returns a single metch, if we turn 36 | on repeat mode we can see every word. 37 | 38 | rx.repeat = true 39 | 40 | rx.structure.assert == %w{We will match against this string}.map{ |e| [e] } 41 | 42 | Notice that repeat mode creates an array in an array. 43 | 44 | -------------------------------------------------------------------------------- /lib/regex/string.rb: -------------------------------------------------------------------------------- 1 | module Regex 2 | 3 | # Extensions for String class. 4 | # These methods are taken directly from Ruby Facets. 5 | # 6 | module String 7 | 8 | # Provides a margin controlled string. 9 | # 10 | # x = %Q{ 11 | # | This 12 | # | is 13 | # | margin controlled! 14 | # }.margin 15 | # 16 | # 17 | # NOTE: This may still need a bit of tweaking. 18 | # 19 | # CREDIT: Trans 20 | 21 | def margin(n=0) 22 | #d = /\A.*\n\s*(.)/.match( self )[1] 23 | #d = /\A\s*(.)/.match( self)[1] unless d 24 | d = ((/\A.*\n\s*(.)/.match(self)) || 25 | (/\A\s*(.)/.match(self)))[1] 26 | return '' unless d 27 | if n == 0 28 | gsub(/\n\s*\Z/,'').gsub(/^\s*[#{d}]/, '') 29 | else 30 | gsub(/\n\s*\Z/,'').gsub(/^\s*[#{d}]/, ' ' * n) 31 | end 32 | end 33 | 34 | # Preserves relative tabbing. 35 | # The first non-empty line ends up with n spaces before nonspace. 36 | # 37 | # CREDIT: Gavin Sinclair 38 | 39 | def tabto(n) 40 | if self =~ /^( *)\S/ 41 | indent(n - $1.length) 42 | else 43 | self 44 | end 45 | end 46 | 47 | # Indent left or right by n spaces. 48 | # (This used to be called #tab and aliased as #indent.) 49 | # 50 | # CREDIT: Gavin Sinclair 51 | # CREDIT: Trans 52 | 53 | def indent(n) 54 | if n >= 0 55 | gsub(/^/, ' ' * n) 56 | else 57 | gsub(/^ {0,#{-n}}/, "") 58 | end 59 | end 60 | 61 | end 62 | 63 | class ::String #:nodoc: 64 | include Regex::String 65 | end 66 | 67 | end 68 | 69 | -------------------------------------------------------------------------------- /demo/api/02_replacer.md: -------------------------------------------------------------------------------- 1 | # Regex::Replacer 2 | 3 | Regex can also be used to do search and replace across multiple 4 | strings or IO objects, includeing files. 5 | 6 | require 'regex' 7 | 8 | To perform search and replace procedure we create a Regex::Replacer object. 9 | The constructor method takes a Hash of options which set universal parameters 10 | to apply to all search and replace rules. Usually, each individual rule 11 | will specify it's own options, so for this example we provide none. 12 | 13 | replacer = Regex::Replacer.new 14 | 15 | Rules are added via the #rule method. 16 | 17 | replacer.rule('World', 'Planet Earth') 18 | replacer.rule('!', '!!!') 19 | 20 | Rules are applied in the order they were defined. If there rules overlap 21 | in their effects this can be signifficant. 22 | 23 | Now, lets say we have that famous String, 24 | 25 | string = "Hello, World!" 26 | 27 | We use the #apply method to actually perform the substitutions. 28 | 29 | replacer.apply(string) 30 | 31 | The replacements occur in place. Since in this case we are performing 32 | the serach and replace on a String object, we can see the change 33 | has taken place. 34 | 35 | string.assert == "Hello, Planet Earth!!!" 36 | 37 | As we mentioned at the beginning, substitutions can be applied to IO 38 | objects in general, so long as they they can be reopended for writing. 39 | 40 | require 'stringio' 41 | 42 | io = StringIO.new("Hello, World!") 43 | 44 | replacer.apply(io) 45 | 46 | io.read.assert == "Hello, Planet Earth!!!" 47 | 48 | If +io+ were a File object, rather than a StringIO, the file would 49 | be changed on disk. As a precaution a backup file can be written 50 | with the name of file plus a '.bak' extension in the same directory as 51 | the file. To turn on the backup option, either supply it as an option 52 | to the constructor, or set it via the writer method. 53 | 54 | replacer.backup = true 55 | 56 | (TODO: Example of a file search and replace.) 57 | 58 | -------------------------------------------------------------------------------- /work/trash/command.rb: -------------------------------------------------------------------------------- 1 | require 'regex/extrator' 2 | require 'regex/replacer' 3 | 4 | class Regex 5 | 6 | # Commandline interface. 7 | # 8 | class Command 9 | 10 | # 11 | attr :file 12 | 13 | # 14 | attr :format 15 | 16 | # 17 | attr :options 18 | 19 | # 20 | def self.main(*argv) 21 | new(*argv).main 22 | end 23 | 24 | # New Command. 25 | def initialize(*argv) 26 | @file = nil 27 | @format = nil 28 | @options = {} 29 | parse(*argv) 30 | end 31 | 32 | # 33 | def parse(*argv) 34 | parser.parse!(argv) 35 | unless @options[:template] 36 | @options[:pattern] = argv.shift 37 | end 38 | @file = argv.shift 39 | if @file 40 | unless File.file?(@file) 41 | puts "No such file -- '#{file}'." 42 | exit 1 43 | end 44 | end 45 | end 46 | 47 | # OptionParser instance. 48 | def parser 49 | require 'optparse' 50 | @options = {} 51 | OptionParser.new do |opt| 52 | opt.on('--template', '-t NAME', "select a built-in regular expression") do |name| 53 | @options[:template] = name 54 | end 55 | 56 | opt.on('--index', '-n INT', "return a specific match index") do |int| 57 | @options[:index] = int.to_i 58 | end 59 | 60 | opt.on('--insensitive', '-i', "case insensitive matching") do 61 | @options[:insensitive] = true 62 | end 63 | 64 | opt.on('--unxml', '-x', "ignore XML/HTML tags") do 65 | @options[:unxml] = true 66 | end 67 | 68 | opt.on('--repeat', '-r', "find all matching occurances") do 69 | @options[:repeat] = true 70 | end 71 | 72 | opt.on('--yaml', '-y', "output in YAML format") do 73 | @format = :yaml 74 | end 75 | 76 | opt.on('--json', '-j', "output in JSON format") do 77 | @format = :json 78 | end 79 | 80 | opt.on_tail('--help', '-h', "display this lovely help message") do 81 | puts opt 82 | exit 0 83 | end 84 | end 85 | end 86 | 87 | # 88 | def extraction 89 | target = file ? File.new(file) : ARGF 90 | Regex.new(target, options) 91 | end 92 | 93 | # Extract and display. 94 | def main 95 | begin 96 | puts extraction.to_s(@format) 97 | rescue => error 98 | if $DEBUG 99 | raise error 100 | else 101 | abort error.to_s 102 | end 103 | end 104 | end 105 | 106 | end 107 | 108 | end 109 | 110 | -------------------------------------------------------------------------------- /work/ruby-matches.rb: -------------------------------------------------------------------------------- 1 | ################# 2 | # Ruby Specific # 3 | ################# 4 | 5 | # Returns a Ruby comment block with a given handle. 6 | 7 | def extract_ruby_block_comment(handle) 8 | b = Regexp.escape(handle) 9 | if b == '' 10 | pattern = /^=begin.*?\n(.*?)\n=end/mi 11 | else 12 | pattern = /^=begin[ \t]+#{b}.*?\n(.*?)\n=end/mi 13 | end 14 | extract_pattern(pattern) 15 | end 16 | 17 | # Returns a Ruby method comment. 18 | 19 | def extract_ruby_method_comment(meth) #=nil ) 20 | #if meth 21 | regexp = Regexp.escape(meth) 22 | pattern = /(\A\s*\#.*?^\s*def #{regexp}/mi 23 | extract_pattern(pattern) 24 | #else 25 | # prog.scan /^\s*\#/mi 26 | # md = pattern_inline_all.match( prog ) 27 | #end 28 | end 29 | 30 | # # Extract the matching comment block. 31 | # 32 | # def extract_block( handle='test' ) 33 | # text = File.read(file) 34 | # md = pattern_block(handle).match(text) 35 | # code = md ? md[1] : nil 36 | # unless code 37 | # puts "Code block not found -- #{handle}" 38 | # exit 0 #return nil 39 | # end 40 | # offset = text[0...md.begin(1)].count("\n") 41 | # return code, offset 42 | # end 43 | # 44 | # # Returns the comment inline regexp to match against. 45 | # 46 | # def pattern_inline( mark ) 47 | # m = Regexp.escape(mark) 48 | # /(\A\s*\#.*?^\s*def #{m}/mi 49 | # end 50 | # 51 | # def extract_inline( fname, mark=nil ) 52 | # prog = File.read( file ) 53 | # if mark 54 | # md = pattern_inline(mark).match( prog ) 55 | # else 56 | # prog.scan /^\s*\#/mi 57 | # md = pattern_inline_all.match( prog ) 58 | # end 59 | # end 60 | 61 | =begin 62 | # Extract Block. 63 | def extract_block(start, stop) 64 | start = Regexp.new(start) 65 | stop = Regexp.new(stop) 66 | 67 | md_start = start.match(text) 68 | if md_start 69 | md_stop = stop.match(text[md_start.end(0)..-1]) 70 | if md_stop 71 | clip = text[md_start.end(0)...(md_stop.begin(0)+md_start.end(0))] 72 | else 73 | raise "Pattern not found -- #{stop}" 74 | return nil, nil 75 | end 76 | offset = text[0...md_start.begin(0)].count("\n") #? 77 | return clip, offset 78 | else 79 | raise "Pattern not found -- #{start}" 80 | return nil, nil 81 | end 82 | end 83 | =end 84 | 85 | =begin 86 | #def extract_pattern(pattern) 87 | #if clip = md ? md[0] : nil 88 | # offset = text[0...md.begin(0)].count("\n") 89 | # return clip, offset 90 | #else 91 | # raise "Pattern not found -- #{pattern}" 92 | # return nil, nil 93 | #end 94 | #end 95 | =end 96 | 97 | -------------------------------------------------------------------------------- /work/test_extractor.rb: -------------------------------------------------------------------------------- 1 | require 'test/unit' 2 | 3 | class ExtractorTest < Test::Unit::TestCase 4 | 5 | def exacto_knife 6 | @knife ||= Extractor.new('/dev/null') 7 | end 8 | 9 | def build_pattern_block(block, code) 10 | exacto_knife.pattern_block(block).match(code) 11 | end 12 | 13 | # Usual case. 14 | 15 | def test_pattern_block 16 | assert_equal "require 'foo'\nfoo", build_pattern_block('test', "=begin test\nrequire 'foo'\nfoo\n=end")[1] 17 | end 18 | 19 | # Some tests for when the block is empty ('') -- should it act as a wildcard and match *any* block, 20 | # or should Extractor::Command#initialize complain about that. 21 | 22 | def test_pattern_block_no_handle 23 | assert_equal "require 'foo'\nfoo", build_pattern_block('', "=begin\nrequire 'foo'\nfoo\n=end")[1] 24 | end 25 | 26 | def test_pattern_block_no_handle_given 27 | assert_equal "require 'foo'\nfoo", build_pattern_block('', "=begin test\nrequire 'foo'\nfoo\n=end")[1] 28 | end 29 | 30 | # Yes, I know, as a side-effect of this regexp change, it will also match some invalid "blocks", like =beginblah. But that 31 | # seems like a nonissue, given that the Ruby parser would reject that syntax anyway. 32 | 33 | def test_pattern_block_side_effects 34 | assert_equal "require 'foo'\nfoo", build_pattern_block('', "=beginblah\nrequire 'foo'\nfoo\n=end")[1] 35 | end 36 | 37 | end 38 | 39 | require 'proutils/xact/extractor.rb' 40 | require 'test/unit' 41 | 42 | class ExtractorTest < Test::Unit::TestCase 43 | 44 | def knife 45 | @knife ||= ProUtils::Extractor.new <<-HERE 46 | This is a "test". 47 | =begin 48 | Word 49 | =end 50 | HERE 51 | end 52 | 53 | def test_extract_block 54 | text, line = *knife.extract_block('=begin', '=end') 55 | assert_equal('Word', text.strip) 56 | assert_equal(1, line) 57 | end 58 | 59 | def test_extract_pattern 60 | text, line = *knife.extract_pattern(/\"(.*?)\"/) 61 | assert_equal('test', text) 62 | assert_equal(0, line) 63 | end 64 | 65 | =begin 66 | def build_pattern_block(block, code) 67 | knife.pattern_block(block).match(code) 68 | end 69 | 70 | # Usual case. 71 | 72 | def test_pattern_block 73 | assert_equal "require 'foo'\nfoo", build_pattern_block('test', "=begin test\nrequire 'foo'\nfoo\n=end")[1] 74 | end 75 | 76 | # Some tests for when the block is empty ('') -- should it act as a wildcard and match *any* block, 77 | # or should Exacto::Command#initialize complain about that. 78 | 79 | def test_pattern_block_no_handle 80 | assert_equal "require 'foo'\nfoo", build_pattern_block('', "=begin\nrequire 'foo'\nfoo\n=end")[1] 81 | end 82 | 83 | def test_pattern_block_no_handle_given 84 | assert_equal "require 'foo'\nfoo", build_pattern_block('', "=begin test\nrequire 'foo'\nfoo\n=end")[1] 85 | end 86 | 87 | # Yes, I know, as a side-effect of this regexp change, it will also match some invalid "blocks", like =beginblah. But that 88 | # seems like a nonissue, given that the Ruby parser would reject that syntax anyway. 89 | 90 | def test_pattern_block_side_effects 91 | assert_equal "require 'foo'\nfoo", build_pattern_block('', "=beginblah\nrequire 'foo'\nfoo\n=end")[1] 92 | end 93 | =end 94 | 95 | end 96 | 97 | -------------------------------------------------------------------------------- /man/man1/regex.1.ronn: -------------------------------------------------------------------------------- 1 | regex(1) - regular expression tool 2 | ================================== 3 | 4 | ## DESCRIPTION 5 | 6 | Regex is a simple commmandline Regular Expression tool, that makes it easy 7 | to search documents for content matches. 8 | 9 | Yea, I know what you are going to say. "I can do that with ____" Fill in the blank 10 | with +grep+, +awk+, +sed+, +perl+, etc. But honestly, none of these tools are 11 | as straight forward and capable as one might want. What is needed is a simple 12 | command-line tool that gives quick access to a Regular Expression engine. 13 | No more, no less. 14 | 15 | Now this could have written this in Perl. No doubt, it would be just as good, if 16 | not better since Perl's Regualar Expression engine rocks (or so it is said). 17 | But Ruby's is pretty damn good too, and getting better (with 1.9+). And since 18 | your humble author knows Ruby very well.... Well that's what you get. 19 | 20 | 21 | ## OPTIONS 22 | 23 | The `regex` command line has the following options. 24 | 25 | ### Search Options 26 | 27 | * `-s`, `--search PATTERN` - Search for this pattern. 28 | 29 | * `-t`, `--template NAME` - Use a built-in regular expression (instead of `-s`). 30 | 31 | * `-i`, `--insensitive` - Case insensitive matching. 32 | 33 | * `-m`, `--multiline` - Multiline matching. 34 | 35 | * `-g`, `--global` - Global search. By default regex only searches for the 36 | first match. Use the global option to search for all matches. 37 | 38 | * `-e`, '--escape' - Make all patterns verbatim string matchers. 39 | 40 | * `-n`, `--index INT` - Return a specific match index. 41 | 42 | * `-R`, `--recursive` - Search though subdirectories recursively. 43 | 44 | * `-y`, `--yaml` - Output in YAML format. 45 | 46 | * `-j`, `--json` - Output in JSON format. 47 | 48 | * `-d`, `--detail` - Provide match details. 49 | 50 | ### Replace Options 51 | 52 | * `-r`, `--replace TEXT` - Replace matching pattern with the given text. 53 | 54 | * `-b`, `--backup` - Backup any files that are changed. 55 | 56 | ### Special Options 57 | 58 | * `--[no-]ansi` - Toggle ansi color. 59 | 60 | * `--debug` - Run in debug mode. 61 | 62 | * `-h`, `--help` - Display this lovely help message. 63 | 64 | 65 | ## OUTPUT 66 | 67 | Regex has three output modes. YAML, JSON and standard text. The standard 68 | text output is unique in that it utilizes special ASCII characters 69 | to separate matches and regex groups. ASCII 29, called the *record separator*, 70 | is used to separate repeat matches. ASCII 30, called the *group separator*, is 71 | is used to separate regular expression groups. 72 | 73 | 74 | ## EXAMPLES 75 | 76 | The following example returns the content between the first `=begin ... =end` 77 | clause it comes across. 78 | 79 | $ regex '/=begin.*?\n(.*)\n=end/' sample.rb 80 | 81 | Instead of the first argument being the regular expresion, we can use the `-s` 82 | option. This exampe finds the first line starting with a Q. 83 | 84 | $ regex -s '^Q' sample.txt 85 | 86 | This example would replace all words starting with an X with an A in all .txt 87 | files in the current directory. 88 | 89 | $ regex -g -s '\bX' -r 'A' *.txt 90 | 91 | 92 | ## COPYRIGHTS 93 | 94 | Copyright (c) 2009 Thomas Sawyer, Rubyworks 95 | 96 | Regex is distributable in accordance with the terms of the BSD-2-Clause license. 97 | 98 | -------------------------------------------------------------------------------- /lib/regex/templates.rb: -------------------------------------------------------------------------------- 1 | module Regex 2 | 3 | # = Templates 4 | # 5 | # TODO: What about regular expressions with variable content? 6 | # But then how would we handle named substituions? 7 | # 8 | # TODO: Should these be methods rather than constants? 9 | module Templates 10 | 11 | # Empty line. 12 | EMPTY = /^$/ 13 | 14 | # Blank line. 15 | BLANK = /^\s*$/ 16 | 17 | # 18 | NUMBER = /[-+]?[0-9]*\.?[0-9]+/ 19 | 20 | # Markup language tag, e.g \stuff. 21 | MLTAG = /<([A-Z][A-Z0-9]*)\b[^>]*>(.*?)<\/\1>/i 22 | 23 | # IPv4 Address 24 | IPV4 = /\b(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/ 25 | 26 | # Dni (spanish ID card) 27 | DNI = /^\d{8}[A-Za-z]{1}$/ 28 | 29 | # Email Address 30 | EMAIL = /([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)/i 31 | 32 | # United States phone number. 33 | USPHONE = /(\d\d\d[-]|\(\d\d\d\))?(\d\d\d)[-](\d\d\d\d)/ 34 | 35 | # United States zip code. 36 | USZIP = /^[0-9]{5}(-[0-9]{4})?$/ 37 | 38 | # United States social secuirty number. 39 | SSN = /[0-9]\{3\}-[0-9]\{2\}-[0-9]\{4\}/ 40 | 41 | # United States dollar amount. 42 | DOLLARS = /\$[0-9]*.[0-9][0-9]/ 43 | 44 | # Bank Ientification Code 45 | BIC = /([a-zA-Z]{4}[a-zA-Z]{2}[a-zA-Z0-9]{2}([a-zA-Z0-9]{3})?)/ 46 | 47 | # 48 | IBAN = /[a-zA-Z]{2}[0-9]{2}[a-zA-Z0-9]{4}[0-9]{7}([a-zA-Z0-9]?){0,16}/ 49 | 50 | # Hexidecimal value. 51 | HEX = /(#([0-9A-Fa-f]{3}|[0-9A-Fa-f]{6})\b)/ 52 | 53 | # HTTP URL Address 54 | HTTP = /^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \?=.-]*)*\/?$/ 55 | 56 | # Validates Credit Card numbers, contains 16 numbers in groups of 4 separated 57 | # by `-`, space or nothing. 58 | CREDITCARD = /^(\d{4}-){3}\d{4}$|^(\d{4}\s){3}\d{4}$|^\d{16}$/ 59 | 60 | # MasterCard credit card 61 | MASTERCARD = /^5[1-5]\d{14}$/ 62 | 63 | # Visa credit card. 64 | VISA = /^4\d{15}$/ 65 | 66 | # TODO: Better name? 67 | UNIXWORD = /^[a-zA-Z0-9_]*$/ 68 | 69 | # Username, at lest 3 characters and no more than 16. 70 | USERNAME = /^[a-zA-Z0-9_]{3,16}$/ 71 | 72 | # Twitter username 73 | TWITTER_USERNMAE = /^([a-z0-9\_])+$/ix 74 | 75 | # Github username 76 | GITHUB_USERNAME = /^([a-z0-9\_\-])+$/ix 77 | 78 | # Slideshare username 79 | SLIDESHARE_USERNAME = /^([a-z0-9])+$/ix 80 | 81 | # Del.icio.us username 82 | DELICIOUS_USERNMAME = /^([a-z0-9\_\-])+$/ix 83 | 84 | # Ruby comment block. 85 | RUBYBLOCK = /^=begin\s*(.*?)\n(.*?)\n=end/m 86 | 87 | # Ruby method definition. 88 | # TODO: Not quite right. 89 | RUBYMETHOD_WITH_COMMENT = /(^\ *\#.*?)^\s*def\s*(.*?)$/m 90 | 91 | # Ruby method definition. 92 | RUBYMETHOD = /^\ *def\s*(.*?)$/ 93 | 94 | # By the legendary abigail. Fails to match if and only if it is matched against 95 | # a prime number of 1's. That is, '11' fails, but '1111' does not. 96 | # I once heard him talk why this works, but I forgot most of it. 97 | PRIMEONES = /^1?$|^(11+?)\1+$/ 98 | 99 | # Name of all constants. 100 | def self.list 101 | constants.map{ |c| c.downcase } 102 | end 103 | 104 | # Lookup a template by name. 105 | def self.[](name) 106 | Templates.const_get(name.upcase) 107 | end 108 | 109 | end 110 | 111 | # Add templates to Regex module. 112 | include Templates 113 | 114 | end 115 | 116 | -------------------------------------------------------------------------------- /work/xact-ruby.rb: -------------------------------------------------------------------------------- 1 | module Rivets 2 | module CLI 3 | 4 | # 5 | 6 | class Ginsu 7 | 8 | def self.start ; new.start ; end 9 | 10 | def initialize(argv=ARGV) 11 | @args, @keys = Console::Arguments.new(argv).parameters 12 | end 13 | 14 | def start 15 | files = @args[0] 16 | ExtractAndSave.test_extract(files) 17 | end 18 | end 19 | 20 | # Runs extracted code via a pipe. 21 | # The binary for this is called exrb. 22 | 23 | class XactRuby #Excerb 24 | 25 | # Shortcut for typical usage. 26 | 27 | def self.run 28 | new.run 29 | end 30 | 31 | attr_reader :exacto, :file, :handle, :argv 32 | 33 | def initialize( argv=ARGV ) 34 | argv = argv.dup 35 | 36 | if argv.delete('--help') 37 | help 38 | exit 0 39 | end 40 | 41 | if i = argv.index('-h') 42 | handle = argv[i+1].strip 43 | argv[i+1,1] = nil 44 | argv.delete('-h') 45 | else 46 | handle = 'test' 47 | end 48 | 49 | if i = argv.index('-P') 50 | argv.delete('-P') 51 | file = argv.pop 52 | puts exact(handle) 53 | exit 0 54 | end 55 | 56 | file = argv.pop 57 | 58 | @argv = argv 59 | @handle = handle 60 | @file = File.expand_path(file) 61 | @exacto = Extractor.new(file) 62 | end 63 | 64 | # Extract the code. 65 | 66 | def exact 67 | return *@exacto.extract_block(handle) 68 | end 69 | 70 | # This runs the commented code block via a pipe. 71 | # This has an advantage in that all the parameters 72 | # that can be passed to ruby can also be passed to exrb. 73 | 74 | def run 75 | excode, offset = exact 76 | 77 | code = "\n" 78 | # code << special_requirements 79 | code << "require '#{file}'\n" 80 | code << "eval(<<'_____#{handle}_____', TOPLEVEL_BINDING, '#{file}', #{offset})\n" 81 | code << excode 82 | code << "\n_____#{handle}_____\n\n" 83 | 84 | cmd = ['ruby', *argv].join(' ') 85 | 86 | result = IO.popen(cmd,"w+") do |ruby| 87 | ruby.puts code 88 | ruby.close_write 89 | puts ruby.read 90 | end 91 | end 92 | 93 | # # Any special requirements based on handle? 94 | # 95 | # def special_requirements 96 | # case handle 97 | # when 'test/unit' 98 | # "require 'test/unit'" 99 | # when 'rspec' 100 | # "require 'rspec'" 101 | # else 102 | # '' 103 | # end + "\n" 104 | # end 105 | 106 | # Show help. 107 | 108 | def help 109 | helpstr = `ruby --help` 110 | helpstr.sub!('ruby', 'exrb') 111 | puts helpstr 112 | puts 113 | puts " -h handle of comment block to run" 114 | puts " -P display the code block to be run" 115 | end 116 | 117 | 118 | # OLD CODE 119 | # 120 | # # This runs the commented code block directly. 121 | # # This has an advantage in that the line numbers 122 | # # can be maintained. 123 | # 124 | # def run_eval( fname, block='test' ) 125 | # code, offset = extract_block( fname ) 126 | # 127 | # require 'test/unit' if block == 'test' 128 | # require fname 129 | # 130 | # eval code, TOPLEVEL_BINDING, File.basename(fname), offset 131 | # end 132 | # 133 | # # This runs the commented code block via a pipe. 134 | # # This has an advantage in that all the parameters 135 | # # that can be passed to ruby can be passed to rubyinline. 136 | # 137 | # def run_pipe( fname, block='test' ) 138 | # code, offset = extract_block( fname, block ) 139 | # 140 | # code = "require 'test/unit'\n\n" + code if block == 'test' 141 | # code = "require '#{fname}'\n\n" + code 142 | # 143 | # cmd = ['ruby', *ARGV].join(' ') 144 | # 145 | # result = IO.popen(cmd,"w+") do |ruby| 146 | # ruby.puts code 147 | # ruby.close_write 148 | # puts ruby.read 149 | # end 150 | # end 151 | 152 | end 153 | 154 | end 155 | end 156 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Regex ("Like a Knife") 2 | 3 | [Website](http://rubyworks.github.com/regex) / 4 | [Report Issue](http://github.com/rubyworks/regex/issues) / 5 | [Source Code](http://github.com/rubyworks/regex) / 6 | [Chat Room](irc://irc.freenode.net/rubyworks) / 7 | [![Build Status](https://secure.travis-ci.org/rubyworks/regex.png)](http://travis-ci.org/rubyworks/regex) / 8 | [![Gem Version](https://badge.fury.io/rb/regex.png)](http://badge.fury.io/rb/regex) 9 | 10 | 11 | ## About 12 | 13 | Yea, I know what you are going to say, "You just reimplmented `sed`!" And 14 | you would be absolutely correct. But! There is a little bit more to this story. 15 | Sed is not a "Langauge 2.0" tool (i.e. "post-Ruby"). And want I wanted is a 16 | command-line tool that is both a bit easier to use and a bit more flexible 17 | as well. 18 | 19 | Now I could have written this in Perl. I'm sure it would just as good, if not 20 | better since Perl's Regular Expression engine rocks, or so I hear. But Ruby's is 21 | pretty damn good too, and getting better (with 1.9+). And since I know Ruby very 22 | well. Well that's what you get. 23 | 24 | 25 | ## Usage 26 | 27 | For detailed explication and examples of usage refer to the 28 | [User Docs](http://wiki.github.com/rubyworks/regex), the 29 | [QED Docs](http://github.com/rubyworks/regex/docs/qed) and the 30 | [API Docs](http://github.com/rubyworks/regex/docs/api). 31 | 32 | In brief, usage simply entails supplying a regular expression and a list of files 33 | to be searched to the `regex` command. 34 | 35 | $ regex '/=begin.*?\n(.*)\n=end/' sample.rb 36 | 37 | This example does exactly what you would expect. It returns the content between 38 | the first `=begin ... =end` clause it comes across. To see all such 39 | block comments, as you would expect, you can use add the `g` regular 40 | expression mode flag. 41 | 42 | $ regex '/=begin.*?\n(.*)\n=end/g' sample.rb 43 | 44 | Alternatively you can use the `-g/--global/--repeat` option. 45 | 46 | $ regex -g '/=begin.*?\n(.*)\n=end/' sample.rb 47 | 48 | Notice that in all these examples we have used single quotes to wrap the 49 | regular expression. This is to prevent the shell from expanding `*` 50 | and `?` marks. 51 | 52 | By default regex produces string output. Regular expression groups are delimited 53 | by ASCII 29 (035 1D) END OF GROUP, and repeat matches are delimited by 54 | ASCII character 30 (036 1E) END OF RECORD. 55 | 56 | Instead of string output, regex also supports YAML and JSON formats using the 57 | `--yaml/-y` and `--json/-j` flags. 58 | 59 | $ regex -y -g '/=begin.*?\n(.*)\n=end/' sample.rb 60 | 61 | In this case the returned matches are delimited using as an array of arrays. 62 | 63 | To get more information than just the match results use the `--detail/-d` 64 | option. 65 | 66 | Also, we can do without the `/ /` deliminators on the regular 67 | expression if we use the `--search/-s` option instead. Going back to 68 | our first example: 69 | 70 | $ regex -s '=begin.*?\n(.*)\n=end' sample.rb 71 | 72 | To replace text, use the `--replace/--r` option. 73 | 74 | $ regex --yaml --repeat -s 'Tom' -r 'Bob' sample.rb 75 | 76 | This will replace every occurrence of "Tom" with "Bob" in the `sample.rb` 77 | file. By default `regex` will backup any file it changes by adding a 78 | `.bak` extension to the original copy. 79 | 80 | Check out the `--help` and I am sure the rest will be smooth sailing. 81 | But it you want more information, then do us the good favor of jumping over 82 | to the [wiki](http://wiki.github.com/rubyworks/regex). Feel free to add 83 | additional information there to help others. 84 | 85 | 86 | ## Output 87 | 88 | As mentioned above, regex has three output modes. YAML, JSON and standard text. 89 | The standard text output is unique in that it utilizes special ASCII characters 90 | to separate matches and regex groups. ASCII 29, called the *record separator*, 91 | is used to separate repeat matches. ASCII 30, called the *group separator*, is 92 | is used to separate regular expression groups. 93 | 94 | 95 | ## Status 96 | 97 | The project is maturing but still a touch wet behind the ears. So don't be too 98 | surprised if it doesn't have every feature under the sun just yet, or that every 99 | detail is going to work absolutely peachy. But hey, if something needs fixing or 100 | a feature needs adding, well then get in there and send us a patch. Open source 101 | software is built on *TEAM WORK*, right? 102 | 103 | 104 | ## Copyrights 105 | 106 | Copyright © 2010 Rubyworks 107 | 108 | Regex is licensed under the terms of the *FreeBSD* license. 109 | 110 | See LICENSE.txt file for details. 111 | 112 | -------------------------------------------------------------------------------- /work/xact.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |

Xact

5 | 6 |

IMPORTANT Xact is still in early stages of development. Becuase of this 7 | the following documentation might not refect the actualy state of functionailty.

8 | 9 |

The xact command is a simple tool that allows the extraction of content from 10 | a text file given a set of textual cooridantes. Texual cooridinate is just a fancy way 11 | of saying start and end line numbers or matching regular expressions. Besides extraction, 12 | xact also allows iteration through a whole set of matches.

13 | 14 |

Lets try a simple example file godel.txt. The file's contents read like this:

15 | 16 |
 17 |     Hello World!
 18 |     Today we are leaning to use Xact.
 19 |     Lets try a simple example file:
 20 |         $ xact /^Lets.*?$/ /^Er/ godel.txt
 21 |     Which outputs:
 22 |         $ xact /^Lets.*?$/ /^Er/ godel.txt
 23 |         ...
 24 |     Er, well. there's no beating Godel!
 25 |   
26 | 27 |

Don't be alarmed. If we simply pull a Bertrand, and actually did something possible

28 | 29 |
 30 |     $ xact /^Lets.*?$/ /^Which/ godel.txt
 31 |   
32 | 33 |

Which outputs

34 | 35 |
 36 |     $ xact /^Lets.*?$/ /^Er/ godel.txt
 37 |   
38 | 39 |

Certainly, it make sense! ;)

40 | 41 | 160 | 161 | -------------------------------------------------------------------------------- /lib/regex/replacer.rb: -------------------------------------------------------------------------------- 1 | require 'stringio' 2 | require 'optparse' 3 | 4 | module Regex 5 | 6 | # 7 | class Replacer 8 | 9 | # Array of [search, replace] rules. 10 | attr_reader :rules 11 | 12 | # Is this a recursive search? 13 | attr_accessor :recursive 14 | 15 | # Make all patterns exact string matchers. 16 | attr_accessor :escape 17 | 18 | # Make all patterns global matchers. 19 | attr_accessor :global 20 | 21 | # Make all patterns case-insenstive matchers. 22 | attr_accessor :insensitive 23 | 24 | # Make all patterns multi-line matchers. 25 | attr_accessor :multiline 26 | 27 | # Make backups of files when they change. 28 | attr_accessor :backup 29 | 30 | # Interactive replacement. 31 | attr_accessor :interactive 32 | 33 | # 34 | def initialize(options={}) 35 | @rules = [] 36 | options.each do |k,v| 37 | __send__("#{k}=", v) 38 | end 39 | end 40 | 41 | # 42 | def rule(pattern, replacement) 43 | @rules << [re(pattern), replacement] 44 | end 45 | 46 | # 47 | def apply(*ios) 48 | ios.each do |io| 49 | original = (IO === io || StringIO === io ? io.read : io.to_s) 50 | generate = original.to_s 51 | rules.each do |(pattern, replacement)| 52 | begin 53 | if pattern.global 54 | generate = generate.gsub(pattern.to_re, replacement) 55 | else 56 | generate = generate.sub(pattern.to_re, replacement) 57 | end 58 | rescue => err 59 | warn(io.inspect + ' ' + err.to_s) if $VERBOSE 60 | end 61 | end 62 | if original != generate 63 | write(io, generate) 64 | end 65 | end 66 | end 67 | 68 | # 69 | # TODO: interactive mode needs to handle \1 style substitutions. 70 | def interactive_gsub(string, pattern, replacement) 71 | copy = string.dup 72 | string.scan(pattern) do |match| 73 | print "#{match} ? (Y/n)" 74 | case ask 75 | when 'y', 'Y', '' 76 | copy[$~.begin(0)..$~.end(0)] = replacement 77 | else 78 | end 79 | end 80 | end 81 | 82 | private 83 | 84 | # Parse pattern matcher. 85 | def re(pattern) 86 | Matcher.new( 87 | pattern, 88 | :global=>global, 89 | :escape=>escape, 90 | :multiline=>multiline, 91 | :insensitive=>insensitive 92 | ) 93 | end 94 | 95 | # 96 | def write(io, text) 97 | case io 98 | when File 99 | if backup 100 | backup_file = io.path + '.bak' 101 | File.open(backup_file, 'w'){ |f| f << File.read(io.path) } 102 | end 103 | File.open(io.path, 'w'){ |w| w << text } 104 | when StringIO 105 | io.string = text 106 | when IO 107 | # TODO: How to handle general IO object? 108 | io.write(text) 109 | else 110 | io.replace(text) 111 | end 112 | end 113 | 114 | # 115 | def self.cli(argv=ARGV) 116 | searches = [] 117 | replaces = [] 118 | options = {} 119 | parser = OptionParser.new do |opt| 120 | opt.on('--search', '-s PATTERN', 'search portion of substitution') do |search| 121 | searches << search 122 | end 123 | opt.on('--template', '-t NAME', 'search for built-in regular expression') do |name| 124 | searches << "$#{name}" 125 | end 126 | opt.on('--replace', '-r STRING', 'replacement string of substitution') do |replace| 127 | replaces << replace 128 | end 129 | opt.on('--recursive', '-R', 'search recursively though subdirectories') do 130 | options[:recursive] = true 131 | end 132 | opt.on('--escape', '-e', 'make all patterns verbatim string matchers') do 133 | options[:escape] = true 134 | end 135 | opt.on('--insensitive', '-i', 'make all patterns case-insensitive matchers') do 136 | options[:insensitive] = true 137 | end 138 | #opt.on('--unxml', '-x', 'ignore XML/HTML tags') do 139 | # options[:unxml] = true 140 | #end 141 | opt.on('--global', '-g', 'make all patterns global matchers') do 142 | options[:global] = true 143 | end 144 | opt.on('--multiline', '-m', 'make all patterns multi-line matchers') do 145 | options[:multiline] = true 146 | end 147 | opt.on('-b', '--backup', 'backup any files that are changed') do 148 | options[:backup] = true 149 | end 150 | opt.on('-i', '--interactive', 'interactive mode') do 151 | options[:interactive] = true 152 | end 153 | opt.on_tail('--debug', 'run in debug mode') do 154 | $DEBUG = true 155 | end 156 | opt.on_tail('--help', '-h', 'display this lovely help message') do 157 | puts opt 158 | exit 0 159 | end 160 | end 161 | parser.parse!(argv) 162 | 163 | files = [] 164 | 165 | argv.each{ |file| 166 | raise "file does not exist -- #{file}" unless File.exist?(file) 167 | if File.directory?(file) 168 | if options[:recursive] 169 | files.concat Dir[File.join(file, '**')].reject{ |d| File.directory?(d) } 170 | end 171 | else 172 | files << file 173 | end 174 | } 175 | 176 | targets = files.empty? ? [ARGF] : files.map{ |f| File.new(f) } 177 | 178 | unless searches.size == replaces.size 179 | raise "search replace mismatch -- #{searches.size} to #{replaces.size}" 180 | end 181 | rules = searches.zip(replaces) 182 | 183 | replacer = new(options) 184 | rules.each do |search, replace| 185 | replacer.rule(search, replace) 186 | end 187 | replacer.apply(*targets) 188 | end 189 | 190 | # Basically a Regex but handles a couple extra options. 191 | class Matcher 192 | 193 | # 194 | attr_accessor :global 195 | 196 | # 197 | attr_accessor :escape 198 | 199 | # 200 | attr_accessor :multiline 201 | 202 | # 203 | attr_accessor :insensitive 204 | 205 | # 206 | def initialize(pattern, options={}) 207 | options.each do |k,v| 208 | __send__("#{k}=", v) if respond_to?("#{k}=") 209 | end 210 | @regexp = parse(pattern) 211 | end 212 | 213 | # 214 | def =~(string) 215 | @regexp =~ string 216 | end 217 | 218 | # 219 | def match(string) 220 | @regexp.match(string) 221 | end 222 | 223 | # 224 | def to_re 225 | @regexp 226 | end 227 | 228 | # Parse pattern matcher. 229 | def parse(pattern) 230 | case pattern 231 | when Regexp 232 | pattern 233 | when /^\$/ 234 | Templates.const_get($'.upcase) 235 | when /^\/(.*?)\/(\w+)$/ 236 | flags = [] 237 | @global = true if $2.index('g') 238 | flags << Regexp::MULTILINE if $2.index('m') or multiline 239 | flags << Regexp::IGNORECASE if $2.index('i') or insensitive 240 | if $2.index('e') or escape 241 | Regexp.new(Regexp.escape($1), *flags) 242 | else 243 | Regexp.new($1, *flags) 244 | end 245 | else 246 | flags = [] 247 | flags << Regexp::MULTILINE if multiline 248 | flags << Regexp::IGNORECASE if insensitive 249 | if escape 250 | Regexp.new(Regexp.escape(pattern), *flags) 251 | else 252 | Regexp.new(pattern, *flags) 253 | end 254 | end 255 | end 256 | 257 | end 258 | 259 | end 260 | 261 | end 262 | -------------------------------------------------------------------------------- /.gemspec: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require 'yaml' 4 | require 'pathname' 5 | 6 | module Indexer 7 | 8 | # Convert index data into a gemspec. 9 | # 10 | # Notes: 11 | # * Assumes all executables are in bin/. 12 | # * Does not yet handle default_executable setting. 13 | # * Does not yet handle platform setting. 14 | # * Does not yet handle required_ruby_version. 15 | # * Support for rdoc entries is weak. 16 | # 17 | class GemspecExporter 18 | 19 | # File globs to include in package --unless a manifest file exists. 20 | FILES = ".index .yardopts alt bin data demo ext features lib man spec test try* [A-Z]*.*" unless defined?(FILES) 21 | 22 | # File globs to omit from FILES. 23 | OMIT = "Config.rb" unless defined?(OMIT) 24 | 25 | # Standard file patterns. 26 | PATTERNS = { 27 | :root => '{.index,Gemfile}', 28 | :bin => 'bin/*', 29 | :lib => 'lib/{**/}*', #.rb', 30 | :ext => 'ext/{**/}extconf.rb', 31 | :doc => '*.{txt,rdoc,md,markdown,tt,textile}', 32 | :test => '{test,spec}/{**/}*.rb' 33 | } unless defined?(PATTERNS) 34 | 35 | # For which revision of indexer spec is this converter intended? 36 | REVISION = 2013 unless defined?(REVISION) 37 | 38 | # 39 | def self.gemspec 40 | new.to_gemspec 41 | end 42 | 43 | # 44 | attr :metadata 45 | 46 | # 47 | def initialize(metadata=nil) 48 | @root_check = false 49 | 50 | if metadata 51 | root_dir = metadata.delete(:root) 52 | if root_dir 53 | @root = root_dir 54 | @root_check = true 55 | end 56 | metadata = nil if metadata.empty? 57 | end 58 | 59 | @metadata = metadata || YAML.load_file(root + '.index') 60 | 61 | if @metadata['revision'].to_i != REVISION 62 | warn "This gemspec exporter was not designed for this revision of index metadata." 63 | end 64 | end 65 | 66 | # 67 | def has_root? 68 | root ? true : false 69 | end 70 | 71 | # 72 | def root 73 | return @root if @root || @root_check 74 | @root_check = true 75 | @root = find_root 76 | end 77 | 78 | # 79 | def manifest 80 | return nil unless root 81 | @manifest ||= Dir.glob(root + 'manifest{,.txt}', File::FNM_CASEFOLD).first 82 | end 83 | 84 | # 85 | def scm 86 | return nil unless root 87 | @scm ||= %w{git hg}.find{ |m| (root + ".#{m}").directory? }.to_sym 88 | end 89 | 90 | # 91 | def files 92 | return [] unless root 93 | @files ||= \ 94 | if manifest 95 | File.readlines(manifest). 96 | map{ |line| line.strip }. 97 | reject{ |line| line.empty? || line[0,1] == '#' } 98 | else 99 | list = [] 100 | Dir.chdir(root) do 101 | FILES.split(/\s+/).each do |pattern| 102 | list.concat(glob(pattern)) 103 | end 104 | OMIT.split(/\s+/).each do |pattern| 105 | list = list - glob(pattern) 106 | end 107 | end 108 | list 109 | end.select{ |path| File.file?(path) }.uniq 110 | end 111 | 112 | # 113 | def glob_files(pattern) 114 | return [] unless root 115 | Dir.chdir(root) do 116 | Dir.glob(pattern).select do |path| 117 | File.file?(path) && files.include?(path) 118 | end 119 | end 120 | end 121 | 122 | def patterns 123 | PATTERNS 124 | end 125 | 126 | def executables 127 | @executables ||= \ 128 | glob_files(patterns[:bin]).map do |path| 129 | File.basename(path) 130 | end 131 | end 132 | 133 | def extensions 134 | @extensions ||= \ 135 | glob_files(patterns[:ext]).map do |path| 136 | File.basename(path) 137 | end 138 | end 139 | 140 | def name 141 | metadata['name'] || metadata['title'].downcase.gsub(/\W+/,'_') 142 | end 143 | 144 | def homepage 145 | page = ( 146 | metadata['resources'].find{ |r| r['type'] =~ /^home/i } || 147 | metadata['resources'].find{ |r| r['name'] =~ /^home/i } || 148 | metadata['resources'].find{ |r| r['name'] =~ /^web/i } 149 | ) 150 | page ? page['uri'] : false 151 | end 152 | 153 | def licenses 154 | metadata['copyrights'].map{ |c| c['license'] }.compact 155 | end 156 | 157 | def require_paths 158 | paths = metadata['paths'] || {} 159 | paths['load'] || ['lib'] 160 | end 161 | 162 | # 163 | # Convert to gemnspec. 164 | # 165 | def to_gemspec 166 | if has_root? 167 | Gem::Specification.new do |gemspec| 168 | to_gemspec_data(gemspec) 169 | to_gemspec_paths(gemspec) 170 | end 171 | else 172 | Gem::Specification.new do |gemspec| 173 | to_gemspec_data(gemspec) 174 | to_gemspec_paths(gemspec) 175 | end 176 | end 177 | end 178 | 179 | # 180 | # Convert pure data settings. 181 | # 182 | def to_gemspec_data(gemspec) 183 | gemspec.name = name 184 | gemspec.version = metadata['version'] 185 | gemspec.summary = metadata['summary'] 186 | gemspec.description = metadata['description'] 187 | 188 | metadata['authors'].each do |author| 189 | gemspec.authors << author['name'] 190 | 191 | if author.has_key?('email') 192 | if gemspec.email 193 | gemspec.email << author['email'] 194 | else 195 | gemspec.email = [author['email']] 196 | end 197 | end 198 | end 199 | 200 | gemspec.licenses = licenses 201 | 202 | requirements = metadata['requirements'] || [] 203 | requirements.each do |req| 204 | next if req['optional'] 205 | next if req['external'] 206 | 207 | name = req['name'] 208 | groups = req['groups'] || [] 209 | 210 | version = gemify_version(req['version']) 211 | 212 | if groups.empty? or groups.include?('runtime') 213 | # populate runtime dependencies 214 | if gemspec.respond_to?(:add_runtime_dependency) 215 | gemspec.add_runtime_dependency(name,*version) 216 | else 217 | gemspec.add_dependency(name,*version) 218 | end 219 | else 220 | # populate development dependencies 221 | if gemspec.respond_to?(:add_development_dependency) 222 | gemspec.add_development_dependency(name,*version) 223 | else 224 | gemspec.add_dependency(name,*version) 225 | end 226 | end 227 | end 228 | 229 | # convert external dependencies into gemspec requirements 230 | requirements.each do |req| 231 | next unless req['external'] 232 | gemspec.requirements << ("%s-%s" % req.values_at('name', 'version')) 233 | end 234 | 235 | gemspec.homepage = homepage 236 | gemspec.require_paths = require_paths 237 | gemspec.post_install_message = metadata['install_message'] 238 | end 239 | 240 | # 241 | # Set gemspec settings that require a root directory path. 242 | # 243 | def to_gemspec_paths(gemspec) 244 | gemspec.files = files 245 | gemspec.extensions = extensions 246 | gemspec.executables = executables 247 | 248 | if Gem::VERSION < '1.7.' 249 | gemspec.default_executable = gemspec.executables.first 250 | end 251 | 252 | gemspec.test_files = glob_files(patterns[:test]) 253 | 254 | unless gemspec.files.include?('.document') 255 | gemspec.extra_rdoc_files = glob_files(patterns[:doc]) 256 | end 257 | end 258 | 259 | # 260 | # Return a copy of this file. This is used to generate a local 261 | # .gemspec file that can automatically read the index file. 262 | # 263 | def self.source_code 264 | File.read(__FILE__) 265 | end 266 | 267 | private 268 | 269 | def find_root 270 | root_files = patterns[:root] 271 | if Dir.glob(root_files).first 272 | Pathname.new(Dir.pwd) 273 | elsif Dir.glob("../#{root_files}").first 274 | Pathname.new(Dir.pwd).parent 275 | else 276 | #raise "Can't find root of project containing `#{root_files}'." 277 | warn "Can't find root of project containing `#{root_files}'." 278 | nil 279 | end 280 | end 281 | 282 | def glob(pattern) 283 | if File.directory?(pattern) 284 | Dir.glob(File.join(pattern, '**', '*')) 285 | else 286 | Dir.glob(pattern) 287 | end 288 | end 289 | 290 | def gemify_version(version) 291 | case version 292 | when /^(.*?)\+$/ 293 | ">= #{$1}" 294 | when /^(.*?)\-$/ 295 | "< #{$1}" 296 | when /^(.*?)\~$/ 297 | "~> #{$1}" 298 | else 299 | version 300 | end 301 | end 302 | 303 | end 304 | 305 | end 306 | 307 | Indexer::GemspecExporter.gemspec -------------------------------------------------------------------------------- /lib/regex/extractor.rb: -------------------------------------------------------------------------------- 1 | require 'fileutils' 2 | require 'open-uri' 3 | require 'regex/string' 4 | 5 | module Regex 6 | 7 | # Supports [:name:] notation for subsitution of built-in templates. 8 | class Extractor 9 | 10 | # When the regular expression return multiple groups, 11 | # each is divided by the group deliminator. 12 | # This is the default value. 13 | DELIMINATOR_GROUP = 29.chr + "\n" 14 | 15 | # When using repeat mode, each match is divided by 16 | # the record deliminator. This is the default value. 17 | DELIMINATOR_RECORD = 30.chr + "\n" 18 | 19 | # TODO: Separate by file ? 20 | # DELIMINATOR_FILE = 28.chr +" \n" 21 | 22 | # 23 | def self.input_cache(input) 24 | @input_cache ||= {} 25 | @input_cache[input] ||= ( 26 | case input 27 | when String 28 | input 29 | else 30 | input.read 31 | end 32 | ) 33 | end 34 | 35 | # List of IO objects or Strings to search. 36 | attr_accessor :io 37 | 38 | # Remove XML tags from search. (NOT CURRENTLY SUPPORTED) 39 | attr_accessor :unxml 40 | 41 | # Regular expression. 42 | attr_accessor :pattern 43 | 44 | # Select built-in regular expression by name. 45 | attr_accessor :template 46 | 47 | # Is a recusive serach? 48 | attr_accessor :recursive 49 | 50 | # Index of expression return. 51 | attr_accessor :index 52 | 53 | # Multiline match. 54 | attr_accessor :multiline 55 | 56 | # Ignore case. 57 | attr_accessor :insensitive 58 | 59 | # Escape expression. 60 | attr_accessor :escape 61 | 62 | # Repeat Match (global). 63 | attr_accessor :repeat 64 | 65 | # Output format. 66 | attr_accessor :format 67 | 68 | # Provide detailed output. 69 | attr_accessor :detail 70 | 71 | # Use ANSI codes in output? 72 | attr_accessor :ansi 73 | 74 | # Use ANSI codes in output? 75 | def ansi? ; @ansi ; end 76 | 77 | # New extractor. 78 | def initialize(*io) 79 | options = Hash === io.last ? io.pop : {} 80 | 81 | @io = io 82 | @ansi = true 83 | 84 | options.each do |k,v| 85 | __send__("#{k}=", v) 86 | end 87 | end 88 | 89 | # 90 | def inspect 91 | "#{self.class.name}" 92 | end 93 | 94 | #-- 95 | # TODO: unxml is too primative, use real xml parser like nokogiri 96 | #++ 97 | #def text 98 | # @text ||= ( 99 | # if unxml 100 | # raw.gsub!(/\<(.*?)\>/, '') 101 | # else 102 | # @raw 103 | # end 104 | # ) 105 | #end 106 | 107 | # 108 | def regex 109 | @regex ||= ( 110 | if template 111 | Templates.const_get(template.upcase) 112 | else 113 | case pattern 114 | when Regexp 115 | pattern 116 | when String 117 | flags = 0 118 | flags + Regexp::MULTILINE if multiline 119 | flags + Regexp::IGNORECASE if insensitive 120 | if escape 121 | Regexp.new(Regexp.escape(pattern), flags) 122 | else 123 | pat = substitute_templates(pattern) 124 | Regexp.new(pat, flags) 125 | end 126 | end 127 | end 128 | ) 129 | end 130 | 131 | # 132 | def substitute_templates(pattern) 133 | pat = pattern 134 | Templates.list.each do |name| 135 | if pat.include?("[:#{name}:]") 136 | pat = pat.gsub(/(?!:\\)\[\:#{name}\:\]/, Templates[name].to_s) 137 | end 138 | end 139 | pat 140 | end 141 | 142 | # 143 | def to_s(format=nil) 144 | case format 145 | when :yaml 146 | to_s_yaml 147 | when :json 148 | to_s_json 149 | else 150 | if detail 151 | output_detailed_text 152 | else 153 | output_text 154 | end 155 | end 156 | end 157 | 158 | # 159 | def to_s_yaml 160 | require 'yaml' 161 | if detail 162 | matches_by_path.to_yaml 163 | else 164 | structure.to_yaml 165 | end 166 | end 167 | 168 | # 169 | def to_s_json 170 | begin 171 | require 'json' 172 | rescue LoadError 173 | require 'json_pure' 174 | end 175 | if detail 176 | matches_by_path.to_json 177 | else 178 | structure.to_json 179 | end 180 | end 181 | 182 | # 183 | def output_text 184 | out = structure 185 | if repeat 186 | out = out.map{ |m| m.join(deliminator_group) } 187 | out = out.join(deliminator_record) #.chomp("\n") + "\n" 188 | else 189 | out = out.join(deliminator_group) #.chomp("\n") + "\n" 190 | end 191 | out 192 | end 193 | 194 | # Detailed text output. 195 | def output_detailed_text 196 | if repeat 197 | count = 0 198 | string = [] 199 | mapping.each do |input, matches| 200 | path = (File === input ? input.path : "(io #{input.object_id})") 201 | string << "" 202 | string << bold(path) 203 | matches.each do |match| 204 | string << formatted_match(input, match) 205 | count += 1 206 | end 207 | end 208 | string.join("\n") + "\n" 209 | string << "\n(#{count} matches)" 210 | else 211 | string = [] 212 | match = scan.first 213 | input = match.input 214 | path = (File === input ? input.path : "(io #{input.object_id})") 215 | string << "" 216 | string << bold(path) 217 | string << formatted_match(input, match) 218 | string.join("\n") 219 | string << "" #"\n1 match" 220 | end 221 | end 222 | 223 | # 224 | def formatted_match(input, match) 225 | string = [] 226 | path = (File === input ? input.path : "(io #{input.object_id})") 227 | part, char, line = match.info(0) 228 | if index 229 | part, char, line = match.info(index) 230 | string << "%s %s %s" % [line, char, part.inspect] 231 | else 232 | string << bold("%s %s %s" % [line, char, part.inspect]) 233 | if match.size > 0 234 | (1...match.size).each do |i| 235 | part, char, line = match.info(i) 236 | string << "#{i}. %s %s %s" % [line, char, part.inspect] 237 | end 238 | end 239 | end 240 | string.join("\n") 241 | end 242 | 243 | # 244 | def matches_by_path 245 | r = Hash.new{ |h,k| h[k] = [] } 246 | h = Hash.new{ |h,k| h[k] = [] } 247 | scan.each do |match| 248 | h[match.input] << match 249 | end 250 | h.each do |input, matches| 251 | path = (File === input ? input.path : "(io #{input.object_id})") 252 | if index 253 | matches.each do |match| 254 | r[path] << match.breakdown[index] 255 | end 256 | else 257 | matches.each do |match| 258 | r[path] << match.breakdown 259 | end 260 | end 261 | end 262 | r 263 | end 264 | 265 | # Structure the matchdata according to specified options. 266 | def structure 267 | repeat ? structure_repeat : structure_single 268 | end 269 | 270 | # Structure the matchdata for single match. 271 | def structure_single 272 | structure_repeat.first || [] 273 | end 274 | 275 | # Structure the matchdata for repeat matches. 276 | def structure_repeat 277 | if index 278 | scan.map{ |match| [match[index]] } 279 | else 280 | scan.map{ |match| match.size > 1 ? match[1..-1] : [match[0]] } 281 | end 282 | end 283 | 284 | # Scan inputs for matches. 285 | # 286 | # Return an associative Array of [input, matchdata]. 287 | def scan 288 | list = [] 289 | io.each do |input| 290 | # TODO: limit to text files, how? 291 | begin 292 | text = read(input) 293 | text.scan(regex) do 294 | list << Match.new(input, $~) 295 | end 296 | rescue => err 297 | warn(input.inspect + ' ' + err.to_s) if $VERBOSE 298 | end 299 | end 300 | list 301 | end 302 | 303 | # 304 | def mapping 305 | hash = Hash.new{ |h,k| h[k]=[] } 306 | scan.each do |match| 307 | hash[match.input] << match 308 | end 309 | hash 310 | end 311 | 312 | # TODO: unxml won't give corrent char counts. 313 | def read(input) 314 | Extractor.input_cache(input) 315 | # if unxml 316 | # txt.gsub(/\<(.*?)\>/, '') 317 | # else 318 | # txt 319 | # end 320 | end 321 | 322 | # Return the line number of the +char+ position within +text+. 323 | def line_at(io, char) 324 | read(io)[0..char].count("\n") + 1 325 | end 326 | 327 | def deliminator_group 328 | DELIMINATOR_GROUP 329 | end 330 | 331 | def deliminator_record 332 | DELIMINATOR_RECORD 333 | end 334 | 335 | # Commandline Interface to Extractor. 336 | def self.cli(argv=ARGV) 337 | require 'optparse' 338 | format = nil 339 | options = {} 340 | parser = OptionParser.new do |opt| 341 | opt.on('--template', '-t NAME', "select a built-in regular expression") do |name| 342 | options[:template] = name 343 | end 344 | opt.on('--search', '-s PATTERN', "search for regular expression") do |re| 345 | options[:pattern] = re 346 | end 347 | opt.on('--recursive', '-R', 'search recursively though subdirectories') do 348 | options[:recursive] = true 349 | end 350 | opt.on('--escape', '-e', 'make all patterns verbatim string matchers') do 351 | options[:escape] = true 352 | end 353 | opt.on('--index', '-n INT', "return a specific match index") do |int| 354 | options[:index] = int.to_i 355 | end 356 | opt.on('--insensitive', '-i', "case insensitive matching") do 357 | options[:insensitive] = true 358 | end 359 | opt.on('--multiline', '-m', "multiline matching") do 360 | options[:multiline] = true 361 | end 362 | #opt.on('--unxml', '-x', "ignore XML/HTML tags") do 363 | # options[:unxml] = true 364 | #end 365 | opt.on('--global', '-g', "find all matching occurances") do 366 | options[:repeat] = true 367 | end 368 | opt.on('--yaml', '-y', "output in YAML format") do 369 | format = :yaml 370 | end 371 | opt.on('--json', '-j', "output in JSON format") do 372 | format = :json 373 | end 374 | opt.on('--detail', '-d', "provide match details") do 375 | options[:detail] = :json 376 | end 377 | opt.on('--[no-]ansi', "toggle ansi color") do |val| 378 | options[:ansi] = val 379 | end 380 | opt.on_tail('--debug', 'run in debug mode') do 381 | $DEBUG = true 382 | end 383 | opt.on_tail('--help', '-h', "display this lovely help message") do 384 | puts opt 385 | exit 0 386 | end 387 | end 388 | parser.parse!(argv) 389 | 390 | unless options[:pattern] or options[:template] 391 | re = argv.shift 392 | case re 393 | when /^\/(.*?)\/(\w*?)$/ 394 | options[:pattern] = $1 395 | $2.split(//).each do |c| 396 | case c 397 | when 'e' then options[:escape] = true 398 | when 'g' then options[:repeat] = true 399 | when 'i' then options[:insensitive] = true 400 | end 401 | end 402 | else 403 | options[:template] = re 404 | end 405 | end 406 | 407 | files = [] 408 | argv.each do |file| 409 | if File.directory?(file) 410 | if options[:recursive] 411 | rec_files = Dir[File.join(file, '**')].reject{ |d| File.directory?(d) } 412 | files.concat(rec_files) 413 | end 414 | elsif File.file?(file) 415 | files << file 416 | else 417 | $stderr.puts "Not a file -- '#{file}'." 418 | exit 1 419 | end 420 | end 421 | 422 | if files.empty? 423 | args = [ARGF] 424 | else 425 | args = files.map{ |f| open(f) } #File.new(f) } 426 | end 427 | 428 | args << options 429 | 430 | extract = new(*args) 431 | 432 | puts extract.to_s(format) 433 | end 434 | 435 | # 436 | def bold(str) 437 | if ansi? 438 | "\e[1m" + str + "\e[0m" 439 | else 440 | string 441 | end 442 | end 443 | 444 | 445 | # 446 | class Match 447 | attr :input 448 | attr :match 449 | 450 | # match - Instance of MatchData 451 | # 452 | def initialize(input, match) 453 | @input = input 454 | @match = match 455 | end 456 | 457 | # 458 | def [](i) 459 | @match[i] 460 | end 461 | 462 | # 463 | def size 464 | @match.size 465 | end 466 | 467 | # 468 | def breakdown 469 | m = [] 470 | range = (0...match.size) 471 | range.each do |i| 472 | char = match.offset(i)[0] 473 | line = line_at(char) 474 | part = match[i] 475 | m << {'index'=>i, 'line'=>line, 'char'=>char, 'text'=>part} 476 | end 477 | m 478 | end 479 | 480 | # 481 | def info(index) 482 | text = match[index] 483 | char = match.offset(index)[0] 484 | line = line_at(char) 485 | return text, char, line 486 | end 487 | 488 | # Return the line number of the +char+ position within +text+. 489 | def line_at(char) 490 | return nil unless char 491 | text[0..char].count("\n") + 1 492 | end 493 | 494 | # 495 | def text 496 | Extractor.input_cache(input) 497 | end 498 | 499 | end 500 | 501 | 502 | end 503 | 504 | end 505 | 506 | --------------------------------------------------------------------------------