├── .gitignore ├── Gemfile ├── lib ├── rrrex │ ├── core_ext.rb │ ├── unescaped_string_match.rb │ ├── not_match.rb │ ├── or_match.rb │ ├── concat_match.rb │ ├── core_ext │ │ ├── range.rb │ │ ├── fixnum.rb │ │ └── string.rb │ ├── string_match.rb │ ├── regexp.rb │ ├── range_match.rb │ ├── composite_match.rb │ ├── group_match.rb │ ├── single_atom_match.rb │ ├── number_match.rb │ ├── match_data.rb │ ├── match.rb │ └── dsl_context.rb ├── rrrex.rb └── method_missing_conversion.rb ├── Gemfile.lock ├── bin └── rake ├── examples ├── groups.rb └── phone_numbers.rb ├── LICENSE ├── rrrex.gemspec ├── Rakefile ├── README.md └── test └── match_test.rb /.gitignore: -------------------------------------------------------------------------------- 1 | pkg 2 | .bundle 3 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'http://rubygems.org' 2 | 3 | group :test do 4 | gem "rake" 5 | gem "mocha" 6 | end 7 | -------------------------------------------------------------------------------- /lib/rrrex/core_ext.rb: -------------------------------------------------------------------------------- 1 | require 'rrrex/core_ext/fixnum' 2 | require 'rrrex/core_ext/range' 3 | require 'rrrex/core_ext/string' 4 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: http://rubygems.org/ 3 | specs: 4 | mocha (0.9.12) 5 | rake (0.9.2) 6 | 7 | PLATFORMS 8 | ruby 9 | 10 | DEPENDENCIES 11 | mocha 12 | rake 13 | -------------------------------------------------------------------------------- /lib/rrrex/unescaped_string_match.rb: -------------------------------------------------------------------------------- 1 | require 'rrrex/string_match' 2 | module Rrrex 3 | class UnescapedStringMatch < StringMatch 4 | def to_regexp_string 5 | wrap atom 6 | end 7 | end 8 | end 9 | -------------------------------------------------------------------------------- /lib/rrrex.rb: -------------------------------------------------------------------------------- 1 | require 'rrrex/core_ext' 2 | module Rrrex 3 | def self.to_s &block 4 | self.build( &block ).to_regexp_string 5 | end 6 | 7 | def self.build &block 8 | Match.convert Rrrex::DslContext.module_exec &block 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /lib/rrrex/not_match.rb: -------------------------------------------------------------------------------- 1 | require 'rrrex/match' 2 | require 'rrrex/single_atom_match' 3 | module Rrrex 4 | class NotMatch < Match 5 | include SingleAtomMatch 6 | def to_regexp_string 7 | "(?!#{atom.to_regexp_string})" 8 | end 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /lib/rrrex/or_match.rb: -------------------------------------------------------------------------------- 1 | require 'rrrex/match' 2 | require 'rrrex/composite_match' 3 | module Rrrex 4 | class OrMatch < Match 5 | include CompositeMatch 6 | def to_regexp_string 7 | wrap @atoms.map {|p| p.to_regexp_string }.join "|" 8 | end 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /lib/rrrex/concat_match.rb: -------------------------------------------------------------------------------- 1 | require 'rrrex/match' 2 | require 'rrrex/composite_match' 3 | module Rrrex 4 | class ConcatMatch < Match 5 | include CompositeMatch 6 | def to_regexp_string 7 | wrap @atoms.map {|p| p.to_regexp_string }.join "" 8 | end 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /lib/rrrex/core_ext/range.rb: -------------------------------------------------------------------------------- 1 | require 'method_missing_conversion' 2 | require 'rrrex/range_match' 3 | class Range 4 | include MethodMissingConversion 5 | sends_methods_to [ :or, :+ ], Rrrex::RangeMatch 6 | 7 | def of atom, opts={} 8 | Rrrex::NumberMatch.new atom, self.begin, self.end, opts 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /lib/rrrex/string_match.rb: -------------------------------------------------------------------------------- 1 | require 'rrrex/match' 2 | require 'rrrex/single_atom_match' 3 | module Rrrex 4 | class StringMatch < Match 5 | include SingleAtomMatch 6 | def initialize( a ) 7 | @atom = a 8 | end 9 | 10 | def to_regexp_string 11 | wrap Regexp.escape atom 12 | end 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /lib/rrrex/core_ext/fixnum.rb: -------------------------------------------------------------------------------- 1 | class Fixnum 2 | def exactly( atom ) 3 | Rrrex::NumberMatch.new atom, self, self 4 | end 5 | 6 | def or_more( atom, opts={} ) 7 | Rrrex::NumberMatch.new atom, self, nil, opts 8 | end 9 | 10 | def or_less( atom, opts={} ) 11 | Rrrex::NumberMatch.new atom, nil, self, opts 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /lib/rrrex/regexp.rb: -------------------------------------------------------------------------------- 1 | require 'rrrex/match_data' 2 | module Rrrex 3 | class Regexp < ::Regexp 4 | def initialize( r ) 5 | super r.to_regexp_string 6 | @atom = r 7 | end 8 | 9 | def match( str ) 10 | result = super( str ) 11 | MatchData.new @atom, result unless result.nil? 12 | end 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /lib/rrrex/range_match.rb: -------------------------------------------------------------------------------- 1 | require 'rrrex/match' 2 | require 'rrrex/single_atom_match' 3 | module Rrrex 4 | class RangeMatch < Match 5 | include SingleAtomMatch 6 | def initialize( range ) 7 | @range = range 8 | end 9 | 10 | def to_regexp_string 11 | wrap "[#{@range.first}-#{@range.last}]" 12 | end 13 | end 14 | end 15 | -------------------------------------------------------------------------------- /lib/rrrex/composite_match.rb: -------------------------------------------------------------------------------- 1 | require 'rrrex/composite_match' 2 | module Rrrex 3 | module CompositeMatch 4 | def initialize(*args) 5 | @atoms = args.collect do |a| 6 | input a 7 | end 8 | end 9 | 10 | def group_names 11 | @atoms.inject( [] ) do |memo,a| 12 | memo + a.group_names 13 | end 14 | end 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /bin/rake: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # 3 | # This file was generated by Bundler. 4 | # 5 | # The application 'rake' is installed as part of a gem, and 6 | # this file is here to facilitate running it. 7 | # 8 | 9 | require 'pathname' 10 | ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile", 11 | Pathname.new(__FILE__).realpath) 12 | 13 | require 'rubygems' 14 | require 'bundler/setup' 15 | 16 | load Gem.bin_path('rake', 'rake') 17 | -------------------------------------------------------------------------------- /lib/rrrex/group_match.rb: -------------------------------------------------------------------------------- 1 | require 'rrrex/match' 2 | require 'rrrex/single_atom_match' 3 | module Rrrex 4 | class GroupMatch < Match 5 | include SingleAtomMatch 6 | def initialize atom, name 7 | @name = name 8 | super atom 9 | end 10 | 11 | def to_regexp_string 12 | "(#{atom.to_regexp_string})" 13 | end 14 | 15 | def group_names 16 | names = @atom.group_names || [] 17 | names.unshift @name 18 | end 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /lib/rrrex/single_atom_match.rb: -------------------------------------------------------------------------------- 1 | module Rrrex 2 | module SingleAtomMatch 3 | 4 | attr_reader :atom 5 | 6 | def initialize( a ) 7 | self.atom = a 8 | end 9 | 10 | def atom=( a ) 11 | @atom = input a 12 | end 13 | 14 | def to_regexp_string 15 | wrap atom.to_regexp_string 16 | end 17 | 18 | def group_names 19 | if @atom.respond_to? :group_names 20 | @atom.group_names if @atom.respond_to? :group_names 21 | else 22 | [] 23 | end 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /examples/groups.rb: -------------------------------------------------------------------------------- 1 | $: << File.join(File.expand_path(File.dirname(__FILE__)), "..", "lib/") 2 | require 'rrrex' 3 | 4 | #named groups! 5 | p "powertiger".rmatch? { 6 | "power" + group(:animal_name, (1.or_more letter)) 7 | }[:animal_name] # => tiger 8 | 9 | #named groups are also accessable via their number, for your convenience 10 | p "powersnail".rmatch? { 11 | "power" + group(:animal_name, (1.or_more letter)) 12 | }[1] # => snail 13 | 14 | p "aaabc".rmatch? { group(1.or_more "a") }[1] 15 | p "aaabc".rmatch? { group(:alpha, (1.or_more "a")) }[:alpha] 16 | -------------------------------------------------------------------------------- /lib/rrrex/number_match.rb: -------------------------------------------------------------------------------- 1 | require 'rrrex/match' 2 | require 'rrrex/single_atom_match' 3 | module Rrrex 4 | class NumberMatch < Match 5 | include SingleAtomMatch 6 | def initialize( a, min, max, opts={} ) 7 | super a 8 | @opts = { :greedy => true }.merge opts 9 | @min = min 10 | @max = max 11 | end 12 | 13 | def to_regexp_string 14 | # Subtle: when nil, we want min to convert to 0, but max to convert to "" 15 | re_str = atom.to_regexp_string + "{#{@min.to_i},#{@max}}" 16 | re_str += "?" unless @opts[ :greedy ] 17 | wrap re_str 18 | end 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /lib/rrrex/core_ext/string.rb: -------------------------------------------------------------------------------- 1 | require 'method_missing_conversion' 2 | require 'rrrex/string_match' 3 | require 'rrrex/match' 4 | require 'rrrex/dsl_context' 5 | class String 6 | include MethodMissingConversion 7 | sends_methods_to [ :or ], Rrrex::StringMatch 8 | 9 | def plus_with_regexp( str2 ) 10 | if str2.kind_of? Rrrex::Match 11 | Rrrex::StringMatch.new( self ) + str2 12 | else 13 | self.plus_without_regexp str2 14 | end 15 | end 16 | alias_method :plus_without_regexp, :+ 17 | alias_method :+, :plus_with_regexp 18 | 19 | def rmatch?( &block ) 20 | Rrrex.build( &block ).match self 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /examples/phone_numbers.rb: -------------------------------------------------------------------------------- 1 | $: << File.join(File.expand_path(File.dirname(__FILE__)), "..", "lib/") 2 | require 'rrrex' 3 | 4 | #something that looks like telephone numbers 5 | phonenumbers = [ 6 | "(123) 456-7890", 7 | "(123)456-7890", 8 | "123-456-7890", 9 | "(123) 456 7890", 10 | "123.456.7890", 11 | "1234567890", 12 | "123456-7890", 13 | "123 456 7890" 14 | ] 15 | 16 | phonenumbers.each {|number| 17 | 18 | number = number.rmatch?{ 19 | group(:area_code, (3.exactly digit)) + 20 | (0.or_more any_char) + 21 | group(:prefix, (3.exactly digit)) + 22 | (0.or_more any_char) + 23 | group(:line_number, (4.exactly digit)) 24 | 25 | } 26 | p "#{number[:area_code]}.#{number[:prefix]}.#{number[:line_number]}" if number 27 | } 28 | 29 | -------------------------------------------------------------------------------- /lib/method_missing_conversion.rb: -------------------------------------------------------------------------------- 1 | module MethodMissingConversion 2 | def self.included receiver 3 | receiver.extend ClassMethods 4 | end 5 | 6 | module ClassMethods 7 | def sends_methods_to methods, convert_to 8 | define_method :method_missing_helper do |name, args, block| 9 | if methods.include? name 10 | convert_to.new( self ).send name, *args 11 | else 12 | method_missing_without_regexp name, *args, &block 13 | end 14 | end 15 | alias_method :method_missing_without_regexp, :method_missing 16 | alias_method :method_missing, :method_missing_with_regexp 17 | end 18 | end 19 | 20 | def method_missing_with_regexp( name, *args, &block ) 21 | method_missing_helper name, args, block 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /lib/rrrex/match_data.rb: -------------------------------------------------------------------------------- 1 | module Rrrex 2 | class MatchData 3 | def initialize( atom, match_data ) 4 | @atom = atom 5 | @match_data = match_data 6 | end 7 | 8 | def []( i ) 9 | if i.is_a? Symbol 10 | named_groups[ i ] 11 | else 12 | @match_data[ i ] 13 | end 14 | end 15 | 16 | def named_groups 17 | @named_groups ||= 18 | begin 19 | result = {} 20 | names = @atom.group_names 21 | names.each_index do |i| 22 | result[ names[ i ] ] = @match_data[ i + 1 ] 23 | end 24 | result 25 | end 26 | end 27 | 28 | def to_a 29 | @match_data.to_a 30 | end 31 | 32 | def method_missing( name, *args ) 33 | @match_data.send name, *args 34 | end 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /lib/rrrex/match.rb: -------------------------------------------------------------------------------- 1 | module Rrrex 2 | class Match 3 | end 4 | end 5 | require 'rrrex/regexp' 6 | require 'rrrex/string_match' 7 | require 'rrrex/range_match' 8 | require 'rrrex/or_match' 9 | require 'rrrex/concat_match' 10 | require 'rrrex/not_match' 11 | module Rrrex 12 | class Match 13 | def self.convert( atom ) 14 | if atom.kind_of? Match 15 | atom 16 | elsif atom.kind_of? Range 17 | RangeMatch.new atom 18 | else 19 | StringMatch.new atom 20 | end 21 | end 22 | 23 | def wrap( s ) 24 | "(?:#{s})" 25 | end 26 | 27 | def match(str) 28 | Regexp.new( self ).match str 29 | end 30 | 31 | def or(atom) 32 | OrMatch.new self, atom 33 | end 34 | 35 | def +(p) 36 | ConcatMatch.new self, p 37 | end 38 | 39 | def not(atom) 40 | ConcatMatch.new NotMatch.new( atom ), self 41 | end 42 | 43 | def group_names 44 | [] 45 | end 46 | 47 | protected 48 | def input( atom ) 49 | self.class.convert atom 50 | end 51 | end 52 | end 53 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2011 Ian Young 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/rrrex/dsl_context.rb: -------------------------------------------------------------------------------- 1 | require 'rrrex/unescaped_string_match' 2 | require 'rrrex/group_match' 3 | require 'rrrex/number_match' 4 | module Rrrex 5 | module DslContext 6 | 7 | WORD_CHAR = '\w' 8 | DIGIT = '\d' 9 | WHITESPACE = '\s' 10 | LETTER = '[[:alpha:]]' 11 | ANY_CHAR = '.' 12 | 13 | constants.each do |const| 14 | ( class << self; self; end ).instance_eval do 15 | define_method const.downcase.to_sym do 16 | UnescapedStringMatch.new const_get( const ) 17 | end 18 | end 19 | end 20 | 21 | def self.any r, opts={} 22 | NumberMatch.new r, 0, nil, opts 23 | end 24 | 25 | def self.some r, opts={} 26 | NumberMatch.new r, 1, nil, opts 27 | end 28 | 29 | def self._not r 30 | any_char.not r 31 | end 32 | 33 | def self.group( name_or_atom=nil, atom=nil, &block ) 34 | if name_or_atom.kind_of? Symbol 35 | name = name_or_atom 36 | atom = atom || DslContext.module_exec( &block ) 37 | elsif name_or_atom.kind_of? Hash 38 | name = name_or_atom.keys.first 39 | atom = name_or_atom[ name ] 40 | else 41 | name = nil 42 | atom = name_or_atom 43 | end 44 | GroupMatch.new atom, name 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /rrrex.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | Gem::Specification.new do |s| 4 | s.name = %q{rrrex} 5 | s.version = "0.1.0" 6 | 7 | s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= 8 | s.authors = ["Ian Young"] 9 | s.date = %q{2011-05-27} 10 | s.description = %q{ Rrrex is a new syntax for regular expressions. 11 | Less compact, but human-readable. By regular humans. 12 | } 13 | s.email = %q{ian.greenleaf+github@gmail.com} 14 | s.extra_rdoc_files = ["README.md"] 15 | s.files = ["lib/method_missing_conversion.rb", "lib/rrrex/composite_match.rb", "lib/rrrex/concat_match.rb", "lib/rrrex/core_ext/fixnum.rb", "lib/rrrex/core_ext/range.rb", "lib/rrrex/core_ext/string.rb", "lib/rrrex/core_ext.rb", "lib/rrrex/dsl_context.rb", "lib/rrrex/group_match.rb", "lib/rrrex/match.rb", "lib/rrrex/match_data.rb", "lib/rrrex/not_match.rb", "lib/rrrex/number_match.rb", "lib/rrrex/or_match.rb", "lib/rrrex/range_match.rb", "lib/rrrex/regexp.rb", "lib/rrrex/single_atom_match.rb", "lib/rrrex/string_match.rb", "lib/rrrex/unescaped_string_match.rb", "lib/rrrex.rb", "test/match_test.rb", "LICENSE", "Rakefile", "README.md", "TODO"] 16 | s.homepage = %q{https://github.com/iangreenleaf/rrrex} 17 | s.rdoc_options = ["--main", "README.md"] 18 | s.require_paths = ["lib"] 19 | s.rubygems_version = %q{1.6.2} 20 | s.summary = %q{Really Readable Regexps} 21 | 22 | if s.respond_to? :specification_version then 23 | s.specification_version = 3 24 | 25 | if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then 26 | s.add_development_dependency(%q, [">= 0"]) 27 | else 28 | s.add_dependency(%q, [">= 0"]) 29 | end 30 | else 31 | s.add_dependency(%q, [">= 0"]) 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "rubygems" 2 | require "rake/gempackagetask" 3 | require "rake/rdoctask" 4 | 5 | require "rake/testtask" 6 | Rake::TestTask.new do |t| 7 | t.libs << "test" 8 | t.test_files = FileList["test/**/*_test.rb"] 9 | t.verbose = true 10 | end 11 | 12 | 13 | task :default => ["test"] 14 | 15 | spec = Gem::Specification.new do |s| 16 | 17 | s.name = "rrrex" 18 | s.version = "0.1.0" 19 | s.author = "Ian Young" 20 | s.email = "ian.greenleaf+github@gmail.com" 21 | 22 | s.summary = "Really Readable Regexps" 23 | s.description = <<-EOF 24 | Rrrex is a new syntax for regular expressions. 25 | Less compact, but human-readable. By regular humans. 26 | EOF 27 | 28 | s.homepage = "https://github.com/iangreenleaf/rrrex" 29 | 30 | s.files = Dir[ 'lib/**/*.rb', 'test/**/*', '[A-Z]*' ] 31 | 32 | s.has_rdoc = true 33 | s.extra_rdoc_files = %w(README.md) 34 | s.rdoc_options = %w(--main README.md) 35 | 36 | s.add_development_dependency("mocha") 37 | end 38 | 39 | Rake::GemPackageTask.new(spec) do |pkg| 40 | pkg.gem_spec = spec 41 | end 42 | 43 | desc "Build the gemspec file #{spec.name}.gemspec" 44 | task :gemspec do 45 | file = File.dirname(__FILE__) + "/#{spec.name}.gemspec" 46 | File.open(file, "w") {|f| f << spec.to_ruby } 47 | end 48 | 49 | task :package => :gemspec 50 | 51 | Rake::RDocTask.new do |rd| 52 | rd.main = "README.md" 53 | rd.rdoc_files.include("README.md", "lib/**/*.rb") 54 | rd.rdoc_dir = "rdoc" 55 | end 56 | 57 | desc 'Clear out RDoc and generated packages' 58 | task :clean => [:clobber_rdoc, :clobber_package] do 59 | rm "#{spec.name}.gemspec" 60 | end 61 | 62 | desc 'Tag the repository in git with gem version number' 63 | task :tag => [:gemspec, :package] do 64 | if `git diff --cached`.empty? 65 | if `git tag`.split("\n").include?("v#{spec.version}") 66 | raise "Version #{spec.version} has already been released" 67 | end 68 | `git add #{File.expand_path("../#{spec.name}.gemspec", __FILE__)}` 69 | `git commit -m "Released version #{spec.version}"` 70 | `git tag v#{spec.version}` 71 | `git push --tags` 72 | `git push` 73 | else 74 | raise "Unstaged changes still waiting to be committed" 75 | end 76 | end 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Really Readable Regular Expressions 2 | ================ 3 | 4 | rrrex is a new syntax for regular expressions. It trades compactness for readability by real humans, and picks up a couple nice perks 5 | along the way. 6 | 7 | [![Build Status]](http://travis-ci.org/iangreenleaf/rrrex) 8 | 9 | Crash Course 10 | ============ 11 | 12 | "The string you'd like to search".rmatch? { "string" } 13 | 14 | "abc".rmatch? { "ab" + "c" } 15 | 16 | "abc".rmatch? { "xyz".or "abc" } 17 | 18 | You don't have to worry about escaping special characters in your strings any more: 19 | 20 | "{symbols} .*&+ [galore]".rmatch? { "{symbols} .*&+ [galore]" } 21 | 22 | You can combine operations and get the expected precedence: 23 | 24 | "abc".rmatch? { "ab" + ( "z".or "c" ) } 25 | 26 | You can use `Rrrex.build` to make reusable, composable objects: 27 | 28 | names = Rrrex.build { "foo".or "bar" } 29 | nums = Rrrex.build { "1".or "2" } 30 | names.match "foo" 31 | "foo112".rmatch? { names + ( some nums )} 32 | 33 | Repetition: 34 | 35 | "aaabc".rmatch? { 1.or_more "a" } 36 | "aaabc".rmatch? { 5.or_less "a" } 37 | "aaabc".rmatch? { 3.exactly "a" } 38 | "aaabc".rmatch? { (1..5).of "a" } 39 | 40 | These are equivalent: 41 | 42 | "aaabc".rmatch? { 0.or_more "a" } 43 | "aaabc".rmatch? { any "a" } 44 | 45 | And these are equivalent: 46 | 47 | "aaabc".rmatch? { 1.or_more "a" } 48 | "aaabc".rmatch? { some "a" } 49 | 50 | Repetition can be nongreedy: 51 | 52 | "aaabc".rmatch? { 2.or_more "a", :greedy => false } # matches "aa" 53 | 54 | Special character sets: 55 | 56 | "abc1234.&*".rmatch? { 10.exactly any_char } 57 | "abc1234".rmatch? { 3.exactly letter } 58 | "abc1234".rmatch? { 4.exactly digit } 59 | "abc_123".rmatch? { 7.exactly word_char } 60 | " ".rmatch? { whitespace } 61 | 62 | Or create your own: 63 | 64 | "abc".rmatch? { 3.exactly "a".."c" } 65 | 66 | Two types of negation: 67 | 68 | "x".rmatch? { word_char.not "x" } # => nil 69 | "y".rmatch? { word_char.not "x" } 70 | "x".rmatch? { _not "x" } # => nil 71 | "y".rmatch? { _not "x" } 72 | 73 | 74 | Groups: 75 | 76 | "aaabc".rmatch? { group(1.or_more "a") }[1] 77 | "aaabc".rmatch? { group(:alpha, (1.or_more "a")) }[:alpha] 78 | 79 | match = "1234567890 Central Processing".rmatch? do 80 | group :serial do 81 | some digit 82 | end + some whitespace + group :source do 83 | any any_char 84 | end 85 | end 86 | match 87 | 88 | See the [examples](https://github.com/iangreenleaf/rrrex/tree/master/examples/) for more ideas. 89 | 90 | [Build Status]: https://travis-ci.org/iangreenleaf/rrrex.png?branch=master 91 | -------------------------------------------------------------------------------- /test/match_test.rb: -------------------------------------------------------------------------------- 1 | require 'test/unit' 2 | require 'mocha' 3 | require 'rrrex' 4 | 5 | class MatchTest < Test::Unit::TestCase 6 | 7 | def test_match_simple_string 8 | [ ["a", "a"], ["bc", "babb bc"], ["úñícode", "i like úñícode"] ].each do |pattern,string| 9 | m = Rrrex::StringMatch.new pattern 10 | assert m.match string 11 | end 12 | end 13 | 14 | def test_dont_match_simple_string 15 | [ ["a", "b"], ["bc", "bac def"], ["úñícode", "i like unicode"] ].each do |pattern,string| 16 | m = Rrrex::StringMatch.new pattern 17 | assert_nil m.match string 18 | end 19 | end 20 | 21 | def test_inline_match_triggers_module 22 | rxp_stub = stub "Rrrex::Match", { :match => true } 23 | Rrrex::StringMatch.expects(:new).with("oo").returns(rxp_stub) 24 | "foobar".rmatch? do "oo" end 25 | end 26 | 27 | def test_match_simple_string_inline 28 | assert_match "oo", "foobar" do "oo" end 29 | end 30 | 31 | def test_dont_match_simple_string_inline 32 | assert_no_match "foobar" do "xy" end 33 | end 34 | 35 | def test_special_characters_escaped_in_string 36 | assert_no_match "foobar" do "o+" end 37 | assert_match "o+", "+hello+" do "o+" end 38 | end 39 | 40 | def test_match_or 41 | assert_match "foo", "foobar" do "xy".or "foo" end 42 | end 43 | 44 | def test_match_any 45 | assert_match "x", "x" do any "x" end 46 | assert_match "aaaaa", "aaaaab" do any "a" end 47 | assert_match "abab", "ababaab" do any "ab" end 48 | assert_match "", "xxxx" do any "y" end 49 | end 50 | 51 | def test_match_any_nongreedy 52 | assert_match "", "x" do any "x", :greedy => false end 53 | # WTF Ruby 1.8 bug 54 | #assert_match "abbbbc", "abbbbc" do "a" + any( "a", :greedy => false ) + "c" end 55 | end 56 | 57 | def test_match_some 58 | assert_match "x", "x" do some "x" end 59 | assert_match "aaaaa", "aaaaab" do some "a" end 60 | assert_match "abab", "ababaab" do some "ab" end 61 | assert_no_match "xxxx" do some "y" end 62 | end 63 | 64 | def test_match_some_nongreedy 65 | assert_match "a", "aaaaab" do some "a", :greedy => false end 66 | end 67 | 68 | def test_match_not 69 | assert_match "x", "x" do _not "y" end 70 | assert_no_match "x" do _not "x" end 71 | assert_match "cdef", "abcdefab" do some _not( "a".or "b" ) end 72 | assert_match "defa", "abcdefaab" do some _not( "ab".or "b".or "c" ) end 73 | end 74 | 75 | def test_match_lookahead_not 76 | assert_match "y", "xy" do letter.not "x" end 77 | assert_match "1234", "123456789" do some( digit.not "5" ) end 78 | assert_match "1234", "1234abc" do some( digit.not "5" ) end 79 | assert_match "abb", "abcabb" do ( "ab" + letter ).not "abc" end 80 | assert_no_match "abbbc" do "a" + ( (1..6).of "a" ).not( "aaa" ) + "c" end 81 | assert_match "21", "123321" do 2.or_more digit.not( "12".or "3" ) end 82 | end 83 | 84 | def test_match_concat 85 | assert_match "foobar", "foobar" do "foo" + "bar" end 86 | assert_match "foobarbaz", "foobarbazbar" do "foo" + "bar" + "baz" end 87 | end 88 | 89 | def test_match_num_exactly 90 | assert_match "oo", "foobar" do 2.exactly "o" end 91 | assert_match "oo", "foooobar" do 2.exactly "o" end 92 | assert_no_match "foobar" do 3.exactly "o" end 93 | assert_match "foobar", "foobar" do "f" + 2.exactly( "o" ) + "bar" end 94 | assert_no_match "foobar" do "f" + 1.exactly( "o" ) + "bar" end 95 | end 96 | 97 | def test_match_num_or_more 98 | assert_match "oo", "foobar" do 2.or_more "o" end 99 | assert_match "foooo", "foooobar" do "f" + 2.or_more( "o" ) end 100 | assert_no_match "foobar" do 3.or_more "o" end 101 | end 102 | 103 | def test_match_num_or_more_nongreedy 104 | assert_match "foo", "foooobar" do "f" + 2.or_more( "o", :greedy => false ) end 105 | assert_no_match "foobar" do 3.or_more "o", :greedy => false end 106 | end 107 | 108 | def test_match_num_or_less 109 | assert_match "xx", "xx" do 2.or_less "x" end 110 | assert_match "xx", "xxxxxxxx" do 2.or_less "x" end 111 | assert_match "xx", "xx" do 100.or_less "x" end 112 | assert_match "foobar", "foobar" do "f" + 2.or_less( "o" ) + "bar" end 113 | assert_match "fbar", "fbar" do "f" + 2.or_less( "o" ) + "bar" end 114 | assert_no_match "foooobar" do "f" + 3.or_less( "o" ) + "bar" end 115 | assert_match "", "xxxxx" do 3.or_less "y" end 116 | end 117 | 118 | def test_match_num_or_less_nongreedy 119 | assert_match "", "xx" do 2.or_less "x", :greedy => false end 120 | # WTF Ruby 1.8 bug 121 | #assert_match "foobar", "foobar" do "f" + 2.or_less( "o", :greedy => false ) + "bar" end 122 | end 123 | 124 | def test_range_of_matches 125 | assert_match "xx", "xx" do (2..4).of "x" end 126 | assert_match "xxx", "xxx" do (2..100).of "x" end 127 | assert_match "xxxx", "xxxxxxxx" do (2..4).of "x" end 128 | assert_no_match "foobar" do "f" + (3..4).of( "o" ) + "bar" end 129 | assert_no_match "foooooooobar" do "f" + (3..4).of( "o" ) + "bar" end 130 | assert_no_match "xxxx" do (1..100).of( "y" ) + "bar" end 131 | end 132 | 133 | def test_range_nongereedy 134 | assert_match "xx", "xxxx" do (2..4).of "x", :greedy => false end 135 | end 136 | 137 | def test_char_range 138 | assert_match "b", "b" do "a".."c" end 139 | assert_no_match "b" do "A".."C" end 140 | assert_match "abc", "abcdefg" do 1.or_more "a".."c" end 141 | assert_match "123", "123456789" do (1..4).of 1..3 end 142 | assert_match "x8", "ax87" do 1.or_more( ("q".."z").or(8..9) ) end 143 | assert_match "az", "az" do ("a".."c") + ("w".."z") end 144 | end 145 | 146 | def test_precedence 147 | assert_match "foobar", "foobar" do "foo" + ( "xyz".or "bar" ) end 148 | assert_match "bar", "foobar" do ( "xyz" + "foo" ).or "bar" end 149 | assert_match "fo", "foobar" do ( "xyz" + "foo" ).or( "xyz".or "fo" ).or( "foo" + "xyz" ) end 150 | end 151 | 152 | def test_dont_add_extra_backreferences 153 | mdata = "foobar".rmatch? do "foo" + ( "xyz".or "bar" ) end 154 | assert_equal 1, mdata.length 155 | end 156 | 157 | def test_any_characters 158 | assert_match "f", "foobar" do any_char end 159 | chars = "#\t?/\<>.,;:\"'!@\#$%^&*()[]{} bar" 160 | assert_match chars, chars do some any_char end 161 | assert_no_match "\n" do any_char end 162 | end 163 | 164 | def test_word_characters 165 | assert_match "f", "foobar" do word_char end 166 | assert_match "foo_bar2", "### foo_bar2 baz bar" do some word_char end 167 | assert_no_match '?/\<>.,;:"\'!@#$%^&*()[]{}' do word_char end 168 | assert_no_match 'a,b,c,d' do 2.or_more word_char end 169 | end 170 | 171 | def test_digit_characters 172 | assert_match "1", "12345" do digit end 173 | assert_match "654321", "### abc654321baz123" do some digit end 174 | assert_no_match 'abc_DEF *&".' do digit end 175 | assert_no_match '1,2,3' do 2.or_more digit end 176 | end 177 | 178 | def test_letter_characters 179 | assert_match "f", "foobar" do letter end 180 | assert_match "foo", "### foo_bar2 baz bar" do some letter end 181 | assert_no_match '?/."()123456_' do letter end 182 | assert_no_match 'a1b2c3' do 2.or_more letter end 183 | end 184 | 185 | def test_whitespace_characters 186 | assert_match " ", " " do whitespace end 187 | assert_match " \t ", "### \t baz bar" do some whitespace end 188 | assert_no_match 'abc_123-+=().?!' do whitespace end 189 | assert_no_match 'a b c d' do 2.or_more whitespace end 190 | end 191 | 192 | def test_numeric_group 193 | assert_match_backreferences ["a", "a"], "abc" do group "a" end 194 | assert_match_backreferences ["ab", "a", "b"], "abc" do group( "a" ) + group( "b" ) end 195 | assert_match_backreferences ["ab", "ab", "b"], "abc" do group( "a" + group( "b" ) ) end 196 | assert_match_backreferences ["abcde", "abcde"], "abcde" do group( any word_char ) end 197 | assert_match_backreferences ["abcde", "e"], "abcde" do any group( word_char ) end 198 | assert_match_backreferences ["a", "a", nil], "abc" do group( "a" ).or group( "b" ) end 199 | end 200 | 201 | def test_named_groups 202 | assert_match_named_groups( { :my_a => "a" }, "abc" ) do group :my_a, "a" end 203 | assert_match_named_groups( { :my_b => "b" }, "abc" ) do "a" + group( :my_b, "b" ) + "c" end 204 | assert_match_named_groups( { :a => "a", :b => nil }, "abc" ) do group( :a, "a" ).or group( :b, "b" ) end 205 | assert_match_named_groups( { :a => "ab", :b => "ab" }, "abc" ) do group( :a, group( :b, "ab" ) ) end 206 | assert_match_named_groups( { :a => "ab", :b => "b" }, "abc" ) do group( :a, "a" + group( :b, "b" ) ) end 207 | assert_match_named_groups( { :word => "abcde" }, "abcde" ) do group( :word, any( word_char ) ) end 208 | assert_match_named_groups( { :letter => "e" }, "abcde" ) do any group( :letter, word_char ) end 209 | end 210 | 211 | def test_named_groups_block_syntax 212 | assert_match_named_groups( { :my_a => "a" }, "abc" ) do group :my_a do "a" end end 213 | assert_match_named_groups( { :my_a => "a" }, "abc" ) do group( :my_a ) { "a" } end 214 | assert_match_named_groups( { :full_match => "abc", :last_part => "bc" }, "abc" ) do 215 | group :full_match do 216 | "a" + group( :last_part ) do 217 | "bz".or "bc".or "b" 218 | end 219 | end 220 | end 221 | end 222 | 223 | def test_named_groups_cached 224 | assert( matches = "a".rmatch? do group :a, "a" end ) 225 | Rrrex::GroupMatch.any_instance.expects( :group_names ).times( 1 ).returns( [] ) 226 | matches[ :a ] 227 | matches[ :a ] 228 | matches.named_groups 229 | matches.named_groups 230 | end 231 | 232 | def test_to_string 233 | assert_equal( "(?:(?:a)|(?:b))", Rrrex.to_s do "a".or "b" end ) 234 | end 235 | 236 | def test_build_standalone 237 | expr = Rrrex.build do "a".or "b" end 238 | assert expr.match( "a" ) 239 | end 240 | 241 | def test_concat_standalone 242 | e1 = Rrrex.build do "a".or "x" end 243 | e2 = Rrrex.build do "b".or "y" end 244 | expr = e1 + e2 245 | assert expr.match( "ab" ) 246 | assert expr.match( "xy" ) 247 | assert expr.match( "ay" ) 248 | end 249 | 250 | def test_other_operators_accept_standalones 251 | expr = Rrrex.build do "x".or "y" end 252 | 253 | assert_match "x", "xyz" do "foo".or expr end 254 | 255 | assert_match "xy", "xy" do any expr end 256 | assert_match "xy", "xy" do some expr end 257 | 258 | assert_match "z", "xyz" do _not expr end 259 | assert_match "z", "xyz" do letter.not expr end 260 | 261 | assert_match "xx", "xxx" do 2.exactly expr end 262 | assert_match "xxx", "xxx" do 2.or_more expr end 263 | assert_match "xx", "xxx" do 2.or_less expr end 264 | assert_match "xxx", "xxx" do (2..4).of expr end 265 | 266 | assert_match_backreferences ["x", "x"], "xyz" do group expr end 267 | assert_match_named_groups( { :my_group => "x" }, "xyz" ) do group :my_group, expr end 268 | assert_match_named_groups( { :my_group => "x" }, "xyz" ) do group :my_group do expr end end 269 | end 270 | 271 | def test_standalone_provides_other_operators 272 | expr = Rrrex.build do "x".or "y" end 273 | assert_match "x", "xyz" do expr.or "foo" end 274 | assert_match "y", "xy" do expr.not "x" end 275 | end 276 | 277 | def assert_no_match( string, &block ) 278 | assert_nil( string.rmatch?( &block ) ) 279 | end 280 | 281 | def assert_match( expected, string, &block ) 282 | assert( matches = string.rmatch?( &block ) ) 283 | assert_equal expected, matches[0] 284 | end 285 | 286 | def assert_match_backreferences( expected, string, &block ) 287 | assert( matches = string.rmatch?( &block ) ) 288 | assert_equal expected, matches.to_a 289 | end 290 | 291 | def assert_match_named_groups( expected, string, &block ) 292 | assert( matches = string.rmatch?( &block ) ) 293 | assert_equal expected, matches.named_groups 294 | expected.each do |k,v| 295 | assert_equal v, matches[ k ] 296 | end 297 | end 298 | 299 | end 300 | --------------------------------------------------------------------------------