├── .gitignore ├── CHANGELOG ├── Gemfile ├── History.txt ├── LICENSE ├── README.rdoc ├── Rakefile ├── lib ├── to_regexp.rb └── to_regexp │ └── version.rb ├── test ├── helper.rb └── test_to_regexp.rb └── to_regexp.gemspec /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | .bundle 3 | Gemfile.lock 4 | pkg/* 5 | doc/ 6 | .yardoc/ 7 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | 0.2.1 / 2013-05-21 2 | 3 | * Bug fixes 4 | 5 | * Make sure (for example) '/foo/imn'.to_regexp(detect: true) is detected as a case-insensitive, multiline, no-encoding regexp 6 | 7 | 0.2.0 / 2013-02-13 8 | 9 | * Breaking changes 10 | 11 | * Strings are no longer stripped before conversion is attempted 12 | * :detect will make '' into nil and '//' into // 13 | 14 | 0.1.2 / 2013-02-13 15 | 16 | * Enhancements 17 | 18 | * Start keeping CHANGELOG! 19 | * add :detect option 20 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in to_regexp.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /History.txt: -------------------------------------------------------------------------------- 1 | == 0.1.1 / 2012-02-22 2 | 3 | * Bug fixes 4 | 5 | * Fix edge case with Regexp.union(*ESCAPE_HTML.keys) seen in the wild with rack-1.2.5/lib/rack/utils.rb 6 | 7 | == 0.1.0 / yanked! 8 | 9 | * Enhancements 10 | 11 | * New :literal option. For example, 'foo'.to_regexp(:literal => true, :ignore_case => true) 12 | * Allow setting :ignore_case, :multiline, :extended as options passed to #to_regexp. 13 | 14 | == 0.0.3 / 2011-04-27 15 | 16 | * first production ready version! 17 | 18 | == 0.0.2 / yanked! 19 | 20 | == 0.0.1 / yanked! 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Seamus Abshere 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.rdoc: -------------------------------------------------------------------------------- 1 | =to_regexp 2 | 3 | Basically a safe way to convert strings to regexps (with options). 4 | 5 | str = "/finalis(é)/im" 6 | old_way = eval(str) # not safe 7 | new_way = str.to_regexp # provided by this gem 8 | old_way == new_way # true 9 | 10 | You can also treat strings as literal regexps. These two are equivalent: 11 | 12 | '/foo/'.to_regexp #=> /foo/ 13 | 'foo'.to_regexp(:literal => true) #=> /foo/ 14 | 15 | If you need case insensitivity and you're using :literal, pass options like :ignore_case. These two are equivalent: 16 | 17 | '/foo/i'.to_regexp #=> /foo/i 18 | 'foo'.to_regexp(:literal => true, :ignore_case => true) #=> /foo/i 19 | 20 | You can get the options passed to Regexp.new with #as_regexp: 21 | 22 | '/foo/'.to_regexp == Regexp.new('/foo/'.as_regexp) # true 23 | 24 | Finally, you can be more lazy using :detect: 25 | 26 | 'foo'.to_regexp(detect: true) #=> /foo/ 27 | 'foo\b'.to_regexp(detect: true) #=> %r{foo\\b} 28 | '/foo\b/'.to_regexp(detect: true) #=> %r{foo\b} 29 | 'foo\b/'.to_regexp(detect: true) #=> %r{foo\\b/} 30 | 31 | Copyright 2012 Seamus Abshere 32 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler' 2 | Bundler::GemHelper.install_tasks 3 | 4 | require 'rake' 5 | require 'rake/testtask' 6 | Rake::TestTask.new(:test) do |test| 7 | test.libs << 'lib' << 'test' 8 | test.pattern = 'test/**/test_*.rb' 9 | test.verbose = true 10 | end 11 | 12 | task :default => :test 13 | 14 | require 'yard' 15 | YARD::Rake::YardocTask.new do |y| 16 | y.options << '--no-private' 17 | end 18 | -------------------------------------------------------------------------------- /lib/to_regexp.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | module ToRegexp 3 | module Regexp 4 | def to_regexp 5 | self 6 | end 7 | end 8 | 9 | module String 10 | class << self 11 | def literal?(str) 12 | REGEXP_DELIMITERS.none? { |s, e| str.start_with?(s) and str =~ /#{e}#{INLINE_OPTIONS}\z/ } 13 | end 14 | end 15 | 16 | INLINE_OPTIONS = /[imxnesu]*/ 17 | REGEXP_DELIMITERS = { 18 | '%r{' => '}', 19 | '/' => '/', 20 | } 21 | 22 | # Get a regexp back 23 | # 24 | # Without :literal or :detect, `"foo".to_regexp` will return nil. 25 | # 26 | # @param [optional, Hash] options 27 | # @option options [true,false] :literal Treat meta characters and other regexp codes as just text; always return a regexp 28 | # @option options [true,false] :detect If string starts and ends with valid regexp delimiters, treat it as a regexp; otherwise, interpret it literally 29 | # @option options [true,false] :ignore_case /foo/i 30 | # @option options [true,false] :multiline /foo/m 31 | # @option options [true,false] :extended /foo/x 32 | # @option options [true,false] :lang /foo/[nesu] 33 | def to_regexp(options = {}) 34 | if args = as_regexp(options) 35 | ::Regexp.new(*args) 36 | end 37 | end 38 | 39 | # Return arguments that can be passed to `Regexp.new` 40 | # @see to_regexp 41 | def as_regexp(options = {}) 42 | unless options.is_a?(::Hash) 43 | raise ::ArgumentError, "[to_regexp] Options must be a Hash" 44 | end 45 | str = self 46 | 47 | return if options[:detect] and str == '' 48 | 49 | if options[:literal] or (options[:detect] and ToRegexp::String.literal?(str)) 50 | content = ::Regexp.escape str 51 | elsif delim_set = REGEXP_DELIMITERS.detect { |k, _| str.start_with?(k) } 52 | delim_start, delim_end = delim_set 53 | /\A#{delim_start}(.*)#{delim_end}(#{INLINE_OPTIONS})\z/u =~ str 54 | content = $1 55 | inline_options = $2 56 | return unless content.is_a?(::String) 57 | content.gsub! '\\/', '/' 58 | if inline_options 59 | options[:ignore_case] = true if inline_options.include?('i') 60 | options[:multiline] = true if inline_options.include?('m') 61 | options[:extended] = true if inline_options.include?('x') 62 | # 'n', 'N' = none, 'e', 'E' = EUC, 's', 'S' = SJIS, 'u', 'U' = UTF-8 63 | options[:lang] = inline_options.scan(/[nesu]/i).join.downcase 64 | end 65 | else 66 | return 67 | end 68 | 69 | ignore_case = options[:ignore_case] ? ::Regexp::IGNORECASE : 0 70 | multiline = options[:multiline] ? ::Regexp::MULTILINE : 0 71 | extended = options[:extended] ? ::Regexp::EXTENDED : 0 72 | lang = options[:lang] || '' 73 | if ::RUBY_VERSION > '1.9' and lang.include?('u') 74 | lang = lang.delete 'u' 75 | end 76 | 77 | if lang.empty? 78 | [ content, (ignore_case|multiline|extended) ] 79 | else 80 | [ content, (ignore_case|multiline|extended), lang ] 81 | end 82 | end 83 | end 84 | 85 | end 86 | 87 | ::String.send :include, ::ToRegexp::String 88 | ::Regexp.send :include, ::ToRegexp::Regexp 89 | -------------------------------------------------------------------------------- /lib/to_regexp/version.rb: -------------------------------------------------------------------------------- 1 | module ToRegexp 2 | VERSION = '0.2.1' 3 | end 4 | -------------------------------------------------------------------------------- /test/helper.rb: -------------------------------------------------------------------------------- 1 | unless RUBY_VERSION >= '1.9' 2 | require 'rubygems' 3 | end 4 | require 'bundler' 5 | Bundler.setup 6 | require 'test/unit' 7 | $LOAD_PATH.unshift(File.dirname(__FILE__)) 8 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 9 | require 'to_regexp' 10 | class Test::Unit::TestCase 11 | end 12 | -------------------------------------------------------------------------------- /test/test_to_regexp.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | require 'helper' 3 | 4 | class TestToRegexp < Test::Unit::TestCase 5 | def test_000_versus_eval_ascii 6 | str = "/finalis(e)/im" 7 | old_way = eval(str) 8 | new_way = str.to_regexp 9 | assert_equal old_way, new_way 10 | end 11 | 12 | def test_000a_versus_eval_utf8 13 | str = "/finalis(é)/im" 14 | old_way = eval(str) 15 | new_way = str.to_regexp 16 | assert_equal old_way, new_way 17 | end 18 | 19 | def test_001_utf8 20 | assert_equal 'ë', '/(ë)/'.to_regexp.match('Citroën').captures[0] 21 | end 22 | 23 | def test_002_multiline 24 | assert_equal nil, '/foo.*(bar)/'.to_regexp.match("foo\n\nbar") 25 | assert_equal 'bar', '/foo.*(bar)/m'.to_regexp.match("foo\n\nbar").captures[0] 26 | end 27 | 28 | def test_003_ignore_case 29 | assert_equal nil, '/(FOO)/'.to_regexp.match('foo') 30 | assert_equal 'foo', '/(FOO)/i'.to_regexp.match('foo').captures[0] 31 | end 32 | 33 | def test_004_percentage_r_notation 34 | assert_equal '/', '%r{(/)}'.to_regexp.match('/').captures[0] 35 | end 36 | 37 | def test_005_multiline_and_ignore_case 38 | assert_equal 'bar', '/FOO.*(BAR)/mi'.to_regexp.match("foo\n\nbar").captures[0] 39 | end 40 | 41 | def test_006_cant_fix_garbled_input 42 | if RUBY_VERSION >= '1.9' 43 | garbled = 'finalisé'.force_encoding('ASCII-8BIT') # like if it was misinterpreted 44 | assert_raises(Encoding::CompatibilityError) do 45 | '/finalis(é)/'.to_regexp.match(garbled) 46 | end 47 | else # not applicable to ruby 1.8 48 | garbled = 'finalisé' 49 | assert_nothing_raised do 50 | '/finalis(é)/'.to_regexp.match(garbled) 51 | end 52 | end 53 | end 54 | 55 | def test_007_possible_garbled_input_fix_using_manfreds_gem 56 | if RUBY_VERSION >= '1.9' 57 | require 'ensure/encoding' 58 | garbled = 'finalisé'.force_encoding('ASCII-8BIT') # like if it was misinterpreted 59 | assert_equal 'é', '/finalis(é)/'.to_regexp.match(garbled.ensure_encoding('UTF-8')).captures[0] 60 | else # not applicable to ruby 1.8 61 | garbled = 'finalisé' 62 | assert_equal 'é', '/finalis(é)/'.to_regexp.match(garbled).captures[0] 63 | end 64 | end 65 | 66 | def test_008_as_regexp 67 | str = '/finalis(é)/in' 68 | assert_equal ['finalis(é)', ::Regexp::IGNORECASE, 'n'], str.as_regexp 69 | assert_equal Regexp.new(*str.as_regexp), str.to_regexp 70 | end 71 | 72 | def test_009_ruby_19_splat 73 | assert_equal nil, 'hi'.to_regexp 74 | end 75 | 76 | def test_010_regexp_to_regexp 77 | a = /foo/ 78 | assert_equal a, a.to_regexp 79 | end 80 | 81 | def test_011_ignore_case_option 82 | assert_equal nil, '/(FOO)/'.to_regexp(:ignore_case => false).match('foo') 83 | assert_equal nil, '/(FOO)/'.to_regexp(:ignore_case => false).match('foo') 84 | assert_equal 'foo', '/(FOO)/'.to_regexp(:ignore_case => true).match('foo').captures[0] 85 | assert_equal 'foo', '/(FOO)/i'.to_regexp(:ignore_case => true).match('foo').captures[0] 86 | end 87 | 88 | def test_012_literal_option 89 | assert '/(FOO)/'.to_regexp(:literal => true).match('hello/(FOO)/there') 90 | end 91 | 92 | def test_013_combine_literal_and_ignore_case 93 | assert '/(FOO)/'.to_regexp(:literal => true, :ignore_case => true).match('hello/(foo)/there') 94 | 95 | # can't use inline options obviously 96 | assert_equal nil, '/(FOO)/i'.to_regexp(:literal => true).match('hello/(foo)/there') 97 | assert '/(FOO)/i'.to_regexp(:literal => true).match('hello/(FOO)/ithere') 98 | end 99 | 100 | def test_014_try_convert 101 | if RUBY_VERSION >= '1.9' 102 | assert_equal /foo/i, Regexp.try_convert('/foo/i') 103 | assert_equal //, Regexp.try_convert('//') 104 | end 105 | end 106 | 107 | # seen in the wild - from rack-1.2.5/lib/rack/utils.rb - converted to array to preserve order in 1.8.7 108 | ESCAPE_HTML_KEYS = [ 109 | "&", 110 | "<", 111 | ">", 112 | "'", 113 | '"', 114 | "/" 115 | ] 116 | def test_015_union 117 | assert_equal /penzance/, Regexp.union('penzance') 118 | assert_equal /skiing|sledding/, Regexp.union('skiing', 'sledding') 119 | assert_equal /skiing|sledding/, Regexp.union(['skiing', 'sledding']) 120 | assert_equal /(?-mix:dogs)|(?i-mx:cats)/, Regexp.union(/dogs/, /cats/i) 121 | assert_equal /(?-mix:dogs)|(?i-mx:cats)/, Regexp.union('/dogs/', /cats/i) 122 | assert_equal /(?-mix:dogs)|(?i-mx:cats)/, Regexp.union(/dogs/, '/cats/i') 123 | assert_equal %r{&|<|>|'|"|\/}.inspect, Regexp.union(*ESCAPE_HTML_KEYS).inspect 124 | end 125 | 126 | def test_016_detect 127 | assert_equal nil, ''.to_regexp(:detect => true) 128 | assert_equal //, '//'.to_regexp(:detect => true) 129 | assert_equal /foo/, 'foo'.to_regexp(:detect => true) 130 | assert_equal %r{foo\\b}, 'foo\b'.to_regexp(:detect => true) 131 | assert_equal %r{foo\b}, '/foo\b/'.to_regexp(:detect => true) 132 | assert_equal %r{foo\\b/}, 'foo\b/'.to_regexp(:detect => true) 133 | assert_equal %r{foo\b}i, '/foo\b/i'.to_regexp(:detect => true) 134 | assert_equal %r{foo\\b/i}, 'foo\b/i'.to_regexp(:detect => true) 135 | assert_equal /FOO.*(BAR)/mi, '/FOO.*(BAR)/mi'.to_regexp(:detect => true) 136 | end 137 | 138 | # https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L474 "test_union2" 139 | def test_mri_union2 140 | assert_equal(/(?!)/, Regexp.union) 141 | assert_equal(/foo/, Regexp.union(/foo/)) 142 | assert_equal(/foo/, Regexp.union([/foo/])) 143 | assert_equal(/\t/, Regexp.union("\t")) 144 | assert_equal(/(?-mix:\u3042)|(?-mix:\u3042)/, Regexp.union(/\u3042/, /\u3042/)) 145 | assert_equal("\u3041", "\u3041"[Regexp.union(/\u3042/, "\u3041")]) 146 | end 147 | 148 | # https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L464 "test_try_convert" 149 | def test_mri_try_convert 150 | assert_equal(/re/, Regexp.try_convert(/re/)) 151 | assert_nil(Regexp.try_convert("re")) 152 | 153 | o = Object.new 154 | assert_nil(Regexp.try_convert(o)) 155 | def o.to_regexp() /foo/ end 156 | assert_equal(/foo/, Regexp.try_convert(o)) 157 | end 158 | 159 | # https://github.com/jruby/jruby/blob/master/spec/ruby/core/regexp/try_convert_spec.rb#L5 160 | def test_jruby_returns_argument_if_given_regexp 161 | assert_equal /foo/s, Regexp.try_convert(/foo/s) 162 | end 163 | 164 | # https://github.com/jruby/jruby/blob/master/spec/ruby/core/regexp/try_convert_spec.rb#L9 165 | def test_jruby_returns_nil_if_given_arg_cant_be_converted 166 | ['', 'glark', [], Object.new, :pat].each do |arg| 167 | assert_equal nil, Regexp.try_convert(arg) 168 | end 169 | end 170 | 171 | # https://github.com/jruby/jruby/blob/master/test/externals/ruby1.9/uri/test_common.rb#L32 172 | def test_jruby_uri_common_regexp 173 | assert_instance_of Regexp, URI.regexp 174 | assert_instance_of Regexp, URI.regexp(['http']) 175 | assert_equal URI.regexp, URI.regexp 176 | assert_equal 'http://', 'x http:// x'.slice(URI.regexp) 177 | assert_equal 'http://', 'x http:// x'.slice(URI.regexp(['http'])) 178 | assert_equal 'http://', 'x http:// x ftp://'.slice(URI.regexp(['http'])) 179 | assert_equal nil, 'http://'.slice(URI.regexp([])) 180 | assert_equal nil, ''.slice(URI.regexp) 181 | assert_equal nil, 'xxxx'.slice(URI.regexp) 182 | assert_equal nil, ':'.slice(URI.regexp) 183 | assert_equal 'From:', 'From:'.slice(URI.regexp) 184 | end 185 | 186 | # https://github.com/jruby/jruby/blob/master/spec/ruby/core/regexp/union_spec.rb#L14 187 | def test_jruby_quotes_string_arguments 188 | assert_equal /n|\./, Regexp.union("n", ".") 189 | end 190 | 191 | end 192 | -------------------------------------------------------------------------------- /to_regexp.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | require File.expand_path("../lib/to_regexp/version", __FILE__) 3 | 4 | Gem::Specification.new do |s| 5 | s.name = "to_regexp" 6 | s.version = ToRegexp::VERSION 7 | s.platform = Gem::Platform::RUBY 8 | s.authors = ["Seamus Abshere"] 9 | s.email = ["seamus@abshere.net"] 10 | s.homepage = "https://github.com/seamusabshere/to_regexp" 11 | s.summary = %q{Provides String#to_regexp} 12 | s.description = %q{Provides String#to_regexp, for example if you want to make regexps out of a CSV you just imported.} 13 | 14 | s.rubyforge_project = "to_regexp" 15 | 16 | s.files = `git ls-files`.split("\n") 17 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") 18 | s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } 19 | s.require_paths = ["lib"] 20 | 21 | s.add_development_dependency 'ensure-encoding' 22 | s.add_development_dependency 'yard' 23 | end 24 | --------------------------------------------------------------------------------