├── .gitignore
├── CHANGELOG
├── Gemfile
├── History.txt
├── LICENSE
├── README.rdoc
├── Rakefile
├── lib
├── to_regexp.rb
└── to_regexp
│ └── version.rb
├── test
├── helper.rb
└── test_to_regexp.rb
└── to_regexp.gemspec
/.gitignore:
--------------------------------------------------------------------------------
1 | *.gem
2 | .bundle
3 | Gemfile.lock
4 | pkg/*
5 | doc/
6 | .yardoc/
7 |
--------------------------------------------------------------------------------
/CHANGELOG:
--------------------------------------------------------------------------------
1 | 0.2.1 / 2013-05-21
2 |
3 | * Bug fixes
4 |
5 | * Make sure (for example) '/foo/imn'.to_regexp(detect: true) is detected as a case-insensitive, multiline, no-encoding regexp
6 |
7 | 0.2.0 / 2013-02-13
8 |
9 | * Breaking changes
10 |
11 | * Strings are no longer stripped before conversion is attempted
12 | * :detect will make '' into nil and '//' into //
13 |
14 | 0.1.2 / 2013-02-13
15 |
16 | * Enhancements
17 |
18 | * Start keeping CHANGELOG!
19 | * add :detect option
20 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 |
3 | # Specify your gem's dependencies in to_regexp.gemspec
4 | gemspec
5 |
--------------------------------------------------------------------------------
/History.txt:
--------------------------------------------------------------------------------
1 | == 0.1.1 / 2012-02-22
2 |
3 | * Bug fixes
4 |
5 | * Fix edge case with Regexp.union(*ESCAPE_HTML.keys) seen in the wild with rack-1.2.5/lib/rack/utils.rb
6 |
7 | == 0.1.0 / yanked!
8 |
9 | * Enhancements
10 |
11 | * New :literal option. For example, 'foo'.to_regexp(:literal => true, :ignore_case => true)
12 | * Allow setting :ignore_case, :multiline, :extended as options passed to #to_regexp.
13 |
14 | == 0.0.3 / 2011-04-27
15 |
16 | * first production ready version!
17 |
18 | == 0.0.2 / yanked!
19 |
20 | == 0.0.1 / yanked!
21 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2013 Seamus Abshere
2 |
3 | MIT License
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining
6 | a copy of this software and associated documentation files (the
7 | "Software"), to deal in the Software without restriction, including
8 | without limitation the rights to use, copy, modify, merge, publish,
9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 |
--------------------------------------------------------------------------------
/README.rdoc:
--------------------------------------------------------------------------------
1 | =to_regexp
2 |
3 | Basically a safe way to convert strings to regexps (with options).
4 |
5 | str = "/finalis(é)/im"
6 | old_way = eval(str) # not safe
7 | new_way = str.to_regexp # provided by this gem
8 | old_way == new_way # true
9 |
10 | You can also treat strings as literal regexps. These two are equivalent:
11 |
12 | '/foo/'.to_regexp #=> /foo/
13 | 'foo'.to_regexp(:literal => true) #=> /foo/
14 |
15 | If you need case insensitivity and you're using :literal, pass options like :ignore_case. These two are equivalent:
16 |
17 | '/foo/i'.to_regexp #=> /foo/i
18 | 'foo'.to_regexp(:literal => true, :ignore_case => true) #=> /foo/i
19 |
20 | You can get the options passed to Regexp.new with #as_regexp:
21 |
22 | '/foo/'.to_regexp == Regexp.new('/foo/'.as_regexp) # true
23 |
24 | Finally, you can be more lazy using :detect:
25 |
26 | 'foo'.to_regexp(detect: true) #=> /foo/
27 | 'foo\b'.to_regexp(detect: true) #=> %r{foo\\b}
28 | '/foo\b/'.to_regexp(detect: true) #=> %r{foo\b}
29 | 'foo\b/'.to_regexp(detect: true) #=> %r{foo\\b/}
30 |
31 | Copyright 2012 Seamus Abshere
32 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require 'bundler'
2 | Bundler::GemHelper.install_tasks
3 |
4 | require 'rake'
5 | require 'rake/testtask'
6 | Rake::TestTask.new(:test) do |test|
7 | test.libs << 'lib' << 'test'
8 | test.pattern = 'test/**/test_*.rb'
9 | test.verbose = true
10 | end
11 |
12 | task :default => :test
13 |
14 | require 'yard'
15 | YARD::Rake::YardocTask.new do |y|
16 | y.options << '--no-private'
17 | end
18 |
--------------------------------------------------------------------------------
/lib/to_regexp.rb:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | module ToRegexp
3 | module Regexp
4 | def to_regexp
5 | self
6 | end
7 | end
8 |
9 | module String
10 | class << self
11 | def literal?(str)
12 | REGEXP_DELIMITERS.none? { |s, e| str.start_with?(s) and str =~ /#{e}#{INLINE_OPTIONS}\z/ }
13 | end
14 | end
15 |
16 | INLINE_OPTIONS = /[imxnesu]*/
17 | REGEXP_DELIMITERS = {
18 | '%r{' => '}',
19 | '/' => '/',
20 | }
21 |
22 | # Get a regexp back
23 | #
24 | # Without :literal or :detect, `"foo".to_regexp` will return nil.
25 | #
26 | # @param [optional, Hash] options
27 | # @option options [true,false] :literal Treat meta characters and other regexp codes as just text; always return a regexp
28 | # @option options [true,false] :detect If string starts and ends with valid regexp delimiters, treat it as a regexp; otherwise, interpret it literally
29 | # @option options [true,false] :ignore_case /foo/i
30 | # @option options [true,false] :multiline /foo/m
31 | # @option options [true,false] :extended /foo/x
32 | # @option options [true,false] :lang /foo/[nesu]
33 | def to_regexp(options = {})
34 | if args = as_regexp(options)
35 | ::Regexp.new(*args)
36 | end
37 | end
38 |
39 | # Return arguments that can be passed to `Regexp.new`
40 | # @see to_regexp
41 | def as_regexp(options = {})
42 | unless options.is_a?(::Hash)
43 | raise ::ArgumentError, "[to_regexp] Options must be a Hash"
44 | end
45 | str = self
46 |
47 | return if options[:detect] and str == ''
48 |
49 | if options[:literal] or (options[:detect] and ToRegexp::String.literal?(str))
50 | content = ::Regexp.escape str
51 | elsif delim_set = REGEXP_DELIMITERS.detect { |k, _| str.start_with?(k) }
52 | delim_start, delim_end = delim_set
53 | /\A#{delim_start}(.*)#{delim_end}(#{INLINE_OPTIONS})\z/u =~ str
54 | content = $1
55 | inline_options = $2
56 | return unless content.is_a?(::String)
57 | content.gsub! '\\/', '/'
58 | if inline_options
59 | options[:ignore_case] = true if inline_options.include?('i')
60 | options[:multiline] = true if inline_options.include?('m')
61 | options[:extended] = true if inline_options.include?('x')
62 | # 'n', 'N' = none, 'e', 'E' = EUC, 's', 'S' = SJIS, 'u', 'U' = UTF-8
63 | options[:lang] = inline_options.scan(/[nesu]/i).join.downcase
64 | end
65 | else
66 | return
67 | end
68 |
69 | ignore_case = options[:ignore_case] ? ::Regexp::IGNORECASE : 0
70 | multiline = options[:multiline] ? ::Regexp::MULTILINE : 0
71 | extended = options[:extended] ? ::Regexp::EXTENDED : 0
72 | lang = options[:lang] || ''
73 | if ::RUBY_VERSION > '1.9' and lang.include?('u')
74 | lang = lang.delete 'u'
75 | end
76 |
77 | if lang.empty?
78 | [ content, (ignore_case|multiline|extended) ]
79 | else
80 | [ content, (ignore_case|multiline|extended), lang ]
81 | end
82 | end
83 | end
84 |
85 | end
86 |
87 | ::String.send :include, ::ToRegexp::String
88 | ::Regexp.send :include, ::ToRegexp::Regexp
89 |
--------------------------------------------------------------------------------
/lib/to_regexp/version.rb:
--------------------------------------------------------------------------------
1 | module ToRegexp
2 | VERSION = '0.2.1'
3 | end
4 |
--------------------------------------------------------------------------------
/test/helper.rb:
--------------------------------------------------------------------------------
1 | unless RUBY_VERSION >= '1.9'
2 | require 'rubygems'
3 | end
4 | require 'bundler'
5 | Bundler.setup
6 | require 'test/unit'
7 | $LOAD_PATH.unshift(File.dirname(__FILE__))
8 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
9 | require 'to_regexp'
10 | class Test::Unit::TestCase
11 | end
12 |
--------------------------------------------------------------------------------
/test/test_to_regexp.rb:
--------------------------------------------------------------------------------
1 | # encoding: UTF-8
2 | require 'helper'
3 |
4 | class TestToRegexp < Test::Unit::TestCase
5 | def test_000_versus_eval_ascii
6 | str = "/finalis(e)/im"
7 | old_way = eval(str)
8 | new_way = str.to_regexp
9 | assert_equal old_way, new_way
10 | end
11 |
12 | def test_000a_versus_eval_utf8
13 | str = "/finalis(é)/im"
14 | old_way = eval(str)
15 | new_way = str.to_regexp
16 | assert_equal old_way, new_way
17 | end
18 |
19 | def test_001_utf8
20 | assert_equal 'ë', '/(ë)/'.to_regexp.match('Citroën').captures[0]
21 | end
22 |
23 | def test_002_multiline
24 | assert_equal nil, '/foo.*(bar)/'.to_regexp.match("foo\n\nbar")
25 | assert_equal 'bar', '/foo.*(bar)/m'.to_regexp.match("foo\n\nbar").captures[0]
26 | end
27 |
28 | def test_003_ignore_case
29 | assert_equal nil, '/(FOO)/'.to_regexp.match('foo')
30 | assert_equal 'foo', '/(FOO)/i'.to_regexp.match('foo').captures[0]
31 | end
32 |
33 | def test_004_percentage_r_notation
34 | assert_equal '/', '%r{(/)}'.to_regexp.match('/').captures[0]
35 | end
36 |
37 | def test_005_multiline_and_ignore_case
38 | assert_equal 'bar', '/FOO.*(BAR)/mi'.to_regexp.match("foo\n\nbar").captures[0]
39 | end
40 |
41 | def test_006_cant_fix_garbled_input
42 | if RUBY_VERSION >= '1.9'
43 | garbled = 'finalisé'.force_encoding('ASCII-8BIT') # like if it was misinterpreted
44 | assert_raises(Encoding::CompatibilityError) do
45 | '/finalis(é)/'.to_regexp.match(garbled)
46 | end
47 | else # not applicable to ruby 1.8
48 | garbled = 'finalisé'
49 | assert_nothing_raised do
50 | '/finalis(é)/'.to_regexp.match(garbled)
51 | end
52 | end
53 | end
54 |
55 | def test_007_possible_garbled_input_fix_using_manfreds_gem
56 | if RUBY_VERSION >= '1.9'
57 | require 'ensure/encoding'
58 | garbled = 'finalisé'.force_encoding('ASCII-8BIT') # like if it was misinterpreted
59 | assert_equal 'é', '/finalis(é)/'.to_regexp.match(garbled.ensure_encoding('UTF-8')).captures[0]
60 | else # not applicable to ruby 1.8
61 | garbled = 'finalisé'
62 | assert_equal 'é', '/finalis(é)/'.to_regexp.match(garbled).captures[0]
63 | end
64 | end
65 |
66 | def test_008_as_regexp
67 | str = '/finalis(é)/in'
68 | assert_equal ['finalis(é)', ::Regexp::IGNORECASE, 'n'], str.as_regexp
69 | assert_equal Regexp.new(*str.as_regexp), str.to_regexp
70 | end
71 |
72 | def test_009_ruby_19_splat
73 | assert_equal nil, 'hi'.to_regexp
74 | end
75 |
76 | def test_010_regexp_to_regexp
77 | a = /foo/
78 | assert_equal a, a.to_regexp
79 | end
80 |
81 | def test_011_ignore_case_option
82 | assert_equal nil, '/(FOO)/'.to_regexp(:ignore_case => false).match('foo')
83 | assert_equal nil, '/(FOO)/'.to_regexp(:ignore_case => false).match('foo')
84 | assert_equal 'foo', '/(FOO)/'.to_regexp(:ignore_case => true).match('foo').captures[0]
85 | assert_equal 'foo', '/(FOO)/i'.to_regexp(:ignore_case => true).match('foo').captures[0]
86 | end
87 |
88 | def test_012_literal_option
89 | assert '/(FOO)/'.to_regexp(:literal => true).match('hello/(FOO)/there')
90 | end
91 |
92 | def test_013_combine_literal_and_ignore_case
93 | assert '/(FOO)/'.to_regexp(:literal => true, :ignore_case => true).match('hello/(foo)/there')
94 |
95 | # can't use inline options obviously
96 | assert_equal nil, '/(FOO)/i'.to_regexp(:literal => true).match('hello/(foo)/there')
97 | assert '/(FOO)/i'.to_regexp(:literal => true).match('hello/(FOO)/ithere')
98 | end
99 |
100 | def test_014_try_convert
101 | if RUBY_VERSION >= '1.9'
102 | assert_equal /foo/i, Regexp.try_convert('/foo/i')
103 | assert_equal //, Regexp.try_convert('//')
104 | end
105 | end
106 |
107 | # seen in the wild - from rack-1.2.5/lib/rack/utils.rb - converted to array to preserve order in 1.8.7
108 | ESCAPE_HTML_KEYS = [
109 | "&",
110 | "<",
111 | ">",
112 | "'",
113 | '"',
114 | "/"
115 | ]
116 | def test_015_union
117 | assert_equal /penzance/, Regexp.union('penzance')
118 | assert_equal /skiing|sledding/, Regexp.union('skiing', 'sledding')
119 | assert_equal /skiing|sledding/, Regexp.union(['skiing', 'sledding'])
120 | assert_equal /(?-mix:dogs)|(?i-mx:cats)/, Regexp.union(/dogs/, /cats/i)
121 | assert_equal /(?-mix:dogs)|(?i-mx:cats)/, Regexp.union('/dogs/', /cats/i)
122 | assert_equal /(?-mix:dogs)|(?i-mx:cats)/, Regexp.union(/dogs/, '/cats/i')
123 | assert_equal %r{&|<|>|'|"|\/}.inspect, Regexp.union(*ESCAPE_HTML_KEYS).inspect
124 | end
125 |
126 | def test_016_detect
127 | assert_equal nil, ''.to_regexp(:detect => true)
128 | assert_equal //, '//'.to_regexp(:detect => true)
129 | assert_equal /foo/, 'foo'.to_regexp(:detect => true)
130 | assert_equal %r{foo\\b}, 'foo\b'.to_regexp(:detect => true)
131 | assert_equal %r{foo\b}, '/foo\b/'.to_regexp(:detect => true)
132 | assert_equal %r{foo\\b/}, 'foo\b/'.to_regexp(:detect => true)
133 | assert_equal %r{foo\b}i, '/foo\b/i'.to_regexp(:detect => true)
134 | assert_equal %r{foo\\b/i}, 'foo\b/i'.to_regexp(:detect => true)
135 | assert_equal /FOO.*(BAR)/mi, '/FOO.*(BAR)/mi'.to_regexp(:detect => true)
136 | end
137 |
138 | # https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L474 "test_union2"
139 | def test_mri_union2
140 | assert_equal(/(?!)/, Regexp.union)
141 | assert_equal(/foo/, Regexp.union(/foo/))
142 | assert_equal(/foo/, Regexp.union([/foo/]))
143 | assert_equal(/\t/, Regexp.union("\t"))
144 | assert_equal(/(?-mix:\u3042)|(?-mix:\u3042)/, Regexp.union(/\u3042/, /\u3042/))
145 | assert_equal("\u3041", "\u3041"[Regexp.union(/\u3042/, "\u3041")])
146 | end
147 |
148 | # https://github.com/ruby/ruby/blob/trunk/test/ruby/test_regexp.rb#L464 "test_try_convert"
149 | def test_mri_try_convert
150 | assert_equal(/re/, Regexp.try_convert(/re/))
151 | assert_nil(Regexp.try_convert("re"))
152 |
153 | o = Object.new
154 | assert_nil(Regexp.try_convert(o))
155 | def o.to_regexp() /foo/ end
156 | assert_equal(/foo/, Regexp.try_convert(o))
157 | end
158 |
159 | # https://github.com/jruby/jruby/blob/master/spec/ruby/core/regexp/try_convert_spec.rb#L5
160 | def test_jruby_returns_argument_if_given_regexp
161 | assert_equal /foo/s, Regexp.try_convert(/foo/s)
162 | end
163 |
164 | # https://github.com/jruby/jruby/blob/master/spec/ruby/core/regexp/try_convert_spec.rb#L9
165 | def test_jruby_returns_nil_if_given_arg_cant_be_converted
166 | ['', 'glark', [], Object.new, :pat].each do |arg|
167 | assert_equal nil, Regexp.try_convert(arg)
168 | end
169 | end
170 |
171 | # https://github.com/jruby/jruby/blob/master/test/externals/ruby1.9/uri/test_common.rb#L32
172 | def test_jruby_uri_common_regexp
173 | assert_instance_of Regexp, URI.regexp
174 | assert_instance_of Regexp, URI.regexp(['http'])
175 | assert_equal URI.regexp, URI.regexp
176 | assert_equal 'http://', 'x http:// x'.slice(URI.regexp)
177 | assert_equal 'http://', 'x http:// x'.slice(URI.regexp(['http']))
178 | assert_equal 'http://', 'x http:// x ftp://'.slice(URI.regexp(['http']))
179 | assert_equal nil, 'http://'.slice(URI.regexp([]))
180 | assert_equal nil, ''.slice(URI.regexp)
181 | assert_equal nil, 'xxxx'.slice(URI.regexp)
182 | assert_equal nil, ':'.slice(URI.regexp)
183 | assert_equal 'From:', 'From:'.slice(URI.regexp)
184 | end
185 |
186 | # https://github.com/jruby/jruby/blob/master/spec/ruby/core/regexp/union_spec.rb#L14
187 | def test_jruby_quotes_string_arguments
188 | assert_equal /n|\./, Regexp.union("n", ".")
189 | end
190 |
191 | end
192 |
--------------------------------------------------------------------------------
/to_regexp.gemspec:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | require File.expand_path("../lib/to_regexp/version", __FILE__)
3 |
4 | Gem::Specification.new do |s|
5 | s.name = "to_regexp"
6 | s.version = ToRegexp::VERSION
7 | s.platform = Gem::Platform::RUBY
8 | s.authors = ["Seamus Abshere"]
9 | s.email = ["seamus@abshere.net"]
10 | s.homepage = "https://github.com/seamusabshere/to_regexp"
11 | s.summary = %q{Provides String#to_regexp}
12 | s.description = %q{Provides String#to_regexp, for example if you want to make regexps out of a CSV you just imported.}
13 |
14 | s.rubyforge_project = "to_regexp"
15 |
16 | s.files = `git ls-files`.split("\n")
17 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18 | s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19 | s.require_paths = ["lib"]
20 |
21 | s.add_development_dependency 'ensure-encoding'
22 | s.add_development_dependency 'yard'
23 | end
24 |
--------------------------------------------------------------------------------