├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ ├── precompile-gem.yml │ ├── docs.yml │ ├── dependencies.yml │ └── tests.yml ├── .rspec ├── .yardopts ├── .gitignore ├── scripts ├── test-gem-install └── update-dependencies ├── Gemfile ├── dependencies.yml ├── lib ├── re2 │ ├── version.rb │ ├── scanner.rb │ ├── string.rb │ └── regexp.rb └── re2.rb ├── spec ├── spec_helper.rb ├── kernel_spec.rb ├── re2 │ ├── string_spec.rb │ ├── set_spec.rb │ ├── scanner_spec.rb │ ├── match_data_spec.rb │ └── regexp_spec.rb └── re2_spec.rb ├── LICENSE.txt ├── re2.gemspec ├── ext └── re2 │ ├── recipes.rb │ └── extconf.rb ├── Rakefile ├── LICENSE-DEPENDENCIES.txt ├── README.md └── CHANGELOG.md /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: mudge 2 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --require spec_helper 3 | --format documentation 4 | -------------------------------------------------------------------------------- /.yardopts: -------------------------------------------------------------------------------- 1 | --markup markdown 2 | --title "RE2: Ruby bindings to RE2" 3 | ext/**/*.cc 4 | lib/**/*.rb 5 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Makefile 2 | mkmf.log 3 | *.bundle 4 | *.o 5 | *.so 6 | conftest.dSYM 7 | tmp 8 | *.gem 9 | .yardoc 10 | doc 11 | Gemfile.lock 12 | ports/ 13 | pkg/ 14 | -------------------------------------------------------------------------------- /scripts/test-gem-install: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | set -eu 4 | 5 | gem install --no-document pkg/*.gem -- "$@" 6 | cd "$(dirname "$(gem which re2)")/.." 7 | bundle install 8 | bundle exec rake spec 9 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source "https://rubygems.org" 4 | 5 | gemspec 6 | 7 | gem "rake", "> 12.3.2" 8 | 9 | group :memcheck, optional: true do 10 | gem "ruby_memcheck" 11 | end 12 | -------------------------------------------------------------------------------- /dependencies.yml: -------------------------------------------------------------------------------- 1 | --- 2 | libre2: 3 | version: '2025-11-05' 4 | sha256: 87f6029d2f6de8aa023654240a03ada90e876ce9a4676e258dd01ea4c26ffd67 5 | abseil: 6 | version: '20250814.1' 7 | sha256: 1692f77d1739bacf3f94337188b78583cf09bab7e420d2dc6c5605a4f86785a1 8 | -------------------------------------------------------------------------------- /lib/re2/version.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # re2 (https://github.com/mudge/re2) 4 | # Ruby bindings to RE2, a "fast, safe, thread-friendly alternative to 5 | # backtracking regular expression engines like those used in PCRE, Perl, and 6 | # Python". 7 | # 8 | # Copyright (c) 2010, Paul Mucur (https://mudge.name) 9 | # Released under the BSD Licence, please see LICENSE.txt 10 | 11 | 12 | module RE2 13 | VERSION = "2.22.0" 14 | end 15 | -------------------------------------------------------------------------------- /lib/re2.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # re2 (https://github.com/mudge/re2) 4 | # Ruby bindings to RE2, a "fast, safe, thread-friendly alternative to 5 | # backtracking regular expression engines like those used in PCRE, Perl, and 6 | # Python". 7 | # 8 | # Copyright (c) 2010, Paul Mucur (https://mudge.name) 9 | # Released under the BSD Licence, please see LICENSE.txt 10 | 11 | begin 12 | ::RUBY_VERSION =~ /(\d+\.\d+)/ 13 | require_relative "#{Regexp.last_match(1)}/re2.so" 14 | rescue LoadError 15 | require 're2.so' 16 | end 17 | 18 | require "re2/regexp" 19 | require "re2/scanner" 20 | require "re2/version" 21 | -------------------------------------------------------------------------------- /lib/re2/scanner.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # re2 (https://github.com/mudge/re2) 4 | # Ruby bindings to RE2, a "fast, safe, thread-friendly alternative to 5 | # backtracking regular expression engines like those used in PCRE, Perl, and 6 | # Python". 7 | # 8 | # Copyright (c) 2010, Paul Mucur (https://mudge.name) 9 | # Released under the BSD Licence, please see LICENSE.txt 10 | 11 | 12 | module RE2 13 | class Scanner 14 | include Enumerable 15 | 16 | def each 17 | if block_given? 18 | while matches = scan 19 | yield matches 20 | end 21 | else 22 | to_enum(:each) 23 | end 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /.github/workflows/precompile-gem.yml: -------------------------------------------------------------------------------- 1 | name: Precompile native gem 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | platform: 7 | required: true 8 | type: string 9 | 10 | jobs: 11 | precompile-gem: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v5 15 | - uses: ruby/setup-ruby@v1 16 | with: 17 | ruby-version: "3.4" 18 | bundler-cache: true 19 | - uses: actions/cache@v4 20 | with: 21 | path: ports/archives 22 | key: archives-ubuntu-${{ hashFiles('dependencies.yml') }} 23 | - run: bundle exec rake gem:${{ inputs.platform }} 24 | - uses: actions/upload-artifact@v4 25 | with: 26 | name: "cruby-${{ inputs.platform }}-gem" 27 | path: pkg/*.gem 28 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "re2" 4 | 5 | # To test passing objects that can be coerced to a String. 6 | class StringLike 7 | attr_reader :str 8 | alias_method :to_str, :str 9 | 10 | def initialize(str) 11 | @str = str 12 | end 13 | end 14 | 15 | RSpec.configure do |config| 16 | config.expect_with :rspec do |expectations| 17 | expectations.include_chain_clauses_in_custom_matcher_descriptions = true 18 | end 19 | 20 | config.mock_with :rspec do |mocks| 21 | mocks.verify_partial_doubles = true 22 | end 23 | 24 | config.filter_run :focus 25 | config.run_all_when_everything_filtered = true 26 | config.disable_monkey_patching! 27 | config.warnings = true 28 | config.default_formatter = 'doc' if config.files_to_run.one? 29 | config.order = :random 30 | Kernel.srand config.seed 31 | end 32 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Build documentation 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | 9 | jobs: 10 | build: 11 | runs-on: "ubuntu-latest" 12 | steps: 13 | - uses: actions/checkout@v5 14 | - uses: ruby/setup-ruby@v1 15 | with: 16 | ruby-version: "3.4" 17 | - run: gem install --no-document yard redcarpet 18 | - run: yardoc 19 | - uses: actions/upload-pages-artifact@v4 20 | with: 21 | path: "doc/" 22 | 23 | deploy: 24 | needs: build 25 | permissions: 26 | pages: write 27 | id-token: write 28 | environment: 29 | name: github-pages 30 | url: ${{ steps.deployment.outputs.page_url }} 31 | runs-on: ubuntu-latest 32 | steps: 33 | - name: Deploy to GitHub Pages 34 | id: deployment 35 | uses: actions/deploy-pages@v4 36 | -------------------------------------------------------------------------------- /spec/kernel_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe Kernel do 4 | describe ".RE2" do 5 | it "returns an RE2::Regexp instance given a pattern" do 6 | expect(RE2('w(o)(o)')).to be_a(RE2::Regexp) 7 | end 8 | 9 | it "returns an RE2::Regexp instance given a pattern and options" do 10 | re = RE2('w(o)(o)', case_sensitive: false) 11 | 12 | expect(re).not_to be_case_sensitive 13 | end 14 | 15 | it "accepts patterns containing null bytes" do 16 | re = RE2("a\0b") 17 | 18 | expect(re.pattern).to eq("a\0b") 19 | end 20 | 21 | it "raises an error if given an inappropriate type" do 22 | expect { RE2(nil) }.to raise_error(TypeError) 23 | end 24 | 25 | it "allows invalid patterns to be created" do 26 | re = RE2('???', log_errors: false) 27 | 28 | expect(re).to be_a(RE2::Regexp) 29 | end 30 | 31 | it "supports passing something that can be coerced to a String as input" do 32 | re = RE2(StringLike.new('w(o)(o)')) 33 | 34 | expect(re).to be_a(RE2::Regexp) 35 | end 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /.github/workflows/dependencies.yml: -------------------------------------------------------------------------------- 1 | name: Upgrade vendored dependencies 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: '15 3 1 * *' 7 | 8 | jobs: 9 | upgrade: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v5 13 | - uses: ruby/setup-ruby@v1 14 | with: 15 | ruby-version: "3.4" 16 | - name: Upgrade all vendored dependencies to their latest versions 17 | run: ./scripts/update-dependencies 18 | - uses: actions/create-github-app-token@v2 19 | id: app-token 20 | with: 21 | app-id: ${{ secrets.APP_ID }} 22 | private-key: ${{ secrets.APP_PRIVATE_KEY }} 23 | - uses: peter-evans/create-pull-request@v7 24 | with: 25 | token: ${{ steps.app-token.outputs.token }} 26 | branch: 'upgrade-vendored-dependencies' 27 | title: 'Upgrade vendored dependencies' 28 | commit-message: 'Upgrade vendored dependencies to latest versions' 29 | labels: dependencies 30 | body: | 31 | - Upgrade RE2 32 | - Upgrade Abseil 33 | -------------------------------------------------------------------------------- /lib/re2/string.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # re2 (https://github.com/mudge/re2) 4 | # Ruby bindings to RE2, a "fast, safe, thread-friendly alternative to 5 | # backtracking regular expression engines like those used in PCRE, Perl, and 6 | # Python". 7 | # 8 | # Copyright (c) 2010, Paul Mucur (https://mudge.name) 9 | # Released under the BSD Licence, please see LICENSE.txt 10 | 11 | require "re2" 12 | 13 | module RE2 14 | # @deprecated Use methods on {RE2} and {RE2::Regexp} instead. 15 | module String 16 | # @deprecated Use {RE2.Replace} instead. 17 | def re2_sub(*args) 18 | RE2.Replace(self, *args) 19 | end 20 | 21 | # @deprecated Use {RE2.GlobalReplace} instead. 22 | def re2_gsub(*args) 23 | RE2.GlobalReplace(self, *args) 24 | end 25 | 26 | # @deprecated Use {RE2::Regexp#match} instead. 27 | def re2_match(pattern, *args) 28 | RE2::Regexp.new(pattern).match(self, *args) 29 | end 30 | 31 | # @deprecated Use {RE2.QuoteMeta} instead. 32 | def re2_escape 33 | RE2.QuoteMeta(self) 34 | end 35 | 36 | alias_method :re2_quote, :re2_escape 37 | end 38 | end 39 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010, Paul Mucur. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of Paul Mucur, nor the names of its contributors may be 15 | used to endorse or promote products derived from this software without 16 | specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /re2.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative 'lib/re2/version' 4 | 5 | Gem::Specification.new do |s| 6 | s.name = "re2" 7 | s.summary = "Ruby bindings to RE2." 8 | s.description = 'Ruby bindings to RE2, "a fast, safe, thread-friendly alternative to backtracking regular expression engines like those used in PCRE, Perl, and Python".' 9 | s.version = RE2::VERSION 10 | s.authors = ["Paul Mucur", "Stan Hu"] 11 | s.homepage = "https://github.com/mudge/re2" 12 | s.extensions = ["ext/re2/extconf.rb"] 13 | s.license = "BSD-3-Clause" 14 | s.required_ruby_version = ">= 3.1.0" 15 | s.files = [ 16 | "dependencies.yml", 17 | "ext/re2/extconf.rb", 18 | "ext/re2/re2.cc", 19 | "ext/re2/recipes.rb", 20 | "Gemfile", 21 | "lib/re2.rb", 22 | "lib/re2/regexp.rb", 23 | "lib/re2/scanner.rb", 24 | "lib/re2/string.rb", 25 | "lib/re2/version.rb", 26 | "LICENSE.txt", 27 | "LICENSE-DEPENDENCIES.txt", 28 | "README.md", 29 | "Rakefile", 30 | "re2.gemspec" 31 | ] 32 | s.test_files = [ 33 | ".rspec", 34 | "spec/spec_helper.rb", 35 | "spec/re2_spec.rb", 36 | "spec/kernel_spec.rb", 37 | "spec/re2/regexp_spec.rb", 38 | "spec/re2/match_data_spec.rb", 39 | "spec/re2/string_spec.rb", 40 | "spec/re2/set_spec.rb", 41 | "spec/re2/scanner_spec.rb" 42 | ] 43 | s.add_development_dependency("rake-compiler", "~> 1.3.0") 44 | s.add_development_dependency("rake-compiler-dock", "~> 1.9.1") 45 | s.add_development_dependency("rspec", "~> 3.2") 46 | s.add_runtime_dependency("mini_portile2", "~> 2.8.9") # keep version in sync with extconf.rb 47 | end 48 | -------------------------------------------------------------------------------- /scripts/update-dependencies: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | require "net/http" 5 | require "digest/sha2" 6 | require "yaml" 7 | 8 | re2_response = Net::HTTP.get_response(URI("https://github.com/google/re2/releases/latest")) 9 | exit 1 unless re2_response.is_a?(Net::HTTPRedirection) 10 | 11 | re2_release = File.basename(URI(re2_response["Location"]).path) 12 | re2_redirect = Net::HTTP.get_response(URI("https://github.com/google/re2/releases/download/#{re2_release}/re2-#{re2_release}.tar.gz")) 13 | exit 1 unless re2_redirect.is_a?(Net::HTTPRedirection) 14 | 15 | re2_archive = Net::HTTP.get_response(URI(re2_redirect["Location"])) 16 | exit 1 unless re2_archive.is_a?(Net::HTTPSuccess) 17 | re2_sha256sum = Digest::SHA2.hexdigest(re2_archive.body) 18 | 19 | abseil_response = Net::HTTP.get_response(URI("https://github.com/abseil/abseil-cpp/releases/latest")) 20 | exit 1 unless abseil_response.is_a?(Net::HTTPRedirection) 21 | 22 | abseil_tag = File.basename(URI(abseil_response["Location"]).path) 23 | abseil_redirect = Net::HTTP.get_response(URI("https://github.com/abseil/abseil-cpp/releases/download/#{abseil_tag}/abseil-cpp-#{abseil_tag}.tar.gz")) 24 | exit 1 unless abseil_redirect.is_a?(Net::HTTPRedirection) 25 | 26 | abseil_archive = Net::HTTP.get_response(URI(abseil_redirect["Location"])) 27 | exit 1 unless abseil_archive.is_a?(Net::HTTPSuccess) 28 | abseil_sha256sum = Digest::SHA2.hexdigest(abseil_archive.body) 29 | 30 | File.write( 31 | File.expand_path("../dependencies.yml", __dir__), 32 | { 33 | "libre2" => { "version" => re2_release, "sha256" => re2_sha256sum }, 34 | "abseil" => { "version" => abseil_tag, "sha256" => abseil_sha256sum } 35 | }.to_yaml 36 | ) 37 | -------------------------------------------------------------------------------- /spec/re2/string_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "re2/string" 4 | 5 | class String 6 | include RE2::String 7 | end 8 | 9 | RSpec.describe RE2::String do 10 | describe "#re2_sub" do 11 | it "delegates to RE2.Replace to perform replacement" do 12 | expect("My name is Robert Paulson".re2_sub('Robert', 'Crobert')).to eq("My name is Crobert Paulson") 13 | end 14 | 15 | it "doesn't perform an in-place replacement" do 16 | string = "My name is Robert Paulson" 17 | 18 | expect(string.re2_sub('Robert', 'Crobert')).not_to equal(string) 19 | end 20 | end 21 | 22 | describe "#re2_gsub" do 23 | it "delegates to RE2.GlobalReplace to perform replacement" do 24 | expect("My name is Robert Paulson".re2_gsub('a', 'e')).to eq("My neme is Robert Peulson") 25 | end 26 | 27 | it "doesn't perform an in-place replacement" do 28 | string = "My name is Robert Paulson" 29 | 30 | expect(string.re2_gsub('a', 'e')).not_to equal(string) 31 | end 32 | end 33 | 34 | describe "#re2_match" do 35 | it "delegates to RE2::Regexp#match to perform matches", :aggregate_failures do 36 | md = "My name is Robert Paulson".re2_match('My name is (\S+) (\S+)') 37 | 38 | expect(md).to be_a(RE2::MatchData) 39 | expect(md[0]).to eq("My name is Robert Paulson") 40 | expect(md[1]).to eq("Robert") 41 | expect(md[2]).to eq("Paulson") 42 | end 43 | 44 | it "supports limiting the number of matches" do 45 | md = "My name is Robert Paulson".re2_match('My name is (\S+) (\S+)', 0) 46 | 47 | expect(md).to eq(true) 48 | end 49 | end 50 | 51 | describe "#re2_escape" do 52 | it "escapes the string for use in regular expressions" do 53 | expect("1.5-2.0?".re2_escape).to eq('1\.5\-2\.0\?') 54 | end 55 | end 56 | 57 | describe "#re2_quote" do 58 | it "escapes the string for use in regular expressions" do 59 | expect("1.5-2.0?".re2_quote).to eq('1\.5\-2\.0\?') 60 | end 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /ext/re2/recipes.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # re2 (https://github.com/mudge/re2) 4 | # Ruby bindings to RE2, a "fast, safe, thread-friendly alternative to 5 | # backtracking regular expression engines like those used in PCRE, Perl, and 6 | # Python". 7 | # 8 | # Copyright (c) 2010, Paul Mucur (https://mudge.name) 9 | # Released under the BSD Licence, please see LICENSE.txt 10 | 11 | PACKAGE_ROOT_DIR = File.expand_path('../..', __dir__) 12 | REQUIRED_MINI_PORTILE_VERSION = '~> 2.8.9' # keep this version in sync with the one in the gemspec 13 | 14 | def load_recipes 15 | require 'yaml' 16 | dependencies = YAML.load_file(File.join(PACKAGE_ROOT_DIR, 'dependencies.yml')) 17 | 18 | abseil_recipe = build_recipe('abseil', dependencies['abseil']['version']) do |recipe| 19 | recipe.files = [{ 20 | url: "https://github.com/abseil/abseil-cpp/archive/refs/tags/#{recipe.version}.tar.gz", 21 | sha256: dependencies['abseil']['sha256'] 22 | }] 23 | end 24 | 25 | re2_recipe = build_recipe('libre2', dependencies['libre2']['version']) do |recipe| 26 | recipe.files = [{ 27 | url: "https://github.com/google/re2/releases/download/#{recipe.version}/re2-#{recipe.version}.tar.gz", 28 | sha256: dependencies['libre2']['sha256'] 29 | }] 30 | end 31 | 32 | [abseil_recipe, re2_recipe] 33 | end 34 | 35 | def build_recipe(name, version) 36 | require 'rubygems' 37 | gem('mini_portile2', REQUIRED_MINI_PORTILE_VERSION) # gemspec is not respected at install time 38 | require 'mini_portile2' 39 | 40 | MiniPortileCMake.new(name, version).tap do |recipe| 41 | recipe.target = File.join(PACKAGE_ROOT_DIR, 'ports') 42 | recipe.configure_options += [ 43 | # abseil needs a C++17 compiler 44 | '-DCMAKE_CXX_STANDARD=17', 45 | # needed for building the C extension shared library with -fPIC 46 | '-DCMAKE_POSITION_INDEPENDENT_CODE=ON', 47 | # ensures pkg-config and installed libraries will be in lib, not lib64 48 | '-DCMAKE_INSTALL_LIBDIR=lib', 49 | '-DCMAKE_CXX_VISIBILITY_PRESET=hidden' 50 | ] 51 | 52 | yield recipe 53 | end 54 | end 55 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rake/extensiontask' 4 | require 'rake_compiler_dock' 5 | require 'rspec/core/rake_task' 6 | 7 | require_relative 'ext/re2/recipes' 8 | 9 | re2_gemspec = Gem::Specification.load('re2.gemspec') 10 | abseil_recipe, re2_recipe = load_recipes 11 | 12 | # Add Abseil and RE2's latest archives to the gem files. (Note these will be 13 | # removed from the precompiled native gems.) 14 | abseil_archive = File.join("ports/archives", File.basename(abseil_recipe.files[0][:url])) 15 | re2_archive = File.join("ports/archives", File.basename(re2_recipe.files[0][:url])) 16 | 17 | re2_gemspec.files << abseil_archive 18 | re2_gemspec.files << re2_archive 19 | 20 | cross_platforms = %w[ 21 | aarch64-linux-gnu 22 | aarch64-linux-musl 23 | arm-linux-gnu 24 | arm-linux-musl 25 | arm64-darwin 26 | x64-mingw-ucrt 27 | x64-mingw32 28 | x86-linux-gnu 29 | x86-linux-musl 30 | x86-mingw32 31 | x86_64-darwin 32 | x86_64-linux-gnu 33 | x86_64-linux-musl 34 | ].freeze 35 | 36 | RakeCompilerDock.set_ruby_cc_version("~> 3.1") 37 | 38 | Gem::PackageTask.new(re2_gemspec).define 39 | 40 | Rake::ExtensionTask.new('re2', re2_gemspec) do |e| 41 | e.cross_compile = true 42 | e.cross_config_options << '--enable-cross-build' 43 | e.config_options << '--disable-system-libraries' 44 | e.cross_platform = cross_platforms 45 | e.cross_compiling do |spec| 46 | spec.files.reject! { |path| File.fnmatch?('ports/*', path) } 47 | spec.dependencies.reject! { |dep| dep.name == 'mini_portile2' } 48 | end 49 | end 50 | 51 | RSpec::Core::RakeTask.new(:spec) 52 | 53 | begin 54 | require 'ruby_memcheck' 55 | require 'ruby_memcheck/rspec/rake_task' 56 | 57 | namespace :spec do 58 | RubyMemcheck::RSpec::RakeTask.new(valgrind: :compile) 59 | end 60 | rescue LoadError 61 | # Only define the spec:valgrind task if ruby_memcheck is installed 62 | end 63 | 64 | namespace :gem do 65 | cross_platforms.each do |platform| 66 | 67 | # Compile each platform's native gem, packaging up the result. Note we add 68 | # /usr/local/bin to the PATH as it contains the newest version of CMake in 69 | # the rake-compiler-dock images. 70 | desc "Compile and build native gem for #{platform} platform" 71 | task platform do 72 | RakeCompilerDock.sh <<~SCRIPT, platform: platform, verbose: true 73 | wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && 74 | echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ focal main' | sudo tee /etc/apt/sources.list.d/kitware.list >/dev/null && 75 | sudo apt-get update && 76 | sudo apt-get install -y cmake=3.22.2-0kitware1ubuntu20.04.1 cmake-data=3.22.2-0kitware1ubuntu20.04.1 && 77 | rbenv shell 3.1.6 && 78 | gem install bundler --no-document && 79 | bundle install && 80 | bundle exec rake native:#{platform} pkg/#{re2_gemspec.full_name}-#{Gem::Platform.new(platform)}.gem PATH="/usr/local/bin:$PATH" 81 | SCRIPT 82 | end 83 | end 84 | end 85 | 86 | # Set up file tasks for Abseil and RE2's archives so they are automatically 87 | # downloaded when required by the gem task. 88 | file abseil_archive do 89 | abseil_recipe.download 90 | end 91 | 92 | file re2_archive do 93 | re2_recipe.download 94 | end 95 | 96 | task default: :spec 97 | 98 | CLEAN.add("lib/**/*.{o,so,bundle}", "pkg") 99 | CLOBBER.add("ports") 100 | -------------------------------------------------------------------------------- /lib/re2/regexp.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # re2 (https://github.com/mudge/re2) 4 | # Ruby bindings to RE2, a "fast, safe, thread-friendly alternative to 5 | # backtracking regular expression engines like those used in PCRE, Perl, and 6 | # Python". 7 | # 8 | # Copyright (c) 2010, Paul Mucur (https://mudge.name) 9 | # Released under the BSD Licence, please see LICENSE.txt 10 | 11 | 12 | module RE2 13 | class Regexp 14 | # Match the pattern against any substring of the given `text` and return a 15 | # {RE2::MatchData} instance with the specified number of submatches 16 | # (defaults to the total number of capturing groups) or a boolean (if no 17 | # submatches are required). 18 | # 19 | # The number of submatches has a significant impact on performance: requesting 20 | # one submatch is much faster than requesting more than one and requesting 21 | # zero submatches is faster still. 22 | # 23 | # @param [String] text the text to search 24 | # @param [Hash] options the options with which to perform the match 25 | # @option options [Integer] :submatches how many submatches to extract (0 26 | # is fastest), defaults to the total number of capturing groups 27 | # @return [RE2::MatchData, nil] if extracting any submatches 28 | # @return [Boolean] if not extracting any submatches 29 | # @raise [ArgumentError] if given a negative number of submatches 30 | # @raise [NoMemoryError] if there was not enough memory to allocate the 31 | # matches 32 | # @raise [TypeError] if given non-numeric submatches or non-hash options 33 | # @example 34 | # r = RE2::Regexp.new('w(o)(o)') 35 | # r.partial_match('woot') #=> # 36 | # r.partial_match('nope') #=> nil 37 | # r.partial_match('woot', submatches: 1) #=> # 38 | # r.partial_match('woot', submatches: 0) #=> true 39 | def partial_match(text, options = {}) 40 | match(text, Hash(options).merge(anchor: :unanchored)) 41 | end 42 | 43 | # Match the pattern against the given `text` exactly and return a 44 | # {RE2::MatchData} instance with the specified number of submatches 45 | # (defaults to the total number of capturing groups) or a boolean (if no 46 | # submatches are required). 47 | # 48 | # The number of submatches has a significant impact on performance: requesting 49 | # one submatch is much faster than requesting more than one and requesting 50 | # zero submatches is faster still. 51 | # 52 | # @param [String] text the text to search 53 | # @param [Hash] options the options with which to perform the match 54 | # @option options [Integer] :submatches how many submatches to extract (0 55 | # is fastest), defaults to the total number of capturing groups 56 | # @return [RE2::MatchData, nil] if extracting any submatches 57 | # @return [Boolean] if not extracting any submatches 58 | # @raise [ArgumentError] if given a negative number of submatches 59 | # @raise [NoMemoryError] if there was not enough memory to allocate the 60 | # matches 61 | # @raise [TypeError] if given non-numeric submatches or non-hash options 62 | # @example 63 | # r = RE2::Regexp.new('w(o)(o)') 64 | # r.full_match('woo') #=> # 65 | # r.full_match('woot') #=> nil 66 | # r.full_match('woo', submatches: 1) #=> # 67 | # r.full_match('woo', submatches: 0) #=> true 68 | def full_match(text, options = {}) 69 | match(text, Hash(options).merge(anchor: :anchor_both)) 70 | end 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /spec/re2_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe RE2 do 4 | describe ".Replace" do 5 | it "only replaces the first occurrence of the pattern" do 6 | expect(RE2.Replace("woo", "o", "a")).to eq("wao") 7 | end 8 | 9 | it "supports inputs with null bytes" do 10 | expect(RE2.Replace("w\0oo", "o", "a")).to eq("w\0ao") 11 | end 12 | 13 | it "supports patterns with null bytes" do 14 | expect(RE2.Replace("w\0oo", "\0", "o")).to eq("wooo") 15 | end 16 | 17 | it "supports replacements with null bytes" do 18 | expect(RE2.Replace("woo", "o", "\0")).to eq("w\0o") 19 | end 20 | 21 | it "performs replacement based on regular expressions" do 22 | expect(RE2.Replace("woo", "o+", "e")).to eq("we") 23 | end 24 | 25 | it "supports flags in patterns" do 26 | expect(RE2.Replace("Good morning", "(?i)gOOD MORNING", "hi")).to eq("hi") 27 | end 28 | 29 | it "does not perform replacements in-place", :aggregate_failures do 30 | name = "Robert" 31 | replacement = RE2.Replace(name, "R", "Cr") 32 | 33 | expect(name).to eq("Robert") 34 | expect(replacement).to eq("Crobert") 35 | end 36 | 37 | it "supports passing an RE2::Regexp as the pattern" do 38 | re = RE2::Regexp.new('wo{2}') 39 | 40 | expect(RE2.Replace("woo", re, "miaow")).to eq("miaow") 41 | end 42 | 43 | it "respects any passed RE2::Regexp's flags" do 44 | re = RE2::Regexp.new('gOOD MORNING', case_sensitive: false) 45 | 46 | expect(RE2.Replace("Good morning", re, "hi")).to eq("hi") 47 | end 48 | 49 | it "supports passing something that can be coerced to a String as input" do 50 | expect(RE2.Replace(StringLike.new("woo"), "oo", "ah")).to eq("wah") 51 | end 52 | 53 | it "supports passing something that can be coerced to a String as a pattern" do 54 | expect(RE2.Replace("woo", StringLike.new("oo"), "ah")).to eq("wah") 55 | end 56 | 57 | it "supports passing something that can be coerced to a String as a replacement" do 58 | expect(RE2.Replace("woo", "oo", StringLike.new("ah"))).to eq("wah") 59 | end 60 | 61 | it "returns UTF-8 strings if the pattern is UTF-8" do 62 | original = "Foo".encode("ISO-8859-1") 63 | replacement = RE2.Replace(original, "oo", "ah") 64 | 65 | expect(replacement.encoding).to eq(Encoding::UTF_8) 66 | end 67 | 68 | it "returns ISO-8859-1 strings if the pattern is not UTF-8" do 69 | original = "Foo" 70 | replacement = RE2.Replace(original, RE2("oo", utf8: false), "ah") 71 | 72 | expect(replacement.encoding).to eq(Encoding::ISO_8859_1) 73 | end 74 | 75 | it "returns UTF-8 strings when given a String pattern" do 76 | replacement = RE2.Replace("Foo", "oo".encode("ISO-8859-1"), "ah") 77 | 78 | expect(replacement.encoding).to eq(Encoding::UTF_8) 79 | end 80 | 81 | it "raises a Type Error for input that can't be converted to String" do 82 | expect { RE2.Replace(0, "oo", "ah") }.to raise_error(TypeError) 83 | end 84 | 85 | it "raises a Type Error for a non-RE2::Regexp pattern that can't be converted to String" do 86 | expect { RE2.Replace("woo", 0, "ah") }.to raise_error(TypeError) 87 | end 88 | 89 | it "raises a Type Error for a replacement that can't be converted to String" do 90 | expect { RE2.Replace("woo", "oo", 0) }.to raise_error(TypeError) 91 | end 92 | end 93 | 94 | describe ".GlobalReplace" do 95 | it "replaces every occurrence of a pattern" do 96 | expect(RE2.GlobalReplace("woo", "o", "a")).to eq("waa") 97 | end 98 | 99 | it "supports inputs with null bytes" do 100 | expect(RE2.GlobalReplace("w\0oo", "o", "a")).to eq("w\0aa") 101 | end 102 | 103 | it "supports patterns with null bytes" do 104 | expect(RE2.GlobalReplace("w\0\0oo", "\0", "a")).to eq("waaoo") 105 | end 106 | 107 | it "supports replacements with null bytes" do 108 | expect(RE2.GlobalReplace("woo", "o", "\0")).to eq("w\0\0") 109 | end 110 | 111 | it "performs replacement based on regular expressions" do 112 | expect(RE2.GlobalReplace("woohoo", "o+", "e")).to eq("wehe") 113 | end 114 | 115 | it "supports flags in patterns" do 116 | expect(RE2.GlobalReplace("Robert", "(?i)r", "w")).to eq("wobewt") 117 | end 118 | 119 | it "does not perform replacement in-place", :aggregate_failures do 120 | name = "Robert" 121 | replacement = RE2.GlobalReplace(name, "(?i)R", "w") 122 | 123 | expect(name).to eq("Robert") 124 | expect(replacement).to eq("wobewt") 125 | end 126 | 127 | it "supports passing an RE2::Regexp as the pattern" do 128 | re = RE2::Regexp.new('wo{2,}') 129 | 130 | expect(RE2.GlobalReplace("woowooo", re, "miaow")).to eq("miaowmiaow") 131 | end 132 | 133 | it "respects any passed RE2::Regexp's flags" do 134 | re = RE2::Regexp.new('gOOD MORNING', case_sensitive: false) 135 | 136 | expect(RE2.GlobalReplace("Good morning Good morning", re, "hi")).to eq("hi hi") 137 | end 138 | 139 | it "supports passing something that can be coerced to a String as input" do 140 | expect(RE2.GlobalReplace(StringLike.new("woo"), "o", "a")).to eq("waa") 141 | end 142 | 143 | it "supports passing something that can be coerced to a String as a pattern" do 144 | expect(RE2.GlobalReplace("woo", StringLike.new("o"), "a")).to eq("waa") 145 | end 146 | 147 | it "supports passing something that can be coerced to a String as a replacement" do 148 | expect(RE2.GlobalReplace("woo", "o", StringLike.new("a"))).to eq("waa") 149 | end 150 | 151 | it "returns UTF-8 strings if the pattern is UTF-8" do 152 | original = "Foo".encode("ISO-8859-1") 153 | replacement = RE2.GlobalReplace(original, "oo", "ah") 154 | 155 | expect(replacement.encoding).to eq(Encoding::UTF_8) 156 | end 157 | 158 | it "returns ISO-8859-1 strings if the pattern is not UTF-8" do 159 | original = "Foo" 160 | replacement = RE2.GlobalReplace(original, RE2("oo", utf8: false), "ah") 161 | 162 | expect(replacement.encoding).to eq(Encoding::ISO_8859_1) 163 | end 164 | 165 | it "returns UTF-8 strings when given a String pattern" do 166 | replacement = RE2.GlobalReplace("Foo", "oo".encode("ISO-8859-1"), "ah") 167 | 168 | expect(replacement.encoding).to eq(Encoding::UTF_8) 169 | end 170 | 171 | it "raises a Type Error for input that can't be converted to String" do 172 | expect { RE2.GlobalReplace(0, "o", "a") }.to raise_error(TypeError) 173 | end 174 | 175 | it "raises a Type Error for a non-RE2::Regexp pattern that can't be converted to String" do 176 | expect { RE2.GlobalReplace("woo", 0, "a") }.to raise_error(TypeError) 177 | end 178 | 179 | it "raises a Type Error for a replacement that can't be converted to String" do 180 | expect { RE2.GlobalReplace("woo", "o", 0) }.to raise_error(TypeError) 181 | end 182 | end 183 | 184 | describe "#QuoteMeta" do 185 | it "escapes a string so it can be used as a regular expression" do 186 | expect(RE2.QuoteMeta("1.5-2.0?")).to eq('1\.5\-2\.0\?') 187 | end 188 | 189 | it "raises a Type Error for input that can't be converted to String" do 190 | expect { RE2.QuoteMeta(0) }.to raise_error(TypeError) 191 | end 192 | 193 | it "supports passing something that can be coerced to a String as input" do 194 | expect(RE2.QuoteMeta(StringLike.new("1.5"))).to eq('1\.5') 195 | end 196 | 197 | it "supports strings containing null bytes" do 198 | expect(RE2.QuoteMeta("abc\0def")).to eq('abc\x00def') 199 | end 200 | end 201 | end 202 | -------------------------------------------------------------------------------- /spec/re2/set_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe RE2::Set do 4 | describe "#initialize" do 5 | it "returns an instance given no args" do 6 | set = RE2::Set.new 7 | 8 | expect(set).to be_a(RE2::Set) 9 | end 10 | 11 | it "returns an instance given only an anchor of :unanchored" do 12 | set = RE2::Set.new(:unanchored) 13 | 14 | expect(set).to be_a(RE2::Set) 15 | end 16 | 17 | it "returns an instance given only an anchor of :anchor_start" do 18 | set = RE2::Set.new(:anchor_start) 19 | 20 | expect(set).to be_a(RE2::Set) 21 | end 22 | 23 | it "returns an instance given only an anchor of :anchor_both" do 24 | set = RE2::Set.new(:anchor_both) 25 | 26 | expect(set).to be_a(RE2::Set) 27 | end 28 | 29 | it "returns an instance given an anchor and options" do 30 | set = RE2::Set.new(:unanchored, case_sensitive: false) 31 | 32 | expect(set).to be_a(RE2::Set) 33 | end 34 | 35 | it "raises an error if given an inappropriate type" do 36 | expect { RE2::Set.new(0) }.to raise_error(TypeError) 37 | end 38 | 39 | it "raises an error if given an invalid anchor" do 40 | expect { RE2::Set.new(:not_a_valid_anchor) }.to raise_error( 41 | ArgumentError, 42 | "anchor should be one of: :unanchored, :anchor_start, :anchor_both" 43 | ) 44 | end 45 | 46 | it "raises an error if given an invalid anchor and options" do 47 | expect { RE2::Set.new(:not_a_valid_anchor, case_sensitive: false) }.to raise_error( 48 | ArgumentError, 49 | "anchor should be one of: :unanchored, :anchor_start, :anchor_both" 50 | ) 51 | end 52 | end 53 | 54 | describe "#add" do 55 | it "allows multiple patterns to be added", :aggregate_failures do 56 | set = RE2::Set.new 57 | 58 | expect(set.add("abc")).to eq(0) 59 | expect(set.add("def")).to eq(1) 60 | expect(set.add("ghi")).to eq(2) 61 | end 62 | 63 | it "rejects invalid patterns when added" do 64 | set = RE2::Set.new(:unanchored, log_errors: false) 65 | 66 | expect { set.add("???") }.to raise_error(ArgumentError, /str rejected by RE2::Set->Add\(\)/) 67 | end 68 | 69 | it "includes the full error message" do 70 | set = RE2::Set.new(:unanchored, log_errors: false) 71 | 72 | expect { set.add("(?P<#{'o' * 200}") }.to raise_error(ArgumentError, "str rejected by RE2::Set->Add(): invalid named capture group: (?P 146 | int main() { return 0; } 147 | SRC 148 | 149 | re2_requires_version_flag = checking_for("re2 that requires explicit C++ version flag") do 150 | !try_compile(minimal_program, compile_options) 151 | end 152 | 153 | if re2_requires_version_flag 154 | # Recent versions of RE2 depend directly on Abseil, which requires a 155 | # compiler with C++17 support. 156 | abort "Cannot compile re2 with your compiler: recent versions require C++17 support." unless %w[c++20 c++17 c++11 c++0x].any? do |std| 157 | checking_for("re2 that compiles with #{std} standard") do 158 | if try_compile(minimal_program, compile_options + " -std=#{std}") 159 | compile_options << " -std=#{std}" 160 | $CPPFLAGS << " -std=#{std}" 161 | 162 | true 163 | end 164 | end 165 | end 166 | end 167 | 168 | # Determine which version of re2 the user has installed. 169 | # Revision d9f8806c004d added an `endpos` argument to the 170 | # generic Match() function. 171 | # 172 | # To test for this, try to compile a simple program that uses 173 | # the newer form of Match() and set a flag if it is successful. 174 | checking_for("RE2::Match() with endpos argument") do 175 | test_re2_match_signature = <<~SRC 176 | #include 177 | 178 | int main() { 179 | RE2 pattern("test"); 180 | re2::StringPiece *match; 181 | pattern.Match("test", 0, 0, RE2::UNANCHORED, match, 0); 182 | 183 | return 0; 184 | } 185 | SRC 186 | 187 | if try_compile(test_re2_match_signature, compile_options) 188 | $defs.push("-DHAVE_ENDPOS_ARGUMENT") 189 | end 190 | end 191 | 192 | checking_for("RE2::Set::Match() with error information") do 193 | test_re2_set_match_signature = <<~SRC 194 | #include 195 | #include 196 | #include 197 | 198 | int main() { 199 | RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED); 200 | s.Add("foo", NULL); 201 | s.Compile(); 202 | 203 | std::vector v; 204 | RE2::Set::ErrorInfo ei; 205 | s.Match("foo", &v, &ei); 206 | 207 | return 0; 208 | } 209 | SRC 210 | 211 | if try_compile(test_re2_set_match_signature, compile_options) 212 | $defs.push("-DHAVE_ERROR_INFO_ARGUMENT") 213 | end 214 | end 215 | 216 | checking_for("RE2::Set::Size()") do 217 | test_re2_set_size = <<~SRC 218 | #include 219 | #include 220 | 221 | int main() { 222 | RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED); 223 | s.Size(); 224 | 225 | return 0; 226 | } 227 | SRC 228 | 229 | if try_compile(test_re2_set_size, compile_options) 230 | $defs.push("-DHAVE_SET_SIZE") 231 | end 232 | end 233 | end 234 | 235 | def static_pkg_config(pc_file, pkg_config_paths) 236 | ENV["PKG_CONFIG_PATH"] = [*pkg_config_paths, ENV["PKG_CONFIG_PATH"]].compact.join(File::PATH_SEPARATOR) 237 | 238 | static_library_paths = minimal_pkg_config(pc_file, '--libs-only-L', '--static') 239 | .shellsplit 240 | .map { |flag| flag.delete_prefix('-L') } 241 | 242 | # Replace all -l flags that can be found in one of the static library 243 | # paths with the absolute path instead. 244 | minimal_pkg_config(pc_file, '--libs-only-l', '--static') 245 | .shellsplit 246 | .each do |flag| 247 | lib = "lib#{flag.delete_prefix('-l')}.#{$LIBEXT}" 248 | 249 | if (static_lib_path = static_library_paths.find { |path| File.exist?(File.join(path, lib)) }) 250 | $libs << ' ' << File.join(static_lib_path, lib).shellescape 251 | else 252 | $libs << ' ' << flag.shellescape 253 | end 254 | end 255 | 256 | append_ldflags(minimal_pkg_config(pc_file, '--libs-only-other', '--static')) 257 | 258 | incflags = minimal_pkg_config(pc_file, '--cflags-only-I') 259 | $INCFLAGS = [incflags, $INCFLAGS].join(" ").strip 260 | 261 | cflags = minimal_pkg_config(pc_file, '--cflags-only-other') 262 | $CFLAGS = [$CFLAGS, cflags].join(" ").strip 263 | $CXXFLAGS = [$CXXFLAGS, cflags].join(" ").strip 264 | end 265 | 266 | def process_recipe(recipe) 267 | cross_build_p = config_cross_build? 268 | message "Cross build is #{cross_build_p ? "enabled" : "disabled"}.\n" 269 | 270 | recipe.host = target_host 271 | # Ensure x64-mingw-ucrt and x64-mingw32 use different library paths since the host 272 | # is the same (x86_64-w64-mingw32). 273 | recipe.target = File.join(recipe.target, target_arch) if cross_build_p 274 | 275 | yield recipe 276 | 277 | checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{recipe.host}.installed" 278 | name = recipe.name 279 | version = recipe.version 280 | 281 | if File.exist?(checkpoint) 282 | message("Building re2 with a packaged version of #{name}-#{version}.\n") 283 | else 284 | message(<<~EOM) 285 | ---------- IMPORTANT NOTICE ---------- 286 | Building re2 with a packaged version of #{name}-#{version}. 287 | Configuration options: #{recipe.configure_options.shelljoin} 288 | EOM 289 | 290 | # Use a temporary base directory to reduce filename lengths since 291 | # Windows can hit a limit of 250 characters (CMAKE_OBJECT_PATH_MAX). 292 | Dir.mktmpdir { |dir| Dir.chdir(dir) { recipe.cook } } 293 | 294 | FileUtils.touch(checkpoint) 295 | end 296 | end 297 | 298 | # See MiniPortile2's minimal_pkg_config: 299 | # https://github.com/flavorjones/mini_portile/blob/52fb0bc41c89a10f1ac7b5abcf0157e059194374/lib/mini_portile2/mini_portile.rb#L760-L783 300 | # and Ruby's pkg_config: 301 | # https://github.com/ruby/ruby/blob/c505bb0ca0fd61c7ae931d26451f11122a2644e9/lib/mkmf.rb#L1916-L2004 302 | def minimal_pkg_config(pc_file, *options) 303 | if ($PKGCONFIG ||= 304 | (pkgconfig = MakeMakefile.with_config("pkg-config") {MakeMakefile.config_string("PKG_CONFIG") || "pkg-config"}) && 305 | MakeMakefile.find_executable0(pkgconfig) && pkgconfig) 306 | pkgconfig = $PKGCONFIG 307 | else 308 | raise RuntimeError, "pkg-config is not found" 309 | end 310 | 311 | response = xpopen([pkgconfig, *options, pc_file], err: %i[child out], &:read) 312 | raise RuntimeError, response unless $?.success? 313 | 314 | response.strip 315 | end 316 | 317 | def config_system_libraries? 318 | enable_config("system-libraries", ENV.key?('RE2_USE_SYSTEM_LIBRARIES')) 319 | end 320 | 321 | def config_cross_build? 322 | enable_config("cross-build") 323 | end 324 | 325 | # We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And 326 | # prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits). 327 | def target_host 328 | host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"] 329 | host.gsub(/i386/, "i686") 330 | end 331 | 332 | def target_arch 333 | RbConfig::CONFIG['arch'] 334 | end 335 | end 336 | end 337 | 338 | extconf = RE2::Extconf.new 339 | 340 | if arg_config('--help') 341 | extconf.print_help 342 | exit!(true) 343 | end 344 | 345 | extconf.configure 346 | -------------------------------------------------------------------------------- /LICENSE-DEPENDENCIES.txt: -------------------------------------------------------------------------------- 1 | # Vendored Dependency Licenses 2 | 3 | The library re2 (which lives at https://github.com/mudge/re2) may include the source code for RE2 (which lives at https://github.com/google/re2) and Abseil (which lives at https://abseil.io). 4 | 5 | The license terms shipped with RE2 are included here for your convenience: 6 | 7 | // Copyright (c) 2009 The RE2 Authors. All rights reserved. 8 | // 9 | // Redistribution and use in source and binary forms, with or without 10 | // modification, are permitted provided that the following conditions are 11 | // met: 12 | // 13 | // * Redistributions of source code must retain the above copyright 14 | // notice, this list of conditions and the following disclaimer. 15 | // * Redistributions in binary form must reproduce the above 16 | // copyright notice, this list of conditions and the following disclaimer 17 | // in the documentation and/or other materials provided with the 18 | // distribution. 19 | // * Neither the name of Google Inc. nor the names of its 20 | // contributors may be used to endorse or promote products derived from 21 | // this software without specific prior written permission. 22 | // 23 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 29 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 33 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 | 35 | The license terms shipped with Abseil are included here for your convenience: 36 | 37 | Apache License 38 | Version 2.0, January 2004 39 | https://www.apache.org/licenses/ 40 | 41 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 42 | 43 | 1. Definitions. 44 | 45 | "License" shall mean the terms and conditions for use, reproduction, 46 | and distribution as defined by Sections 1 through 9 of this document. 47 | 48 | "Licensor" shall mean the copyright owner or entity authorized by 49 | the copyright owner that is granting the License. 50 | 51 | "Legal Entity" shall mean the union of the acting entity and all 52 | other entities that control, are controlled by, or are under common 53 | control with that entity. For the purposes of this definition, 54 | "control" means (i) the power, direct or indirect, to cause the 55 | direction or management of such entity, whether by contract or 56 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 57 | outstanding shares, or (iii) beneficial ownership of such entity. 58 | 59 | "You" (or "Your") shall mean an individual or Legal Entity 60 | exercising permissions granted by this License. 61 | 62 | "Source" form shall mean the preferred form for making modifications, 63 | including but not limited to software source code, documentation 64 | source, and configuration files. 65 | 66 | "Object" form shall mean any form resulting from mechanical 67 | transformation or translation of a Source form, including but 68 | not limited to compiled object code, generated documentation, 69 | and conversions to other media types. 70 | 71 | "Work" shall mean the work of authorship, whether in Source or 72 | Object form, made available under the License, as indicated by a 73 | copyright notice that is included in or attached to the work 74 | (an example is provided in the Appendix below). 75 | 76 | "Derivative Works" shall mean any work, whether in Source or Object 77 | form, that is based on (or derived from) the Work and for which the 78 | editorial revisions, annotations, elaborations, or other modifications 79 | represent, as a whole, an original work of authorship. For the purposes 80 | of this License, Derivative Works shall not include works that remain 81 | separable from, or merely link (or bind by name) to the interfaces of, 82 | the Work and Derivative Works thereof. 83 | 84 | "Contribution" shall mean any work of authorship, including 85 | the original version of the Work and any modifications or additions 86 | to that Work or Derivative Works thereof, that is intentionally 87 | submitted to Licensor for inclusion in the Work by the copyright owner 88 | or by an individual or Legal Entity authorized to submit on behalf of 89 | the copyright owner. For the purposes of this definition, "submitted" 90 | means any form of electronic, verbal, or written communication sent 91 | to the Licensor or its representatives, including but not limited to 92 | communication on electronic mailing lists, source code control systems, 93 | and issue tracking systems that are managed by, or on behalf of, the 94 | Licensor for the purpose of discussing and improving the Work, but 95 | excluding communication that is conspicuously marked or otherwise 96 | designated in writing by the copyright owner as "Not a Contribution." 97 | 98 | "Contributor" shall mean Licensor and any individual or Legal Entity 99 | on behalf of whom a Contribution has been received by Licensor and 100 | subsequently incorporated within the Work. 101 | 102 | 2. Grant of Copyright License. Subject to the terms and conditions of 103 | this License, each Contributor hereby grants to You a perpetual, 104 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 105 | copyright license to reproduce, prepare Derivative Works of, 106 | publicly display, publicly perform, sublicense, and distribute the 107 | Work and such Derivative Works in Source or Object form. 108 | 109 | 3. Grant of Patent License. Subject to the terms and conditions of 110 | this License, each Contributor hereby grants to You a perpetual, 111 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 112 | (except as stated in this section) patent license to make, have made, 113 | use, offer to sell, sell, import, and otherwise transfer the Work, 114 | where such license applies only to those patent claims licensable 115 | by such Contributor that are necessarily infringed by their 116 | Contribution(s) alone or by combination of their Contribution(s) 117 | with the Work to which such Contribution(s) was submitted. If You 118 | institute patent litigation against any entity (including a 119 | cross-claim or counterclaim in a lawsuit) alleging that the Work 120 | or a Contribution incorporated within the Work constitutes direct 121 | or contributory patent infringement, then any patent licenses 122 | granted to You under this License for that Work shall terminate 123 | as of the date such litigation is filed. 124 | 125 | 4. Redistribution. You may reproduce and distribute copies of the 126 | Work or Derivative Works thereof in any medium, with or without 127 | modifications, and in Source or Object form, provided that You 128 | meet the following conditions: 129 | 130 | (a) You must give any other recipients of the Work or 131 | Derivative Works a copy of this License; and 132 | 133 | (b) You must cause any modified files to carry prominent notices 134 | stating that You changed the files; and 135 | 136 | (c) You must retain, in the Source form of any Derivative Works 137 | that You distribute, all copyright, patent, trademark, and 138 | attribution notices from the Source form of the Work, 139 | excluding those notices that do not pertain to any part of 140 | the Derivative Works; and 141 | 142 | (d) If the Work includes a "NOTICE" text file as part of its 143 | distribution, then any Derivative Works that You distribute must 144 | include a readable copy of the attribution notices contained 145 | within such NOTICE file, excluding those notices that do not 146 | pertain to any part of the Derivative Works, in at least one 147 | of the following places: within a NOTICE text file distributed 148 | as part of the Derivative Works; within the Source form or 149 | documentation, if provided along with the Derivative Works; or, 150 | within a display generated by the Derivative Works, if and 151 | wherever such third-party notices normally appear. The contents 152 | of the NOTICE file are for informational purposes only and 153 | do not modify the License. You may add Your own attribution 154 | notices within Derivative Works that You distribute, alongside 155 | or as an addendum to the NOTICE text from the Work, provided 156 | that such additional attribution notices cannot be construed 157 | as modifying the License. 158 | 159 | You may add Your own copyright statement to Your modifications and 160 | may provide additional or different license terms and conditions 161 | for use, reproduction, or distribution of Your modifications, or 162 | for any such Derivative Works as a whole, provided Your use, 163 | reproduction, and distribution of the Work otherwise complies with 164 | the conditions stated in this License. 165 | 166 | 5. Submission of Contributions. Unless You explicitly state otherwise, 167 | any Contribution intentionally submitted for inclusion in the Work 168 | by You to the Licensor shall be under the terms and conditions of 169 | this License, without any additional terms or conditions. 170 | Notwithstanding the above, nothing herein shall supersede or modify 171 | the terms of any separate license agreement you may have executed 172 | with Licensor regarding such Contributions. 173 | 174 | 6. Trademarks. This License does not grant permission to use the trade 175 | names, trademarks, service marks, or product names of the Licensor, 176 | except as required for reasonable and customary use in describing the 177 | origin of the Work and reproducing the content of the NOTICE file. 178 | 179 | 7. Disclaimer of Warranty. Unless required by applicable law or 180 | agreed to in writing, Licensor provides the Work (and each 181 | Contributor provides its Contributions) on an "AS IS" BASIS, 182 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 183 | implied, including, without limitation, any warranties or conditions 184 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 185 | PARTICULAR PURPOSE. You are solely responsible for determining the 186 | appropriateness of using or redistributing the Work and assume any 187 | risks associated with Your exercise of permissions under this License. 188 | 189 | 8. Limitation of Liability. In no event and under no legal theory, 190 | whether in tort (including negligence), contract, or otherwise, 191 | unless required by applicable law (such as deliberate and grossly 192 | negligent acts) or agreed to in writing, shall any Contributor be 193 | liable to You for damages, including any direct, indirect, special, 194 | incidental, or consequential damages of any character arising as a 195 | result of this License or out of the use or inability to use the 196 | Work (including but not limited to damages for loss of goodwill, 197 | work stoppage, computer failure or malfunction, or any and all 198 | other commercial damages or losses), even if such Contributor 199 | has been advised of the possibility of such damages. 200 | 201 | 9. Accepting Warranty or Additional Liability. While redistributing 202 | the Work or Derivative Works thereof, You may choose to offer, 203 | and charge a fee for, acceptance of support, warranty, indemnity, 204 | or other liability obligations and/or rights consistent with this 205 | License. However, in accepting such obligations, You may act only 206 | on Your own behalf and on Your sole responsibility, not on behalf 207 | of any other Contributor, and only if You agree to indemnify, 208 | defend, and hold each Contributor harmless for any liability 209 | incurred by, or claims asserted against, such Contributor by reason 210 | of your accepting any such warranty or additional liability. 211 | 212 | END OF TERMS AND CONDITIONS 213 | 214 | APPENDIX: How to apply the Apache License to your work. 215 | 216 | To apply the Apache License to your work, attach the following 217 | boilerplate notice, with the fields enclosed by brackets "[]" 218 | replaced with your own identifying information. (Don't include 219 | the brackets!) The text should be enclosed in the appropriate 220 | comment syntax for the file format. We also recommend that a 221 | file or class name and description of purpose be included on the 222 | same "printed page" as the copyright notice for easier 223 | identification within third-party archives. 224 | 225 | Copyright [yyyy] [name of copyright owner] 226 | 227 | Licensed under the Apache License, Version 2.0 (the "License"); 228 | you may not use this file except in compliance with the License. 229 | You may obtain a copy of the License at 230 | 231 | https://www.apache.org/licenses/LICENSE-2.0 232 | 233 | Unless required by applicable law or agreed to in writing, software 234 | distributed under the License is distributed on an "AS IS" BASIS, 235 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 236 | See the License for the specific language governing permissions and 237 | limitations under the License. 238 | -------------------------------------------------------------------------------- /spec/re2/match_data_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'objspace' 4 | 5 | RSpec.describe RE2::MatchData do 6 | it "reports a larger consuming memory size when it has more matches" do 7 | matches1 = RE2::Regexp.new('w(o)').match('woo') 8 | matches2 = RE2::Regexp.new('w(o)(o)').match('woo') 9 | 10 | expect(ObjectSpace.memsize_of(matches1)).to be < ObjectSpace.memsize_of(matches2) 11 | end 12 | 13 | describe "#to_a" do 14 | it "is populated with the match and capturing groups" do 15 | a = RE2::Regexp.new('w(o)(o)').match('woo').to_a 16 | 17 | expect(a).to eq(["woo", "o", "o"]) 18 | end 19 | 20 | it "populates optional capturing groups with nil if they are missing" do 21 | a = RE2::Regexp.new('(\d?)(a)(b)').match('ab').to_a 22 | 23 | expect(a).to eq(["ab", nil, "a", "b"]) 24 | end 25 | 26 | it "returns UTF-8 strings if the pattern is UTF-8" do 27 | a = RE2::Regexp.new('w(o)(o)').match('woo').to_a 28 | 29 | expect(a.map(&:encoding)).to all eq(Encoding::UTF_8) 30 | end 31 | 32 | it "returns ISO-8859-1 strings if the pattern is not UTF-8" do 33 | a = RE2::Regexp.new('w(o)(o)', utf8: false).match('woo').to_a 34 | 35 | expect(a.map(&:encoding)).to all eq(Encoding::ISO_8859_1) 36 | end 37 | end 38 | 39 | describe "#[]" do 40 | it "accesses capturing groups by numerical index", :aggregate_failures do 41 | md = RE2::Regexp.new('(\d)(\d{2})').match("123") 42 | 43 | expect(md[1]).to eq("1") 44 | expect(md[2]).to eq("23") 45 | end 46 | 47 | it "returns a UTF-8 string by numerical index if the pattern is UTF-8" do 48 | md = RE2::Regexp.new('(\d)(\d{2})').match("123") 49 | 50 | expect(md[1].encoding).to eq(Encoding::UTF_8) 51 | end 52 | 53 | it "returns a ISO-8859-1 string by numerical index if the pattern is not UTF-8" do 54 | md = RE2::Regexp.new('(\d)(\d{2})', utf8: false).match("123") 55 | 56 | expect(md[1].encoding).to eq(Encoding::ISO_8859_1) 57 | end 58 | 59 | it "has the whole match as the 0th item" do 60 | md = RE2::Regexp.new('(\d)(\d{2})').match("123") 61 | 62 | expect(md[0]).to eq("123") 63 | end 64 | 65 | it "supports access by numerical ranges", :aggregate_failures do 66 | md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789") 67 | 68 | expect(md[1..3]).to eq(["123", "456", "789"]) 69 | expect(md[1...3]).to eq(["123", "456"]) 70 | end 71 | 72 | it "supports slicing", :aggregate_failures do 73 | md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789") 74 | 75 | expect(md[1, 3]).to eq(["123", "456", "789"]) 76 | expect(md[1, 2]).to eq(["123", "456"]) 77 | end 78 | 79 | it "returns nil if attempting to access non-existent capturing groups by index", :aggregate_failures do 80 | md = RE2::Regexp.new('(\d+)').match('bob 123') 81 | 82 | expect(md[2]).to be_nil 83 | expect(md[3]).to be_nil 84 | end 85 | 86 | it "allows access by string names when there are named groups" do 87 | md = RE2::Regexp.new('(?P\d+)').match('bob 123') 88 | 89 | expect(md["numbers"]).to eq("123") 90 | end 91 | 92 | it "allows access by symbol names when there are named groups" do 93 | md = RE2::Regexp.new('(?P\d+)').match('bob 123') 94 | 95 | expect(md[:numbers]).to eq("123") 96 | end 97 | 98 | it "allows access by names and indices with mixed groups", :aggregate_failures do 99 | md = RE2::Regexp.new('(?P\w+)(\s*)(?P\d+)').match("bob 123") 100 | 101 | expect(md["name"]).to eq("bob") 102 | expect(md[:name]).to eq("bob") 103 | expect(md[2]).to eq(" ") 104 | expect(md["numbers"]).to eq("123") 105 | expect(md[:numbers]).to eq("123") 106 | end 107 | 108 | it "returns nil if no such named group exists", :aggregate_failures do 109 | md = RE2::Regexp.new('(\d+)').match("bob 123") 110 | 111 | expect(md["missing"]).to be_nil 112 | expect(md[:missing]).to be_nil 113 | end 114 | 115 | it "raises an error if given an inappropriate index" do 116 | md = RE2::Regexp.new('(\d+)').match("bob 123") 117 | 118 | expect { md[nil] }.to raise_error(TypeError) 119 | end 120 | 121 | it "returns UTF-8 encoded strings by default", :aggregate_failures do 122 | md = RE2::Regexp.new('(?P\S+)').match("bob") 123 | 124 | expect(md[0].encoding.name).to eq("UTF-8") 125 | expect(md["name"].encoding.name).to eq("UTF-8") 126 | expect(md[:name].encoding.name).to eq("UTF-8") 127 | end 128 | 129 | it "returns Latin 1 strings encoding when utf-8 is false", :aggregate_failures do 130 | md = RE2::Regexp.new('(?P\S+)', utf8: false).match('bob') 131 | 132 | expect(md[0].encoding.name).to eq("ISO-8859-1") 133 | expect(md["name"].encoding.name).to eq("ISO-8859-1") 134 | expect(md[:name].encoding.name).to eq("ISO-8859-1") 135 | end 136 | 137 | it "supports GC compaction" do 138 | md = RE2::Regexp.new('(wo{2})').match('woohoo' * 5) 139 | GC.compact 140 | 141 | expect(md[1]).to eq("woo") 142 | end 143 | end 144 | 145 | describe "#string" do 146 | it "returns the original string to match against" do 147 | re = RE2::Regexp.new('(\D+)').match("bob") 148 | 149 | expect(re.string).to eq("bob") 150 | end 151 | 152 | it "returns a copy, not the actual original" do 153 | string = +"bob" 154 | re = RE2::Regexp.new('(\D+)').match(string) 155 | 156 | expect(re.string).to_not equal(string) 157 | end 158 | 159 | it "returns a frozen string" do 160 | re = RE2::Regexp.new('(\D+)').match("bob") 161 | 162 | expect(re.string).to be_frozen 163 | end 164 | 165 | it "does not copy the string if it was already frozen" do 166 | string = "bob" 167 | re = RE2::Regexp.new('(\D+)').match(string) 168 | 169 | expect(re.string).to equal(string) 170 | end 171 | end 172 | 173 | describe "#size" do 174 | it "returns the number of capturing groups plus the matching string" do 175 | md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56") 176 | 177 | expect(md.size).to eq(3) 178 | end 179 | end 180 | 181 | describe "#length" do 182 | it "returns the number of capturing groups plus the matching string" do 183 | md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56") 184 | 185 | expect(md.length).to eq(3) 186 | end 187 | end 188 | 189 | describe "#regexp" do 190 | it "returns the original RE2::Regexp used" do 191 | re = RE2::Regexp.new('(\d+)') 192 | md = re.match("123") 193 | 194 | expect(md.regexp).to equal(re) 195 | end 196 | end 197 | 198 | describe "#inspect" do 199 | it "returns a text representation of the object and indices" do 200 | md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56") 201 | 202 | expect(md.inspect).to eq('#') 203 | end 204 | 205 | it "represents missing matches as nil" do 206 | md = RE2::Regexp.new('(\d+) (\d+)?').match("1234 ") 207 | 208 | expect(md.inspect).to eq('#') 209 | end 210 | 211 | it "supports matches with null bytes" do 212 | md = RE2::Regexp.new("(\\w\0\\w) (\\w\0\\w)").match("a\0b c\0d") 213 | 214 | expect(md.inspect).to eq("#") 215 | end 216 | end 217 | 218 | describe "#to_s" do 219 | it "returns the matching part of the original string" do 220 | md = RE2::Regexp.new('(\d{2,5})').match("one two 23456") 221 | 222 | expect(md.to_s).to eq("23456") 223 | end 224 | end 225 | 226 | describe "#to_ary" do 227 | it "allows the object to be expanded with an asterisk", :aggregate_failures do 228 | md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56") 229 | m1, m2, m3 = *md 230 | 231 | expect(m1).to eq("1234 56") 232 | expect(m2).to eq("1234") 233 | expect(m3).to eq("56") 234 | end 235 | end 236 | 237 | describe "#begin" do 238 | it "returns the offset of the start of a match by index" do 239 | md = RE2::Regexp.new('(wo{2})').match('a woohoo') 240 | 241 | expect(md.string[md.begin(0)..-1]).to eq('woohoo') 242 | end 243 | 244 | it "returns the offset of the start of a match by string name" do 245 | md = RE2::Regexp.new('(?Pfo{2})').match('a foobar') 246 | 247 | expect(md.string[md.begin('foo')..-1]).to eq('foobar') 248 | end 249 | 250 | it "returns the offset of the start of a match by symbol name" do 251 | md = RE2::Regexp.new('(?Pfo{2})').match('a foobar') 252 | 253 | expect(md.string[md.begin(:foo)..-1]).to eq('foobar') 254 | end 255 | 256 | it "returns the offset of the start of a match by something that can be coerced to a String" do 257 | md = RE2::Regexp.new('(?Pfo{2})').match('a foobar') 258 | 259 | expect(md.string[md.begin(StringLike.new("foo"))..-1]).to eq('foobar') 260 | end 261 | 262 | it "returns the offset despite multibyte characters" do 263 | md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby') 264 | 265 | expect(md.string[md.begin(0)..-1]).to eq('Ruby') 266 | end 267 | 268 | it "returns nil for non-existent numerical matches" do 269 | md = RE2::Regexp.new('(\d)').match('123') 270 | 271 | expect(md.begin(10)).to be_nil 272 | end 273 | 274 | it "returns nil for negative numerical matches" do 275 | md = RE2::Regexp.new('(\d)').match('123') 276 | 277 | expect(md.begin(-4)).to be_nil 278 | end 279 | 280 | it "returns nil for non-existent named matches" do 281 | md = RE2::Regexp.new('(\d)').match('123') 282 | 283 | expect(md.begin('foo')).to be_nil 284 | end 285 | 286 | it "returns nil for non-existent symbol named matches" do 287 | md = RE2::Regexp.new('(\d)').match('123') 288 | 289 | expect(md.begin(:foo)).to be_nil 290 | end 291 | 292 | it "raises a type error if given an invalid name or number" do 293 | md = RE2::Regexp.new('(\d)').match('123') 294 | 295 | expect { md.begin(nil) }.to raise_error(TypeError) 296 | end 297 | 298 | it "supports GC compaction" do 299 | md = RE2::Regexp.new('(wo{2})').match('woohoo' * 5) 300 | GC.compact 301 | 302 | expect(md.string[md.begin(0)..-1]).to eq('woohoo' * 5) 303 | end 304 | end 305 | 306 | describe "#end" do 307 | it "returns the offset of the character following the end of a match" do 308 | md = RE2::Regexp.new('(wo{2})').match('a woohoo') 309 | 310 | expect(md.string[0...md.end(0)]).to eq('a woo') 311 | end 312 | 313 | it "returns the offset of a match by string name" do 314 | md = RE2::Regexp.new('(?Pfo{2})').match('a foobar') 315 | 316 | expect(md.string[0...md.end('foo')]).to eq('a foo') 317 | end 318 | 319 | it "returns the offset of a match by symbol name" do 320 | md = RE2::Regexp.new('(?Pfo{2})').match('a foobar') 321 | 322 | expect(md.string[0...md.end(:foo)]).to eq('a foo') 323 | end 324 | 325 | it "returns the offset of a match by something that can be coerced to a String" do 326 | md = RE2::Regexp.new('(?Pfo{2})').match('a foobar') 327 | 328 | expect(md.string[0...md.end(StringLike.new("foo"))]).to eq('a foo') 329 | end 330 | 331 | it "returns the offset despite multibyte characters" do 332 | md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby') 333 | 334 | expect(md.string[0...md.end(0)]).to eq('I ♥ Ruby') 335 | end 336 | 337 | it "returns nil for non-existent numerical matches" do 338 | md = RE2::Regexp.new('(\d)').match('123') 339 | 340 | expect(md.end(10)).to be_nil 341 | end 342 | 343 | it "returns nil for negative numerical matches" do 344 | md = RE2::Regexp.new('(\d)').match('123') 345 | 346 | expect(md.end(-4)).to be_nil 347 | end 348 | 349 | it "returns nil for non-existent named matches" do 350 | md = RE2::Regexp.new('(\d)').match('123') 351 | 352 | expect(md.end('foo')).to be_nil 353 | end 354 | 355 | it "returns nil for non-existent symbol named matches" do 356 | md = RE2::Regexp.new('(\d)').match('123') 357 | 358 | expect(md.end(:foo)).to be_nil 359 | end 360 | 361 | it "raises a type error if given an invalid name or number" do 362 | md = RE2::Regexp.new('(\d)').match('123') 363 | 364 | expect { md.end(nil) }.to raise_error(TypeError) 365 | end 366 | 367 | it "supports GC compaction" do 368 | md = RE2::Regexp.new('(wo{2})').match('woohoo' * 5) 369 | GC.compact 370 | 371 | expect(md.string[0...md.end(0)]).to eq('woo') 372 | end 373 | end 374 | 375 | describe "#deconstruct" do 376 | it "returns all capturing groups" do 377 | md = RE2::Regexp.new('w(o)(o)').match('woo') 378 | 379 | expect(md.deconstruct).to eq(['o', 'o']) 380 | end 381 | 382 | it "includes optional capturing groups as nil" do 383 | md = RE2::Regexp.new('w(.)(.)(.)?').match('woo') 384 | 385 | expect(md.deconstruct).to eq(['o', 'o', nil]) 386 | end 387 | end 388 | 389 | describe "#deconstruct_keys" do 390 | it "returns all named captures if given nil" do 391 | md = RE2::Regexp.new('(?P\d+) (?P[a-zA-Z]+)').match('123 abc') 392 | 393 | expect(md.deconstruct_keys(nil)).to eq(numbers: '123', letters: 'abc') 394 | end 395 | 396 | it "returns only named captures if given names" do 397 | md = RE2::Regexp.new('(?P\d+) (?P[a-zA-Z]+)').match('123 abc') 398 | 399 | expect(md.deconstruct_keys([:numbers])).to eq(numbers: '123') 400 | end 401 | 402 | it "returns named captures up until an invalid name is given" do 403 | md = RE2::Regexp.new('(?P\d+) (?P[a-zA-Z]+)').match('123 abc') 404 | 405 | expect(md.deconstruct_keys([:numbers, :punctuation])).to eq(numbers: '123') 406 | end 407 | 408 | it "returns an empty hash if given more capture names than exist" do 409 | md = RE2::Regexp.new('(?P\d+) (?P[a-zA-Z]+)').match('123 abc') 410 | 411 | expect(md.deconstruct_keys([:numbers, :letters, :punctuation])).to eq({}) 412 | end 413 | 414 | it "returns an empty hash if there are no named capturing groups" do 415 | md = RE2::Regexp.new('(\d+) ([a-zA-Z]+)').match('123 abc') 416 | 417 | expect(md.deconstruct_keys(nil)).to eq({}) 418 | end 419 | 420 | it "raises an error if given a non-array of keys" do 421 | md = RE2::Regexp.new('(?P\d+) (?P[a-zA-Z]+)').match('123 abc') 422 | 423 | expect { md.deconstruct_keys(0) }.to raise_error(TypeError) 424 | end 425 | 426 | it "raises an error if given keys as non-symbols" do 427 | md = RE2::Regexp.new('(?P\d+) (?P[a-zA-Z]+)').match('123 abc') 428 | 429 | expect { md.deconstruct_keys([0]) }.to raise_error(TypeError) 430 | end 431 | end 432 | end 433 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: re2 Tests 2 | 3 | concurrency: 4 | group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" 5 | cancel-in-progress: true 6 | 7 | on: 8 | workflow_dispatch: 9 | push: 10 | branches: 11 | - main 12 | pull_request: 13 | schedule: 14 | - cron: "30 4 * * 4" 15 | 16 | env: 17 | RUBYOPT: "--enable-frozen-string-literal --debug-frozen-string-literal" 18 | 19 | jobs: 20 | build-cruby-gem: 21 | runs-on: "ubuntu-latest" 22 | steps: 23 | - uses: actions/checkout@v5 24 | - uses: actions/cache@v4 25 | with: 26 | path: ports/archives 27 | key: archives-ubuntu-${{ hashFiles('dependencies.yml') }} 28 | - uses: ruby/setup-ruby@v1 29 | with: 30 | ruby-version: "3.4" 31 | bundler-cache: true 32 | - run: bundle exec rake gem 33 | - uses: actions/upload-artifact@v4 34 | with: 35 | name: cruby-gem 36 | path: pkg/*.gem 37 | 38 | precompile-aarch64-linux-gnu: 39 | uses: ./.github/workflows/precompile-gem.yml 40 | with: 41 | platform: aarch64-linux-gnu 42 | 43 | precompile-aarch64-linux-musl: 44 | uses: ./.github/workflows/precompile-gem.yml 45 | with: 46 | platform: aarch64-linux-musl 47 | 48 | precompile-arm-linux-gnu: 49 | uses: ./.github/workflows/precompile-gem.yml 50 | with: 51 | platform: arm-linux-gnu 52 | 53 | precompile-arm-linux-musl: 54 | uses: ./.github/workflows/precompile-gem.yml 55 | with: 56 | platform: arm-linux-musl 57 | 58 | precompile-x86_64-linux-gnu: 59 | uses: ./.github/workflows/precompile-gem.yml 60 | with: 61 | platform: x86_64-linux-gnu 62 | 63 | precompile-x86_64-linux-musl: 64 | uses: ./.github/workflows/precompile-gem.yml 65 | with: 66 | platform: x86_64-linux-musl 67 | 68 | precompile-arm64-darwin: 69 | uses: ./.github/workflows/precompile-gem.yml 70 | with: 71 | platform: arm64-darwin 72 | 73 | precompile-x86_64-darwin: 74 | uses: ./.github/workflows/precompile-gem.yml 75 | with: 76 | platform: x86_64-darwin 77 | 78 | precompile-x64-mingw-ucrt: 79 | uses: ./.github/workflows/precompile-gem.yml 80 | with: 81 | platform: x64-mingw-ucrt 82 | 83 | test-re2-abi: 84 | needs: "build-cruby-gem" 85 | runs-on: ubuntu-22.04 86 | strategy: 87 | matrix: 88 | ruby: ["3.4", "3.1"] # oldest and newest 89 | libre2: 90 | - version: "20150501" 91 | soname: 0 92 | - version: "20200302" 93 | soname: 1 94 | - version: "20200303" 95 | soname: 6 96 | - version: "20200501" 97 | soname: 7 98 | - version: "20200706" 99 | soname: 8 100 | - version: "20201101" 101 | soname: 9 102 | - version: "20221201" 103 | soname: 10 104 | - version: "20230701" 105 | soname: 11 106 | steps: 107 | - uses: actions/checkout@v5 108 | - name: Remove any existing libre2 installation 109 | run: sudo apt-get remove -y libre2-dev libre2-9 110 | - name: Download and install specific release of libre2 111 | run: | 112 | curl -Lo libre2-dev.deb https://github.com/mudge/re2-ci/releases/download/v22.04/libre2-dev_${{ matrix.libre2.version }}_amd64.deb 113 | sudo apt -f install ./libre2-dev.deb 114 | - uses: ruby/setup-ruby@v1 115 | with: 116 | ruby-version: "${{ matrix.ruby }}" 117 | bundler-cache: true 118 | - uses: actions/download-artifact@v5 119 | with: 120 | name: cruby-gem 121 | path: pkg 122 | - run: ./scripts/test-gem-install --enable-system-libraries 123 | env: 124 | BUNDLE_PATH: ${{ github.workspace }}/vendor/bundle 125 | 126 | test-ubuntu: 127 | needs: "build-cruby-gem" 128 | strategy: 129 | fail-fast: false 130 | matrix: 131 | ruby: ["3.1", "3.2", "3.3", "3.4", "4.0"] 132 | sys: ["enable", "disable"] 133 | runs-on: "ubuntu-latest" 134 | steps: 135 | - uses: actions/checkout@v5 136 | - uses: ruby/setup-ruby-pkgs@v1 137 | with: 138 | ruby-version: ${{ matrix.ruby }} 139 | apt-get: libre2-dev 140 | bundler-cache: true 141 | - uses: actions/download-artifact@v5 142 | with: 143 | name: cruby-gem 144 | path: pkg 145 | - run: ./scripts/test-gem-install --${{ matrix.sys }}-system-libraries 146 | env: 147 | BUNDLE_PATH: ${{ github.workspace }}/vendor/bundle 148 | 149 | test-macos: 150 | needs: "build-cruby-gem" 151 | strategy: 152 | fail-fast: false 153 | matrix: 154 | ruby: ["3.1", "3.2", "3.3", "3.4", "4.0"] 155 | sys: ["enable", "disable"] 156 | runs-on: "macos-14" 157 | steps: 158 | - uses: actions/checkout@v5 159 | - uses: ruby/setup-ruby-pkgs@v1 160 | with: 161 | ruby-version: ${{ matrix.ruby }} 162 | brew: re2 163 | bundler-cache: true 164 | - uses: actions/download-artifact@v5 165 | with: 166 | name: cruby-gem 167 | path: pkg 168 | - run: ./scripts/test-gem-install --${{ matrix.sys }}-system-libraries 169 | env: 170 | BUNDLE_PATH: ${{ github.workspace }}/vendor/bundle 171 | 172 | test-windows-2022: 173 | needs: "build-cruby-gem" 174 | strategy: 175 | fail-fast: false 176 | matrix: 177 | ruby: ["3.4", "head"] 178 | sys: ["enable", "disable"] 179 | runs-on: "windows-2022" 180 | steps: 181 | - uses: actions/checkout@v5 182 | - uses: ruby/setup-ruby-pkgs@v1 183 | with: 184 | ruby-version: ${{ matrix.ruby }} 185 | mingw: re2 186 | bundler-cache: true 187 | - uses: actions/download-artifact@v5 188 | with: 189 | name: cruby-gem 190 | path: pkg 191 | - run: ./scripts/test-gem-install --${{ matrix.sys }}-system-libraries 192 | shell: bash 193 | env: 194 | BUNDLE_PATH: ${{ github.workspace }}/vendor/bundle 195 | 196 | test-windows-2025: 197 | needs: "build-cruby-gem" 198 | strategy: 199 | fail-fast: false 200 | matrix: 201 | ruby: ["3.4", "head"] 202 | sys: ["enable", "disable"] 203 | runs-on: "windows-2025" 204 | steps: 205 | - uses: actions/checkout@v5 206 | - uses: ruby/setup-ruby-pkgs@v1 207 | with: 208 | ruby-version: ${{ matrix.ruby }} 209 | mingw: re2 210 | bundler-cache: true 211 | - uses: actions/download-artifact@v5 212 | with: 213 | name: cruby-gem 214 | path: pkg 215 | - run: ./scripts/test-gem-install --${{ matrix.sys }}-system-libraries 216 | shell: bash 217 | env: 218 | BUNDLE_PATH: ${{ github.workspace }}/vendor/bundle 219 | 220 | test-freebsd: 221 | needs: "build-cruby-gem" 222 | strategy: 223 | fail-fast: false 224 | matrix: 225 | sys: ["enable", "disable"] 226 | runs-on: ubuntu-latest 227 | steps: 228 | - uses: actions/checkout@v5 229 | - uses: actions/download-artifact@v5 230 | with: 231 | name: cruby-gem 232 | path: pkg 233 | - uses: vmactions/freebsd-vm@v1 234 | with: 235 | usesh: true 236 | copyback: false 237 | prepare: pkg install -y ruby devel/ruby-gems sysutils/rubygem-bundler devel/pkgconf devel/cmake shells/bash devel/re2 238 | run: ./scripts/test-gem-install --${{ matrix.sys }}-system-libraries 239 | 240 | test-vendored-and-system: 241 | needs: "build-cruby-gem" 242 | runs-on: ubuntu-latest 243 | steps: 244 | - uses: actions/checkout@v5 245 | - name: Install RE2 246 | run: sudo apt-get install -y libre2-dev 247 | - uses: ruby/setup-ruby@v1 248 | id: setup-ruby 249 | with: 250 | ruby-version: "3.4" 251 | bundler-cache: true 252 | - uses: actions/download-artifact@v5 253 | with: 254 | name: cruby-gem 255 | path: pkg 256 | - name: "Link libre2 into Ruby's lib directory" 257 | run: ln -s /usr/lib/x86_64-linux-gnu/libre2.so ${{ steps.setup-ruby.outputs.ruby-prefix }}/lib/libre2.so 258 | - run: ./scripts/test-gem-install 259 | env: 260 | BUNDLE_PATH: ${{ github.workspace }}/vendor/bundle 261 | 262 | test-precompiled-aarch64-linux-gnu: 263 | needs: "precompile-aarch64-linux-gnu" 264 | strategy: 265 | fail-fast: false 266 | matrix: 267 | ruby: ["3.1", "3.2", "3.3", "3.4"] 268 | runs-on: ubuntu-latest 269 | steps: 270 | - uses: actions/checkout@v5 271 | - uses: actions/download-artifact@v5 272 | with: 273 | name: cruby-aarch64-linux-gnu-gem 274 | path: pkg 275 | - name: Enable execution of multi-architecture containers by QEMU 276 | run: docker run --rm --privileged multiarch/qemu-user-static --reset -p yes 277 | - run: | 278 | docker run --rm -v "$(pwd):/re2" -w /re2 \ 279 | --platform=linux/arm64 \ 280 | ruby:${{ matrix.ruby }} \ 281 | ./scripts/test-gem-install 282 | 283 | test-precompiled-aarch64-linux-musl: 284 | needs: "precompile-aarch64-linux-musl" 285 | strategy: 286 | fail-fast: false 287 | matrix: 288 | ruby: ["3.1", "3.2", "3.3", "3.4"] 289 | runs-on: ubuntu-latest 290 | steps: 291 | - uses: actions/checkout@v5 292 | - uses: actions/download-artifact@v5 293 | with: 294 | name: cruby-aarch64-linux-musl-gem 295 | path: pkg 296 | - name: Enable execution of multi-architecture containers by QEMU 297 | run: docker run --rm --privileged multiarch/qemu-user-static --reset -p yes 298 | - run: | 299 | docker run --rm -v "$(pwd):/re2" -w /re2 \ 300 | --platform=linux/arm64 \ 301 | ruby:${{ matrix.ruby }}-alpine \ 302 | /bin/sh -c "apk update && apk add libstdc++ && ./scripts/test-gem-install" 303 | 304 | test-precompiled-arm-linux-gnu: 305 | needs: "precompile-arm-linux-gnu" 306 | strategy: 307 | fail-fast: false 308 | matrix: 309 | ruby: ["3.1", "3.2", "3.3", "3.4"] 310 | runs-on: ubuntu-latest 311 | steps: 312 | - uses: actions/checkout@v5 313 | - uses: actions/download-artifact@v5 314 | with: 315 | name: cruby-arm-linux-gnu-gem 316 | path: pkg 317 | - name: enable execution of multi-architecture containers by qemu 318 | run: docker run --rm --privileged multiarch/qemu-user-static --reset -p yes 319 | - run: | 320 | docker run --rm -v "$(pwd):/re2" -w /re2 \ 321 | --platform=linux/arm/v7 \ 322 | ruby:${{ matrix.ruby }} \ 323 | ./scripts/test-gem-install 324 | 325 | test-precompiled-arm-linux-musl: 326 | needs: "precompile-arm-linux-musl" 327 | strategy: 328 | fail-fast: false 329 | matrix: 330 | ruby: ["3.1", "3.2", "3.3", "3.4"] 331 | runs-on: ubuntu-latest 332 | steps: 333 | - uses: actions/checkout@v5 334 | - uses: actions/download-artifact@v5 335 | with: 336 | name: cruby-arm-linux-musl-gem 337 | path: pkg 338 | - name: enable execution of multi-architecture containers by qemu 339 | run: docker run --rm --privileged multiarch/qemu-user-static --reset -p yes 340 | - run: | 341 | docker run --rm -v "$(pwd):/re2" -w /re2 \ 342 | --platform=linux/arm/v7 \ 343 | ruby:${{ matrix.ruby }}-alpine \ 344 | /bin/sh -c "apk update && apk add libstdc++ && ./scripts/test-gem-install" 345 | 346 | test-precompiled-x86_64-linux-gnu: 347 | needs: "precompile-x86_64-linux-gnu" 348 | strategy: 349 | fail-fast: false 350 | matrix: 351 | ruby: ["3.1", "3.2", "3.3", "3.4"] 352 | runs-on: ubuntu-latest 353 | steps: 354 | - uses: actions/checkout@v5 355 | - uses: ruby/setup-ruby@v1 356 | with: 357 | ruby-version: "${{ matrix.ruby }}" 358 | bundler-cache: true 359 | - uses: actions/download-artifact@v5 360 | with: 361 | name: cruby-x86_64-linux-gnu-gem 362 | path: pkg 363 | - run: ./scripts/test-gem-install 364 | env: 365 | BUNDLE_PATH: ${{ github.workspace }}/vendor/bundle 366 | 367 | test-precompiled-x86_64-linux-musl: 368 | needs: "precompile-x86_64-linux-musl" 369 | strategy: 370 | fail-fast: false 371 | matrix: 372 | ruby: ["3.1", "3.2", "3.3", "3.4"] 373 | runs-on: ubuntu-latest 374 | container: 375 | image: "ruby:${{ matrix.ruby }}-alpine" 376 | steps: 377 | - uses: actions/checkout@v5 378 | - uses: actions/download-artifact@v5 379 | with: 380 | name: cruby-x86_64-linux-musl-gem 381 | path: pkg 382 | - run: apk update && apk add libstdc++ 383 | - run: ./scripts/test-gem-install 384 | 385 | test-precompiled-arm64-darwin: 386 | needs: "precompile-arm64-darwin" 387 | strategy: 388 | fail-fast: false 389 | matrix: 390 | ruby: ["3.1", "3.2", "3.3", "3.4"] 391 | runs-on: "macos-14" 392 | steps: 393 | - uses: actions/checkout@v5 394 | - uses: ruby/setup-ruby@v1 395 | with: 396 | ruby-version: "${{ matrix.ruby }}" 397 | bundler-cache: true 398 | - uses: actions/download-artifact@v5 399 | with: 400 | name: cruby-arm64-darwin-gem 401 | path: pkg 402 | - run: ./scripts/test-gem-install 403 | env: 404 | BUNDLE_PATH: ${{ github.workspace }}/vendor/bundle 405 | 406 | test-precompiled-x86_64-darwin: 407 | needs: "precompile-x86_64-darwin" 408 | strategy: 409 | fail-fast: false 410 | matrix: 411 | ruby: ["3.1", "3.2", "3.3", "3.4"] 412 | runs-on: "macos-15-intel" 413 | steps: 414 | - uses: actions/checkout@v5 415 | - uses: ruby/setup-ruby@v1 416 | with: 417 | ruby-version: "${{ matrix.ruby }}" 418 | bundler-cache: true 419 | - uses: actions/download-artifact@v5 420 | with: 421 | name: cruby-x86_64-darwin-gem 422 | path: pkg 423 | - run: ./scripts/test-gem-install 424 | env: 425 | BUNDLE_PATH: ${{ github.workspace }}/vendor/bundle 426 | 427 | test-precompiled-x64-mingw-ucrt: 428 | needs: "precompile-x64-mingw-ucrt" 429 | strategy: 430 | fail-fast: false 431 | matrix: 432 | ruby: ["3.1", "3.2", "3.3", "3.4"] 433 | runs-on: windows-2022 434 | steps: 435 | - uses: actions/checkout@v5 436 | - uses: MSP-Greg/setup-ruby-pkgs@v1 437 | with: 438 | ruby-version: "${{ matrix.ruby }}" 439 | bundler-cache: true 440 | - uses: actions/download-artifact@v5 441 | with: 442 | name: cruby-x64-mingw-ucrt-gem 443 | path: pkg 444 | - run: ./scripts/test-gem-install 445 | shell: bash 446 | env: 447 | BUNDLE_PATH: ${{ github.workspace }}/vendor/bundle 448 | 449 | ruby-memcheck: 450 | runs-on: "ubuntu-latest" 451 | env: 452 | BUNDLE_WITH: memcheck 453 | steps: 454 | - uses: actions/checkout@v5 455 | - uses: actions/cache@v4 456 | with: 457 | path: ports/archives 458 | key: archives-ubuntu-${{ hashFiles('dependencies.yml') }} 459 | - uses: ruby/setup-ruby-pkgs@v1 460 | with: 461 | ruby-version: "3.4" 462 | apt-get: valgrind 463 | bundler-cache: true 464 | - run: bundle exec rake spec:valgrind 465 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # re2 - safer regular expressions in Ruby 2 | 3 | Ruby bindings to [RE2][], a "fast, safe, thread-friendly alternative to 4 | backtracking regular expression engines like those used in PCRE, Perl, and 5 | Python". 6 | 7 | [![Build Status](https://github.com/mudge/re2/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/mudge/re2/actions) 8 | 9 | **Current version:** 2.22.0 10 | **Bundled RE2 version:** libre2.11 (2025-11-05) 11 | 12 | ```ruby 13 | RE2('h.*o').full_match?("hello") #=> true 14 | RE2('e').full_match?("hello") #=> false 15 | RE2('h.*o').partial_match?("hello") #=> true 16 | RE2('e').partial_match?("hello") #=> true 17 | RE2('(\w+):(\d+)').full_match("ruby:1234") 18 | #=> # 19 | ``` 20 | 21 | ## Table of Contents 22 | 23 | * [Why RE2?](#why-re2) 24 | * [Usage](#usage) 25 | * [Compiling regular expressions](#compiling-regular-expressions) 26 | * [Matching interface](#matching-interface) 27 | * [Submatch extraction](#submatch-extraction) 28 | * [Scanning text incrementally](#scanning-text-incrementally) 29 | * [Searching simultaneously](#searching-simultaneously) 30 | * [Encoding](#encoding) 31 | * [Requirements](#requirements) 32 | * [Native gems](#native-gems) 33 | * [Verifying the gems](#verifying-the-gems) 34 | * [Installing the `ruby` platform gem](#installing-the-ruby-platform-gem) 35 | * [Using system libraries](#using-system-libraries) 36 | * [Thanks](#thanks) 37 | * [Contact](#contact) 38 | * [License](#license) 39 | * [Dependencies](#dependencies) 40 | 41 | ## Why RE2? 42 | 43 | While [recent 44 | versions](https://www.ruby-lang.org/en/news/2022/12/25/ruby-3-2-0-released/) of 45 | Ruby have improved defences against [regular expression denial of service 46 | (ReDoS) attacks](https://en.wikipedia.org/wiki/ReDoS), it is still possible for 47 | users to craft malicious patterns that take a long time to process by using 48 | syntactic features such as [back-references, lookaheads and possessive 49 | quantifiers](https://bugs.ruby-lang.org/issues/19104#note-3). RE2 aims to 50 | eliminate ReDoS by design: 51 | 52 | > **_Safety is RE2's raison d'être._** 53 | > 54 | > RE2 was designed and implemented with an explicit goal of being able to 55 | > handle regular expressions from untrusted users without risk. One of its 56 | > primary guarantees is that the match time is linear in the length of the 57 | > input string. It was also written with production concerns in mind: the 58 | > parser, the compiler and the execution engines limit their memory usage by 59 | > working within a configurable budget – failing gracefully when exhausted – 60 | > and they avoid stack overflow by eschewing recursion. 61 | 62 | — [Why RE2?](https://github.com/google/re2/wiki/WhyRE2) 63 | 64 | ## Usage 65 | 66 | Install re2 as a dependency: 67 | 68 | ```ruby 69 | # In your Gemfile 70 | gem "re2" 71 | 72 | # Or without Bundler 73 | gem install re2 74 | ``` 75 | 76 | Include in your code: 77 | 78 | ```ruby 79 | require "re2" 80 | ``` 81 | 82 | Full API documentation automatically generated from the latest version is 83 | available at https://mudge.name/re2/. 84 | 85 | While re2 uses the same naming scheme as Ruby's built-in regular expression 86 | library (with [`Regexp`](https://mudge.name/re2/RE2/Regexp.html) and 87 | [`MatchData`](https://mudge.name/re2/RE2/MatchData.html)), its API is slightly 88 | different: 89 | 90 | ### Compiling regular expressions 91 | 92 | > [!WARNING] 93 | > RE2's regular expression syntax differs from PCRE and Ruby's built-in 94 | > [`Regexp`](https://docs.ruby-lang.org/en/3.2/Regexp.html) library, see the 95 | > [official syntax page](https://github.com/google/re2/wiki/Syntax) for more 96 | > details. 97 | 98 | The core class is [`RE2::Regexp`](https://mudge.name/re2/RE2/Regexp.html) which 99 | takes a regular expression as a string and compiles it internally into an `RE2` 100 | object. A global function `RE2` is available to concisely compile a new 101 | `RE2::Regexp`: 102 | 103 | ```ruby 104 | re = RE2('(\w+):(\d+)') 105 | #=> # 106 | re.ok? #=> true 107 | 108 | re = RE2('abc)def') 109 | re.ok? #=> false 110 | re.error #=> "missing ): abc(def" 111 | ``` 112 | 113 | > [!TIP] 114 | > Note the use of *single quotes* when passing the regular expression as 115 | > a string to `RE2` so that the backslashes aren't interpreted as escapes. 116 | 117 | When compiling a regular expression, an optional second argument can be used to change RE2's default options, e.g. stop logging syntax and execution errors to stderr with `log_errors`: 118 | 119 | ```ruby 120 | RE2('abc)def', log_errors: false) 121 | ``` 122 | 123 | See the API documentation for [`RE2::Regexp#initialize`](https://mudge.name/re2/RE2/Regexp.html#initialize-instance_method) for all the available options. 124 | 125 | ### Matching interface 126 | 127 | There are two main methods for matching: [`RE2::Regexp#full_match?`](https://mudge.name/re2/RE2/Regexp.html#full_match%3F-instance_method) requires the regular expression to match the entire input text, and [`RE2::Regexp#partial_match?`](https://mudge.name/re2/RE2/Regexp.html#partial_match%3F-instance_method) looks for a match for a substring of the input text, returning a boolean to indicate whether a match was successful or not. 128 | 129 | ```ruby 130 | RE2('h.*o').full_match?("hello") #=> true 131 | RE2('e').full_match?("hello") #=> false 132 | 133 | RE2('h.*o').partial_match?("hello") #=> true 134 | RE2('e').partial_match?("hello") #=> true 135 | ``` 136 | 137 | ### Submatch extraction 138 | 139 | > [!TIP] 140 | > Only extract the number of submatches you need as performance is improved 141 | > with fewer submatches (with the best performance when avoiding submatch 142 | > extraction altogether). 143 | 144 | Both matching methods have a second form that can extract submatches as [`RE2::MatchData`](https://mudge.name/re2/RE2/MatchData.html) objects: [`RE2::Regexp#full_match`](https://mudge.name/re2/RE2/Regexp.html#full_match-instance_method) and [`RE2::Regexp#partial_match`](https://mudge.name/re2/RE2/Regexp.html#partial_match-instance_method). 145 | 146 | ```ruby 147 | m = RE2('(\w+):(\d+)').full_match("ruby:1234") 148 | #=> # 149 | 150 | m[0] #=> "ruby:1234" 151 | m[1] #=> "ruby" 152 | m[2] #=> "1234" 153 | 154 | m = RE2('(\w+):(\d+)').full_match("r") 155 | #=> nil 156 | ``` 157 | 158 | `RE2::MatchData` supports retrieving submatches by numeric index or by name if present in the regular expression: 159 | 160 | ```ruby 161 | m = RE2('(?P\w+):(?P\d+)').full_match("ruby:1234") 162 | #=> # 163 | 164 | m["word"] #=> "ruby" 165 | m["number"] #=> "1234" 166 | ``` 167 | 168 | They can also be used with Ruby's [pattern matching](https://docs.ruby-lang.org/en/3.2/syntax/pattern_matching_rdoc.html): 169 | 170 | ```ruby 171 | case RE2('(\w+):(\d+)').full_match("ruby:1234") 172 | in [word, number] 173 | puts "Word: #{word}, Number: #{number}" 174 | else 175 | puts "No match" 176 | end 177 | # Word: ruby, Number: 1234 178 | 179 | case RE2('(?P\w+):(?P\d+)').full_match("ruby:1234") 180 | in word:, number: 181 | puts "Word: #{word}, Number: #{number}" 182 | else 183 | puts "No match" 184 | end 185 | # Word: ruby, Number: 1234 186 | ``` 187 | 188 | By default, both `full_match` and `partial_match` will extract all submatches into the `RE2::MatchData` based on the number of capturing groups in the regular expression. This can be changed by passing an optional second argument when matching: 189 | 190 | ```ruby 191 | m = RE2('(\w+):(\d+)').full_match("ruby:1234", submatches: 1) 192 | => # 193 | ``` 194 | 195 | > [!WARNING] 196 | > If the regular expression has no capturing groups or you pass `submatches: 197 | > 0`, the matching method will behave like its `full_match?` or 198 | > `partial_match?` form and only return `true` or `false` rather than 199 | > `RE2::MatchData`. 200 | 201 | ### Scanning text incrementally 202 | 203 | If you want to repeatedly match regular expressions from the start of some input text, you can use [`RE2::Regexp#scan`](https://mudge.name/re2/RE2/Regexp.html#scan-instance_method) to return an `Enumerable` [`RE2::Scanner`](https://mudge.name/re2/RE2/Scanner.html) object which will lazily consume matches as you iterate over it: 204 | 205 | ```ruby 206 | scanner = RE2('(\w+)').scan(" one two three 4") 207 | scanner.each do |match| 208 | puts match.inspect 209 | end 210 | # ["one"] 211 | # ["two"] 212 | # ["three"] 213 | # ["4"] 214 | ``` 215 | 216 | ### Searching simultaneously 217 | 218 | [`RE2::Set`](https://mudge.name/re2/RE2/Set.html) represents a collection of 219 | regular expressions that can be searched for simultaneously. Calling 220 | [`RE2::Set#add`](https://mudge.name/re2/RE2/Set.html#add-instance_method) with 221 | a regular expression will return the integer index at which it is stored within 222 | the set. After all patterns have been added, the set can be compiled using 223 | [`RE2::Set#compile`](https://mudge.name/re2/RE2/Set.html#compile-instance_method), 224 | and then 225 | [`RE2::Set#match`](https://mudge.name/re2/RE2/Set.html#match-instance_method) 226 | will return an array containing the indices of all the patterns that matched. 227 | [`RE2::Set#size`](https://mudge.name/re2/RE2/Set.html#size-instance_method) 228 | will return the number of patterns in the set. 229 | 230 | ```ruby 231 | set = RE2::Set.new 232 | set.add("abc") #=> 0 233 | set.add("def") #=> 1 234 | set.add("ghi") #=> 2 235 | set.size #=> 3 236 | set.compile #=> true 237 | set.match("abcdefghi") #=> [0, 1, 2] 238 | set.match("ghidefabc") #=> [2, 1, 0] 239 | ``` 240 | 241 | ### Encoding 242 | 243 | > [!WARNING] 244 | > Note RE2 only supports UTF-8 and ISO-8859-1 encoding so strings will be 245 | > returned in UTF-8 by default or ISO-8859-1 if the `:utf8` option for the 246 | > `RE2::Regexp` is set to `false` (any other encoding's behaviour is undefined). 247 | 248 | For backward compatibility: re2 won't automatically convert string inputs to 249 | the right encoding so this is the responsibility of the caller, e.g. 250 | 251 | ```ruby 252 | # By default, RE2 will process patterns and text as UTF-8 253 | RE2(non_utf8_pattern.encode("UTF-8")).match(non_utf8_text.encode("UTF-8")) 254 | 255 | # If the :utf8 option is false, RE2 will process patterns and text as ISO-8859-1 256 | RE2(non_latin1_pattern.encode("ISO-8859-1"), utf8: false).match(non_latin1_text.encode("ISO-8859-1")) 257 | ``` 258 | 259 | ## Requirements 260 | 261 | This gem requires the following to run: 262 | 263 | * [Ruby](https://www.ruby-lang.org/en/) 3.1 to 3.4 264 | 265 | It supports the following RE2 ABI versions: 266 | 267 | * libre2.0 (prior to release 2020-03-02) to libre2.11 (2023-07-01 to 2025-11-05) 268 | 269 | ### Native gems 270 | 271 | Where possible, a pre-compiled native gem will be provided for the following platforms: 272 | 273 | * Linux 274 | * `aarch64-linux`, `arm-linux`, and `x86_64-linux` (requires [glibc](https://www.gnu.org/software/libc/) 2.29+, RubyGems 3.3.22+ and Bundler 2.3.21+) 275 | * [musl](https://musl.libc.org/)-based systems such as [Alpine](https://alpinelinux.org) are supported with Bundler 2.5.6+ 276 | * macOS 10.14+ `x86_64-darwin` and `arm64-darwin` 277 | * Windows 2022+ `x64-mingw-ucrt` 278 | 279 | ### Verifying the gems 280 | 281 | SHA256 checksums are included in the [release notes](https://github.com/mudge/re2/releases) for each version and can be checked with `sha256sum`, e.g. 282 | 283 | ```console 284 | $ gem fetch re2 -v 2.18.0 285 | Fetching re2-2.18.0-arm64-darwin.gem 286 | Downloaded re2-2.18.0-arm64-darwin 287 | $ sha256sum re2-2.18.0-arm64-darwin.gem 288 | 953063f0491420163d3484ed256fe2ff616c777ec66ee20aa5ec1a1a1fc39ff5 re2-2.18.0-arm64-darwin.gem 289 | ``` 290 | 291 | [GPG](https://www.gnupg.org/) signatures are attached to each release (the assets ending in `.sig`) and can be verified if you import [our signing key `0x39AC3530070E0F75`](https://mudge.name/39AC3530070E0F75.asc) (or fetch it from a public keyserver, e.g. `gpg --keyserver keyserver.ubuntu.com --recv-key 0x39AC3530070E0F75`): 292 | 293 | ```console 294 | $ gpg --verify re2-2.18.0-arm64-darwin.gem.sig re2-2.18.0-arm64-darwin.gem 295 | gpg: Signature made Sun 3 Aug 11:02:26 2025 BST 296 | gpg: using RSA key 702609D9C790F45B577D7BEC39AC3530070E0F75 297 | gpg: Good signature from "Paul Mucur " [unknown] 298 | gpg: aka "Paul Mucur " [unknown] 299 | gpg: WARNING: This key is not certified with a trusted signature! 300 | gpg: There is no indication that the signature belongs to the owner. 301 | Primary key fingerprint: 7026 09D9 C790 F45B 577D 7BEC 39AC 3530 070E 0F75 302 | ``` 303 | 304 | The fingerprint should be as shown above or you can independently verify it with the ones shown in the footer of https://mudge.name. 305 | 306 | ### Installing the `ruby` platform gem 307 | 308 | > [!WARNING] 309 | > We strongly recommend using the native gems where possible to avoid the need 310 | > for compiling the C++ extension and its dependencies which will take longer 311 | > and be less reliable. 312 | 313 | If you wish to compile the gem, you will need to explicitly install the `ruby` platform gem: 314 | 315 | ```ruby 316 | # In your Gemfile with Bundler 2.3.18+ 317 | gem "re2", force_ruby_platform: true 318 | 319 | # With Bundler 2.1+ 320 | bundle config set force_ruby_platform true 321 | 322 | # With older versions of Bundler 323 | bundle config force_ruby_platform true 324 | 325 | # Without Bundler 326 | gem install re2 --platform=ruby 327 | ``` 328 | 329 | You will need a full compiler toolchain for compiling Ruby C extensions (see 330 | [Nokogiri's "The Compiler 331 | Toolchain"](https://nokogiri.org/tutorials/installing_nokogiri.html#appendix-a-the-compiler-toolchain)) 332 | plus the toolchain required for compiling the vendored version of RE2 and its 333 | dependency [Abseil][] which includes [CMake](https://cmake.org), a compiler 334 | with C++17 support such as [clang](http://clang.llvm.org/) 5 or 335 | [gcc](https://gcc.gnu.org/) 8 and a recent version of 336 | [pkg-config](https://www.freedesktop.org/wiki/Software/pkg-config/). On 337 | Windows, you'll also need pkgconf 2.1.0+ to avoid [`undefined reference` 338 | errors](https://github.com/pkgconf/pkgconf/issues/322) when attempting to 339 | compile Abseil. 340 | 341 | ### Using system libraries 342 | 343 | If you already have RE2 installed, you can instruct the gem not to use its own vendored version: 344 | 345 | ```ruby 346 | gem install re2 --platform=ruby -- --enable-system-libraries 347 | 348 | # If RE2 is not installed in /usr/local, /usr, or /opt/homebrew: 349 | gem install re2 --platform=ruby -- --enable-system-libraries --with-re2-dir=/path/to/re2/prefix 350 | ``` 351 | 352 | Alternatively, you can set the `RE2_USE_SYSTEM_LIBRARIES` environment variable instead of passing `--enable-system-libraries` to the `gem` command. 353 | 354 | 355 | ## Thanks 356 | 357 | * Thanks to [Jason Woods](https://github.com/driskell) who contributed the 358 | original implementations of `RE2::MatchData#begin` and `RE2::MatchData#end`. 359 | * Thanks to [Stefano Rivera](https://github.com/stefanor) who first contributed 360 | C++11 support. 361 | * Thanks to [Stan Hu](https://github.com/stanhu) for reporting a bug with empty 362 | patterns and `RE2::Regexp#scan`, contributing support for libre2.11 363 | (2023-07-01) and for vendoring RE2 and abseil and compiling native gems in 364 | 2.0. 365 | * Thanks to [Sebastian Reitenbach](https://github.com/buzzdeee) for reporting 366 | the deprecation and removal of the `utf8` encoding option in RE2. 367 | * Thanks to [Sergio Medina](https://github.com/serch) for reporting a bug when 368 | using `RE2::Scanner#scan` with an invalid regular expression. 369 | * Thanks to [Pritam Baral](https://github.com/pritambaral) for contributing the 370 | initial support for `RE2::Set`. 371 | * Thanks to [Mike Dalessio](https://github.com/flavorjones) for reviewing the 372 | precompilation of native gems in 2.0. 373 | * Thanks to [Peter Zhu](https://github.com/peterzhu2118) for 374 | [ruby_memcheck](https://github.com/Shopify/ruby_memcheck) and helping find 375 | the memory leaks fixed in 2.1.3. 376 | * Thanks to [Jean Boussier](https://github.com/byroot) for contributing the 377 | switch to Ruby's `TypedData` API and the resulting garbage collection 378 | improvements in 2.4.0. 379 | * Thanks to [Manuel Jacob](https://github.com/manueljacob) for reporting a bug 380 | when passing strings with null bytes. 381 | * Thanks to [Maciej Gajewski](https://github.com/konieczkow) for helping 382 | confirm issues with GC compaction and mutable strings. 383 | 384 | ## Contact 385 | 386 | All issues and suggestions should go to [GitHub Issues](https://github.com/mudge/re2/issues). 387 | 388 | ## License 389 | 390 | This library is licensed under the BSD 3-Clause License, see `LICENSE.txt`. 391 | 392 | Copyright © 2010, Paul Mucur. 393 | 394 | ### Dependencies 395 | 396 | The source code of [RE2][] is distributed in the `ruby` platform gem. This code is licensed under the BSD 3-Clause License, see `LICENSE-DEPENDENCIES.txt`. 397 | 398 | The source code of [Abseil][] is distributed in the `ruby` platform gem. This code is licensed under the Apache License 2.0, see `LICENSE-DEPENDENCIES.txt`. 399 | 400 | [RE2]: https://github.com/google/re2 401 | [Abseil]: https://abseil.io 402 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. This 3 | project adheres to [Semantic Versioning](http://semver.org/). 4 | 5 | Older versions are detailed as [GitHub 6 | releases](https://github.com/mudge/re2/releases) for this project. 7 | 8 | ## [2.22.0] - 2025-11-30 9 | ### Changed 10 | - RE2::Set#add now includes the full error message from the underlying 11 | RE2 library if it fails rather than only the first 100 characters. 12 | - Matching no longer allocates an RE2::MatchData object if there are no 13 | matches. 14 | - Matching now better guards against integer overflows when specifying 15 | the number of matches. 16 | - Be more defensive against allocations failing. 17 | 18 | ## [2.21.0] - 2025-11-10 19 | ### Fixed 20 | - Fixed bugs in RE2::Scanner and RE2::MatchData after Ruby's GC compaction 21 | where references to string data would no longer be valid. 22 | 23 | ### Changed 24 | - Strings given to RE2::Regexp#scan are now frozen (if they aren't already) so 25 | that mutating a string after creating an RE2::Scanner does not affect its 26 | matches. 27 | - Upgrade the bundled version of RE2 to 2025-11-05. 28 | 29 | ## [2.20.0] - 2025-09-26 30 | ### Removed 31 | - Compilation on macOS now targets macOS 10.14 so older versions of macOS are 32 | no longer supported. 33 | - Remove support and native gems for Ruby 2.6, 2.7, and 3.0 (as it accounts 34 | for less than 1% of downloads in the past 60 days). 35 | - Remove native gems for 32-bit platforms, specifically x86-linux-gnu, 36 | x86-linux-musl, and x86-mingw32 37 | 38 | ### Changed 39 | - Upgrade the bundled version of Abseil to 20250814.1. Note this now requires 40 | a C++17 compiler. 41 | - Upgrade the bundled version of RE2 to 2025-08-12. Note this now requires 42 | a C++17 compiler. 43 | 44 | ## [2.19.1] - 2025-11-11 45 | ### Changed 46 | - Backport GC compaction fixes and string freezing inside RE2::Scanner from 47 | 2.21.0 for users of Ruby older than 3.1. 48 | 49 | ## [2.19.0] - 2025-08-07 50 | ### Changed 51 | - Upgrade the bundled version of RE2 to 2025-08-05. Note this now requires 52 | CMake 3.22 or newer when compiling yourself. 53 | 54 | ## [2.18.0] - 2025-08-03 55 | ### Changed 56 | - Upgrade the bundled version of RE2 to 2025-07-22. 57 | 58 | ## [2.17.0] - 2025-07-18 59 | ### Changed 60 | - Upgrade the bundled version of RE2 to 2025-07-17. 61 | 62 | ## [2.16.0] - 2025-06-27 63 | ### Changed 64 | - Upgrade the bundled version of RE2 to 2025-06-26b. 65 | - Upgrade the bundled version of Abseil to 20250127.1. 66 | 67 | ### Added 68 | - Added `RE2::Set#size` to return the number of patterns in a set. 69 | 70 | ## [2.15.0] - 2025-01-06 71 | ### Added 72 | - Add support for Ruby 3.4 in precompiled, native gems. 73 | - Restored support for Ruby 2.6, 2.7, and 3.0 after dropping them in 2.15.0.rc1 74 | 75 | ### Changed 76 | - Provide separate precompiled, native gems for GNU and Musl. 77 | - Require glibc 2.29+ for x86-linux-gnu and x86_64-linux-gnu (and recommend 78 | RubyGems 3.3.22+ and Bundler 2.3.21+). 79 | 80 | ## [2.15.0.rc1] - 2024-12-16 81 | ### Added 82 | - Add support for Ruby 3.4.0-rc1 in precompiled, native gems. 83 | 84 | ### Changed 85 | - Provide separate precompiled, native gems for GNU and Musl. 86 | - Require glibc 2.29+ for x86-linux-gnu and x86_64-linux-gnu (and recommend 87 | RubyGems 3.3.22+ and Bundler 2.3.21+). 88 | 89 | ### Removed 90 | - Drop support for Ruby versions older than 3.1 as they do not ship with a 91 | version of RubyGems new enough to handle the new Musl gems. 92 | 93 | ## [2.14.0] - 2024-08-02 94 | ### Changed 95 | - Upgrade the bundled version of Abseil to 20240722.0, removing the need to 96 | patch it for CMake 3.30.0. 97 | 98 | ## [2.13.2] - 2024-07-16 99 | ### Fixed 100 | - Patch the bundled Abseil to work with CMake 3.30.0. 101 | 102 | ## [2.13.1] - 2024-07-01 103 | ### Changed 104 | - Upgrade the bundled version of RE2 to 2024-07-02. 105 | 106 | ## [2.13.0] - 2024-06-30 107 | ### Changed 108 | - Upgrade the bundled version of RE2 to 2024-07-01. 109 | 110 | ## [2.12.0] - 2024-06-02 111 | ### Changed 112 | - Upgrade the bundled version of RE2 to 2024-06-01. 113 | - Upgrade MiniPortile to 2.8.7. 114 | 115 | ## [2.11.0] - 2024-04-30 116 | ### Changed 117 | - Upgrade the bundled version of RE2 to 2024-05-01. 118 | 119 | ## [2.10.0] - 2024-04-01 120 | ### Changed 121 | - Upgrade the bundled version of RE2 to 2024-04-01. 122 | - Refactor how we statically link RE2 into the gem to also include any 123 | preprocessor and compiler flags specified by its pkg-config file. 124 | - Ensure the gem compiles when frozen strings are enabled. 125 | 126 | ## [2.9.0] - 2024-02-29 127 | ### Changed 128 | - Upgrade the bundled version of RE2 to 2024-03-01. 129 | - Upgrade the bundled version of Abseil to 20240116.1. 130 | 131 | ## [2.8.0] - 2024-01-31 132 | ### Changed 133 | - Upgrade the bundled version of RE2 to 2024-02-01. 134 | - Upgrade the bundled version of Abseil to 20240116.0. 135 | 136 | ## [2.7.0] - 2024-01-20 137 | ### Added 138 | - Support strings with null bytes as patterns and input throughout the library. 139 | Note this means strings with null bytes will no longer be truncated at the 140 | first null byte. Thanks to @manueljacob for reporting this bug. 141 | 142 | ## [2.6.0] - 2023-12-27 143 | ### Added 144 | - Add precompiled native gem for Ruby 3.3.0. 145 | 146 | ## [2.6.0.rc1] - 2023-12-13 147 | ### Added 148 | - Add precompiled native gem for Ruby 3.3.0-rc1. 149 | 150 | ## [2.5.0] - 2023-12-05 151 | ### Added 152 | - Add a new matching interface that more closely resembles the underlying RE2 153 | library's: `RE2::Regexp#full_match?`, `RE2::Regexp#partial_match?` for 154 | matching without extracting submatches and `RE2::Regexp#full_match` and 155 | `RE2::Regexp#partial_match` for extracting submatches. The latter two are 156 | built on the existing `RE2::Regexp#match` method which now exposes more of 157 | RE2's general matching interface by accepting new `anchor`, `startpos` and 158 | `endpos` (where supported) arguments. 159 | 160 | ### Changed 161 | - Overhaul much of the documentation to better explain the library and more 162 | closely match the underlying RE2 library's interface, primarily promoting 163 | the new full and partial matching APIs. 164 | - Remove workarounds for building Abseil on Windows now that pkgconf 2.1.0 has 165 | been released. 166 | 167 | ## [2.4.3] - 2023-11-22 168 | ### Fixed 169 | - Restore support for compiling the gem with gcc 7.3 (as used on Amazon Linux 170 | 2), fixing the "non-trivial designated initializers not supported" error 171 | introduced in version 2.4.0. Thanks to @stanhu for reporting this bug. 172 | 173 | ## [2.4.2] - 2023-11-17 174 | ### Changed 175 | - Improve the reported consuming memory size of RE2::Regexp, RE2::Set and 176 | RE2::Scanner objects. 177 | 178 | ## [2.4.1] - 2023-11-12 179 | ### Changed 180 | - Improve the reported consuming memory size of RE2::MatchData objects. Thanks 181 | to @byroot again for suggesting a better way to calculate this. 182 | 183 | ## [2.4.0] - 2023-11-11 184 | ### Added 185 | - Improve garbage collection and support compaction in newer versions of Ruby. 186 | Thanks to @byroot for contributing this by switching to Ruby's TypedData API. 187 | 188 | ### Changed 189 | - No longer needlessly return copies of frozen strings passed to 190 | `RE2::Regexp#match` and return the original instead. 191 | 192 | ## [2.3.0] - 2023-10-31 193 | ### Changed 194 | - Upgrade the bundled version of RE2 to 2023-11-01. 195 | 196 | ## [2.2.0] - 2023-10-23 197 | ### Changed 198 | - Upgrade the bundled version of Abseil to 20230802.1. 199 | - Upgrade MiniPortile to 2.8.5 which significantly reduces the size of the 200 | precompiled native gems due to its switch to build Abseil in Release mode. 201 | 202 | ## [2.1.3] - 2023-09-23 203 | ### Fixed 204 | - Fixed memory leaks reported by 205 | [ruby_memcheck](https://github.com/Shopify/ruby_memcheck) when rewinding an 206 | `RE2::Scanner` and when passing invalid input to `RE2::Regexp#scan`, 207 | `RE2::Regexp#initialize`, `RE2.Replace`, `RE2.GlobalReplace` and 208 | `RE2::Set#add`. Thanks to @peterzhu2118 for maintaining ruby_memcheck and 209 | their assistance in finding the source of these leaks. 210 | 211 | ## [2.1.2] - 2023-09-20 212 | ### Fixed 213 | - Removed use of a C++17 extension from the gem to restore support for users 214 | compiling against system libraries with older C compilers. 215 | 216 | ## [2.1.1] - 2023-09-18 217 | ### Fixed 218 | - Worked around a deprecation warning re the use of ANYARGS when compiling 219 | against recent Ruby versions. 220 | 221 | ### Changed 222 | - Various internal C++ style improvements to reduce unnecessary memory usage 223 | when accessing named capturing groups. 224 | 225 | ## [2.1.0] - 2023-09-16 226 | ### Fixed 227 | - As RE2 only supports UTF-8 and ISO-8859-1 encodings, fix an inconsistency 228 | when using string patterns with `RE2.Replace` and `RE2.GlobalReplace` where 229 | the result would match the encoding of the pattern rather than UTF-8 which is 230 | what RE2 will use. This behaviour and limitation is now documented on all 231 | APIs that produce string results. 232 | 233 | ### Added 234 | - The `RE2::Set` API now accepts anything that can be coerced to a string where 235 | previously only strings were permitted, e.g. `RE2::Set#add`, 236 | `RE2::Set#match`. 237 | - Added the licences of all vendored dependencies: RE2 and Abseil. 238 | - Document the behaviour of `RE2::Regexp#match` when given a pattern with no 239 | capturing groups: it will return true or false whether there was a match or 240 | not rather than a `RE2::MatchData` instance. 241 | 242 | ## [2.0.0] - 2023-09-13 243 | ### Added 244 | - The gem now comes bundled with the underlying RE2 library and its dependency, 245 | Abseil. Installing the gem will compile those dependencies automatically. As 246 | this can take a while, precompiled native gems are available for Linux, 247 | Windows and macOS. (Thanks to Stan Hu for contributing this.) 248 | 249 | ### Changed 250 | - By default, the gem will use its own bundled version of RE2 rather than 251 | looking for the library on the system. To opt back into that behaviour, pass 252 | `--enable-system-libraries` when installing. (Thanks to Stan Hu for 253 | contributing this.) 254 | 255 | ### Removed 256 | - Due to the new dependency on MiniPortile2, the gem no longer supports Ruby 257 | versions older than 2.6. 258 | 259 | ## [2.0.0.beta2] - 2023-09-10 260 | ### Added 261 | - Restored support for Ruby 2.6. 262 | 263 | ### Changed 264 | - Upgraded the vendored version of RE2 to 2023-09-01. 265 | 266 | ### Fixed 267 | - When using the Ruby platform gem (skipping the precompiled, native gems) and 268 | opting out of the vendored dependencies in favour of a system install of 269 | RE2 with `--enable-system-libraries`, the gem will now compile correctly 270 | against the library if it is installed in Ruby's `exec_prefix` directory. 271 | (Thanks to Stan Hu.) 272 | 273 | ## [2.0.0.beta1] - 2023-09-08 274 | ### Added 275 | - The gem now comes bundled with the underlying RE2 library and its dependency, 276 | Abseil. Installing the gem will compile those dependencies automatically. As 277 | this can take a while, precompiled native gems are available for Linux, 278 | Windows and macOS. (Thanks to Stan Hu for contributing this.) 279 | 280 | ### Changed 281 | - By default, the gem will use its own bundled version of RE2 rather than 282 | looking for the library on the system. To opt back into that behaviour, pass 283 | `--enable-system-libraries` when installing. (Thanks to Stan Hu for 284 | contributing this.) 285 | 286 | ### Removed 287 | - Due to the new dependency on MiniPortile2, the gem no longer supports Ruby 288 | versions older than 2.7. 289 | 290 | ## [1.7.0] - 2023-07-04 291 | ### Added 292 | - Added support for libre2.11 (thanks to Stan Hu for contributing this) 293 | 294 | ## [1.6.0] - 2022-10-22 295 | ### Added 296 | - Added RE2::MatchData#deconstruct and RE2::MatchData#deconstruct_keys so they 297 | can be used with Ruby pattern matching 298 | 299 | ## [1.5.0] - 2022-10-16 300 | ### Added 301 | - Added RE2::Set for simultaneously searching a collection of patterns 302 | 303 | ## [1.4.0] - 2021-03-29 304 | ### Fixed 305 | - Fixed a crash when using RE2::Scanner#scan with an invalid regular expression 306 | (thanks to Sergio Medina for reporting this) 307 | - Fixed RE2::Regexp#match raising a NoMemoryError instead of an ArgumentError 308 | when given a negative number of matches 309 | 310 | ## [1.3.0] - 2021-03-12 311 | ### Added 312 | - Add Homebrew's prefix on Apple Silicon and /usr as fallback locations 313 | searched when looking for the underlying re2 library if not found in 314 | /usr/local 315 | 316 | ## [1.2.0] - 2020-04-18 317 | ### Changed 318 | - Stop using the now-deprecated utf8 API and re-implement it in terms of the 319 | encoding API in order to support both existing and upcoming releases of re2 320 | 321 | ## [1.1.1] - 2017-07-24 322 | ### Fixed 323 | - Ensure that any compilers passed via the CC and CXX environment variables are 324 | used throughout the compilation process including both the final Makefile and 325 | any preceding checks 326 | 327 | ## [1.1.0] - 2017-07-23 328 | ### Fixed 329 | - Fixed RE2::Scanner not advancing when calling scan with an empty pattern or 330 | pattern with empty capturing groups (thanks to Stan Hu for reporting this) 331 | 332 | ### Added 333 | - Added eof? to RE2::Scanner to detect when the input has been fully consumed by 334 | matches (used internally by the fixed scan) 335 | - Added support for specifying the C and C++ compiler using the standard CC and 336 | CXX environment variables when installing the gem 337 | 338 | ## [1.0.0] - 2016-11-14 339 | ### Added 340 | - Added support for versions of the underlying re2 library that require C++11 341 | compiler support 342 | 343 | ## [0.7.0] - 2015-01-25 344 | ### Added 345 | - Added RE2::MatchData#begin and RE2::MatchData#end for finding the offset of 346 | matches in your searches 347 | 348 | ## [0.6.1] - 2014-10-25 349 | ### Fixed 350 | - Fix crash when non-strings are passed to match 351 | 352 | ## [0.6.0] - 2014-02-01 353 | ### Added 354 | - Added RE2::Regexp#scan which returns a new RE2::Scanner instance for 355 | incrementally scanning a string for matches 356 | 357 | ### Removed 358 | - Methods that altered strings in place are gone: re2_sub! and re2_gsub! 359 | 360 | ### Changed 361 | - RE2.Replace and RE2.GlobalReplace now return new strings rather than modifying 362 | their input 363 | 364 | ### Fixed 365 | - In Ruby 1.9.2 and later, re2 will now set the correct encoding for strings 366 | 367 | [2.22.0]: https://github.com/mudge/re2/releases/tag/v2.22.0 368 | [2.21.0]: https://github.com/mudge/re2/releases/tag/v2.21.0 369 | [2.20.0]: https://github.com/mudge/re2/releases/tag/v2.20.0 370 | [2.19.1]: https://github.com/mudge/re2/releases/tag/v2.19.1 371 | [2.19.0]: https://github.com/mudge/re2/releases/tag/v2.19.0 372 | [2.18.0]: https://github.com/mudge/re2/releases/tag/v2.18.0 373 | [2.17.0]: https://github.com/mudge/re2/releases/tag/v2.17.0 374 | [2.16.0]: https://github.com/mudge/re2/releases/tag/v2.16.0 375 | [2.15.0]: https://github.com/mudge/re2/releases/tag/v2.15.0 376 | [2.15.0.rc1]: https://github.com/mudge/re2/releases/tag/v2.15.0.rc1 377 | [2.14.0]: https://github.com/mudge/re2/releases/tag/v2.14.0 378 | [2.13.2]: https://github.com/mudge/re2/releases/tag/v2.13.2 379 | [2.13.1]: https://github.com/mudge/re2/releases/tag/v2.13.1 380 | [2.13.0]: https://github.com/mudge/re2/releases/tag/v2.13.0 381 | [2.12.0]: https://github.com/mudge/re2/releases/tag/v2.12.0 382 | [2.11.0]: https://github.com/mudge/re2/releases/tag/v2.11.0 383 | [2.10.0]: https://github.com/mudge/re2/releases/tag/v2.10.0 384 | [2.9.0]: https://github.com/mudge/re2/releases/tag/v2.9.0 385 | [2.8.0]: https://github.com/mudge/re2/releases/tag/v2.8.0 386 | [2.7.0]: https://github.com/mudge/re2/releases/tag/v2.7.0 387 | [2.6.0]: https://github.com/mudge/re2/releases/tag/v2.6.0 388 | [2.6.0.rc1]: https://github.com/mudge/re2/releases/tag/v2.6.0.rc1 389 | [2.5.0]: https://github.com/mudge/re2/releases/tag/v2.5.0 390 | [2.4.3]: https://github.com/mudge/re2/releases/tag/v2.4.3 391 | [2.4.2]: https://github.com/mudge/re2/releases/tag/v2.4.2 392 | [2.4.1]: https://github.com/mudge/re2/releases/tag/v2.4.1 393 | [2.4.0]: https://github.com/mudge/re2/releases/tag/v2.4.0 394 | [2.3.0]: https://github.com/mudge/re2/releases/tag/v2.3.0 395 | [2.2.0]: https://github.com/mudge/re2/releases/tag/v2.2.0 396 | [2.1.3]: https://github.com/mudge/re2/releases/tag/v2.1.3 397 | [2.1.2]: https://github.com/mudge/re2/releases/tag/v2.1.2 398 | [2.1.1]: https://github.com/mudge/re2/releases/tag/v2.1.1 399 | [2.1.0]: https://github.com/mudge/re2/releases/tag/v2.1.0 400 | [2.0.0]: https://github.com/mudge/re2/releases/tag/v2.0.0 401 | [2.0.0.beta2]: https://github.com/mudge/re2/releases/tag/v2.0.0.beta2 402 | [2.0.0.beta1]: https://github.com/mudge/re2/releases/tag/v2.0.0.beta1 403 | [1.7.0]: https://github.com/mudge/re2/releases/tag/v1.7.0 404 | [1.6.0]: https://github.com/mudge/re2/releases/tag/v1.6.0 405 | [1.5.0]: https://github.com/mudge/re2/releases/tag/v1.5.0 406 | [1.4.0]: https://github.com/mudge/re2/releases/tag/v1.4.0 407 | [1.3.0]: https://github.com/mudge/re2/releases/tag/v1.3.0 408 | [1.2.0]: https://github.com/mudge/re2/releases/tag/v1.2.0 409 | [1.1.1]: https://github.com/mudge/re2/releases/tag/v1.1.1 410 | [1.1.0]: https://github.com/mudge/re2/releases/tag/v1.1.0 411 | [1.0.0]: https://github.com/mudge/re2/releases/tag/v1.0.0 412 | [0.7.0]: https://github.com/mudge/re2/releases/tag/v0.7.0 413 | [0.6.0]: https://github.com/mudge/re2/releases/tag/v0.6.0 414 | -------------------------------------------------------------------------------- /spec/re2/regexp_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "rbconfig/sizeof" 4 | 5 | RSpec.describe RE2::Regexp do 6 | INT_MAX = 2**(RbConfig::SIZEOF.fetch("int") * 8 - 1) - 1 7 | 8 | describe "#initialize" do 9 | it "returns an instance given only a pattern" do 10 | re = RE2::Regexp.new('woo') 11 | 12 | expect(re).to be_a(RE2::Regexp) 13 | end 14 | 15 | it "returns an instance given a pattern and options" do 16 | re = RE2::Regexp.new('woo', case_sensitive: false) 17 | 18 | expect(re).to be_a(RE2::Regexp) 19 | end 20 | 21 | it "accepts patterns containing null bytes" do 22 | re = RE2::Regexp.new("a\0b") 23 | 24 | expect(re.pattern).to eq("a\0b") 25 | end 26 | 27 | it "raises an error if given an inappropriate type" do 28 | expect { RE2::Regexp.new(nil) }.to raise_error(TypeError) 29 | end 30 | 31 | it "allows invalid patterns to be created" do 32 | re = RE2::Regexp.new('???', log_errors: false) 33 | 34 | expect(re).to be_a(RE2::Regexp) 35 | end 36 | 37 | it "supports passing something that can be coerced to a String as input" do 38 | re = RE2::Regexp.new(StringLike.new('w(o)(o)')) 39 | 40 | expect(re).to be_a(RE2::Regexp) 41 | end 42 | end 43 | 44 | describe ".compile" do 45 | it "returns an instance given only a pattern" do 46 | re = RE2::Regexp.compile('woo') 47 | 48 | expect(re).to be_a(RE2::Regexp) 49 | end 50 | 51 | it "returns an instance given a pattern and options" do 52 | re = RE2::Regexp.compile('woo', case_sensitive: false) 53 | expect(re).to be_a(RE2::Regexp) 54 | end 55 | 56 | it "accepts patterns containing null bytes" do 57 | re = RE2::Regexp.compile("a\0b") 58 | 59 | expect(re.pattern).to eq("a\0b") 60 | end 61 | 62 | it "raises an error if given an inappropriate type" do 63 | expect { RE2::Regexp.compile(nil) }.to raise_error(TypeError) 64 | end 65 | 66 | it "allows invalid patterns to be created" do 67 | re = RE2::Regexp.compile('???', log_errors: false) 68 | 69 | expect(re).to be_a(RE2::Regexp) 70 | end 71 | 72 | it "supports passing something that can be coerced to a String as input" do 73 | re = RE2::Regexp.compile(StringLike.new('w(o)(o)')) 74 | 75 | expect(re).to be_a(RE2::Regexp) 76 | end 77 | end 78 | 79 | describe "#options" do 80 | it "returns a hash of options" do 81 | options = RE2::Regexp.new('woo').options 82 | expect(options).to be_a(Hash) 83 | end 84 | 85 | it "is populated with default options when nothing has been set" do 86 | expect(RE2::Regexp.new('woo').options).to include( 87 | utf8: true, 88 | posix_syntax: false, 89 | longest_match: false, 90 | log_errors: true, 91 | literal: false, 92 | never_nl: false, 93 | case_sensitive: true, 94 | perl_classes: false, 95 | word_boundary: false, 96 | one_line: false 97 | ) 98 | end 99 | 100 | it "is populated with overridden options when specified" do 101 | options = RE2::Regexp.new('woo', case_sensitive: false).options 102 | 103 | expect(options).to include(case_sensitive: false) 104 | end 105 | end 106 | 107 | describe "#error" do 108 | it "returns nil if there is no error" do 109 | error = RE2::Regexp.new('woo').error 110 | 111 | expect(error).to be_nil 112 | end 113 | 114 | # Use log_errors: false to suppress RE2's logging to STDERR. 115 | it "contains the error string if there is an error" do 116 | error = RE2::Regexp.new('wo(o', log_errors: false).error 117 | 118 | expect(error).to eq("missing ): wo(o") 119 | end 120 | end 121 | 122 | describe "#error_arg" do 123 | it "returns nil if there is no error" do 124 | error_arg = RE2::Regexp.new('woo').error_arg 125 | 126 | expect(error_arg).to be_nil 127 | end 128 | 129 | it "returns the offending portion of the pattern if there is an error" do 130 | error_arg = RE2::Regexp.new('wo(o', log_errors: false).error_arg 131 | 132 | expect(error_arg).to eq("wo(o") 133 | end 134 | end 135 | 136 | describe "#program_size" do 137 | it "returns a numeric value" do 138 | program_size = RE2::Regexp.new('w(o)(o)').program_size 139 | 140 | expect(program_size).to be_an(Integer) 141 | end 142 | 143 | it "returns -1 for an invalid pattern" do 144 | program_size = RE2::Regexp.new('???', log_errors: false).program_size 145 | 146 | expect(program_size).to eq(-1) 147 | end 148 | end 149 | 150 | describe "#to_str" do 151 | it "returns the original pattern" do 152 | string = RE2::Regexp.new('w(o)(o)').to_str 153 | 154 | expect(string).to eq("w(o)(o)") 155 | end 156 | 157 | it "returns the pattern even if invalid" do 158 | string = RE2::Regexp.new('???', log_errors: false).to_str 159 | 160 | expect(string).to eq("???") 161 | end 162 | end 163 | 164 | describe "#pattern" do 165 | it "returns the original pattern" do 166 | pattern = RE2::Regexp.new('w(o)(o)').pattern 167 | 168 | expect(pattern).to eq("w(o)(o)") 169 | end 170 | 171 | it "returns the pattern even if invalid" do 172 | pattern = RE2::Regexp.new('???', log_errors: false).pattern 173 | 174 | expect(pattern).to eq("???") 175 | end 176 | end 177 | 178 | describe "#inspect" do 179 | it "shows the class name and original pattern" do 180 | string = RE2::Regexp.new('w(o)(o)').inspect 181 | 182 | expect(string).to eq("#") 183 | end 184 | 185 | it "respects the pattern's original encoding" do 186 | string = RE2::Regexp.new('w(o)(o)', utf8: false).inspect 187 | 188 | expect(string.encoding).to eq(Encoding::ISO_8859_1) 189 | end 190 | end 191 | 192 | describe "#utf8?" do 193 | it "returns true by default" do 194 | expect(RE2::Regexp.new('woo')).to be_utf8 195 | end 196 | 197 | it "can be overridden on initialization" do 198 | re = RE2::Regexp.new('woo', utf8: false) 199 | 200 | expect(re).to_not be_utf8 201 | end 202 | end 203 | 204 | describe "#posix_syntax?" do 205 | it "returns false by default" do 206 | expect(RE2::Regexp.new('woo')).to_not be_posix_syntax 207 | end 208 | 209 | it "can be overridden on initialization" do 210 | re = RE2::Regexp.new('woo', posix_syntax: true) 211 | 212 | expect(re).to be_posix_syntax 213 | end 214 | end 215 | 216 | describe "#literal?" do 217 | it "returns false by default" do 218 | expect(RE2::Regexp.new('woo')).to_not be_literal 219 | end 220 | 221 | it "can be overridden on initialization" do 222 | re = RE2::Regexp.new('woo', literal: true) 223 | 224 | expect(re).to be_literal 225 | end 226 | end 227 | 228 | describe "#never_nl?" do 229 | it "returns false by default" do 230 | expect(RE2::Regexp.new('woo')).to_not be_never_nl 231 | end 232 | 233 | it "can be overridden on initialization" do 234 | re = RE2::Regexp.new('woo', never_nl: true) 235 | 236 | expect(re).to be_never_nl 237 | end 238 | end 239 | 240 | describe "#case_sensitive?" do 241 | it "returns true by default" do 242 | expect(RE2::Regexp.new('woo')).to be_case_sensitive 243 | end 244 | 245 | it "can be overridden on initialization" do 246 | re = RE2::Regexp.new('woo', case_sensitive: false) 247 | expect(re).to_not be_case_sensitive 248 | end 249 | end 250 | 251 | describe "#case_insensitive?" do 252 | it "returns false by default" do 253 | expect(RE2::Regexp.new('woo')).to_not be_case_insensitive 254 | end 255 | 256 | it "can be overridden on initialization" do 257 | re = RE2::Regexp.new('woo', case_sensitive: false) 258 | 259 | expect(re).to be_case_insensitive 260 | end 261 | end 262 | 263 | describe "#casefold?" do 264 | it "returns true by default" do 265 | expect(RE2::Regexp.new('woo')).to_not be_casefold 266 | end 267 | 268 | it "can be overridden on initialization" do 269 | re = RE2::Regexp.new('woo', case_sensitive: false) 270 | 271 | expect(re).to be_casefold 272 | end 273 | end 274 | 275 | describe "#longest_match?" do 276 | it "returns false by default" do 277 | expect(RE2::Regexp.new('woo')).to_not be_casefold 278 | end 279 | 280 | it "can be overridden on initialization" do 281 | re = RE2::Regexp.new('woo', longest_match: true) 282 | 283 | expect(re).to be_longest_match 284 | end 285 | end 286 | 287 | describe "#log_errors?" do 288 | it "returns true by default" do 289 | expect(RE2::Regexp.new('woo')).to be_log_errors 290 | end 291 | 292 | it "can be overridden on initialization" do 293 | re = RE2::Regexp.new('woo', log_errors: false) 294 | 295 | expect(re).to_not be_log_errors 296 | end 297 | end 298 | 299 | describe "#perl_classes?" do 300 | it "returns false by default" do 301 | expect(RE2::Regexp.new('woo')).to_not be_perl_classes 302 | end 303 | 304 | it "can be overridden on initialization" do 305 | re = RE2::Regexp.new('woo', perl_classes: true) 306 | 307 | expect(re).to be_perl_classes 308 | end 309 | end 310 | 311 | describe "#word_boundary?" do 312 | it "returns false by default" do 313 | expect(RE2::Regexp.new('woo')).to_not be_word_boundary 314 | end 315 | 316 | it "can be overridden on initialization" do 317 | re = RE2::Regexp.new('woo', word_boundary: true) 318 | 319 | expect(re).to be_word_boundary 320 | end 321 | end 322 | 323 | describe "#one_line?" do 324 | it "returns false by default" do 325 | expect(RE2::Regexp.new('woo')).to_not be_one_line 326 | end 327 | 328 | it "can be overridden on initialization" do 329 | re = RE2::Regexp.new('woo', one_line: true) 330 | 331 | expect(re).to be_one_line 332 | end 333 | end 334 | 335 | describe "#max_mem" do 336 | it "returns the default max memory" do 337 | expect(RE2::Regexp.new('woo').max_mem).to eq(8388608) 338 | end 339 | 340 | it "can be overridden on initialization" do 341 | re = RE2::Regexp.new('woo', max_mem: 1024) 342 | 343 | expect(re.max_mem).to eq(1024) 344 | end 345 | end 346 | 347 | describe "#match" do 348 | it "returns match data given only text if the pattern has capturing groups" do 349 | re = RE2::Regexp.new('My name is (\w+) (\w+)') 350 | 351 | expect(re.match("My name is Alice Bloggs")).to be_a(RE2::MatchData) 352 | end 353 | 354 | it "returns only true or false given only text if the pattern has no capturing groups" do 355 | re = RE2::Regexp.new('My name is \w+ \w+') 356 | 357 | expect(re.match("My name is Alice Bloggs")).to eq(true) 358 | end 359 | 360 | it "supports matching against text containing null bytes" do 361 | re = RE2::Regexp.new("a\0b") 362 | 363 | expect(re.match("a\0b")).to eq(true) 364 | end 365 | 366 | it "returns nil if the text does not match the pattern" do 367 | re = RE2::Regexp.new('My name is (\w+) (\w+)') 368 | 369 | expect(re.match("My age is 99")).to be_nil 370 | end 371 | 372 | it "accepts text that can be coerced to a string" do 373 | re = RE2::Regexp.new('My name is (\w+) (\w+)') 374 | 375 | expect(re.match(StringLike.new("My name is Alice Bloggs"))).to be_a(RE2::MatchData) 376 | end 377 | 378 | it "raises an exception when given text that cannot be coerced to a string" do 379 | re = RE2::Regexp.new('My name is (\w+) (\w+)') 380 | 381 | expect { re.match(nil) }.to raise_error(TypeError) 382 | end 383 | 384 | it "returns nil with an invalid pattern" do 385 | re = RE2::Regexp.new('???', log_errors: false) 386 | 387 | expect(re.match("My name is Alice Bloggs")).to be_nil 388 | end 389 | 390 | it "returns nil with an invalid pattern and options" do 391 | re = RE2::Regexp.new('???', log_errors: false) 392 | 393 | expect(re.match('foo bar', startpos: 1)).to be_nil 394 | end 395 | 396 | it "accepts an offset at which to start matching", :aggregate_failures do 397 | re = RE2::Regexp.new('(\w+) (\w+)') 398 | md = re.match("one two three", startpos: 4) 399 | 400 | expect(md[1]).to eq("two") 401 | expect(md[2]).to eq("three") 402 | end 403 | 404 | it "returns nil if using a starting offset past the end of the text" do 405 | skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument? 406 | 407 | re = RE2::Regexp.new('(\w+) (\w+)', log_errors: false) 408 | 409 | expect(re.match("one two three", startpos: 20, endpos: 21)).to be_nil 410 | end 411 | 412 | it "raises an exception when given a negative starting offset" do 413 | re = RE2::Regexp.new('(\w+) (\w+)') 414 | 415 | expect { re.match("one two three", startpos: -1) }.to raise_error(ArgumentError, "startpos should be >= 0") 416 | end 417 | 418 | it "raises an exception when given a starting offset past the default ending offset" do 419 | re = RE2::Regexp.new('(\w+) (\w+)') 420 | 421 | expect { re.match("one two three", startpos: 30) }.to raise_error(ArgumentError, "startpos should be <= endpos") 422 | end 423 | 424 | it "accepts an offset at which to end matching", :aggregate_failures do 425 | skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument? 426 | 427 | re = RE2::Regexp.new('(\w+) (\w+)') 428 | md = re.match("one two three", endpos: 6) 429 | 430 | expect(md[1]).to eq("one") 431 | expect(md[2]).to eq("tw") 432 | end 433 | 434 | it "returns nil if using a ending offset at the start of the text" do 435 | skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument? 436 | 437 | re = RE2::Regexp.new('(\w+) (\w+)') 438 | 439 | expect(re.match("one two three", endpos: 0)).to be_nil 440 | end 441 | 442 | it "raises an exception when given a negative ending offset" do 443 | skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument? 444 | 445 | re = RE2::Regexp.new('(\w+) (\w+)') 446 | 447 | expect { re.match("one two three", endpos: -1) }.to raise_error(ArgumentError, "endpos should be >= 0") 448 | end 449 | 450 | it "raises an exception when given an ending offset before the starting offset" do 451 | skip "Underlying RE2::Match does not have endpos argument" unless RE2::Regexp.match_has_endpos_argument? 452 | 453 | re = RE2::Regexp.new('(\w+) (\w+)') 454 | 455 | expect { re.match("one two three", startpos: 3, endpos: 0) }.to raise_error(ArgumentError, "startpos should be <= endpos") 456 | end 457 | 458 | it "raises an error if given an ending offset and RE2 does not support it" do 459 | skip "Underlying RE2::Match has endpos argument" if RE2::Regexp.match_has_endpos_argument? 460 | 461 | re = RE2::Regexp.new('(\w+) (\w+)') 462 | 463 | expect { re.match("one two three", endpos: 3) }.to raise_error(RE2::Regexp::UnsupportedError) 464 | end 465 | 466 | it "does not anchor matches by default when extracting submatches" do 467 | re = RE2::Regexp.new('(two)') 468 | 469 | expect(re.match("one two three")).to be_a(RE2::MatchData) 470 | end 471 | 472 | it "does not anchor matches by default without extracting submatches" do 473 | re = RE2::Regexp.new('(two)') 474 | 475 | expect(re.match("one two three", submatches: 0)).to eq(true) 476 | end 477 | 478 | it "can explicitly match without anchoring when extracting submatches" do 479 | re = RE2::Regexp.new('(two)') 480 | 481 | expect(re.match("one two three", anchor: :unanchored)).to be_a(RE2::MatchData) 482 | end 483 | 484 | it "can explicitly match with neither anchoring nor extracting submatches" do 485 | re = RE2::Regexp.new('(two)') 486 | 487 | expect(re.match("one two three", anchor: :unanchored, submatches: 0)).to eq(true) 488 | end 489 | 490 | it "can anchor matches at the start when extracting submatches", :aggregate_failures do 491 | re = RE2::Regexp.new('(two)') 492 | 493 | expect(re.match("two three", anchor: :anchor_start)).to be_a(RE2::MatchData) 494 | expect(re.match("one two three", anchor: :anchor_start)).to be_nil 495 | end 496 | 497 | it "can anchor matches at the start without extracting submatches", :aggregate_failures do 498 | re = RE2::Regexp.new('(two)') 499 | 500 | expect(re.match("two three", anchor: :anchor_start, submatches: 0)).to eq(true) 501 | expect(re.match("one two three", anchor: :anchor_start, submatches: 0)).to eq(false) 502 | end 503 | 504 | it "can anchor matches at both ends when extracting submatches", :aggregate_failures do 505 | re = RE2::Regexp.new('(two)') 506 | 507 | expect(re.match("two three", anchor: :anchor_both)).to be_nil 508 | expect(re.match("two", anchor: :anchor_both)).to be_a(RE2::MatchData) 509 | end 510 | 511 | it "does not anchor matches when given a nil anchor" do 512 | re = RE2::Regexp.new('(two)') 513 | 514 | expect(re.match("one two three", anchor: nil)).to be_a(RE2::MatchData) 515 | end 516 | 517 | it "raises an exception when given an invalid anchor" do 518 | re = RE2::Regexp.new('(two)') 519 | 520 | expect { re.match("one two three", anchor: :invalid) }.to raise_error(ArgumentError, "anchor should be one of: :unanchored, :anchor_start, :anchor_both") 521 | end 522 | 523 | it "raises an exception when given a non-symbol anchor" do 524 | re = RE2::Regexp.new('(two)') 525 | 526 | expect { re.match("one two three", anchor: 0) }.to raise_error(TypeError) 527 | end 528 | 529 | it "extracts all submatches by default", :aggregate_failures do 530 | re = RE2::Regexp.new('(\w+) (\w+) (\w+)') 531 | md = re.match("one two three") 532 | 533 | expect(md[1]).to eq("one") 534 | expect(md[2]).to eq("two") 535 | expect(md[3]).to eq("three") 536 | end 537 | 538 | it "supports extracting submatches containing null bytes" do 539 | re = RE2::Regexp.new("(a\0b)") 540 | md = re.match("a\0bc") 541 | 542 | expect(md[1]).to eq("a\0b") 543 | end 544 | 545 | it "extracts a specific number of submatches", :aggregate_failures do 546 | re = RE2::Regexp.new('(\w+) (\w+) (\w+)') 547 | md = re.match("one two three", submatches: 2) 548 | 549 | expect(md[1]).to eq("one") 550 | expect(md[2]).to eq("two") 551 | expect(md[3]).to be_nil 552 | end 553 | 554 | it "pads submatches with nil when requesting more than the number of capturing groups" do 555 | re = RE2::Regexp.new('(\w+) (\w+) (\w+)') 556 | md = re.match("one two three", submatches: 5) 557 | 558 | expect(md.to_a).to eq(["one two three", "one", "two", "three", nil, nil]) 559 | end 560 | 561 | it "raises an exception when given a negative number of submatches" do 562 | re = RE2::Regexp.new('(\w+) (\w+) (\w+)') 563 | 564 | expect { re.match("one two three", submatches: -1) }.to raise_error(ArgumentError, "number of matches should be >= 0") 565 | end 566 | 567 | it "raises an exception when given a non-numeric number of submatches" do 568 | re = RE2::Regexp.new('(\w+) (\w+) (\w+)') 569 | 570 | expect { re.match("one two three", submatches: :invalid) }.to raise_error(TypeError) 571 | end 572 | 573 | it "raises an exception when given too large a number of submatches" do 574 | re = RE2::Regexp.new('(\w+) (\w+) (\w+)') 575 | 576 | expect { re.match("one two three", submatches: INT_MAX) }.to raise_error(RangeError, "number of matches should be < #{INT_MAX}") 577 | end 578 | 579 | it "defaults to extracting all submatches when given nil", :aggregate_failures do 580 | re = RE2::Regexp.new('(\w+) (\w+) (\w+)') 581 | md = re.match("one two three", submatches: nil) 582 | 583 | expect(md[1]).to eq("one") 584 | expect(md[2]).to eq("two") 585 | expect(md[3]).to eq("three") 586 | end 587 | 588 | it "accepts passing the number of submatches instead of options for backward compatibility", :aggregate_failures do 589 | re = RE2::Regexp.new('(\w+) (\w+) (\w+)') 590 | md = re.match("one two three", 2) 591 | 592 | expect(md[1]).to eq("one") 593 | expect(md[2]).to eq("two") 594 | expect(md[3]).to be_nil 595 | end 596 | 597 | it "raises an exception if given too large a number of submatches instead of options" do 598 | re = RE2::Regexp.new('(\w+) (\w+) (\w+)') 599 | md = re.match("one two three", 2) 600 | 601 | expect { re.match("one two three", INT_MAX) }.to raise_error(RangeError, "number of matches should be < #{INT_MAX}") 602 | end 603 | 604 | it "raises an exception when given invalid options" do 605 | re = RE2::Regexp.new('(\w+) (\w+) (\w+)') 606 | 607 | expect { re.match("one two three", :invalid) }.to raise_error(TypeError) 608 | end 609 | 610 | it "accepts anything that can be coerced to a hash as options", :aggregate_failures do 611 | re = RE2::Regexp.new('(\w+) (\w+) (\w+)') 612 | 613 | expect(re.match("one two three", nil)).to be_a(RE2::MatchData) 614 | end 615 | end 616 | 617 | describe "#match?" do 618 | it "returns only true or false even if there are capturing groups", :aggregate_failures do 619 | re = RE2::Regexp.new('My name is (\S+) (\S+)') 620 | 621 | expect(re.match?("My name is Alice Bloggs")).to eq(true) 622 | expect(re.match?("My age is 99")).to eq(false) 623 | end 624 | 625 | it "returns false if the pattern is invalid" do 626 | re = RE2::Regexp.new('???', log_errors: false) 627 | 628 | expect(re.match?("My name is Alice Bloggs")).to eq(false) 629 | end 630 | 631 | it "raises an exception if text cannot be coerced to a string" do 632 | re = RE2::Regexp.new('My name is (\S+) (\S+)') 633 | 634 | expect { re.match?(0) }.to raise_error(TypeError) 635 | end 636 | end 637 | 638 | describe "#partial_match?" do 639 | it "returns only true or false even if there are capturing groups", :aggregate_failures do 640 | re = RE2::Regexp.new('My name is (\S+) (\S+)') 641 | 642 | expect(re.partial_match?("My name is Alice Bloggs")).to eq(true) 643 | expect(re.partial_match?("My age is 99")).to eq(false) 644 | end 645 | 646 | it "supports matching against text containing null bytes", :aggregate_failures do 647 | re = RE2::Regexp.new("a\0b") 648 | 649 | expect(re.partial_match?("a\0b")).to eq(true) 650 | expect(re.partial_match?("ab")).to eq(false) 651 | end 652 | 653 | it "returns false if the pattern is invalid" do 654 | re = RE2::Regexp.new('???', log_errors: false) 655 | 656 | expect(re.partial_match?("My name is Alice Bloggs")).to eq(false) 657 | end 658 | 659 | it "raises an exception if text cannot be coerced to a string" do 660 | re = RE2::Regexp.new('My name is (\S+) (\S+)') 661 | 662 | expect { re.partial_match?(0) }.to raise_error(TypeError) 663 | end 664 | end 665 | 666 | describe "#=~" do 667 | it "returns only true or false even if there are capturing groups", :aggregate_failures do 668 | re = RE2::Regexp.new('My name is (\S+) (\S+)') 669 | 670 | expect(re =~ "My name is Alice Bloggs").to eq(true) 671 | expect(re =~ "My age is 99").to eq(false) 672 | end 673 | 674 | it "supports matching against text containing null bytes", :aggregate_failures do 675 | re = RE2::Regexp.new("a\0b") 676 | 677 | expect(re =~ "a\0b").to eq(true) 678 | expect(re =~ "ab").to eq(false) 679 | end 680 | 681 | it "returns false if the pattern is invalid" do 682 | re = RE2::Regexp.new('???', log_errors: false) 683 | 684 | expect(re =~ "My name is Alice Bloggs").to eq(false) 685 | end 686 | 687 | it "raises an exception if text cannot be coerced to a string" do 688 | re = RE2::Regexp.new('My name is (\S+) (\S+)') 689 | 690 | expect { re =~ 0 }.to raise_error(TypeError) 691 | end 692 | end 693 | 694 | describe "#===" do 695 | it "returns only true or false even if there are capturing groups", :aggregate_failures do 696 | re = RE2::Regexp.new('My name is (\S+) (\S+)') 697 | 698 | expect(re === "My name is Alice Bloggs").to eq(true) 699 | expect(re === "My age is 99").to eq(false) 700 | end 701 | 702 | it "returns false if the pattern is invalid" do 703 | re = RE2::Regexp.new('???', log_errors: false) 704 | 705 | expect(re === "My name is Alice Bloggs").to eq(false) 706 | end 707 | 708 | it "raises an exception if text cannot be coerced to a string" do 709 | re = RE2::Regexp.new('My name is (\S+) (\S+)') 710 | 711 | expect { re === 0 }.to raise_error(TypeError) 712 | end 713 | end 714 | 715 | describe "#full_match?" do 716 | it "returns only true or false even if there are capturing groups", :aggregate_failures do 717 | re = RE2::Regexp.new('My name is (\S+) (\S+)') 718 | 719 | expect(re.full_match?("My name is Alice Bloggs")).to eq(true) 720 | expect(re.full_match?("My name is Alice Bloggs and I am 99")).to eq(false) 721 | end 722 | 723 | it "supports matching against text containing null bytes", :aggregate_failures do 724 | re = RE2::Regexp.new("a\0b") 725 | 726 | expect(re.full_match?("a\0b")).to eq(true) 727 | expect(re.full_match?("a\0bc")).to eq(false) 728 | end 729 | 730 | it "returns false if the pattern is invalid" do 731 | re = RE2::Regexp.new('???', log_errors: false) 732 | 733 | expect(re.full_match?("My name is Alice Bloggs")).to eq(false) 734 | end 735 | 736 | it "raises an exception if text cannot be coerced to a string" do 737 | re = RE2::Regexp.new('My name is (\S+) (\S+)') 738 | 739 | expect { re.full_match?(0) }.to raise_error(TypeError) 740 | end 741 | end 742 | 743 | describe "#ok?" do 744 | it "returns true for valid patterns", :aggregate_failures do 745 | expect(RE2::Regexp.new('woo')).to be_ok 746 | expect(RE2::Regexp.new('wo(o)')).to be_ok 747 | expect(RE2::Regexp.new('((\d)\w+){3,}')).to be_ok 748 | end 749 | 750 | it "returns false for invalid patterns", :aggregate_failures do 751 | expect(RE2::Regexp.new('wo(o', log_errors: false)).to_not be_ok 752 | expect(RE2::Regexp.new('wo[o', log_errors: false)).to_not be_ok 753 | expect(RE2::Regexp.new('*', log_errors: false)).to_not be_ok 754 | end 755 | end 756 | 757 | describe ".escape" do 758 | it "transforms a string into a regexp" do 759 | expect(RE2::Regexp.escape("1.5-2.0?")).to eq('1\.5\-2\.0\?') 760 | end 761 | end 762 | 763 | describe ".quote" do 764 | it "transforms a string into a regexp" do 765 | expect(RE2::Regexp.quote("1.5-2.0?")).to eq('1\.5\-2\.0\?') 766 | end 767 | end 768 | 769 | describe "#number_of_capturing_groups" do 770 | it "returns the number of groups in a pattern", :aggregate_failures do 771 | expect(RE2::Regexp.new('(a)(b)(c)').number_of_capturing_groups).to eq(3) 772 | expect(RE2::Regexp.new('abc').number_of_capturing_groups).to eq(0) 773 | expect(RE2::Regexp.new('a((b)c)').number_of_capturing_groups).to eq(2) 774 | end 775 | 776 | it "returns -1 for an invalid pattern" do 777 | expect(RE2::Regexp.new('???', log_errors: false).number_of_capturing_groups).to eq(-1) 778 | end 779 | end 780 | 781 | describe "#named_capturing_groups" do 782 | it "returns a hash of names to indices" do 783 | expect(RE2::Regexp.new('(?Pa)').named_capturing_groups).to be_a(Hash) 784 | end 785 | 786 | it "maps names to indices with only one group" do 787 | groups = RE2::Regexp.new('(?Pa)').named_capturing_groups 788 | 789 | expect(groups).to eq("bob" => 1) 790 | end 791 | 792 | it "maps names to indices with several groups" do 793 | groups = RE2::Regexp.new('(?Pa)(o)(?Pe)').named_capturing_groups 794 | 795 | expect(groups).to eq("bob" => 1, "rob" => 3) 796 | end 797 | 798 | it "returns an empty hash for an invalid regexp" do 799 | expect(RE2::Regexp.new('???', log_errors: false).named_capturing_groups).to be_empty 800 | end 801 | end 802 | 803 | describe "#scan" do 804 | it "returns a scanner" do 805 | r = RE2::Regexp.new('(\w+)') 806 | scanner = r.scan("It is a truth universally acknowledged") 807 | 808 | expect(scanner).to be_a(RE2::Scanner) 809 | end 810 | 811 | it "raises a type error if given invalid input" do 812 | r = RE2::Regexp.new('(\w+)') 813 | 814 | expect { r.scan(nil) }.to raise_error(TypeError) 815 | end 816 | end 817 | 818 | describe "#partial_match" do 819 | it "matches the pattern anywhere within the given text" do 820 | r = RE2::Regexp.new('f(o+)') 821 | 822 | expect(r.partial_match("foo bar")).to be_a(RE2::MatchData) 823 | end 824 | 825 | it "returns true or false if there are no capturing groups" do 826 | r = RE2::Regexp.new('fo+') 827 | 828 | expect(r.partial_match("foo bar")).to eq(true) 829 | end 830 | 831 | it "can set the number of submatches to extract", :aggregate_failures do 832 | r = RE2::Regexp.new('f(o+)(a+)') 833 | m = r.partial_match("fooaa bar", submatches: 1) 834 | 835 | expect(m[1]).to eq("oo") 836 | expect(m[2]).to be_nil 837 | 838 | m = r.partial_match("fooaa bar", submatches: 2) 839 | 840 | expect(m[1]).to eq("oo") 841 | expect(m[2]).to eq("aa") 842 | end 843 | 844 | it "raises an error if given non-hash options" do 845 | r = RE2::Regexp.new('f(o+)(a+)') 846 | 847 | expect { r.partial_match("fooaa bar", "not a hash") }.to raise_error(TypeError) 848 | end 849 | 850 | it "accepts options that can be coerced to a hash", :aggregate_failures do 851 | r = RE2::Regexp.new('f(o+)(a+)') 852 | 853 | m = r.partial_match("fooaa bar", nil) 854 | expect(m[1]).to eq('oo') 855 | 856 | m = r.partial_match("fooaa bar", []) 857 | expect(m[1]).to eq('oo') 858 | end 859 | 860 | it "accepts anything that can be coerced to a string" do 861 | r = RE2::Regexp.new('f(o+)(a+)') 862 | 863 | expect(r.partial_match(StringLike.new("fooaa bar"))).to be_a(RE2::MatchData) 864 | end 865 | 866 | it "does not allow the anchor to be overridden" do 867 | r = RE2::Regexp.new('(\d+)') 868 | 869 | expect(r.partial_match('ruby:1234', anchor: :anchor_both)).to be_a(RE2::MatchData) 870 | end 871 | end 872 | 873 | describe "#full_match" do 874 | it "only matches the pattern if all of the given text matches", :aggregate_failures do 875 | r = RE2::Regexp.new('f(o+)') 876 | 877 | expect(r.full_match("foo")).to be_a(RE2::MatchData) 878 | expect(r.full_match("foo bar")).to be_nil 879 | end 880 | 881 | it "returns true or false if there are no capturing groups" do 882 | r = RE2::Regexp.new('fo+') 883 | 884 | expect(r.full_match("foo")).to eq(true) 885 | end 886 | 887 | it "can set the number of submatches to extract", :aggregate_failures do 888 | r = RE2::Regexp.new('f(o+)(a+)') 889 | m = r.full_match("fooaa", submatches: 1) 890 | 891 | expect(m[1]).to eq("oo") 892 | expect(m[2]).to be_nil 893 | 894 | m = r.full_match("fooaa", submatches: 2) 895 | 896 | expect(m[1]).to eq("oo") 897 | expect(m[2]).to eq("aa") 898 | end 899 | 900 | it "raises an error if given non-hash options" do 901 | r = RE2::Regexp.new('f(o+)(a+)') 902 | 903 | expect { r.full_match("fooaa", "not a hash") }.to raise_error(TypeError) 904 | end 905 | 906 | it "accepts options that can be coerced to a hash", :aggregate_failures do 907 | r = RE2::Regexp.new('f(o+)(a+)') 908 | 909 | m = r.full_match("fooaa", nil) 910 | expect(m[1]).to eq("oo") 911 | 912 | m = r.full_match("fooaa", []) 913 | expect(m[1]).to eq("oo") 914 | end 915 | 916 | it "accepts anything that can be coerced to a string" do 917 | r = RE2::Regexp.new('f(o+)(a+)') 918 | 919 | expect(r.full_match(StringLike.new("fooaa"), submatches: 0)).to eq(true) 920 | end 921 | 922 | it "does not allow the anchor to be overridden" do 923 | r = RE2::Regexp.new('(\d+)') 924 | 925 | expect(r.full_match('ruby:1234', anchor: :unanchored)).to be_nil 926 | end 927 | end 928 | end 929 | --------------------------------------------------------------------------------