├── .rspec
├── lib
├── pcre2
│ ├── version.rb
│ ├── error.rb
│ ├── matchdata.rb
│ ├── string_utils.rb
│ ├── regexp.rb
│ ├── lib.rb
│ └── lib
│ │ └── constants.rb
└── pcre2.rb
├── spec
├── pcre2_spec.rb
├── regexp_spec.rb
├── lib
│ └── pcre2
│ │ ├── error_spec.rb
│ │ ├── lib_spec.rb
│ │ ├── matchdata_spec.rb
│ │ ├── string_utils_spec.rb
│ │ └── regexp_spec.rb
└── spec_helper.rb
├── Gemfile
├── bin
├── setup
└── console
├── Rakefile
├── .gitignore
├── .github
└── workflows
│ └── tests.yml
├── LICENSE.txt
├── pcre2.gemspec
├── benchmark.rake
└── README.md
/.rspec:
--------------------------------------------------------------------------------
1 | --format documentation
2 | --color
3 | --require spec_helper
4 |
--------------------------------------------------------------------------------
/lib/pcre2/version.rb:
--------------------------------------------------------------------------------
1 | module PCRE2
2 | VERSION = "0.2.0"
3 | end
4 |
--------------------------------------------------------------------------------
/spec/pcre2_spec.rb:
--------------------------------------------------------------------------------
1 | RSpec.describe PCRE2 do
2 | it "has a version number" do
3 | expect(PCRE2::VERSION).not_to be nil
4 | end
5 | end
6 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 |
3 | # Specify your gem's dependencies in pcre2.gemspec
4 | gemspec
5 |
6 | gem "rake", "~> 12.0"
7 | gem "rspec", "~> 3.0"
8 |
--------------------------------------------------------------------------------
/bin/setup:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -euo pipefail
3 | IFS=$'\n\t'
4 | set -vx
5 |
6 | bundle install
7 |
8 | # Do any other automated setup that you need to do here
9 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require "bundler/gem_tasks"
2 | require "rspec/core/rake_task"
3 |
4 | RSpec::Core::RakeTask.new(:spec)
5 |
6 | task :default => :spec
7 |
8 | load "benchmark.rake"
9 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.bundle/
2 | /.yardoc
3 | /_yardoc/
4 | /coverage/
5 | /doc/
6 | /pkg/
7 | /spec/reports/
8 | /tmp/
9 |
10 | # rspec failure tracking
11 | .rspec_status
12 | /Gemfile.lock
13 |
--------------------------------------------------------------------------------
/lib/pcre2.rb:
--------------------------------------------------------------------------------
1 | require "pcre2/version"
2 | require "pcre2/lib"
3 | require "pcre2/lib/constants"
4 | require "pcre2/string_utils"
5 |
6 | # Classes
7 | require "pcre2/error"
8 | require "pcre2/regexp"
9 | require "pcre2/matchdata"
10 |
11 | module PCRE2
12 | # Your code goes here...
13 | end
14 |
--------------------------------------------------------------------------------
/spec/regexp_spec.rb:
--------------------------------------------------------------------------------
1 | RSpec.describe Regexp do
2 | it "responds to `to_pcre2`", :skip do
3 | regexp = /hello/
4 |
5 | pcre2_regexp = regexp.to_pcre2
6 |
7 | expect(pcre2_regexp).to be_a(PCRE2::Regexp)
8 | expect(pcre2_regexp.source).to eq(regexp.source)
9 | end
10 | end
11 |
--------------------------------------------------------------------------------
/spec/lib/pcre2/error_spec.rb:
--------------------------------------------------------------------------------
1 | RSpec.describe PCRE2::Error do
2 | describe ".from_error_code" do
3 | it "has the correct error message" do
4 | error = PCRE2::Error.from_error_code(PCRE2::PCRE2_ERROR_BADDATA)
5 |
6 | expect(error.message).to match(/bad data value/)
7 | end
8 | end
9 | end
10 |
--------------------------------------------------------------------------------
/lib/pcre2/error.rb:
--------------------------------------------------------------------------------
1 | class PCRE2::Error < StandardError
2 | def self.from_error_code(error_code, extra_message = nil)
3 | message = "Error #{error_code}: "
4 | message += PCRE2::Lib.get_error_message(error_code)
5 | message += " - #{extra_message}" if extra_message
6 |
7 | self.new(message)
8 | end
9 | end
10 |
--------------------------------------------------------------------------------
/bin/console:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 |
3 | require "bundler/setup"
4 | require "pcre2"
5 |
6 | # You can add fixtures and/or initialization code here to make experimenting
7 | # with your gem easier. You can also use a different console, if you like.
8 |
9 | # (If you use this, don't forget to add pry to your Gemfile!)
10 | # require "pry"
11 | # Pry.start
12 |
13 | require "irb"
14 | IRB.start(__FILE__)
15 |
--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
1 | require "bundler/setup"
2 | require "pcre2"
3 |
4 | RSpec.configure do |config|
5 | # Enable flags like --only-failures and --next-failure
6 | config.example_status_persistence_file_path = ".rspec_status"
7 |
8 | # Disable RSpec exposing methods globally on `Module` and `main`
9 | config.disable_monkey_patching!
10 |
11 | config.expect_with :rspec do |c|
12 | c.syntax = :expect
13 | end
14 | end
15 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | # Setup Ruby, install gems, cache gems, and run test suite
2 | # - https://github.com/ruby/setup-ruby
3 |
4 | name: Tests
5 | on: [push, pull_request]
6 |
7 | jobs:
8 | test:
9 | runs-on: ubuntu-latest
10 | strategy:
11 | matrix:
12 | ruby: [ '2.5', '2.6' ]
13 | name: Ruby ${{ matrix.ruby }} tests
14 | steps:
15 | - uses: actions/checkout@v2
16 | - uses: ruby/setup-ruby@v1
17 | with:
18 | bundler-cache: true
19 | ruby-version: ${{ matrix.ruby }}
20 | - run: bundle install
21 | - run: bundle exec rake -t
22 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2020 David Verhasselt
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/lib/pcre2/matchdata.rb:
--------------------------------------------------------------------------------
1 | class PCRE2::MatchData
2 | attr :regexp, :pairs, :string
3 |
4 | def initialize(regexp, string, pairs)
5 | @regexp = regexp
6 | @string = string
7 | @pairs = pairs
8 | end
9 |
10 | def [](key)
11 | if !key.is_a?(Numeric)
12 | key = regexp.named_captures[key.to_s].first
13 | end
14 |
15 | if pair = pairs[key]
16 | string_from_pair(*pair)
17 | end
18 | end
19 |
20 | def offset(n)
21 | pairs[n]
22 | end
23 |
24 | def capture_pairs
25 | pairs[1..-1]
26 | end
27 |
28 | def to_a
29 | @to_a ||= pairs.map { |pair| string_from_pair(*pair) }
30 | end
31 |
32 | def captures
33 | to_a[1..-1]
34 | end
35 |
36 | def length
37 | start_of_match - end_of_match
38 | end
39 |
40 | def pre_match
41 | string[0 ... start_of_match]
42 | end
43 |
44 | def post_match
45 | string[end_of_match .. -1]
46 | end
47 |
48 | def start_of_match
49 | offset(0)[0]
50 | end
51 |
52 | def end_of_match
53 | offset(0)[1]
54 | end
55 |
56 | private
57 |
58 | def string_from_pair(start, ending)
59 | string.slice(start, ending-start)
60 | end
61 | end
62 |
--------------------------------------------------------------------------------
/spec/lib/pcre2/lib_spec.rb:
--------------------------------------------------------------------------------
1 | RSpec.describe PCRE2::Lib do
2 | describe ".get_error_message" do
3 | it "returns an error message" do
4 | result = PCRE2::Lib.get_error_message(PCRE2::PCRE2_ERROR_NOMATCH)
5 |
6 | expect(result).to eq("no match")
7 | end
8 |
9 | it "accepts a MemoryPointer" do
10 | error_code = FFI::MemoryPointer.new(:int, 1)
11 | error_code.write_int(PCRE2::PCRE2_ERROR_NOMATCH)
12 |
13 | result = PCRE2::Lib.get_error_message(error_code)
14 |
15 | expect(result).to eq("no match")
16 | end
17 | end
18 |
19 | describe ".match" do
20 | it "returns 0 when no matches" do
21 | pattern_ptr = PCRE2::Lib.compile_pattern("hello")
22 | result_count, match_data_ptr = PCRE2::Lib.match(pattern_ptr, "goodbye")
23 |
24 | expect(result_count).to eq(0)
25 | end
26 | end
27 |
28 | describe ".compile_pattern" do
29 | errors = {
30 | '(?<>.' => /Error 162: subpattern name expected/,
31 | '(.*' => /Error 114: missing closing parenthesis/,
32 | }
33 |
34 | errors.each do |pattern, error|
35 | it "raises the correct error for '#{pattern}'" do
36 | expect { PCRE2::Lib.compile_pattern(pattern) }.to raise_error(PCRE2::Error, error)
37 | end
38 | end
39 | end
40 | end
41 |
--------------------------------------------------------------------------------
/pcre2.gemspec:
--------------------------------------------------------------------------------
1 | require_relative 'lib/pcre2/version'
2 |
3 | Gem::Specification.new do |spec|
4 | spec.name = "pcre2"
5 | spec.version = PCRE2::VERSION
6 | spec.authors = ["David Verhasselt"]
7 | spec.email = ["david@crowdway.com"]
8 |
9 | spec.summary = %q{Use the PCRE2 library inside your Ruby projects}
10 | spec.description = %q{Wraps the PCRE2 library using FFI so it and the advanced functionality it provides can be used in Ruby projects}
11 | spec.homepage = "https://github.com/dv/pcre2"
12 | spec.license = "MIT"
13 | spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14 |
15 | spec.metadata["homepage_uri"] = spec.homepage
16 | spec.metadata["source_code_uri"] = spec.homepage
17 | spec.metadata["changelog_uri"] = spec.homepage
18 |
19 | # Specify which files should be added to the gem when it is released.
20 | # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
21 | spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
22 | `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
23 | end
24 | spec.bindir = "exe"
25 | spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
26 | spec.require_paths = ["lib"]
27 |
28 | spec.add_dependency "ffi"
29 | end
30 |
--------------------------------------------------------------------------------
/lib/pcre2/string_utils.rb:
--------------------------------------------------------------------------------
1 | module PCRE2::StringUtils
2 | def scan(string, &block)
3 | return enum_for(:scan, string).to_a if !block_given?
4 |
5 | matches(string) do |matchdata|
6 | if matchdata.captures.any?
7 | yield matchdata.captures
8 | else
9 | yield matchdata[0]
10 | end
11 | end
12 | end
13 |
14 | def split(string, &block)
15 | return enum_for(:split, string).to_a if !block_given?
16 |
17 | previous_position = 0
18 | matches(string) do |matchdata|
19 | beginning, ending = matchdata.offset(0)
20 |
21 | # If zero-length match and the previous_position is equal to the match position, just skip
22 | # it. The next zero-length match will have a different previous_position and generate a split
23 | # which results in the appearance of a "per character split" but without empty parts in the
24 | # beginning. Note that we're also skipping adding capture groups.
25 | if matchdata.length == 0 && previous_position == beginning
26 | next
27 | end
28 |
29 | yield string[previous_position ... beginning]
30 |
31 | matchdata.captures.each do |capture|
32 | yield capture
33 | end
34 |
35 | previous_position = ending
36 | end
37 |
38 | # Also return the ending of the string from the last match
39 | if previous_position < string.length
40 | yield string[previous_position .. -1]
41 | end
42 | end
43 | end
44 |
--------------------------------------------------------------------------------
/spec/lib/pcre2/matchdata_spec.rb:
--------------------------------------------------------------------------------
1 | RSpec.describe PCRE2::MatchData do
2 | let(:pattern) { "(?hello) (world)" }
3 | let(:string) { "one two three hello world today!" }
4 | let(:re) { PCRE2::Regexp.new(pattern) }
5 |
6 | subject(:matchdata) { re.match(string) }
7 |
8 | describe "#[]" do
9 | it "returns the full match at 0" do
10 | expect(matchdata[0]).to eq("hello world")
11 | end
12 |
13 | it "returns the subpattern match at 1" do
14 | expect(matchdata[1]).to eq("hello")
15 | end
16 |
17 | it "returns the subpattern match at 2" do
18 | expect(matchdata[2]).to eq("world")
19 | end
20 |
21 | it "returns nil for unexisting subpattern" do
22 | expect(matchdata[3]).to be_nil
23 | end
24 |
25 | it "returns the named subpattern match at 'a'" do
26 | expect(matchdata["a"]).to eq("hello")
27 | end
28 | end
29 |
30 | describe "#to_a" do
31 | it "returns an array of all matches" do
32 | expect(matchdata.to_a).to eq(["hello world", "hello", "world"])
33 | end
34 | end
35 |
36 | describe "#pre_match and #post_match" do
37 | it "returns the correct results" do
38 | expect(matchdata.pre_match).to eq("one two three ")
39 | expect(matchdata.post_match).to eq(" today!")
40 | end
41 | end
42 |
43 | describe "#length" do
44 | it "returns 0 for a zero-length match" do
45 | re = PCRE2::Regexp.new("")
46 | matchdata = re.match("string")
47 |
48 | expect(matchdata.length).to eq(0)
49 | end
50 | end
51 | end
52 |
--------------------------------------------------------------------------------
/lib/pcre2/regexp.rb:
--------------------------------------------------------------------------------
1 | module PCRE2
2 | class Regexp
3 | attr :source, :pattern_ptr
4 |
5 | include StringUtils
6 |
7 | # Accepts a String, Regexp or another PCRE2::Regexp
8 | def initialize(pattern, *options)
9 | case pattern
10 | when ::Regexp, PCRE2::Regexp
11 | @source = pattern.source
12 | else
13 | @source = pattern
14 | end
15 |
16 | @pattern_ptr = Lib.compile_pattern(source, options)
17 | end
18 |
19 | # Compiles the Regexp into a JIT optimised version. Returns whether it was successful
20 | def jit!
21 | options = PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD
22 |
23 | Lib.pcre2_jit_compile_8(pattern_ptr, options) == 0
24 | end
25 |
26 | def match(str, pos = nil)
27 | result_count, match_data_ptr = Lib.match(@pattern_ptr, str, position: pos)
28 |
29 | if result_count == 0
30 | nil
31 | else
32 | pairs = PCRE2::Lib.get_ovector_pairs(match_data_ptr, result_count)
33 |
34 | MatchData.new(self, str, pairs)
35 | end
36 | end
37 |
38 | def matches(str, pos = nil, &block)
39 | return enum_for(:matches, str, pos) if !block_given?
40 |
41 | pos ||= 0
42 | while pos < str.length
43 | matchdata = self.match(str, pos)
44 |
45 | if matchdata
46 | yield matchdata
47 |
48 | beginning, ending = matchdata.offset(0)
49 |
50 | if pos == ending # Manually increment position if no change to avoid infinite loops
51 | pos += 1
52 | else
53 | pos = ending
54 | end
55 | else
56 | return
57 | end
58 | end
59 | end
60 |
61 | def named_captures
62 | @named_captures ||= Lib.named_captures(pattern_ptr)
63 | end
64 |
65 | def names
66 | named_captures.keys
67 | end
68 | end
69 | end
70 |
--------------------------------------------------------------------------------
/benchmark.rake:
--------------------------------------------------------------------------------
1 | require "benchmark"
2 | require "pcre2"
3 |
4 | desc "Run a benchmark to compare PCRE2 vs Ruby's built-in Regexp"
5 | task :benchmark do
6 | def benchmark!(pattern, string)
7 | task = ->(re) {
8 | pos = 0
9 |
10 | while matchdata = re.match(string, pos)
11 | pos = matchdata.offset(0)[1] + 1
12 | end
13 | }
14 |
15 | GC.disable
16 | Benchmark.bmbm do |benchmark|
17 | ruby_re = Regexp.new(pattern)
18 | pcre2_re = PCRE2::Regexp.new(pattern)
19 | pcre2_re_jit = PCRE2::Regexp.new(pattern).tap(&:jit!)
20 |
21 | benchmark.report("Ruby Regexp") do
22 | 100000.times { task.call(ruby_re) }
23 | end
24 |
25 | GC.start
26 |
27 | benchmark.report("PCRE2 Regexp") do
28 | 100000.times { task.call(pcre2_re) }
29 | end
30 |
31 | GC.start
32 |
33 | benchmark.report("PCRE2 Regexp - JIT enhanced") do
34 | 100000.times { task.call(pcre2_re_jit) }
35 | end
36 | end
37 | GC.enable
38 |
39 | puts
40 | puts
41 | puts
42 | end
43 |
44 | puts "Benchmark 1: Small pattern, big string"
45 | puts
46 |
47 | pattern = "hello"
48 | string = "abab" * 1000
49 | string += "hello"
50 | string += "abab" * 1000
51 |
52 | benchmark!(pattern, string)
53 |
54 |
55 | puts "Benchmark 2: Big pattern, big string"
56 | puts
57 |
58 | pattern = "hello" * 50
59 | string = "abab" * 1000
60 | string += "hello"
61 | string += "abab" * 1000
62 | string += pattern
63 | string += "abab" * 1000
64 |
65 | benchmark!(pattern, string)
66 |
67 |
68 | puts "Benchmark 3: Small pattern, small string"
69 | puts
70 |
71 | pattern = "hello"
72 | string = "abababab" + "hello" + "abababab"
73 |
74 | benchmark!(pattern, string)
75 |
76 |
77 | puts "Benchmark 3: Multiple matches"
78 | puts
79 |
80 | pattern = "hello"
81 | string = ""
82 |
83 | 20.times do
84 | string += "abab" * 5
85 | string += "hello"
86 | string += "abab" * 5
87 | end
88 |
89 | benchmark!(pattern, string)
90 | end
91 |
--------------------------------------------------------------------------------
/spec/lib/pcre2/string_utils_spec.rb:
--------------------------------------------------------------------------------
1 | RSpec.describe PCRE2::StringUtils do
2 | describe "#scan" do
3 | it "returns all matched strings" do
4 | subject = "and a 1 and a 2 and a 345"
5 | regexp = PCRE2::Regexp.new('\d+')
6 |
7 | result = regexp.scan(subject)
8 |
9 | expect(result).to eq(["1", "2", "345"])
10 | end
11 |
12 | it "accepts a block which iterates over all matches" do
13 | subject = "and a 1 and a 2 and a 345"
14 | regexp = PCRE2::Regexp.new('\d+')
15 |
16 | result = ""
17 | regexp.scan(subject) { |match| result += match }
18 |
19 | expect(result).to eq("12345")
20 | end
21 |
22 | it "returns captures if arity is higher than 1" do
23 | subject = "and a 1 and a 2 and a 345"
24 | regexp = PCRE2::Regexp.new('(and a) (\d+)')
25 |
26 | result = regexp.scan(subject)
27 |
28 | expect(result).to eq([["and a", "1"], ["and a", "2"], ["and a", "345"]])
29 | end
30 | end
31 |
32 | describe "#split" do
33 | let(:string) { "and a 1 and a 2 and a 345 congrats!" }
34 |
35 | it "splits where the regexp matches" do
36 | regexp = PCRE2::Regexp.new('\d+')
37 |
38 | result = regexp.split(string)
39 |
40 | expect(result).to eq(["and a ", " and a ", " and a ", " congrats!"])
41 | end
42 |
43 | it "returns captures in the result too" do
44 | regexp = PCRE2::Regexp.new('\d(\d*)')
45 |
46 | result = regexp.split(string)
47 |
48 | expect(result).to eq(["and a ", "", " and a ", "", " and a ", "45", " congrats!"])
49 | end
50 |
51 | it "splits each character for zero-length matches" do
52 | regexp = PCRE2::Regexp.new('')
53 |
54 | result = regexp.split(string)
55 |
56 | expect(result).to eq(string.chars)
57 | end
58 |
59 | # These are tests that were reverse-engineered from what String#split returns, since the documentation
60 | # or the original source code is not very clear about what the exact specification of `split` should be
61 | # when there are zero-length matches.
62 | context "edge-cases" do
63 | let(:string) { "abcde" }
64 |
65 | it "has an empty part at the beginning" do
66 | regexp = PCRE2::Regexp.new("a")
67 |
68 | result = regexp.split(string)
69 |
70 | expect(result).to eq(["", "bcde"])
71 | end
72 |
73 | it "has a zero-length match so split by characters without empty part at the beginning" do
74 | regexp = PCRE2::Regexp.new("")
75 |
76 | result = regexp.split(string)
77 |
78 | expect(result).to eq(["a", "b", "c", "d", "e"])
79 | end
80 |
81 | it "has a zero-length match in the middle so split into two parts" do
82 | regexp = PCRE2::Regexp.new("(?=c)")
83 |
84 | result = regexp.split(string)
85 |
86 | expect(result).to eq(["ab", "cde"])
87 | end
88 |
89 | it "has a zero-length match at the start and in the middle but again only split into two parts without empty part at the beginning" do
90 | regexp = PCRE2::Regexp.new("^|(?=c)")
91 |
92 | result = regexp.split(string)
93 |
94 | expect(result).to eq(["ab", "cde"])
95 | end
96 |
97 | it "has multiple zero-length matches including empty capture groups so split by chars and also include lots of empty results" do
98 | regexp = PCRE2::Regexp.new("()|^|(?=c)")
99 |
100 | result = regexp.split(string)
101 |
102 | expect(result).to eq(["a", "", "b", "", "c", "", "d", "", "e"])
103 | end
104 | end
105 | end
106 | end
107 |
--------------------------------------------------------------------------------
/spec/lib/pcre2/regexp_spec.rb:
--------------------------------------------------------------------------------
1 | RSpec.describe PCRE2::Regexp do
2 | describe "#new" do
3 | # Apparently this does not throw an error
4 | it "returns an error when given a broken pattern", :skip do
5 | pattern = "hell(o"
6 |
7 | expect do
8 | PCRE2::Regexp.new(pattern)
9 | end.to raise_error(/unmatched parenthesis/)
10 | end
11 |
12 | it "accepts a ::Regexp" do
13 | re = PCRE2::Regexp.new(/a/)
14 |
15 | expect(re.source).to eq("a")
16 | end
17 |
18 | it "accepts another PCRE2::Regexp" do
19 | re = PCRE2::Regexp.new(PCRE2::Regexp.new("a"))
20 |
21 | expect(re.source).to eq("a")
22 | end
23 | end
24 |
25 | describe "#match" do
26 | let(:regexp) { PCRE2::Regexp.new("hello") }
27 |
28 | it "returns a matchdata when a match" do
29 | subject = "well hello there!"
30 |
31 | result = regexp.match(subject)
32 |
33 | expect(result.offset(0)).to eq([5, 10])
34 | end
35 |
36 | it "matches very long patterns" do
37 | part = "aa"
38 | pattern = part * 100
39 | subject = part * 99
40 |
41 | regexp = PCRE2::Regexp.new(pattern)
42 |
43 | expect(regexp.match(subject)).to be_nil
44 | expect(regexp.match(subject + part)).not_to be_nil
45 | end
46 |
47 | it "returns nil when no match" do
48 | subject = "goodbye"
49 |
50 | result = regexp.match(subject)
51 |
52 | expect(result).to be_nil
53 | end
54 |
55 | it "starts from a given position" do
56 | subject = "well hello hello hello there!"
57 | # ^ start here
58 |
59 | result = regexp.match(subject, 11)
60 |
61 | expect(result.offset(0)).to eq([11, 16])
62 | end
63 | end
64 |
65 | describe "#matches" do
66 | let(:string) { "well hello hello hello there!"}
67 |
68 | it "yields all matchdatas" do
69 | regexp = PCRE2::Regexp.new("hello")
70 |
71 | matchdatas = regexp.matches(string).to_a
72 |
73 | expect(matchdatas.length).to eq(3)
74 | expect(matchdatas[0].offset(0)).to eq([5, 10])
75 | expect(matchdatas[1].offset(0)).to eq([11, 16])
76 | expect(matchdatas[2].offset(0)).to eq([17, 22])
77 | end
78 |
79 | it "does not get stuck in an infinte loop with zero-length matches" do
80 | regexp = PCRE2::Regexp.new("")
81 | enum = regexp.matches(string)
82 |
83 | first_pair = enum.next.offset(0)
84 | second_pair = enum.next.offset(0)
85 |
86 | expect(first_pair).not_to eq(second_pair)
87 | end
88 | end
89 |
90 | context "with named captures" do
91 | describe "#named_captures" do
92 | it "returns a list of named subpatterns and positions" do
93 | pattern = '(?\w+)(?\W+)(?\w+)(?aaa)'
94 | re = PCRE2::Regexp.new(pattern, PCRE2::PCRE2_DUPNAMES)
95 |
96 | expect(re.named_captures).to eq(
97 | {
98 | "a" => [1],
99 | "b" => [2],
100 | "c" => [3, 4]
101 | }
102 | )
103 | end
104 | end
105 |
106 | describe "#names" do
107 | it "returns names of the named captures" do
108 | pattern = '(?\w+)(?\W+)(?\w+)'
109 | re = PCRE2::Regexp.new(pattern)
110 |
111 | expect(re.names).to eq(["a", "b", "c"])
112 | end
113 | end
114 | end
115 |
116 | context "with options" do
117 | it "matches case insensitive" do
118 | re = PCRE2::Regexp.new("HELLO")
119 | expect(re.match("hello!")).to be_nil
120 |
121 | re = PCRE2::Regexp.new("HELLO", PCRE2::PCRE2_CASELESS)
122 | expect(re.match("hello!")).not_to be_nil
123 | end
124 |
125 | it "allows duplicate named subpatterns" do\
126 | pattern = "(?.)(?)"
127 |
128 | expect { PCRE2::Regexp.new(pattern) }.to raise_error(/two named subpatterns have the same name/)
129 | expect { PCRE2::Regexp.new(pattern, PCRE2::PCRE2_DUPNAMES) }.not_to raise_error
130 | end
131 |
132 | it "accepts multiple options" do
133 | re = PCRE2::Regexp.new("HELLO|(?world)(?country)", PCRE2::PCRE2_DUPNAMES, PCRE2::PCRE2_CASELESS)
134 |
135 | expect(re.match("hello!")).not_to be_nil
136 | end
137 | end
138 |
139 | describe "#jit!" do
140 | it "compiles successfully" do
141 | expect(PCRE2::Regexp.new("hello").jit!).to be_truthy
142 | end
143 | end
144 | end
145 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PCRE2
2 |
3 | This library provides a Ruby interface for the PCRE2 library, which supports more advanced regular expression functionality than the built-in Ruby `Regexp`.
4 |
5 | ## Why?
6 |
7 | Ruby's `Regexp` is actually quite fast! For simple Regexps without backtracking (for instance regexp without matches like `.*`), you should probably keep using the Ruby `Regexp`. No extra dependencies and it'll be faster than using an external library, including PCRE2.
8 |
9 | The main reason I built this was so I could use the [backtracking control verbs](https://www.rexegg.com/backtracking-control-verbs.html#mainverbs) such as `(*SKIP)(*FAIL)` that are not supported by Ruby's `Regexp`. Using these, and other features, `PCRE2` supports some pretty wild and advanced regular expressions which you cannot do with Ruby's `Regexp`.
10 |
11 | `PCRE2` also supports JIT (just-in-time) compilation of the regular expression. From [the manual](https://www.pcre.org/current/doc/html/pcre2jit.html):
12 | > Just-in-time compiling is a heavyweight optimization that can greatly speed up pattern matching. However, it comes at the cost of extra processing before the match is performed, so it is of most benefit when the same pattern is going to be matched many times. This does not necessarily mean many calls of a matching function; if the pattern is not anchored, matching attempts may take place many times at various positions in the subject, even for a single call. Therefore, if the subject string is very long, it may still pay to use JIT even for one-off matches.
13 |
14 | You can enable JIT by calling `regexp.jit!` on the `PCRE2::Regexp` object. Using JIT the `PCRE2` matching can be more than 2X faster than Ruby's built-in.
15 |
16 | ## Installation
17 |
18 | Install the PCRE2 library:
19 |
20 | ```bash
21 | brew install pcre2
22 | ```
23 |
24 | Add this line to your application's Gemfile:
25 |
26 | ```ruby
27 | gem 'pcre2'
28 | ```
29 |
30 | And then execute:
31 |
32 | $ bundle install
33 |
34 | Or install it yourself as:
35 |
36 | $ gem install pcre2
37 |
38 | ## Usage
39 |
40 | `PCRE2::Regexp` aims to act as much like Ruby's `Regexp` as possible. It has implemented a subset of the `Regexp` and `MatchData` APIs so it can be used as a drop-in replacement.
41 |
42 | ```ruby
43 | regexp = PCRE2::Regexp.new("hello")
44 | subject = "well hello there!"
45 | matchdata = regexp.match(subject)
46 |
47 | matchdata.offset(0) # [5, 10] - start and end of the match
48 | matchdata[0] # => "hello"
49 |
50 | matchdata = regexp.match(subject, 11) # find next match
51 | ```
52 |
53 | Also some of the utility methods on `String` are reimplemented on `PCRE2::Regexp`:
54 |
55 | ```ruby
56 | regexp = PCRE2::Regexp.new('\d+')
57 | subject = "and a 1 and a 2 and a 345"
58 |
59 | regexp.scan(subject) # => ["1", "2", "345"]
60 | regexp.split(subject) # => ["and a ", " and a ", " and a "]
61 | ```
62 |
63 | There is one new method not available on `Regexp`: `PCRE2::Regexp#matches` which will loop over all matches of the string, and yield the corresponding `Matchdata`:
64 |
65 | ```ruby
66 | string = "well hello hello hello there!"
67 | re = PCRE2::Regexp.new("hello")
68 |
69 | re.matches(string) do |matchdata|
70 | puts "Matchdata found between #{matchdata.offsets(0)[0]} and #{matchdata.offsets(0)[1]}"
71 | end
72 | ```
73 |
74 | ## Benchmark
75 |
76 | You can run the benchmark that compares `PCRE2::Regexp` with Ruby's built-in `Regexp` as follows:
77 |
78 | ```bash
79 | bundle exec rake benchmark
80 | ```
81 |
82 | ## Resources
83 |
84 | - [PCRE2 Library](https://www.pcre.org/current/doc/html/)
85 | - [PCRE2 demo](https://www.pcre.org/current/doc/html/pcre2demo.html)
86 |
87 | ## Development
88 |
89 | After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
90 |
91 | To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
92 |
93 | ## Contributing
94 |
95 | Bug reports and pull requests are welcome on GitHub at https://github.com/dv/pcre2.
96 |
97 | ## License
98 |
99 | The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
100 |
--------------------------------------------------------------------------------
/lib/pcre2/lib.rb:
--------------------------------------------------------------------------------
1 | require "ffi"
2 |
3 | module PCRE2::Lib
4 | RETURN_CODE_NO_ERROR = 100
5 |
6 | extend FFI::Library
7 |
8 | ffi_lib 'pcre2-8' # Able to do 16 or 32 too
9 |
10 | PCRE2_SIZE = typedef :size_t, :PCRE2_SIZE
11 | PCRE2_SPTR = typedef :pointer, :PCRE2_SPTR
12 | PCRE2_UCHAR8 = typedef :uint8_t, :PCRE2_UCHAR8
13 | PCRE2_UCHAR16 = typedef :uint16_t, :PCRE2_UCHAR16
14 | PCRE2_UCHAR32 = typedef :uint32_t, :PCRE2_UCHAR32
15 |
16 | # For 8-bit PCRE
17 | PCRE2_UCHAR = typedef :PCRE2_UCHAR8, :PCRE2_UCHAR
18 |
19 | # int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, PCRE2_SIZE bufflen);
20 | attach_function :pcre2_get_error_message_8, [ :int, :pointer, :PCRE2_SIZE ], :int
21 |
22 | # pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext);
23 | attach_function :pcre2_compile_8, [ :PCRE2_SPTR, :PCRE2_SIZE, :uint32_t, :pointer, :pointer, :pointer ], :pointer
24 | attach_function :pcre2_code_free_8, [ :pointer ], :void
25 |
26 | # int pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where);
27 | attach_function :pcre2_pattern_info_8, [ :pointer, :uint32_t, :pointer ], :int
28 |
29 | # pcre2_match_data *pcre2_match_data_create_from_pattern( const pcre2_code *code, pcre2_general_context *gcontext);
30 | attach_function :pcre2_match_data_create_from_pattern_8, [ :pointer, :pointer ], :pointer
31 | attach_function :pcre2_match_data_free_8, [ :pointer ], :void
32 |
33 | # int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, pcre2_match_data *match_data, pcre2_match_context *mcontext);
34 | attach_function :pcre2_match_8, [ :pointer, :PCRE2_SPTR, :PCRE2_SIZE, :PCRE2_SIZE, :uint32_t, :pointer, :pointer ], :int
35 |
36 | attach_function :pcre2_get_ovector_count_8, [ :pointer ], :uint32_t
37 | attach_function :pcre2_get_ovector_pointer_8, [ :pointer ], :pointer
38 |
39 | # int pcre2_jit_compile(pcre2_code *code, uint32_t options)
40 | attach_function :pcre2_jit_compile_8, [ :pointer, :uint32_t ], :int
41 |
42 |
43 | def self.get_error_message(error_code)
44 | if error_code.kind_of?(FFI::MemoryPointer)
45 | error_code = error_code.read_int
46 | end
47 |
48 | buffer = FFI::MemoryPointer.new(PCRE2_UCHAR, 120)
49 | result = pcre2_get_error_message_8(error_code, buffer, buffer.size)
50 |
51 | case result
52 | when PCRE2::PCRE2_ERROR_BADDATA
53 | "Error number #{error_code} unknown"
54 | when PCRE2::PCRE2_ERROR_NOMEMORY
55 | raise PCRE2::Error, "Buffer of #{buffer.size} is not large enough to contain message"
56 | else
57 | buffer.read_string
58 | end
59 | end
60 |
61 | # Some utility functions to help make the above more palatable
62 | def self.compile_pattern(pattern, options = [])
63 | pattern_string_ptr = FFI::MemoryPointer.from_string(pattern)
64 | error_code_ptr = FFI::MemoryPointer.new(:int, 1)
65 | error_offset_ptr = FFI::MemoryPointer.new(PCRE2_SIZE, 1)
66 | options = options.flatten.inject(0) { |memo, option| memo | option }
67 |
68 | pattern_ptr = PCRE2::Lib.pcre2_compile_8(pattern_string_ptr, pattern.size, options, error_code_ptr, error_offset_ptr, nil)
69 |
70 | if pattern_ptr.null?
71 | error_code = error_code_ptr.read_int
72 | error_offset = error_offset_ptr.read(PCRE2_SIZE)
73 |
74 | raise PCRE2::Error.from_error_code(error_code, "while compiling pattern #{pattern} @ #{error_offset}")
75 | end
76 |
77 | FFI::AutoPointer.new(pattern_ptr, PCRE2::Lib.method(:pcre2_code_free_8))
78 | end
79 |
80 | def self.create_match_data_for_pattern(pattern_ptr)
81 | match_data_ptr = PCRE2::Lib.pcre2_match_data_create_from_pattern_8(pattern_ptr, nil)
82 | FFI::AutoPointer.new(match_data_ptr, PCRE2::Lib.method(:pcre2_match_data_free_8))
83 | end
84 |
85 | def self.match(pattern_ptr, body, position: 0, match_data_ptr: nil)
86 | position ||= 0
87 | match_data_ptr ||= create_match_data_for_pattern(pattern_ptr)
88 |
89 | body_ptr = FFI::MemoryPointer.from_string(body)
90 |
91 | return_code =
92 | PCRE2::Lib.pcre2_match_8(
93 | pattern_ptr,
94 | body_ptr,
95 | body_ptr.size,
96 | position,
97 | 0,
98 | match_data_ptr,
99 | nil
100 | )
101 |
102 | case return_code
103 | when 0
104 | raise PCRE2::Error, "Not enough memory in MatchData to store all captures"
105 | when PCRE2::PCRE2_ERROR_NOMATCH
106 | result_count = 0
107 | else
108 | if return_code < 0
109 | raise PCRE2::Error.from_error_code(return_code)
110 | else
111 | result_count = return_code
112 | end
113 | end
114 |
115 | [result_count, match_data_ptr]
116 | end
117 |
118 | def self.get_ovector_pairs(match_data_ptr, pair_count)
119 | if pair_count.nil?
120 | pair_count = PCRE2::Lib.pcre2_get_ovector_count_8(match_data_ptr)
121 | end
122 |
123 | ovector_ptr = PCRE2::Lib.pcre2_get_ovector_pointer_8(match_data_ptr)
124 | type_size = FFI.type_size(:size_t)
125 |
126 | pair_count.times.map do |i|
127 | [
128 | ovector_ptr.get(:size_t, i*2 * type_size),
129 | ovector_ptr.get(:size_t, (i*2+1) * type_size)
130 | ]
131 | end
132 | end
133 |
134 | def self.named_captures(pattern_ptr)
135 | named_captures_count = FFI::MemoryPointer.new(:uint32_t, 1)
136 | name_entry_size = FFI::MemoryPointer.new(:uint32_t, 1)
137 | name_table_ptr = FFI::MemoryPointer.new(:pointer, 1)
138 |
139 | if PCRE2::Lib.pcre2_pattern_info_8(pattern_ptr, PCRE2::PCRE2_INFO_NAMECOUNT, named_captures_count) != 0
140 | raise "Something went wrong"
141 | end
142 |
143 | if PCRE2::Lib.pcre2_pattern_info_8(pattern_ptr, PCRE2::PCRE2_INFO_NAMEENTRYSIZE, name_entry_size) != 0
144 | raise "Something went wrong"
145 | end
146 |
147 | if PCRE2::Lib.pcre2_pattern_info_8(pattern_ptr, PCRE2::PCRE2_INFO_NAMETABLE, name_table_ptr) != 0
148 | raise "Something went wrong"
149 | end
150 |
151 | named_captures_count = named_captures_count.read_uint32
152 | name_entry_size = name_entry_size.read_uint32
153 | name_table_ptr = name_table_ptr.read_pointer
154 |
155 | names_and_positions =
156 | named_captures_count.times.map do |i|
157 | ovector_position = (name_table_ptr.get_int8(0) << 8) + name_table_ptr.get_int8(1)
158 | match_name = (name_table_ptr+2).read_string_to_null
159 |
160 | name_table_ptr += name_entry_size
161 |
162 | [match_name, ovector_position]
163 | end
164 |
165 | # Convert an array of [name, position] into a Hash of name => [position (, position, ...)], with possible duplicate names
166 | names_and_positions.each_with_object(Hash.new {[]} ) { |(name, position), hash| hash[name] <<= position }
167 | end
168 | end
169 |
--------------------------------------------------------------------------------
/lib/pcre2/lib/constants.rb:
--------------------------------------------------------------------------------
1 | #
2 | # Use replace:
3 | #
4 | # "#define ([^\W]+) \W* (.*)/"" -> "\1 = \2"
5 | # "(0x[^u]+)u" -> "\1"
6 | #
7 | module PCRE2
8 | # The following option bits can be passed to pcre2_compile(), pcre2_match(),
9 | # or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
10 | # is passed. Put these bits at the most significant end of the options word so
11 | # others can be added next to them
12 |
13 | PCRE2_ANCHORED = 0x80000000
14 | PCRE2_NO_UTF_CHECK = 0x40000000
15 | PCRE2_ENDANCHORED = 0x20000000
16 |
17 | # The following option bits can be passed only to pcre2_compile(). However,
18 | # they may affect compilation, JIT compilation, and/or interpretive execution.
19 | # The following tags indicate which:
20 | # C alters what is compiled by pcre2_compile()
21 | # J alters what is compiled by pcre2_jit_compile()
22 | # M is inspected during pcre2_match() execution
23 | # D is inspected during pcre2_dfa_match() execution
24 |
25 | PCRE2_ALLOW_EMPTY_CLASS = 0x00000001 # C
26 | PCRE2_ALT_BSUX = 0x00000002 # C
27 | PCRE2_AUTO_CALLOUT = 0x00000004 # C
28 | PCRE2_CASELESS = 0x00000008 # C
29 | PCRE2_DOLLAR_ENDONLY = 0x00000010 # J M D
30 | PCRE2_DOTALL = 0x00000020 # C
31 | PCRE2_DUPNAMES = 0x00000040 # C
32 | PCRE2_EXTENDED = 0x00000080 # C
33 | PCRE2_FIRSTLINE = 0x00000100 # J M D
34 | PCRE2_MATCH_UNSET_BACKREF = 0x00000200 # C J M
35 | PCRE2_MULTILINE = 0x00000400 # C
36 | PCRE2_NEVER_UCP = 0x00000800 # C
37 | PCRE2_NEVER_UTF = 0x00001000 # C
38 | PCRE2_NO_AUTO_CAPTURE = 0x00002000 # C
39 | PCRE2_NO_AUTO_POSSESS = 0x00004000 # C
40 | PCRE2_NO_DOTSTAR_ANCHOR = 0x00008000 # C
41 | PCRE2_NO_START_OPTIMIZE = 0x00010000 # J M D
42 | PCRE2_UCP = 0x00020000 # C J M D
43 | PCRE2_UNGREEDY = 0x00040000 # C
44 | PCRE2_UTF = 0x00080000 # C J M D
45 | PCRE2_NEVER_BACKSLASH_C = 0x00100000 # C
46 | PCRE2_ALT_CIRCUMFLEX = 0x00200000 # J M D
47 | PCRE2_ALT_VERBNAMES = 0x00400000 # C
48 | PCRE2_USE_OFFSET_LIMIT = 0x00800000 # J M D
49 | PCRE2_EXTENDED_MORE = 0x01000000 # C
50 | PCRE2_LITERAL = 0x02000000 # C
51 |
52 | # An additional compile options word is available in the compile context.
53 |
54 | PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES = 0x00000001 # C
55 | PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL = 0x00000002 # C
56 | PCRE2_EXTRA_MATCH_WORD = 0x00000004 # C
57 | PCRE2_EXTRA_MATCH_LINE = 0x00000008 # C
58 | PCRE2_EXTRA_ESCAPED_CR_IS_LF = 0x00000010 # C
59 | PCRE2_EXTRA_ALT_BSUX = 0x00000020 # C
60 |
61 | # These are for pcre2_jit_compile().
62 |
63 | PCRE2_JIT_COMPLETE = 0x00000001 # For full matching
64 | PCRE2_JIT_PARTIAL_SOFT = 0x00000002
65 | PCRE2_JIT_PARTIAL_HARD = 0x00000004
66 | PCRE2_JIT_INVALID_UTF = 0x00000100
67 |
68 | # These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
69 | # pcre2_substitute(). Some are allowed only for one of the functions, and in
70 | # these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
71 | # PCRE2_NO_UTF_CHECK can also be passed to these functions (though
72 | # pcre2_jit_match() ignores the latter since it bypasses all sanity checks).
73 |
74 | PCRE2_NOTBOL = 0x00000001
75 | PCRE2_NOTEOL = 0x00000002
76 | PCRE2_NOTEMPTY = 0x00000004 # ) These two must be kept
77 | PCRE2_NOTEMPTY_ATSTART = 0x00000008 # ) adjacent to each other.
78 | PCRE2_PARTIAL_SOFT = 0x00000010
79 | PCRE2_PARTIAL_HARD = 0x00000020
80 | PCRE2_DFA_RESTART = 0x00000040 # pcre2_dfa_match() only
81 | PCRE2_DFA_SHORTEST = 0x00000080 # pcre2_dfa_match() only
82 | PCRE2_SUBSTITUTE_GLOBAL = 0x00000100 # pcre2_substitute() only
83 | PCRE2_SUBSTITUTE_EXTENDED = 0x00000200 # pcre2_substitute() only
84 | PCRE2_SUBSTITUTE_UNSET_EMPTY = 0x00000400 # pcre2_substitute() only
85 | PCRE2_SUBSTITUTE_UNKNOWN_UNSET = 0x00000800 # pcre2_substitute() only
86 | PCRE2_SUBSTITUTE_OVERFLOW_LENGTH = 0x00001000 # pcre2_substitute() only
87 | PCRE2_NO_JIT = 0x00002000 # Not for pcre2_dfa_match()
88 | PCRE2_COPY_MATCHED_SUBJECT = 0x00004000
89 |
90 | # Options for pcre2_pattern_convert().
91 |
92 | PCRE2_CONVERT_UTF = 0x00000001
93 | PCRE2_CONVERT_NO_UTF_CHECK = 0x00000002
94 | PCRE2_CONVERT_POSIX_BASIC = 0x00000004
95 | PCRE2_CONVERT_POSIX_EXTENDED = 0x00000008
96 | PCRE2_CONVERT_GLOB = 0x00000010
97 | PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR = 0x00000030
98 | PCRE2_CONVERT_GLOB_NO_STARSTAR = 0x00000050
99 |
100 | # Newline and \R settings, for use in compile contexts. The newline values
101 | # must be kept in step with values set in config.h and both sets must all be
102 | # greater than zero.
103 |
104 | PCRE2_NEWLINE_CR = 1
105 | PCRE2_NEWLINE_LF = 2
106 | PCRE2_NEWLINE_CRLF = 3
107 | PCRE2_NEWLINE_ANY = 4
108 | PCRE2_NEWLINE_ANYCRLF = 5
109 | PCRE2_NEWLINE_NUL = 6
110 | PCRE2_BSR_UNICODE = 1
111 | PCRE2_BSR_ANYCRLF = 2
112 |
113 | # Error codes for pcre2_compile(). Some of these are also used by
114 | # pcre2_pattern_convert().
115 |
116 | PCRE2_ERROR_END_BACKSLASH = 101
117 | PCRE2_ERROR_END_BACKSLASH_C = 102
118 | PCRE2_ERROR_UNKNOWN_ESCAPE = 103
119 | PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER = 104
120 | PCRE2_ERROR_QUANTIFIER_TOO_BIG = 105
121 | PCRE2_ERROR_MISSING_SQUARE_BRACKET = 106
122 | PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS = 107
123 | PCRE2_ERROR_CLASS_RANGE_ORDER = 108
124 | PCRE2_ERROR_QUANTIFIER_INVALID = 109
125 | PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT = 110
126 | PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY = 111
127 | PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS = 112
128 | PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING = 113
129 | PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS = 114
130 | PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE = 115
131 | PCRE2_ERROR_NULL_PATTERN = 116
132 | PCRE2_ERROR_BAD_OPTIONS = 117
133 | PCRE2_ERROR_MISSING_COMMENT_CLOSING = 118
134 | PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP = 119
135 | PCRE2_ERROR_PATTERN_TOO_LARGE = 120
136 | PCRE2_ERROR_HEAP_FAILED = 121
137 | PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS = 122
138 | PCRE2_ERROR_INTERNAL_CODE_OVERFLOW = 123
139 | PCRE2_ERROR_MISSING_CONDITION_CLOSING = 124
140 | PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH = 125
141 | PCRE2_ERROR_ZERO_RELATIVE_REFERENCE = 126
142 | PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES = 127
143 | PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED = 128
144 | PCRE2_ERROR_BAD_RELATIVE_REFERENCE = 129
145 | PCRE2_ERROR_UNKNOWN_POSIX_CLASS = 130
146 | PCRE2_ERROR_INTERNAL_STUDY_ERROR = 131
147 | PCRE2_ERROR_UNICODE_NOT_SUPPORTED = 132
148 | PCRE2_ERROR_PARENTHESES_STACK_CHECK = 133
149 | PCRE2_ERROR_CODE_POINT_TOO_BIG = 134
150 | PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED = 135
151 | PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C = 136
152 | PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE = 137
153 | PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG = 138
154 | PCRE2_ERROR_MISSING_CALLOUT_CLOSING = 139
155 | PCRE2_ERROR_ESCAPE_INVALID_IN_VERB = 140
156 | PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P = 141
157 | PCRE2_ERROR_MISSING_NAME_TERMINATOR = 142
158 | PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME = 143
159 | PCRE2_ERROR_INVALID_SUBPATTERN_NAME = 144
160 | PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE = 145
161 | PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY = 146
162 | PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY = 147
163 | PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG = 148
164 | PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS = 149
165 | PCRE2_ERROR_CLASS_INVALID_RANGE = 150
166 | PCRE2_ERROR_OCTAL_BYTE_TOO_BIG = 151
167 | PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE = 152
168 | PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN = 153
169 | PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES = 154
170 | PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE = 155
171 | PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE = 156
172 | PCRE2_ERROR_BACKSLASH_G_SYNTAX = 157
173 | PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING = 158
174 |
175 | # Error 159 is obsolete and should now never occur
176 |
177 | PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED = 159
178 | PCRE2_ERROR_VERB_UNKNOWN = 160
179 | PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG = 161
180 | PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED = 162
181 | PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW = 163
182 | PCRE2_ERROR_INVALID_OCTAL = 164
183 | PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH = 165
184 | PCRE2_ERROR_MARK_MISSING_ARGUMENT = 166
185 | PCRE2_ERROR_INVALID_HEXADECIMAL = 167
186 | PCRE2_ERROR_BACKSLASH_C_SYNTAX = 168
187 | PCRE2_ERROR_BACKSLASH_K_SYNTAX = 169
188 | PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS = 170
189 | PCRE2_ERROR_BACKSLASH_N_IN_CLASS = 171
190 | PCRE2_ERROR_CALLOUT_STRING_TOO_LONG = 172
191 | PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT = 173
192 | PCRE2_ERROR_UTF_IS_DISABLED = 174
193 | PCRE2_ERROR_UCP_IS_DISABLED = 175
194 | PCRE2_ERROR_VERB_NAME_TOO_LONG = 176
195 | PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG = 177
196 | PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS = 178
197 | PCRE2_ERROR_VERSION_CONDITION_SYNTAX = 179
198 | PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS = 180
199 | PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER = 181
200 | PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER = 182
201 | PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED = 183
202 | PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP = 184
203 | PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED = 185
204 | PCRE2_ERROR_PATTERN_TOO_COMPLICATED = 186
205 | PCRE2_ERROR_LOOKBEHIND_TOO_LONG = 187
206 | PCRE2_ERROR_PATTERN_STRING_TOO_LONG = 188
207 | PCRE2_ERROR_INTERNAL_BAD_CODE = 189
208 | PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP = 190
209 | PCRE2_ERROR_NO_SURROGATES_IN_UTF16 = 191
210 | PCRE2_ERROR_BAD_LITERAL_OPTIONS = 192
211 | PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE = 193
212 | PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS = 194
213 | PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN = 195
214 | PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE = 196
215 |
216 | # "Expected" matching error codes: no match and partial match.
217 |
218 | PCRE2_ERROR_NOMATCH = (-1)
219 | PCRE2_ERROR_PARTIAL = (-2)
220 |
221 | # Error codes for UTF-8 validity checks
222 |
223 | PCRE2_ERROR_UTF8_ERR1 = (-3)
224 | PCRE2_ERROR_UTF8_ERR2 = (-4)
225 | PCRE2_ERROR_UTF8_ERR3 = (-5)
226 | PCRE2_ERROR_UTF8_ERR4 = (-6)
227 | PCRE2_ERROR_UTF8_ERR5 = (-7)
228 | PCRE2_ERROR_UTF8_ERR6 = (-8)
229 | PCRE2_ERROR_UTF8_ERR7 = (-9)
230 | PCRE2_ERROR_UTF8_ERR8 = (-10)
231 | PCRE2_ERROR_UTF8_ERR9 = (-11)
232 | PCRE2_ERROR_UTF8_ERR10 = (-12)
233 | PCRE2_ERROR_UTF8_ERR11 = (-13)
234 | PCRE2_ERROR_UTF8_ERR12 = (-14)
235 | PCRE2_ERROR_UTF8_ERR13 = (-15)
236 | PCRE2_ERROR_UTF8_ERR14 = (-16)
237 | PCRE2_ERROR_UTF8_ERR15 = (-17)
238 | PCRE2_ERROR_UTF8_ERR16 = (-18)
239 | PCRE2_ERROR_UTF8_ERR17 = (-19)
240 | PCRE2_ERROR_UTF8_ERR18 = (-20)
241 | PCRE2_ERROR_UTF8_ERR19 = (-21)
242 | PCRE2_ERROR_UTF8_ERR20 = (-22)
243 | PCRE2_ERROR_UTF8_ERR21 = (-23)
244 |
245 | # Error codes for UTF-16 validity checks
246 |
247 | PCRE2_ERROR_UTF16_ERR1 = (-24)
248 | PCRE2_ERROR_UTF16_ERR2 = (-25)
249 | PCRE2_ERROR_UTF16_ERR3 = (-26)
250 |
251 | # Error codes for UTF-32 validity checks
252 |
253 | PCRE2_ERROR_UTF32_ERR1 = (-27)
254 | PCRE2_ERROR_UTF32_ERR2 = (-28)
255 |
256 | # Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
257 | # functions, context functions, and serializing functions. They are in numerical
258 | # order. Originally they were in alphabetical order too, but now that PCRE2 is
259 | # released, the numbers must not be changed.
260 |
261 | PCRE2_ERROR_BADDATA = (-29)
262 | PCRE2_ERROR_MIXEDTABLES = (-30) # Name was changed
263 | PCRE2_ERROR_BADMAGIC = (-31)
264 | PCRE2_ERROR_BADMODE = (-32)
265 | PCRE2_ERROR_BADOFFSET = (-33)
266 | PCRE2_ERROR_BADOPTION = (-34)
267 | PCRE2_ERROR_BADREPLACEMENT = (-35)
268 | PCRE2_ERROR_BADUTFOFFSET = (-36)
269 | PCRE2_ERROR_CALLOUT = (-37) # Never used by PCRE2 itself
270 | PCRE2_ERROR_DFA_BADRESTART = (-38)
271 | PCRE2_ERROR_DFA_RECURSE = (-39)
272 | PCRE2_ERROR_DFA_UCOND = (-40)
273 | PCRE2_ERROR_DFA_UFUNC = (-41)
274 | PCRE2_ERROR_DFA_UITEM = (-42)
275 | PCRE2_ERROR_DFA_WSSIZE = (-43)
276 | PCRE2_ERROR_INTERNAL = (-44)
277 | PCRE2_ERROR_JIT_BADOPTION = (-45)
278 | PCRE2_ERROR_JIT_STACKLIMIT = (-46)
279 | PCRE2_ERROR_MATCHLIMIT = (-47)
280 | PCRE2_ERROR_NOMEMORY = (-48)
281 | PCRE2_ERROR_NOSUBSTRING = (-49)
282 | PCRE2_ERROR_NOUNIQUESUBSTRING = (-50)
283 | PCRE2_ERROR_NULL = (-51)
284 | PCRE2_ERROR_RECURSELOOP = (-52)
285 | PCRE2_ERROR_DEPTHLIMIT = (-53)
286 | PCRE2_ERROR_RECURSIONLIMIT = (-53) # Obsolete synonym
287 | PCRE2_ERROR_UNAVAILABLE = (-54)
288 | PCRE2_ERROR_UNSET = (-55)
289 | PCRE2_ERROR_BADOFFSETLIMIT = (-56)
290 | PCRE2_ERROR_BADREPESCAPE = (-57)
291 | PCRE2_ERROR_REPMISSINGBRACE = (-58)
292 | PCRE2_ERROR_BADSUBSTITUTION = (-59)
293 | PCRE2_ERROR_BADSUBSPATTERN = (-60)
294 | PCRE2_ERROR_TOOMANYREPLACE = (-61)
295 | PCRE2_ERROR_BADSERIALIZEDDATA = (-62)
296 | PCRE2_ERROR_HEAPLIMIT = (-63)
297 | PCRE2_ERROR_CONVERT_SYNTAX = (-64)
298 | PCRE2_ERROR_INTERNAL_DUPMATCH = (-65)
299 |
300 | # Request types for pcre2_pattern_info()
301 |
302 | PCRE2_INFO_ALLOPTIONS = 0
303 | PCRE2_INFO_ARGOPTIONS = 1
304 | PCRE2_INFO_BACKREFMAX = 2
305 | PCRE2_INFO_BSR = 3
306 | PCRE2_INFO_CAPTURECOUNT = 4
307 | PCRE2_INFO_FIRSTCODEUNIT = 5
308 | PCRE2_INFO_FIRSTCODETYPE = 6
309 | PCRE2_INFO_FIRSTBITMAP = 7
310 | PCRE2_INFO_HASCRORLF = 8
311 | PCRE2_INFO_JCHANGED = 9
312 | PCRE2_INFO_JITSIZE = 10
313 | PCRE2_INFO_LASTCODEUNIT = 11
314 | PCRE2_INFO_LASTCODETYPE = 12
315 | PCRE2_INFO_MATCHEMPTY = 13
316 | PCRE2_INFO_MATCHLIMIT = 14
317 | PCRE2_INFO_MAXLOOKBEHIND = 15
318 | PCRE2_INFO_MINLENGTH = 16
319 | PCRE2_INFO_NAMECOUNT = 17
320 | PCRE2_INFO_NAMEENTRYSIZE = 18
321 | PCRE2_INFO_NAMETABLE = 19
322 | PCRE2_INFO_NEWLINE = 20
323 | PCRE2_INFO_DEPTHLIMIT = 21
324 | PCRE2_INFO_RECURSIONLIMIT = 21 # Obsolete synonym
325 | PCRE2_INFO_SIZE = 22
326 | PCRE2_INFO_HASBACKSLASHC = 23
327 | PCRE2_INFO_FRAMESIZE = 24
328 | PCRE2_INFO_HEAPLIMIT = 25
329 | PCRE2_INFO_EXTRAOPTIONS = 26
330 |
331 | # Request types for pcre2_config().
332 |
333 | PCRE2_CONFIG_BSR = 0
334 | PCRE2_CONFIG_JIT = 1
335 | PCRE2_CONFIG_JITTARGET = 2
336 | PCRE2_CONFIG_LINKSIZE = 3
337 | PCRE2_CONFIG_MATCHLIMIT = 4
338 | PCRE2_CONFIG_NEWLINE = 5
339 | PCRE2_CONFIG_PARENSLIMIT = 6
340 | PCRE2_CONFIG_DEPTHLIMIT = 7
341 | PCRE2_CONFIG_RECURSIONLIMIT = 7 # Obsolete synonym
342 | PCRE2_CONFIG_STACKRECURSE = 8 # Obsolete
343 | PCRE2_CONFIG_UNICODE = 9
344 | PCRE2_CONFIG_UNICODE_VERSION = 10
345 | PCRE2_CONFIG_VERSION = 11
346 | PCRE2_CONFIG_HEAPLIMIT = 12
347 | PCRE2_CONFIG_NEVER_BACKSLASH_C = 13
348 | PCRE2_CONFIG_COMPILED_WIDTHS = 14
349 | end
350 |
--------------------------------------------------------------------------------