├── .github
    └── workflows
    │   ├── gouteur.yml
    │   ├── lint.yml
    │   └── tests.yml
├── .gitignore
├── .gouteur.yml
├── .rubocop.yml
├── CHANGELOG.md
├── Gemfile
├── LICENSE
├── README.md
├── Rakefile
├── bin
    ├── console
    └── setup
├── lib
    ├── regexp_parser.rb
    └── regexp_parser
    │   ├── error.rb
    │   ├── expression.rb
    │   ├── expression
    │       ├── base.rb
    │       ├── classes
    │       │   ├── alternation.rb
    │       │   ├── anchor.rb
    │       │   ├── backreference.rb
    │       │   ├── character_set.rb
    │       │   ├── character_set
    │       │   │   ├── intersection.rb
    │       │   │   └── range.rb
    │       │   ├── character_type.rb
    │       │   ├── conditional.rb
    │       │   ├── escape_sequence.rb
    │       │   ├── free_space.rb
    │       │   ├── group.rb
    │       │   ├── keep.rb
    │       │   ├── literal.rb
    │       │   ├── posix_class.rb
    │       │   ├── root.rb
    │       │   └── unicode_property.rb
    │       ├── methods
    │       │   ├── construct.rb
    │       │   ├── escape_sequence_char.rb
    │       │   ├── escape_sequence_codepoint.rb
    │       │   ├── human_name.rb
    │       │   ├── match.rb
    │       │   ├── match_length.rb
    │       │   ├── negative.rb
    │       │   ├── options.rb
    │       │   ├── parts.rb
    │       │   ├── printing.rb
    │       │   ├── referenced_expressions.rb
    │       │   ├── strfregexp.rb
    │       │   ├── tests.rb
    │       │   └── traverse.rb
    │       ├── quantifier.rb
    │       ├── sequence.rb
    │       ├── sequence_operation.rb
    │       ├── shared.rb
    │       └── subexpression.rb
    │   ├── lexer.rb
    │   ├── parser.rb
    │   ├── scanner
    │       ├── char_type.rl
    │       ├── errors
    │       │   ├── premature_end_error.rb
    │       │   ├── scanner_error.rb
    │       │   └── validation_error.rb
    │       ├── properties
    │       │   ├── long.csv
    │       │   └── short.csv
    │       ├── property.rl
    │       └── scanner.rl
    │   ├── syntax.rb
    │   ├── syntax
    │       ├── any.rb
    │       ├── base.rb
    │       ├── token.rb
    │       ├── token
    │       │   ├── anchor.rb
    │       │   ├── assertion.rb
    │       │   ├── backreference.rb
    │       │   ├── character_set.rb
    │       │   ├── character_type.rb
    │       │   ├── conditional.rb
    │       │   ├── escape.rb
    │       │   ├── group.rb
    │       │   ├── keep.rb
    │       │   ├── meta.rb
    │       │   ├── posix_class.rb
    │       │   ├── quantifier.rb
    │       │   ├── unicode_property.rb
    │       │   └── virtual.rb
    │       ├── version_lookup.rb
    │       ├── versions.rb
    │       └── versions
    │       │   ├── 1.8.6.rb
    │       │   ├── 1.9.1.rb
    │       │   ├── 1.9.3.rb
    │       │   ├── 2.0.0.rb
    │       │   ├── 2.2.0.rb
    │       │   ├── 2.3.0.rb
    │       │   ├── 2.4.0.rb
    │       │   ├── 2.4.1.rb
    │       │   ├── 2.5.0.rb
    │       │   ├── 2.6.0.rb
    │       │   ├── 2.6.2.rb
    │       │   ├── 2.6.3.rb
    │       │   ├── 3.1.0.rb
    │       │   └── 3.2.0.rb
    │   ├── token.rb
    │   └── version.rb
├── regexp_parser.gemspec
├── spec
    ├── expression
    │   ├── base_spec.rb
    │   ├── clone_spec.rb
    │   ├── conditional_spec.rb
    │   ├── free_space_spec.rb
    │   ├── methods
    │   │   ├── construct_spec.rb
    │   │   ├── human_name_spec.rb
    │   │   ├── match_length_spec.rb
    │   │   ├── match_spec.rb
    │   │   ├── negative_spec.rb
    │   │   ├── parts_spec.rb
    │   │   ├── printing_spec.rb
    │   │   ├── strfregexp_spec.rb
    │   │   ├── tests_spec.rb
    │   │   └── traverse_spec.rb
    │   ├── options_spec.rb
    │   ├── subexpression_spec.rb
    │   ├── te_ts_spec.rb
    │   ├── to_h_spec.rb
    │   └── to_s_spec.rb
    ├── lexer
    │   ├── all_spec.rb
    │   ├── conditionals_spec.rb
    │   ├── delimiters_spec.rb
    │   ├── escapes_spec.rb
    │   ├── keep_spec.rb
    │   ├── literals_spec.rb
    │   ├── nesting_spec.rb
    │   └── refcalls_spec.rb
    ├── parser
    │   ├── all_spec.rb
    │   ├── alternation_spec.rb
    │   ├── anchors_spec.rb
    │   ├── conditionals_spec.rb
    │   ├── errors_spec.rb
    │   ├── escapes_spec.rb
    │   ├── free_space_spec.rb
    │   ├── groups_spec.rb
    │   ├── keep_spec.rb
    │   ├── options_spec.rb
    │   ├── posix_classes_spec.rb
    │   ├── properties_spec.rb
    │   ├── quantifiers_spec.rb
    │   ├── refcalls_spec.rb
    │   ├── set
    │   │   ├── intersections_spec.rb
    │   │   └── ranges_spec.rb
    │   ├── sets_spec.rb
    │   └── types_spec.rb
    ├── scanner
    │   ├── all_spec.rb
    │   ├── anchors_spec.rb
    │   ├── conditionals_spec.rb
    │   ├── delimiters_spec.rb
    │   ├── errors_spec.rb
    │   ├── escapes_spec.rb
    │   ├── free_space_spec.rb
    │   ├── groups_spec.rb
    │   ├── keep_spec.rb
    │   ├── literals_spec.rb
    │   ├── meta_spec.rb
    │   ├── options_spec.rb
    │   ├── properties_spec.rb
    │   ├── quantifiers_spec.rb
    │   ├── refcalls_spec.rb
    │   ├── sets_spec.rb
    │   └── types_spec.rb
    ├── spec_helper.rb
    ├── support
    │   ├── capturing_stderr.rb
    │   └── shared_examples.rb
    ├── syntax
    │   ├── syntax_spec.rb
    │   ├── syntax_token_map_spec.rb
    │   └── versions
    │   │   ├── 1.8.6_spec.rb
    │   │   ├── 1.9.1_spec.rb
    │   │   ├── 1.9.3_spec.rb
    │   │   ├── 2.0.0_spec.rb
    │   │   ├── 2.2.0_spec.rb
    │   │   └── 3.2.0_spec.rb
    └── token
    │   └── token_spec.rb
└── tasks
    ├── benchmark.rake
    ├── benchmarks
        ├── log
        ├── minimal_regexp.rb
        └── uri_regexp.rb
    ├── props.rake
    └── ragel.rake


/.github/workflows/gouteur.yml:
--------------------------------------------------------------------------------
 1 | name: gouteur
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |       - uses: actions/checkout@v4
11 |       - name: Set up Ruby
12 |         uses: ruby/setup-ruby@v1
13 |         with:
14 |           ruby-version: 3.2
15 |           bundler-cache: true
16 |       - name: Install and run ragel
17 |         run: |
18 |           sudo apt-get install -yqq ragel
19 |           bundle exec rake ragel:rb
20 |       - name: Test
21 |         run: bundle exec gouteur
22 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | # based on https://github.com/rails/rails/blob/4a78dcb/.github/workflows/rubocop.yml
 2 | 
 3 | name: rubocop linting
 4 | 
 5 | on: [push, pull_request]
 6 | 
 7 | jobs:
 8 |   build:
 9 |     runs-on: ubuntu-latest
10 | 
11 |     steps:
12 |     - uses: actions/checkout@v4
13 |     - name: Set up Ruby
14 |       uses: ruby/setup-ruby@v1
15 |       with:
16 |         ruby-version: 3.2
17 |         bundler-cache: true
18 |     - name: Install and run ragel
19 |       run: |
20 |         sudo apt-get install -yqq ragel
21 |         bundle exec rake ragel:rb
22 |     - name: Run rubocop
23 |       run: bundle exec rubocop
24 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: tests
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |   schedule:
 7 |     - cron: '11 11 14 * *' # at 11:11 am on the 14th of every month
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 | 
13 |     strategy:
14 |       matrix:
15 |         ruby: [ '2.3', '2.4', '2.5', '2.6', '2.7', '3.0', '3.1', '3.2', '3.3', 'ruby-head' ]
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v4
19 |       - name: Set up Ruby ${{ matrix.ruby }}
20 |         uses: ruby/setup-ruby@v1
21 |         with:
22 |           ruby-version: ${{ matrix.ruby }}
23 |           bundler-cache: true
24 |       - name: Install ragel
25 |         run: sudo apt-get install -yqq ragel
26 |       - name: Test with Rake
27 |         run: bundle exec rake test:full
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | .*.swp
 3 | .DS_Store
 4 | .ruby-version
 5 | .tags
 6 | .tags1
 7 | .tool-versions
 8 | 
 9 | Gemfile.lock
10 | 
11 | lib/regexp_parser/scanner.rb
12 | 
13 | doc
14 | .yardoc
15 | 
16 | .bundle/*
17 | pkg/*
18 | coverage/*
19 | tmp/*
20 | 


--------------------------------------------------------------------------------
/.gouteur.yml:
--------------------------------------------------------------------------------
 1 | # Usage: https://github.com/jaynetics/gouteur/blob/main/README.md
 2 | 
 3 | repos:
 4 |   - uri: https://github.com/jaynetics/js_regex
 5 | 
 6 |   - uri: https://github.com/jaynetics/repper
 7 | 
 8 |   - uri: https://github.com/rubocop-hq/rubocop
 9 |     tasks: rspec --pattern "**/{,*}regexp{,*,*/**/*}_spec.rb"
10 | 
11 |   - uri: https://github.com/mbj/mutant
12 |     tasks: rspec --pattern "**/{,*}regexp{,*,*/**/*}_spec.rb"
13 | 
14 |   - uri: https://github.com/teamcapybara/capybara
15 |     tasks: rspec spec/regexp_dissassembler_spec.rb
16 | 


--------------------------------------------------------------------------------
/.rubocop.yml:
--------------------------------------------------------------------------------
 1 | AllCops:
 2 |   DisabledByDefault: true
 3 |   Exclude:
 4 |     - '{bin,pkg,tmp,vendor}/**/*' # vendored dependencies etc.
 5 |     - 'lib/regexp_parser/scanner.rb' # Ragel-generated code
 6 |   NewCops: enable
 7 |   RubyInterpreters:
 8 |     - ruby
 9 |     - rake
10 |   SuggestExtensions: false
11 |   TargetRubyVersion: 2.6 # really 2.0, but 2.6 is lowest supported by rubocop
12 | 
13 | Lint:
14 |   Enabled: true
15 | 
16 | # ignore weird looking regexps in specs, we have these on purpose
17 | Lint/DuplicateRegexpCharacterClassElement:
18 |   Exclude: ['spec/**/*']
19 | Lint/MixedRegexpCaptureTypes:
20 |   Exclude: ['spec/**/*']
21 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
 1 | source 'https://rubygems.org'
 2 | 
 3 | gemspec
 4 | 
 5 | group :development, :test do
 6 |   gem 'leto', '~> 2.1'
 7 |   gem 'rake', '~> 13.1'
 8 |   gem 'regexp_property_values', '~> 1.5'
 9 |   gem 'rspec', '~> 3.10'
10 |   if RUBY_VERSION.to_f >= 2.7
11 |     gem 'benchmark-ips', '~> 2.1'
12 |     gem 'gouteur', '~> 1.1'
13 |     gem 'rubocop', '~> 1.59'
14 |   end
15 | end
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2010, 2012-2024,  Ammar Ali
 2 | 
 3 | Permission is hereby granted, free of charge, to any person
 4 | obtaining a copy of this software and associated documentation
 5 | files (the "Software"), to deal in the Software without
 6 | restriction, including without limitation the rights to use,
 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the
 9 | Software is furnished to do so, subject to the following
10 | conditions:
11 | 
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'bundler'
 2 | require 'rubygems'
 3 | require 'rubygems/package_task'
 4 | require 'rake'
 5 | require 'rake/testtask'
 6 | require 'rspec/core/rake_task'
 7 | 
 8 | Dir['tasks/**/*.rake'].each { |file| load(file) }
 9 | 
10 | Bundler::GemHelper.install_tasks
11 | 
12 | RSpec::Core::RakeTask.new(:spec)
13 | 
14 | task :default => [:'test:full']
15 | 
16 | namespace :test do
17 |   task full: [:'ragel:rb', :spec]
18 | end
19 | 
20 | # Add ragel task as a prerequisite for building the gem to ensure that the
21 | # latest scanner code is generated and included in the build.
22 | desc "Runs ragel:rb before building the gem"
23 | task :build => ['ragel:rb']
24 | 


--------------------------------------------------------------------------------
/bin/console:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | require 'bundler/setup'
 4 | require 'regexp_parser'
 5 | require 'regexp_property_values'
 6 | 
 7 | RL = Regexp::Lexer
 8 | RP = Regexp::Parser
 9 | RS = Regexp::Scanner
10 | PV = RegexpPropertyValues
11 | 
12 | def lex(...);   Regexp::Lexer.lex(...)    end
13 | def parse(...); Regexp::Parser.parse(...) end
14 | def scan(...);  Regexp::Scanner.scan(...) end
15 | 
16 | require 'irb'
17 | IRB.start(__FILE__)
18 | 


--------------------------------------------------------------------------------
/bin/setup:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -euo pipefail
3 | 
4 | # install gems
5 | bundle
6 | 
7 | # install ragel
8 | rake ragel:install
9 | 


--------------------------------------------------------------------------------
/lib/regexp_parser.rb:
--------------------------------------------------------------------------------
1 | require_relative 'regexp_parser/version'
2 | require_relative 'regexp_parser/token'
3 | require_relative 'regexp_parser/scanner'
4 | require_relative 'regexp_parser/syntax'
5 | require_relative 'regexp_parser/lexer'
6 | require_relative 'regexp_parser/parser'
7 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/error.rb:
--------------------------------------------------------------------------------
1 | class Regexp::Parser
2 |   # base class for all gem-specific errors
3 |   class Error < StandardError; end
4 | end
5 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression.rb:
--------------------------------------------------------------------------------
 1 | require_relative 'error'
 2 | 
 3 | require_relative 'expression/shared'
 4 | require_relative 'expression/base'
 5 | require_relative 'expression/quantifier'
 6 | require_relative 'expression/subexpression'
 7 | require_relative 'expression/sequence'
 8 | require_relative 'expression/sequence_operation'
 9 | 
10 | require_relative 'expression/classes/alternation'
11 | require_relative 'expression/classes/anchor'
12 | require_relative 'expression/classes/backreference'
13 | require_relative 'expression/classes/character_set'
14 | require_relative 'expression/classes/character_set/intersection'
15 | require_relative 'expression/classes/character_set/range'
16 | require_relative 'expression/classes/character_type'
17 | require_relative 'expression/classes/conditional'
18 | require_relative 'expression/classes/escape_sequence'
19 | require_relative 'expression/classes/free_space'
20 | require_relative 'expression/classes/group'
21 | require_relative 'expression/classes/keep'
22 | require_relative 'expression/classes/literal'
23 | require_relative 'expression/classes/posix_class'
24 | require_relative 'expression/classes/root'
25 | require_relative 'expression/classes/unicode_property'
26 | 
27 | require_relative 'expression/methods/construct'
28 | require_relative 'expression/methods/escape_sequence_char'
29 | require_relative 'expression/methods/escape_sequence_codepoint'
30 | require_relative 'expression/methods/human_name'
31 | require_relative 'expression/methods/match'
32 | require_relative 'expression/methods/match_length'
33 | require_relative 'expression/methods/negative'
34 | require_relative 'expression/methods/options'
35 | require_relative 'expression/methods/parts'
36 | require_relative 'expression/methods/printing'
37 | require_relative 'expression/methods/referenced_expressions'
38 | require_relative 'expression/methods/strfregexp'
39 | require_relative 'expression/methods/tests'
40 | require_relative 'expression/methods/traverse'
41 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/base.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   class Base
 3 |     include Regexp::Expression::Shared
 4 | 
 5 |     def initialize(token, options = {})
 6 |       init_from_token_and_options(token, options)
 7 |     end
 8 | 
 9 |     def to_re(format = :full)
10 |       if set_level > 0
11 |         warn "Calling #to_re on character set members is deprecated - "\
12 |              "their behavior might not be equivalent outside of the set."
13 |       end
14 |       ::Regexp.new(to_s(format))
15 |     end
16 | 
17 |     def quantify(*args)
18 |       self.quantifier = Quantifier.new(*args)
19 |     end
20 | 
21 |     def unquantified_clone
22 |       clone.tap { |exp| exp.quantifier = nil }
23 |     end
24 | 
25 |     # Deprecated. Prefer `#repetitions` which has a more uniform interface.
26 |     def quantity
27 |       return [nil,nil] unless quantified?
28 |       [quantifier.min, quantifier.max]
29 |     end
30 | 
31 |     def repetitions
32 |       @repetitions ||=
33 |         if quantified?
34 |           min = quantifier.min
35 |           max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
36 |           range = min..max
37 |           # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
38 |           if RUBY_VERSION.to_f < 2.7
39 |             range.define_singleton_method(:minmax) { [min, max] }
40 |           end
41 |           range
42 |         else
43 |           1..1
44 |         end
45 |     end
46 | 
47 |     def greedy?
48 |       quantified? and quantifier.greedy?
49 |     end
50 | 
51 |     def reluctant?
52 |       quantified? and quantifier.reluctant?
53 |     end
54 |     alias :lazy? :reluctant?
55 | 
56 |     def possessive?
57 |       quantified? and quantifier.possessive?
58 |     end
59 | 
60 |     def to_h
61 |       {
62 |         type:              type,
63 |         token:             token,
64 |         text:              to_s(:base),
65 |         starts_at:         ts,
66 |         length:            full_length,
67 |         level:             level,
68 |         set_level:         set_level,
69 |         conditional_level: conditional_level,
70 |         options:           options,
71 |         quantifier:        quantified? ? quantifier.to_h : nil,
72 |       }
73 |     end
74 |     alias :attributes :to_h
75 |   end
76 | end
77 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/alternation.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   # A sequence of expressions, used by Alternation as one of its alternatives.
 3 |   class Alternative < Regexp::Expression::Sequence; end
 4 | 
 5 |   class Alternation < Regexp::Expression::SequenceOperation
 6 |     OPERAND = Alternative
 7 | 
 8 |     alias :alternatives :expressions
 9 |   end
10 | end
11 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/anchor.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   module Anchor
 3 |     class Base < Regexp::Expression::Base; end
 4 | 
 5 |     class BeginningOfLine               < Anchor::Base; end
 6 |     class EndOfLine                     < Anchor::Base; end
 7 | 
 8 |     class BeginningOfString             < Anchor::Base; end
 9 |     class EndOfString                   < Anchor::Base; end
10 | 
11 |     class EndOfStringOrBeforeEndOfLine  < Anchor::Base; end
12 | 
13 |     class WordBoundary                  < Anchor::Base; end
14 |     class NonWordBoundary               < Anchor::Base; end
15 | 
16 |     class MatchStart                    < Anchor::Base; end
17 | 
18 |     BOL      = BeginningOfLine
19 |     EOL      = EndOfLine
20 |     BOS      = BeginningOfString
21 |     EOS      = EndOfString
22 |     EOSobEOL = EndOfStringOrBeforeEndOfLine
23 |   end
24 | end
25 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/backreference.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   module Backreference
 3 |     class Base < Regexp::Expression::Base; end
 4 | 
 5 |     class Number < Backreference::Base
 6 |       attr_reader :number
 7 |       alias reference number
 8 | 
 9 |       def initialize(token, options = {})
10 |         @number = token.text[/-?\d+/].to_i
11 |         super
12 |       end
13 |     end
14 | 
15 |     class Name < Backreference::Base
16 |       attr_reader :name
17 |       alias reference name
18 | 
19 |       def initialize(token, options = {})
20 |         @name = token.text[3..-2]
21 |         super
22 |       end
23 |     end
24 | 
25 |     class NumberRelative     < Backreference::Number
26 |       attr_accessor :effective_number
27 |       alias reference effective_number
28 |     end
29 | 
30 |     class NumberCall         < Backreference::Number; end
31 |     class NameCall           < Backreference::Name; end
32 |     class NumberCallRelative < Backreference::NumberRelative; end
33 | 
34 |     class NumberRecursionLevel < Backreference::NumberRelative
35 |       attr_reader :recursion_level
36 | 
37 |       def initialize(token, options = {})
38 |         super
39 |         @number, @recursion_level = token.text[3..-2].split(/(?=[+-])/).map(&:to_i)
40 |       end
41 |     end
42 | 
43 |     class NameRecursionLevel < Backreference::Name
44 |       attr_reader :recursion_level
45 | 
46 |       def initialize(token, options = {})
47 |         super
48 |         @name, recursion_level = token.text[3..-2].split(/(?=[+-])/)
49 |         @recursion_level = recursion_level.to_i
50 |       end
51 |     end
52 |   end
53 | 
54 |   # alias for symmetry between token symbol and Expression class name
55 |   Backref = Backreference
56 | end
57 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/character_set.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   class CharacterSet < Regexp::Expression::Subexpression
 3 |     attr_accessor :closed, :negative
 4 |     alias :closed? :closed
 5 | 
 6 |     def initialize(token, options = {})
 7 |       self.negative = false
 8 |       self.closed   = false
 9 |       super
10 |     end
11 | 
12 |     def negate
13 |       self.negative = true
14 |     end
15 | 
16 |     def close
17 |       self.closed = true
18 |     end
19 |   end
20 | 
21 |   # alias for symmetry between token symbol and Expression class name
22 |   Set = CharacterSet
23 | end # module Regexp::Expression
24 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/character_set/intersection.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   class CharacterSet < Regexp::Expression::Subexpression
 3 |     class IntersectedSequence < Regexp::Expression::Sequence; end
 4 | 
 5 |     class Intersection < Regexp::Expression::SequenceOperation
 6 |       OPERAND = IntersectedSequence
 7 |     end
 8 |   end
 9 | end
10 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/character_set/range.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   class CharacterSet < Regexp::Expression::Subexpression
 3 |     class Range < Regexp::Expression::Subexpression
 4 |       def ts
 5 |         (head = expressions.first) ? head.ts : @ts
 6 |       end
 7 | 
 8 |       def <<(exp)
 9 |         complete? and raise Regexp::Parser::Error,
10 |           "Can't add more than 2 expressions to a Range"
11 |         super
12 |       end
13 | 
14 |       def complete?
15 |         count == 2
16 |       end
17 |     end
18 |   end
19 | end
20 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/character_type.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   module CharacterType
 3 |     class Base < Regexp::Expression::Base; end
 4 | 
 5 |     class Any              < CharacterType::Base; end
 6 |     class Digit            < CharacterType::Base; end
 7 |     class NonDigit         < CharacterType::Base; end
 8 |     class Hex              < CharacterType::Base; end
 9 |     class NonHex           < CharacterType::Base; end
10 |     class Word             < CharacterType::Base; end
11 |     class NonWord          < CharacterType::Base; end
12 |     class Space            < CharacterType::Base; end
13 |     class NonSpace         < CharacterType::Base; end
14 |     class Linebreak        < CharacterType::Base; end
15 |     class ExtendedGrapheme < CharacterType::Base; end
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/conditional.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   module Conditional
 3 |     class TooManyBranches < Regexp::Parser::Error
 4 |       def initialize
 5 |         super('The conditional expression has more than 2 branches')
 6 |       end
 7 |     end
 8 | 
 9 |     class Condition < Regexp::Expression::Base
10 |       # Name or number of the referenced capturing group that determines state.
11 |       # Returns a String if reference is by name, Integer if by number.
12 |       def reference
13 |         ref = text.tr("'<>()", "")
14 |         ref =~ /\D/ ? ref : Integer(ref)
15 |       end
16 |     end
17 | 
18 |     class Branch < Regexp::Expression::Sequence; end
19 | 
20 |     class Expression < Regexp::Expression::Subexpression
21 |       def <<(exp)
22 |         expressions.last << exp
23 |       end
24 | 
25 |       def add_sequence(active_opts = {}, params = { ts: 0 })
26 |         raise TooManyBranches.new if branches.length == 2
27 |         params = params.merge({ conditional_level: conditional_level + 1 })
28 |         Branch.add_to(self, params, active_opts)
29 |       end
30 |       alias :branch :add_sequence
31 | 
32 |       def condition=(exp)
33 |         expressions.delete(condition)
34 |         expressions.unshift(exp)
35 |       end
36 | 
37 |       def condition
38 |         find { |subexp| subexp.is_a?(Condition) }
39 |       end
40 | 
41 |       def branches
42 |         select { |subexp| subexp.is_a?(Sequence) }
43 |       end
44 | 
45 |       def reference
46 |         condition.reference
47 |       end
48 |     end
49 |   end
50 | end
51 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/escape_sequence.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   module EscapeSequence
 3 |     Base        = Class.new(Regexp::Expression::Base)
 4 | 
 5 |     AsciiEscape = Class.new(Base) # \e
 6 |     Backspace   = Class.new(Base) # \b
 7 |     Bell        = Class.new(Base) # \a
 8 |     FormFeed    = Class.new(Base) # \f
 9 |     Newline     = Class.new(Base) # \n
10 |     Return      = Class.new(Base) # \r
11 |     Tab         = Class.new(Base) # \t
12 |     VerticalTab = Class.new(Base) # \v
13 | 
14 |     Literal     = Class.new(Base) # e.g. \j, \@, \😀 (ineffectual escapes)
15 | 
16 |     Octal       = Class.new(Base) # e.g. \012
17 |     Hex         = Class.new(Base) # e.g. \x0A
18 |     Codepoint   = Class.new(Base) # e.g. \u000A
19 | 
20 |     CodepointList = Class.new(Base) # e.g. \u{A B}
21 | 
22 |     AbstractMetaControlSequence = Class.new(Base)
23 |     Control                     = Class.new(AbstractMetaControlSequence) # e.g. \cB
24 |     Meta                        = Class.new(AbstractMetaControlSequence) # e.g. \M-Z
25 |     MetaControl                 = Class.new(AbstractMetaControlSequence) # e.g. \M-\cX
26 |   end
27 | 
28 |   # alias for symmetry between Token::* and Expression::*
29 |   Escape = EscapeSequence
30 | end
31 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/free_space.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   class FreeSpace < Regexp::Expression::Base
 3 |     def quantify(*_args)
 4 |       raise Regexp::Parser::Error, 'Can not quantify a free space object'
 5 |     end
 6 |   end
 7 | 
 8 |   class Comment < Regexp::Expression::FreeSpace
 9 |   end
10 | 
11 |   class WhiteSpace < Regexp::Expression::FreeSpace
12 |     def merge(exp)
13 |       warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
14 |       text << exp.text
15 |     end
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/group.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   module Group
 3 |     class Base < Regexp::Expression::Subexpression
 4 |     end
 5 | 
 6 |     class Passive < Group::Base
 7 |       attr_writer :implicit
 8 | 
 9 |       def initialize(*)
10 |         @implicit = false
11 |         super
12 |       end
13 | 
14 |       def implicit?
15 |         @implicit
16 |       end
17 |     end
18 | 
19 |     class Absence < Group::Base; end
20 |     class Atomic  < Group::Base; end
21 |     # TODO: should split off OptionsSwitch in v3.0.0. Maybe even make it no
22 |     # longer inherit from Group because it is effectively a terminal expression.
23 |     class Options < Group::Base
24 |       attr_accessor :option_changes
25 | 
26 |       def initialize_copy(orig)
27 |         self.option_changes = orig.option_changes.dup
28 |         super
29 |       end
30 | 
31 |       def quantify(*args)
32 |         if token == :options_switch
33 |           raise Regexp::Parser::Error, 'Can not quantify an option switch'
34 |         else
35 |           super
36 |         end
37 |       end
38 |     end
39 | 
40 |     class Capture < Group::Base
41 |       attr_accessor :number, :number_at_level
42 |       alias identifier number
43 |     end
44 | 
45 |     class Named < Group::Capture
46 |       attr_reader :name
47 |       alias identifier name
48 | 
49 |       def initialize(token, options = {})
50 |         @name = token.text[3..-2]
51 |         super
52 |       end
53 | 
54 |       def initialize_copy(orig)
55 |         @name = orig.name.dup
56 |         super
57 |       end
58 |     end
59 | 
60 |     class Comment < Group::Base
61 |     end
62 |   end
63 | 
64 |   module Assertion
65 |     class Base < Regexp::Expression::Group::Base; end
66 | 
67 |     class Lookahead           < Assertion::Base; end
68 |     class NegativeLookahead   < Assertion::Base; end
69 | 
70 |     class Lookbehind          < Assertion::Base; end
71 |     class NegativeLookbehind  < Assertion::Base; end
72 |   end
73 | end
74 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/keep.rb:
--------------------------------------------------------------------------------
1 | module Regexp::Expression
2 |   module Keep
3 |     # TODO: in regexp_parser v3.0.0 this should possibly be a Subexpression
4 |     #       that contains all expressions to its left.
5 |     class Mark < Regexp::Expression::Base; end
6 |   end
7 | end
8 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/literal.rb:
--------------------------------------------------------------------------------
1 | module Regexp::Expression
2 |   class Literal < Regexp::Expression::Base; end
3 | end
4 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/posix_class.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   class PosixClass < Regexp::Expression::Base
 3 |     def name
 4 |       text[/\w+/]
 5 |     end
 6 |   end
 7 | 
 8 |   # alias for symmetry between token symbol and Expression class name
 9 |   Posixclass    = PosixClass
10 |   Nonposixclass = PosixClass
11 | end
12 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/root.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   class Root < Regexp::Expression::Subexpression
 3 |     def self.build(options = {})
 4 |       warn "`#{self.class}.build(options)` is deprecated and will raise in "\
 5 |            "regexp_parser v3.0.0. Please use `.construct(options: options)`."
 6 |       construct(options: options)
 7 |     end
 8 |   end
 9 | end
10 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/classes/unicode_property.rb:
--------------------------------------------------------------------------------
  1 | module Regexp::Expression
  2 |   module UnicodeProperty
  3 |     class Base < Regexp::Expression::Base
  4 |       def name
  5 |         text[/\A\\[pP]\{([^}]+)\}\z/, 1]
  6 |       end
  7 | 
  8 |       def shortcut
  9 |         Regexp::Scanner.short_prop_map.key(token.to_s)
 10 |       end
 11 |     end
 12 | 
 13 |     class Alnum         < Base; end
 14 |     class Alpha         < Base; end
 15 |     class Ascii         < Base; end
 16 |     class Blank         < Base; end
 17 |     class Cntrl         < Base; end
 18 |     class Digit         < Base; end
 19 |     class Graph         < Base; end
 20 |     class Lower         < Base; end
 21 |     class Print         < Base; end
 22 |     class Punct         < Base; end
 23 |     class Space         < Base; end
 24 |     class Upper         < Base; end
 25 |     class Word          < Base; end
 26 |     class Xdigit        < Base; end
 27 |     class XPosixPunct   < Base; end
 28 | 
 29 |     class Newline       < Base; end
 30 | 
 31 |     class Any           < Base; end
 32 |     class Assigned      < Base; end
 33 | 
 34 |     module Letter
 35 |       class Base < UnicodeProperty::Base; end
 36 | 
 37 |       class Any         < Letter::Base; end
 38 |       class Cased       < Letter::Base; end
 39 |       class Uppercase   < Letter::Base; end
 40 |       class Lowercase   < Letter::Base; end
 41 |       class Titlecase   < Letter::Base; end
 42 |       class Modifier    < Letter::Base; end
 43 |       class Other       < Letter::Base; end
 44 |     end
 45 | 
 46 |     module Mark
 47 |       class Base < UnicodeProperty::Base; end
 48 | 
 49 |       class Any         < Mark::Base; end
 50 |       class Combining   < Mark::Base; end
 51 |       class Nonspacing  < Mark::Base; end
 52 |       class Spacing     < Mark::Base; end
 53 |       class Enclosing   < Mark::Base; end
 54 |     end
 55 | 
 56 |     module Number
 57 |       class Base < UnicodeProperty::Base; end
 58 | 
 59 |       class Any         < Number::Base; end
 60 |       class Decimal     < Number::Base; end
 61 |       class Letter      < Number::Base; end
 62 |       class Other       < Number::Base; end
 63 |     end
 64 | 
 65 |     module Punctuation
 66 |       class Base < UnicodeProperty::Base; end
 67 | 
 68 |       class Any         < Punctuation::Base; end
 69 |       class Connector   < Punctuation::Base; end
 70 |       class Dash        < Punctuation::Base; end
 71 |       class Open        < Punctuation::Base; end
 72 |       class Close       < Punctuation::Base; end
 73 |       class Initial     < Punctuation::Base; end
 74 |       class Final       < Punctuation::Base; end
 75 |       class Other       < Punctuation::Base; end
 76 |     end
 77 | 
 78 |     module Separator
 79 |       class Base < UnicodeProperty::Base; end
 80 | 
 81 |       class Any         < Separator::Base; end
 82 |       class Space       < Separator::Base; end
 83 |       class Line        < Separator::Base; end
 84 |       class Paragraph   < Separator::Base; end
 85 |     end
 86 | 
 87 |     module Symbol
 88 |       class Base < UnicodeProperty::Base; end
 89 | 
 90 |       class Any         < Symbol::Base; end
 91 |       class Math        < Symbol::Base; end
 92 |       class Currency    < Symbol::Base; end
 93 |       class Modifier    < Symbol::Base; end
 94 |       class Other       < Symbol::Base; end
 95 |     end
 96 | 
 97 |     module Codepoint
 98 |       class Base < UnicodeProperty::Base; end
 99 | 
100 |       class Any         < Codepoint::Base; end
101 |       class Control     < Codepoint::Base; end
102 |       class Format      < Codepoint::Base; end
103 |       class Surrogate   < Codepoint::Base; end
104 |       class PrivateUse  < Codepoint::Base; end
105 |       class Unassigned  < Codepoint::Base; end
106 |     end
107 | 
108 |     class Age        < UnicodeProperty::Base; end
109 |     class Block      < UnicodeProperty::Base; end
110 |     class Derived    < UnicodeProperty::Base; end
111 |     class Emoji      < UnicodeProperty::Base; end
112 |     class Enumerated < UnicodeProperty::Base; end
113 |     class Script     < UnicodeProperty::Base; end
114 |   end
115 | 
116 |   # alias for symmetry between token symbol and Expression class name
117 |   Property    = UnicodeProperty
118 |   Nonproperty = UnicodeProperty
119 | end # module Regexp::Expression
120 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/methods/construct.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   module Shared
 3 |     module ClassMethods
 4 |       # Convenience method to init a valid Expression without a Regexp::Token
 5 |       def construct(params = {})
 6 |         attrs = construct_defaults.merge(params)
 7 |         options = attrs.delete(:options)
 8 |         token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
 9 |         token = Regexp::Token.new(*token_args)
10 |         raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
11 | 
12 |         new(token, options)
13 |       end
14 | 
15 |       def construct_defaults
16 |         if self == Root
17 |           { type: :expression, token: :root, ts: 0 }
18 |         elsif self < Sequence
19 |           { type: :expression, token: :sequence }
20 |         else
21 |           { type: token_class::Type }
22 |         end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
23 |       end
24 | 
25 |       def token_class
26 |         if self == Root || self < Sequence
27 |           nil # no token class because these objects are Parser-generated
28 |         # TODO: synch exp class, token class & type names for this in v3.0.0
29 |         elsif self == CharacterType::Any
30 |           Regexp::Syntax::Token::Meta
31 |         else
32 |           Regexp::Syntax::Token.const_get(name.split('::')[2])
33 |         end
34 |       end
35 |     end
36 | 
37 |     def token_class
38 |       self.class.token_class
39 |     end
40 |   end
41 | end
42 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/methods/escape_sequence_char.rb:
--------------------------------------------------------------------------------
1 | Regexp::Expression::EscapeSequence::Base.class_eval do
2 |   def char
3 |     codepoint.chr('utf-8')
4 |   end
5 | end
6 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression::EscapeSequence
 2 |   AsciiEscape.class_eval { def codepoint; 0x1B end }
 3 |   Backspace.class_eval   { def codepoint; 0x8  end }
 4 |   Bell.class_eval        { def codepoint; 0x7  end }
 5 |   FormFeed.class_eval    { def codepoint; 0xC  end }
 6 |   Newline.class_eval     { def codepoint; 0xA  end }
 7 |   Return.class_eval      { def codepoint; 0xD  end }
 8 |   Tab.class_eval         { def codepoint; 0x9  end }
 9 |   VerticalTab.class_eval { def codepoint; 0xB  end }
10 | 
11 |   Literal.class_eval     { def codepoint; text[1].ord end }
12 | 
13 |   Octal.class_eval       { def codepoint; text[/\d+/].to_i(8) end }
14 | 
15 |   Hex.class_eval         { def codepoint; text[/\h+/].hex end }
16 |   Codepoint.class_eval   { def codepoint; text[/\h+/].hex end }
17 | 
18 |   CodepointList.class_eval do
19 |     # Maybe this should be a unique top-level expression class?
20 |     def char
21 |       raise NoMethodError, 'CodepointList responds only to #chars'
22 |     end
23 | 
24 |     def codepoint
25 |       raise NoMethodError, 'CodepointList responds only to #codepoints'
26 |     end
27 | 
28 |     def chars
29 |       codepoints.map { |cp| cp.chr('utf-8') }
30 |     end
31 | 
32 |     def codepoints
33 |       text.scan(/\h+/).map(&:hex)
34 |     end
35 |   end
36 | 
37 |   AbstractMetaControlSequence.class_eval do
38 |     private
39 | 
40 |     def control_sequence_to_s(control_sequence)
41 |       five_lsb = control_sequence.unpack('B*').first[-5..-1]
42 |       ["000#{five_lsb}"].pack('B*')
43 |     end
44 | 
45 |     def meta_char_to_codepoint(meta_char)
46 |       byte_value = meta_char.ord
47 |       byte_value < 128 ? byte_value + 128 : byte_value
48 |     end
49 |   end
50 | 
51 |   Control.class_eval do
52 |     def codepoint
53 |       control_sequence_to_s(text).ord
54 |     end
55 |   end
56 | 
57 |   Meta.class_eval do
58 |     def codepoint
59 |       meta_char_to_codepoint(text[-1])
60 |     end
61 |   end
62 | 
63 |   MetaControl.class_eval do
64 |     def codepoint
65 |       meta_char_to_codepoint(control_sequence_to_s(text))
66 |     end
67 |   end
68 | end
69 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/methods/human_name.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   module Shared
 3 |     # default implementation, e.g. "atomic group", "hex escape", "word type", ..
 4 |     def human_name
 5 |       [token, type].compact.join(' ').tr('_', ' ')
 6 |     end
 7 |   end
 8 | 
 9 |   Alternation.class_eval                       { def human_name; 'alternation'                 end }
10 |   Alternative.class_eval                       { def human_name; 'alternative'                 end }
11 |   Anchor::BOL.class_eval                       { def human_name; 'beginning of line'           end }
12 |   Anchor::BOS.class_eval                       { def human_name; 'beginning of string'         end }
13 |   Anchor::EOL.class_eval                       { def human_name; 'end of line'                 end }
14 |   Anchor::EOS.class_eval                       { def human_name; 'end of string'               end }
15 |   Anchor::EOSobEOL.class_eval                  { def human_name; 'newline-ready end of string' end }
16 |   Anchor::MatchStart.class_eval                { def human_name; 'match start'                 end }
17 |   Anchor::NonWordBoundary.class_eval           { def human_name; 'no word boundary'            end }
18 |   Anchor::WordBoundary.class_eval              { def human_name; 'word boundary'               end }
19 |   Assertion::Lookahead.class_eval              { def human_name; 'lookahead'                   end }
20 |   Assertion::Lookbehind.class_eval             { def human_name; 'lookbehind'                  end }
21 |   Assertion::NegativeLookahead.class_eval      { def human_name; 'negative lookahead'          end }
22 |   Assertion::NegativeLookbehind.class_eval     { def human_name; 'negative lookbehind'         end }
23 |   Backreference::Name.class_eval               { def human_name; 'backreference by name'       end }
24 |   Backreference::NameCall.class_eval           { def human_name; 'subexpression call by name'  end }
25 |   Backreference::Number.class_eval             { def human_name; 'backreference'               end }
26 |   Backreference::NumberRelative.class_eval     { def human_name; 'relative backreference'      end }
27 |   Backreference::NumberCall.class_eval         { def human_name; 'subexpression call'          end }
28 |   Backreference::NumberCallRelative.class_eval { def human_name; 'relative subexpression call' end }
29 |   CharacterSet::IntersectedSequence.class_eval { def human_name; 'intersected sequence'        end }
30 |   CharacterSet::Intersection.class_eval        { def human_name; 'intersection'                end }
31 |   CharacterSet::Range.class_eval               { def human_name; 'character range'             end }
32 |   CharacterType::Any.class_eval                { def human_name; 'match-all'                   end }
33 |   Comment.class_eval                           { def human_name; 'comment'                     end }
34 |   Conditional::Branch.class_eval               { def human_name; 'conditional branch'          end }
35 |   Conditional::Condition.class_eval            { def human_name; 'condition'                   end }
36 |   Conditional::Expression.class_eval           { def human_name; 'conditional'                 end }
37 |   Group::Capture.class_eval                    { def human_name; "capture group #{number}"     end }
38 |   Group::Named.class_eval                      { def human_name; 'named capture group'         end }
39 |   Keep::Mark.class_eval                        { def human_name; 'keep-mark lookbehind'        end }
40 |   Literal.class_eval                           { def human_name; 'literal'                     end }
41 |   Root.class_eval                              { def human_name; 'root'                        end }
42 |   WhiteSpace.class_eval                        { def human_name; 'free space'                  end }
43 | end
44 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/methods/match.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   class Base
 3 |     def match?(string)
 4 |       !!match(string)
 5 |     end
 6 |     alias :matches? :match?
 7 | 
 8 |     def match(string, offset = 0)
 9 |       Regexp.new(to_s).match(string, offset)
10 |     end
11 |     alias :=~ :match
12 |   end
13 | end
14 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/methods/match_length.rb:
--------------------------------------------------------------------------------
  1 | class Regexp::MatchLength
  2 |   include Enumerable
  3 | 
  4 |   def self.of(obj)
  5 |     exp = obj.is_a?(Regexp::Expression::Base) ? obj : Regexp::Parser.parse(obj)
  6 |     exp.match_length
  7 |   end
  8 | 
  9 |   def initialize(exp, opts = {})
 10 |     self.exp_class = exp.class
 11 |     self.min_rep = exp.repetitions.min
 12 |     self.max_rep = exp.repetitions.max
 13 |     if (base = opts[:base])
 14 |       self.base_min = base
 15 |       self.base_max = base
 16 |       self.reify = ->{ '.' * base }
 17 |     else
 18 |       self.base_min = opts.fetch(:base_min)
 19 |       self.base_max = opts.fetch(:base_max)
 20 |       self.reify = opts.fetch(:reify)
 21 |     end
 22 |   end
 23 | 
 24 |   def each(opts = {})
 25 |     return enum_for(__method__, opts) unless block_given?
 26 |     limit = opts[:limit] || 1000
 27 |     yielded = 0
 28 |     (min..max).each do |num|
 29 |       next unless include?(num)
 30 |       yield(num)
 31 |       break if (yielded += 1) >= limit
 32 |     end
 33 |   end
 34 | 
 35 |   def endless_each
 36 |     return enum_for(__method__) unless block_given?
 37 |     (min..max).each { |num| yield(num) if include?(num) }
 38 |   end
 39 | 
 40 |   def include?(length)
 41 |     test_regexp.match?('X' * length)
 42 |   end
 43 | 
 44 |   def fixed?
 45 |     min == max
 46 |   end
 47 | 
 48 |   def min
 49 |     min_rep * base_min
 50 |   end
 51 | 
 52 |   def max
 53 |     max_rep * base_max
 54 |   end
 55 | 
 56 |   def minmax
 57 |     [min, max]
 58 |   end
 59 | 
 60 |   def inspect
 61 |     type = exp_class.name.sub('Regexp::Expression::', '')
 62 |     "#<#{self.class}<#{type}> min=#{min} max=#{max}>"
 63 |   end
 64 | 
 65 |   def to_re
 66 |     /(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
 67 |   end
 68 | 
 69 |   private
 70 | 
 71 |   attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
 72 | 
 73 |   if Regexp.method_defined?(:match?) # ruby >= 2.4
 74 |     def test_regexp
 75 |       @test_regexp ||= /^#{to_re}$/
 76 |     end
 77 |   else
 78 |     def test_regexp
 79 |       @test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
 80 |     end
 81 |   end
 82 | end
 83 | 
 84 | module Regexp::Expression
 85 |   MatchLength = Regexp::MatchLength
 86 | 
 87 |   [
 88 |     CharacterSet,
 89 |     CharacterSet::Intersection,
 90 |     CharacterSet::IntersectedSequence,
 91 |     CharacterSet::Range,
 92 |     CharacterType::Base,
 93 |     EscapeSequence::Base,
 94 |     PosixClass,
 95 |     UnicodeProperty::Base,
 96 |   ].each do |klass|
 97 |     klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
 98 |       def match_length
 99 |         MatchLength.new(self, base: 1)
100 |       end
101 |     RUBY
102 |   end
103 | 
104 |   class Literal
105 |     def match_length
106 |       MatchLength.new(self, base: text.length)
107 |     end
108 |   end
109 | 
110 |   class Subexpression
111 |     def match_length
112 |       MatchLength.new(self,
113 |                        base_min: map { |exp| exp.match_length.min }.inject(0, :+),
114 |                        base_max: map { |exp| exp.match_length.max }.inject(0, :+),
115 |                        reify: ->{ map { |exp| exp.match_length.to_re }.join })
116 |     end
117 | 
118 |     def inner_match_length
119 |       dummy = Regexp::Expression::Root.construct
120 |       dummy.expressions = expressions.map(&:clone)
121 |       dummy.quantifier = quantifier && quantifier.clone
122 |       dummy.match_length
123 |     end
124 |   end
125 | 
126 |   [
127 |     Alternation,
128 |     Conditional::Expression,
129 |   ].each do |klass|
130 |     klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
131 |       def match_length
132 |         MatchLength.new(self,
133 |                          base_min: map { |exp| exp.match_length.min }.min,
134 |                          base_max: map { |exp| exp.match_length.max }.max,
135 |                          reify: ->{ map { |exp| exp.match_length.to_re }.join('|') })
136 |       end
137 |     RUBY
138 |   end
139 | 
140 |   [
141 |     Anchor::Base,
142 |     Assertion::Base,
143 |     Conditional::Condition,
144 |     FreeSpace,
145 |     Keep::Mark,
146 |   ].each do |klass|
147 |     klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
148 |       def match_length
149 |         MatchLength.new(self, base: 0)
150 |       end
151 |     RUBY
152 |   end
153 | 
154 |   class Backreference::Base
155 |     def match_length
156 |       if referenced_expression.nil?
157 |         raise ArgumentError, 'Missing referenced_expression - not parsed?'
158 |       end
159 |       referenced_expression.unquantified_clone.match_length
160 |     end
161 |   end
162 | 
163 |   class EscapeSequence::CodepointList
164 |     def match_length
165 |       MatchLength.new(self, base: codepoints.count)
166 |     end
167 |   end
168 | 
169 |   # Special case. Absence group can match 0.. chars, irrespective of content.
170 |   # TODO: in theory, they *can* exclude match lengths with `.`: `(?~.{3})`
171 |   class Group::Absence
172 |     def match_length
173 |       MatchLength.new(self, base_min: 0, base_max: Float::INFINITY, reify: ->{ '.*' })
174 |     end
175 |   end
176 | end
177 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/methods/negative.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   module Shared
 3 |     def negative?
 4 |       false
 5 |     end
 6 | 
 7 |     # not an alias so as to respect overrides of #negative?
 8 |     def negated?
 9 |       negative?
10 |     end
11 |   end
12 | 
13 |   Anchor::NonWordBoundary.class_eval       { def negative?; true                          end }
14 |   Assertion::NegativeLookahead.class_eval  { def negative?; true                          end }
15 |   Assertion::NegativeLookbehind.class_eval { def negative?; true                          end }
16 |   CharacterSet.class_eval                  { def negative?; negative                      end }
17 |   CharacterType::Base.class_eval           { def negative?; token.to_s.start_with?('non') end }
18 |   PosixClass.class_eval                    { def negative?; type == :nonposixclass        end }
19 |   UnicodeProperty::Base.class_eval         { def negative?; type == :nonproperty          end }
20 | end
21 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/methods/options.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   class Base
 3 |     def multiline?
 4 |       options[:m] == true
 5 |     end
 6 |     alias :m? :multiline?
 7 | 
 8 |     def case_insensitive?
 9 |       options[:i] == true
10 |     end
11 |     alias :i? :case_insensitive?
12 |     alias :ignore_case? :case_insensitive?
13 | 
14 |     def free_spacing?
15 |       options[:x] == true
16 |     end
17 |     alias :x? :free_spacing?
18 |     alias :extended? :free_spacing?
19 | 
20 |     def default_classes?
21 |       options[:d] == true
22 |     end
23 |     alias :d? :default_classes?
24 | 
25 |     def ascii_classes?
26 |       options[:a] == true
27 |     end
28 |     alias :a? :ascii_classes?
29 | 
30 |     def unicode_classes?
31 |       options[:u] == true
32 |     end
33 |     alias :u? :unicode_classes?
34 |   end
35 | end
36 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/methods/parts.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   module Shared
 3 |     # default implementation
 4 |     def parts
 5 |       [text.dup]
 6 |     end
 7 | 
 8 |     private
 9 | 
10 |     def intersperse(expressions, separator)
11 |       expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
12 |     end
13 |   end
14 | 
15 |   CharacterSet.class_eval            { def parts; ["#{text}#{'^' if negated?}", *expressions, ']']        end }
16 |   CharacterSet::Range.class_eval     { def parts; intersperse(expressions, text.dup)                      end }
17 |   Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
18 |   Group::Base.class_eval             { def parts; [text.dup, *expressions, ')']                           end }
19 |   Group::Passive.class_eval          { def parts; implicit? ? expressions : super                         end }
20 |   Group::Comment.class_eval          { def parts; [text.dup]                                              end }
21 |   Subexpression.class_eval           { def parts; expressions                                             end }
22 |   SequenceOperation.class_eval       { def parts; intersperse(expressions, text.dup)                      end }
23 | end
24 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/methods/printing.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   module Shared
 3 |     def inspect
 4 |       [
 5 |         "#<#{self.class}",
 6 |         pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
 7 |         ">"
 8 |       ].join
 9 |     end
10 | 
11 |     # Make pretty-print work despite #inspect implementation.
12 |     def pretty_print(q)
13 |       q.pp_object(self)
14 |     end
15 | 
16 |     # Called by pretty_print (ruby/pp) and #inspect.
17 |     def pretty_print_instance_variables
18 |       [
19 |         (:@text unless text.to_s.empty?),
20 |         (:@quantifier if quantified?),
21 |         (:@options unless options.empty?),
22 |         (:@expressions unless terminal?),
23 |       ].compact
24 |     end
25 |   end
26 | end
27 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/methods/referenced_expressions.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   module ReferencedExpressions
 3 |     attr_accessor :referenced_expressions
 4 | 
 5 |     def referenced_expression
 6 |       referenced_expressions && referenced_expressions.first
 7 |     end
 8 | 
 9 |     def initialize_copy(orig)
10 |       exp_id = [self.class, self.starts_at]
11 | 
12 |       # prevent infinite recursion for recursive subexp calls
13 |       copied = self.class.instance_eval { @copied_ref_exps ||= {} }
14 |       self.referenced_expressions =
15 |         if copied[exp_id]
16 |           orig.referenced_expressions
17 |         else
18 |           copied[exp_id] = true
19 |           orig.referenced_expressions && orig.referenced_expressions.map(&:dup)
20 |         end
21 |       copied.clear
22 | 
23 |       super
24 |     end
25 |   end
26 | 
27 |   Base.include ReferencedExpressions
28 | end
29 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/methods/strfregexp.rb:
--------------------------------------------------------------------------------
  1 | module Regexp::Expression
  2 |   class Base
  3 | 
  4 |     #   %l  Level (depth) of the expression. Returns 'root' for the root
  5 |     #       expression, returns zero or higher for all others.
  6 |     #
  7 |     #   %>  Indentation at expression's level.
  8 |     #
  9 |     #   %x  Index of the expression at its depth. Available when using
 10 |     #       the sprintf_tree method only.
 11 |     #
 12 |     #   %s  Start offset within the whole expression.
 13 |     #   %e  End offset within the whole expression.
 14 |     #   %S  Length of expression.
 15 |     #
 16 |     #   %o  Coded offset and length, same as '@%s+%S'
 17 |     #
 18 |     #   %y  Type of expression.
 19 |     #   %k  Token of expression.
 20 |     #   %i  ID, same as '%y:%k'
 21 |     #   %c  Class name
 22 |     #
 23 |     #   %q  Quantifier info, as {m[,M]}
 24 |     #   %Q  Quantifier text
 25 |     #
 26 |     #   %z  Quantifier min
 27 |     #   %Z  Quantifier max
 28 |     #
 29 |     #   %t  Base text of the expression (excludes quantifier, if any)
 30 |     #   %~t Full text if the expression is terminal, otherwise %i
 31 |     #   %T  Full text of the expression (includes quantifier, if any)
 32 |     #
 33 |     #   %b  Basic info, same as '%o %i'
 34 |     #   %m  Most info, same as '%b %q'
 35 |     #   %a  All info, same as '%m %t'
 36 |     #
 37 |     def strfregexp(format = '%a', indent_offset = 0, index = nil)
 38 |       have_index    = index ? true : false
 39 | 
 40 |       part = {}
 41 | 
 42 |       print_level = nesting_level > 0 ? nesting_level - 1 : nil
 43 | 
 44 |       # Order is important! Fields that use other fields in their
 45 |       # definition must appear before the fields they use.
 46 |       part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
 47 |       part.keys.each {|k| part[k] = "<?#{k}?>"}
 48 | 
 49 |       part['>'] = print_level ? ('  ' * (print_level + indent_offset)) : ''
 50 | 
 51 |       part['l'] = print_level ? "#{'%d' % print_level}" : 'root'
 52 |       part['x'] = "#{'%d' % index}" if have_index
 53 | 
 54 |       part['s'] = starts_at
 55 |       part['S'] = full_length
 56 |       part['e'] = starts_at + full_length
 57 |       part['o'] = coded_offset
 58 | 
 59 |       part['k'] = token
 60 |       part['y'] = type
 61 |       part['i'] = '%y:%k'
 62 |       part['c'] = self.class.name
 63 | 
 64 |       if quantified?
 65 |         if quantifier.max == -1
 66 |           part['q'] = "{#{quantifier.min}, or-more}"
 67 |         else
 68 |           part['q'] = "{#{quantifier.min}, #{quantifier.max}}"
 69 |         end
 70 | 
 71 |         part['Q'] = quantifier.text
 72 |         part['z'] = quantifier.min
 73 |         part['Z'] = quantifier.max
 74 |       else
 75 |         part['q'] = '{1}'
 76 |         part['Q'] = ''
 77 |         part['z'] = '1'
 78 |         part['Z'] = '1'
 79 |       end
 80 | 
 81 |       part['t'] = to_s(:base)
 82 |       part['~t'] = terminal? ? to_s : "#{type}:#{token}"
 83 |       part['T'] = to_s(:full)
 84 | 
 85 |       part['b'] = '%o %i'
 86 |       part['m'] = '%b %q'
 87 |       part['a'] = '%m %t'
 88 | 
 89 |       out = format.dup
 90 | 
 91 |       part_keys.each do |k|
 92 |         out.gsub!(/%#{k}/, part[k].to_s)
 93 |       end
 94 | 
 95 |       out
 96 |     end
 97 | 
 98 |     alias :strfre :strfregexp
 99 |   end
100 | 
101 |   class Subexpression < Regexp::Expression::Base
102 |     def strfregexp_tree(format = '%a', include_self = true, separator = "\n")
103 |       output = include_self ? [self.strfregexp(format)] : []
104 | 
105 |       output += flat_map do |exp, index|
106 |         exp.strfregexp(format, (include_self ? 1 : 0), index)
107 |       end
108 | 
109 |       output.join(separator)
110 |     end
111 | 
112 |     alias :strfre_tree :strfregexp_tree
113 |   end
114 | end
115 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/methods/traverse.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   class Subexpression < Regexp::Expression::Base
 3 | 
 4 |     # Traverses the expression, passing each recursive child to the
 5 |     # given block.
 6 |     # If the block takes two arguments, the indices of the children within
 7 |     # their parents are also passed to it.
 8 |     def each_expression(include_self = false, &block)
 9 |       return enum_for(__method__, include_self) unless block
10 | 
11 |       if block.arity == 1
12 |         block.call(self) if include_self
13 |         each_expression_without_index(&block)
14 |       else
15 |         block.call(self, 0) if include_self
16 |         each_expression_with_index(&block)
17 |       end
18 |     end
19 | 
20 |     # Traverses the subexpression (depth-first, pre-order) and calls the given
21 |     # block for each expression with three arguments; the traversal event,
22 |     # the expression, and the index of the expression within its parent.
23 |     #
24 |     # The event argument is passed as follows:
25 |     #
26 |     # - For subexpressions, :enter upon entering the subexpression, and
27 |     #   :exit upon exiting it.
28 |     #
29 |     # - For terminal expressions, :visit is called once.
30 |     #
31 |     # Returns self.
32 |     def traverse(include_self = false, &block)
33 |       return enum_for(__method__, include_self) unless block_given?
34 | 
35 |       block.call(:enter, self, 0) if include_self
36 | 
37 |       each_with_index do |exp, index|
38 |         if exp.terminal?
39 |           block.call(:visit, exp, index)
40 |         else
41 |           block.call(:enter, exp, index)
42 |           exp.traverse(&block)
43 |           block.call(:exit, exp, index)
44 |         end
45 |       end
46 | 
47 |       block.call(:exit, self, 0) if include_self
48 | 
49 |       self
50 |     end
51 |     alias :walk :traverse
52 | 
53 |     # Returns a new array with the results of calling the given block once
54 |     # for every expression. If a block is not given, returns an array with
55 |     # each expression and its level index as an array.
56 |     def flat_map(include_self = false, &block)
57 |       case block && block.arity
58 |       when nil then each_expression(include_self).to_a
59 |       when 2   then each_expression(include_self).map(&block)
60 |       else          each_expression(include_self).map { |exp| block.call(exp) }
61 |       end
62 |     end
63 | 
64 |     protected
65 | 
66 |     def each_expression_with_index(&block)
67 |       each_with_index do |exp, index|
68 |         block.call(exp, index)
69 |         exp.each_expression_with_index(&block) unless exp.terminal?
70 |       end
71 |     end
72 | 
73 |     def each_expression_without_index(&block)
74 |       each do |exp|
75 |         block.call(exp)
76 |         exp.each_expression_without_index(&block) unless exp.terminal?
77 |       end
78 |     end
79 |   end
80 | end
81 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/quantifier.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   # TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
 3 |   # call super in #initialize, but raise in #quantifier= and #quantify,
 4 |   # or introduce an Expression::Quantifiable intermediate class.
 5 |   # Or actually allow chaining as a more concise but tricky solution than PR#69.
 6 |   class Quantifier
 7 |     include Regexp::Expression::Shared
 8 | 
 9 |     MODES = %i[greedy possessive reluctant]
10 | 
11 |     def initialize(*args)
12 |       deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
13 | 
14 |       init_from_token_and_options(*args)
15 |       # TODO: remove in v3.0.0, stop removing parts of #token (?)
16 |       self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
17 |     end
18 | 
19 |     def to_h
20 |       {
21 |         token: token,
22 |         text:  text,
23 |         mode:  mode,
24 |         min:   min,
25 |         max:   max,
26 |       }
27 |     end
28 | 
29 |     MODES.each do |mode|
30 |       class_eval <<-RUBY, __FILE__, __LINE__ + 1
31 |         def #{mode}?
32 |           mode.equal?(:#{mode})
33 |         end
34 |       RUBY
35 |     end
36 |     alias :lazy? :reluctant?
37 | 
38 |     def min
39 |       derived_data[:min]
40 |     end
41 | 
42 |     def max
43 |       derived_data[:max]
44 |     end
45 | 
46 |     def mode
47 |       derived_data[:mode]
48 |     end
49 | 
50 |     private
51 | 
52 |     def deprecated_old_init(token, text, _min, _max, _mode = :greedy)
53 |       warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
54 |            "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
55 |            "Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
56 |            "with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
57 |            "will be derived automatically.\n"\
58 |            "Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
59 |            "This is consistent with how Expression::Base instances are created. "
60 |       @token = token
61 |       @text  = text
62 |     end
63 | 
64 |     def derived_data
65 |       @derived_data ||= begin
66 |         min, max =
67 |           case text[0]
68 |           when '?'; [0, 1]
69 |           when '*'; [0, -1]
70 |           when '+'; [1, -1]
71 |           else
72 |             int_min = text[/\{(\d*)/, 1]
73 |             int_max = text[/,?(\d*)\}/, 1]
74 |             [int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
75 |           end
76 | 
77 |         mod = text[/.([?+])/, 1]
78 |         mode = (mod == '?' && :reluctant) || (mod == '+' && :possessive) || :greedy
79 | 
80 |         { min: min, max: max, mode: mode }
81 |       end
82 |     end
83 |   end
84 | end
85 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/sequence.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   # A sequence of expressions. Differs from a Subexpressions by how it handles
 3 |   # quantifiers, as it applies them to its last element instead of itself as
 4 |   # a whole subexpression.
 5 |   #
 6 |   # Used as the base class for the Alternation alternatives, Conditional
 7 |   # branches, and CharacterSet::Intersection intersected sequences.
 8 |   class Sequence < Regexp::Expression::Subexpression
 9 |     class << self
10 |       def add_to(exp, params = {}, active_opts = {})
11 |         sequence = construct(
12 |           level:             exp.level,
13 |           set_level:         exp.set_level,
14 |           conditional_level: params[:conditional_level] || exp.conditional_level,
15 |           ts:                params[:ts],
16 |         )
17 |         sequence.options = active_opts
18 |         exp.expressions << sequence
19 |         sequence
20 |       end
21 |     end
22 | 
23 |     def ts
24 |       (head = expressions.first) ? head.ts : @ts
25 |     end
26 | 
27 |     def quantify(token, *args)
28 |       extract_quantifier_target(token.text).quantify(token, *args)
29 |     end
30 |   end
31 | end
32 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/sequence_operation.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   # abstract class
 3 |   class SequenceOperation < Regexp::Expression::Subexpression
 4 |     alias :sequences :expressions
 5 |     alias :operands :expressions
 6 |     alias :operator :text
 7 | 
 8 |     def ts
 9 |       (head = expressions.first) ? head.ts : @ts
10 |     end
11 | 
12 |     def <<(exp)
13 |       expressions.last << exp
14 |     end
15 | 
16 |     def add_sequence(active_opts = {}, params = { ts: 0 })
17 |       self.class::OPERAND.add_to(self, params, active_opts)
18 |     end
19 |   end
20 | end
21 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/shared.rb:
--------------------------------------------------------------------------------
  1 | module Regexp::Expression
  2 |   module Shared
  3 |     module ClassMethods; end # filled in ./methods/*.rb
  4 | 
  5 |     def self.included(mod)
  6 |       mod.class_eval do
  7 |         extend Shared::ClassMethods
  8 | 
  9 |         attr_accessor :type, :token, :text, :ts, :te,
 10 |                       :level, :set_level, :conditional_level,
 11 |                       :options, :parent,
 12 |                       :custom_to_s_handling, :pre_quantifier_decorations
 13 | 
 14 |         attr_reader   :nesting_level, :quantifier
 15 |       end
 16 |     end
 17 | 
 18 |     def init_from_token_and_options(token, options = {})
 19 |       self.type              = token.type
 20 |       self.token             = token.token
 21 |       self.text              = token.text
 22 |       self.ts                = token.ts
 23 |       self.te                = token.te
 24 |       self.level             = token.level
 25 |       self.set_level         = token.set_level
 26 |       self.conditional_level = token.conditional_level
 27 |       self.nesting_level     = 0
 28 |       self.options           = options || {}
 29 |     end
 30 |     private :init_from_token_and_options
 31 | 
 32 |     def initialize_copy(orig)
 33 |       self.text       = orig.text.dup         if orig.text
 34 |       self.options    = orig.options.dup      if orig.options
 35 |       self.quantifier = orig.quantifier.clone if orig.quantifier
 36 |       self.parent     = nil # updated by Subexpression#initialize_copy
 37 |       if orig.pre_quantifier_decorations
 38 |         self.pre_quantifier_decorations = orig.pre_quantifier_decorations.map(&:dup)
 39 |       end
 40 |       super
 41 |     end
 42 | 
 43 |     def starts_at
 44 |       ts
 45 |     end
 46 | 
 47 |     def ends_at(include_quantifier = true)
 48 |       ts + (include_quantifier ? full_length : base_length)
 49 |     end
 50 | 
 51 |     def base_length
 52 |       to_s(:base).length
 53 |     end
 54 | 
 55 |     def full_length
 56 |       to_s(:original).length
 57 |     end
 58 | 
 59 |     # #to_s reproduces the original source, as an unparser would.
 60 |     #
 61 |     # It takes an optional format argument.
 62 |     #
 63 |     # Example:
 64 |     #
 65 |     # lit = Regexp::Parser.parse(/a +/x)[0]
 66 |     #
 67 |     # lit.to_s            # => 'a+'  # default; with quantifier
 68 |     # lit.to_s(:full)     # => 'a+'  # default; with quantifier
 69 |     # lit.to_s(:base)     # => 'a'   # without quantifier
 70 |     # lit.to_s(:original) # => 'a +' # with quantifier AND intermittent decorations
 71 |     #
 72 |     def to_s(format = :full)
 73 |       base = parts.each_with_object(''.dup) do |part, buff|
 74 |         if part.instance_of?(String)
 75 |           buff << part
 76 |         elsif !part.custom_to_s_handling
 77 |           buff << part.to_s(:original)
 78 |         end
 79 |       end
 80 |       "#{base}#{pre_quantifier_decoration(format)}#{quantifier_affix(format)}"
 81 |     end
 82 |     alias :to_str :to_s
 83 | 
 84 |     def pre_quantifier_decoration(expression_format = :original)
 85 |       pre_quantifier_decorations.to_a.join if expression_format == :original
 86 |     end
 87 | 
 88 |     def quantifier_affix(expression_format = :full)
 89 |       quantifier.to_s if quantified? && expression_format != :base
 90 |     end
 91 | 
 92 |     def offset
 93 |       [starts_at, full_length]
 94 |     end
 95 | 
 96 |     def coded_offset
 97 |       '@%d+%d' % offset
 98 |     end
 99 | 
100 |     def nesting_level=(lvl)
101 |       @nesting_level = lvl
102 |       quantifier && quantifier.nesting_level = lvl
103 |       terminal? || each { |subexp| subexp.nesting_level = lvl + 1 }
104 |     end
105 | 
106 |     def quantifier=(qtf)
107 |       @quantifier = qtf
108 |       @repetitions = nil # clear memoized value
109 |     end
110 |   end
111 | end
112 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/expression/subexpression.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Expression
 2 |   class Subexpression < Regexp::Expression::Base
 3 |     include Enumerable
 4 | 
 5 |     attr_accessor :expressions
 6 | 
 7 |     def initialize(token, options = {})
 8 |       self.expressions = []
 9 |       super
10 |     end
11 | 
12 |     # Override base method to clone the expressions as well.
13 |     def initialize_copy(orig)
14 |       self.expressions = orig.expressions.map do |exp|
15 |         exp.clone.tap { |copy| copy.parent = self }
16 |       end
17 |       super
18 |     end
19 | 
20 |     def <<(exp)
21 |       exp.parent = self
22 |       expressions << exp
23 |     end
24 | 
25 |     %w[[] at each empty? fetch index join last length values_at].each do |method|
26 |       class_eval <<-RUBY, __FILE__, __LINE__ + 1
27 |         def #{method}(*args, &block)
28 |           expressions.#{method}(*args, &block)
29 |         end
30 |       RUBY
31 |     end
32 | 
33 |     def dig(*indices)
34 |       exp = self
35 |       indices.each { |idx| exp = exp.nil? || exp.terminal? ? nil : exp[idx] }
36 |       exp
37 |     end
38 | 
39 |     def te
40 |       ts + base_length
41 |     end
42 | 
43 |     def to_h
44 |       attributes.merge(
45 |         text:        to_s(:base),
46 |         expressions: expressions.map(&:to_h)
47 |       )
48 |     end
49 | 
50 |     def extract_quantifier_target(quantifier_description)
51 |       pre_quantifier_decorations = []
52 |       target = expressions.reverse.find do |exp|
53 |         if exp.decorative?
54 |           exp.custom_to_s_handling = true
55 |           pre_quantifier_decorations << exp.text
56 |           next
57 |         end
58 |         exp
59 |       end
60 |       target or raise Regexp::Parser::ParserError,
61 |         "No valid target found for '#{quantifier_description}' quantifier"
62 | 
63 |       target.pre_quantifier_decorations = pre_quantifier_decorations
64 |       target
65 |     end
66 |   end
67 | end
68 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/scanner/char_type.rl:
--------------------------------------------------------------------------------
 1 | %%{
 2 |   machine re_char_type;
 3 | 
 4 |   single_codepoint_char_type = [dDhHsSwW];
 5 |   multi_codepoint_char_type  = [RX];
 6 | 
 7 |   char_type_char = single_codepoint_char_type | multi_codepoint_char_type;
 8 | 
 9 |   # Char types scanner
10 |   # --------------------------------------------------------------------------
11 |   char_type := |*
12 |     char_type_char {
13 |       case text = copy(data, ts-1, te)
14 |       when '\d'; emit(:type, :digit,      text)
15 |       when '\D'; emit(:type, :nondigit,   text)
16 |       when '\h'; emit(:type, :hex,        text)
17 |       when '\H'; emit(:type, :nonhex,     text)
18 |       when '\s'; emit(:type, :space,      text)
19 |       when '\S'; emit(:type, :nonspace,   text)
20 |       when '\w'; emit(:type, :word,       text)
21 |       when '\W'; emit(:type, :nonword,    text)
22 |       when '\R'; emit(:type, :linebreak,  text)
23 |       when '\X'; emit(:type, :xgrapheme,  text)
24 |       end
25 |       fret;
26 |     };
27 |   *|;
28 | }%%
29 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/scanner/errors/premature_end_error.rb:
--------------------------------------------------------------------------------
1 | class Regexp::Scanner
2 |   # Unexpected end of pattern
3 |   class PrematureEndError < ScannerError
4 |     def initialize(where = '')
5 |       super "Premature end of pattern at #{where}"
6 |     end
7 |   end
8 | end
9 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/scanner/errors/scanner_error.rb:
--------------------------------------------------------------------------------
1 | require_relative '../../../regexp_parser/error'
2 | 
3 | class Regexp::Scanner
4 |   # General scanner error (catch all)
5 |   class ScannerError < Regexp::Parser::Error; end
6 | end
7 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/scanner/errors/validation_error.rb:
--------------------------------------------------------------------------------
 1 | class Regexp::Scanner
 2 |   # Base for all scanner validation errors
 3 |   class ValidationError < ScannerError
 4 |     # Centralizes and unifies the handling of validation related errors.
 5 |     def self.for(type, problem, reason = nil)
 6 |       types.fetch(type).new(problem, reason)
 7 |     end
 8 | 
 9 |     def self.types
10 |       @types ||= {
11 |         backref:      InvalidBackrefError,
12 |         group:        InvalidGroupError,
13 |         group_option: InvalidGroupOption,
14 |         posix_class:  UnknownPosixClassError,
15 |         property:     UnknownUnicodePropertyError,
16 |         sequence:     InvalidSequenceError,
17 |       }
18 |     end
19 |   end
20 | 
21 |   # Invalid sequence format. Used for escape sequences, mainly.
22 |   class InvalidSequenceError < ValidationError
23 |     def initialize(what = 'sequence', where = '')
24 |       super "Invalid #{what} at #{where}"
25 |     end
26 |   end
27 | 
28 |   # Invalid group. Used for named groups.
29 |   class InvalidGroupError < ValidationError
30 |     def initialize(what, reason)
31 |       super "Invalid #{what}, #{reason}."
32 |     end
33 |   end
34 | 
35 |   # Invalid groupOption. Used for inline options.
36 |   # TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
37 |   class InvalidGroupOption < ValidationError
38 |     def initialize(option, text)
39 |       super "Invalid group option #{option} in #{text}"
40 |     end
41 |   end
42 | 
43 |   # Invalid back reference. Used for name a number refs/calls.
44 |   class InvalidBackrefError < ValidationError
45 |     def initialize(what, reason)
46 |       super "Invalid back reference #{what}, #{reason}"
47 |     end
48 |   end
49 | 
50 |   # The property name was not recognized by the scanner.
51 |   class UnknownUnicodePropertyError < ValidationError
52 |     def initialize(name, _)
53 |       super "Unknown unicode character property name #{name}"
54 |     end
55 |   end
56 | 
57 |   # The POSIX class name was not recognized by the scanner.
58 |   class UnknownPosixClassError < ValidationError
59 |     def initialize(text, _)
60 |       super "Unknown POSIX class #{text}"
61 |     end
62 |   end
63 | end
64 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/scanner/property.rl:
--------------------------------------------------------------------------------
 1 | %%{
 2 |   machine re_property;
 3 | 
 4 |   property_char     = [pP];
 5 | 
 6 |   property_sequence = property_char . '{' . '^'? (alnum|space|[_\-\.=])+ '}';
 7 | 
 8 |   action premature_property_end {
 9 |     raise PrematureEndError.new('unicode property')
10 |   }
11 | 
12 |   # Unicode properties scanner
13 |   # --------------------------------------------------------------------------
14 |   unicode_property := |*
15 | 
16 |     property_sequence < eof(premature_property_end) {
17 |       text = copy(data, ts-1, te)
18 |       type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property
19 | 
20 |       name = text[3..-2].gsub(/[\^\s_\-]/, '').downcase
21 | 
22 |       token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
23 |       raise ValidationError.for(:property, name) unless token
24 | 
25 |       self.emit(type, token.to_sym, text)
26 | 
27 |       fret;
28 |     };
29 |   *|;
30 | }%%
31 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax.rb:
--------------------------------------------------------------------------------
 1 | require_relative 'error'
 2 | 
 3 | module Regexp::Syntax
 4 |   class SyntaxError < Regexp::Parser::Error; end
 5 | end
 6 | 
 7 | require_relative 'syntax/token'
 8 | require_relative 'syntax/base'
 9 | require_relative 'syntax/any'
10 | require_relative 'syntax/version_lookup'
11 | require_relative 'syntax/versions'
12 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/any.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   # A syntax that always returns true, passing all tokens as implemented. This
 3 |   # is useful during development, testing, and should be useful for some types
 4 |   # of transformations as well.
 5 |   class Any < Base
 6 |     implements :*, [:*]
 7 | 
 8 |     def self.implements?(_type, _token) true end
 9 |   end
10 | end
11 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/base.rb:
--------------------------------------------------------------------------------
  1 | module Regexp::Syntax
  2 |   class NotImplementedError < Regexp::Syntax::SyntaxError
  3 |     def initialize(syntax, type, token)
  4 |       super "#{syntax} does not implement: [#{type}:#{token}]"
  5 |     end
  6 |   end
  7 | 
  8 |   # A lookup map of supported types and tokens in a given syntax
  9 |   class Base
 10 |     include Regexp::Syntax::Token
 11 | 
 12 |     class << self
 13 |       attr_accessor :features
 14 | 
 15 |       # automatically inherit features through the syntax class hierarchy
 16 |       def inherited(subclass)
 17 |         super
 18 |         subclass.features = features.to_h.map { |k, v| [k, v.dup] }.to_h
 19 |       end
 20 | 
 21 |       def implements(type, tokens)
 22 |         (features[type] ||= []).concat(tokens)
 23 |         added_features[type] = tokens
 24 |       end
 25 | 
 26 |       def excludes(type, tokens)
 27 |         tokens.each { |tok| features[type].delete(tok) }
 28 |         removed_features[type] = tokens
 29 |       end
 30 | 
 31 |       def implements?(type, token)
 32 |         implementations(type).include?(token)
 33 |       end
 34 |       alias :check? :implements?
 35 | 
 36 |       def implementations(type)
 37 |         features[type] || []
 38 |       end
 39 | 
 40 |       def implements!(type, token)
 41 |         raise NotImplementedError.new(self, type, token) unless
 42 |           implements?(type, token)
 43 |       end
 44 |       alias :check! :implements!
 45 | 
 46 |       def added_features
 47 |         @added_features ||= {}
 48 |       end
 49 | 
 50 |       def removed_features
 51 |         @removed_features ||= {}
 52 |       end
 53 | 
 54 |       def normalize(type, token)
 55 |         case type
 56 |         when :group
 57 |           normalize_group(type, token)
 58 |         when :backref
 59 |           normalize_backref(type, token)
 60 |         else
 61 |           [type, token]
 62 |         end
 63 |       end
 64 | 
 65 |       def normalize_group(type, token)
 66 |         case token
 67 |         when :named_ab, :named_sq
 68 |           %i[group named]
 69 |         else
 70 |           [type, token]
 71 |         end
 72 |       end
 73 | 
 74 |       def normalize_backref(type, token)
 75 |         case token
 76 |         when :name_ref_ab, :name_ref_sq
 77 |           %i[backref name_ref]
 78 |         when :name_call_ab, :name_call_sq
 79 |           %i[backref name_call]
 80 |         when :name_recursion_ref_ab, :name_recursion_ref_sq
 81 |           %i[backref name_recursion_ref]
 82 |         when :number_ref_ab, :number_ref_sq
 83 |           %i[backref number_ref]
 84 |         when :number_call_ab, :number_call_sq
 85 |           %i[backref number_call]
 86 |         when :number_rel_ref_ab, :number_rel_ref_sq
 87 |           %i[backref number_rel_ref]
 88 |         when :number_rel_call_ab, :number_rel_call_sq
 89 |           %i[backref number_rel_call]
 90 |         when :number_recursion_ref_ab, :number_recursion_ref_sq
 91 |           %i[backref number_recursion_ref]
 92 |         else
 93 |           [type, token]
 94 |         end
 95 |       end
 96 |     end
 97 | 
 98 |     # TODO: drop this backwards compatibility code in v3.0.0, do `private :new`
 99 |     def initialize
100 |       warn 'Using instances of Regexp::Parser::Syntax is deprecated ' \
101 |            "and will no longer be supported in v3.0.0."
102 |     end
103 | 
104 |     def method_missing(name, *args)
105 |       if self.class.respond_to?(name)
106 |         warn 'Using instances of Regexp::Parser::Syntax is deprecated ' \
107 |              "and will no longer be supported in v3.0.0. Please call "\
108 |              "methods on the class directly, e.g.: #{self.class}.#{name}"
109 |         self.class.send(name, *args)
110 |       else
111 |         super
112 |       end
113 |     end
114 | 
115 |     def respond_to_missing?(name, include_private = false)
116 |       self.class.respond_to?(name) || super
117 |     end
118 |     # end of backwards compatibility code
119 |   end
120 | end
121 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/token.rb:
--------------------------------------------------------------------------------
 1 | # Define the base module and the simplest of tokens.
 2 | module Regexp::Syntax
 3 |   module Token
 4 |     Map = {}
 5 | 
 6 |     module Literal
 7 |       All = %i[literal]
 8 |       Type = :literal
 9 |     end
10 | 
11 |     module FreeSpace
12 |       All  = %i[comment whitespace]
13 |       Type = :free_space
14 |     end
15 | 
16 |     Map[FreeSpace::Type] = FreeSpace::All
17 |     Map[Literal::Type]   = Literal::All
18 |   end
19 | end
20 | 
21 | 
22 | # Load all the token files, they will populate the Map constant.
23 | require_relative 'token/anchor'
24 | require_relative 'token/assertion'
25 | require_relative 'token/backreference'
26 | require_relative 'token/posix_class'
27 | require_relative 'token/character_set'
28 | require_relative 'token/character_type'
29 | require_relative 'token/conditional'
30 | require_relative 'token/escape'
31 | require_relative 'token/group'
32 | require_relative 'token/keep'
33 | require_relative 'token/meta'
34 | require_relative 'token/quantifier'
35 | require_relative 'token/unicode_property'
36 | 
37 | 
38 | # After loading all the tokens the map is full. Extract all tokens and types
39 | # into the All and Types constants.
40 | module Regexp::Syntax
41 |   module Token
42 |     All   = Map.values.flatten.uniq.sort.freeze
43 |     Types = Map.keys.freeze
44 |   end
45 | end
46 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/token/anchor.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   module Token
 3 |     module Anchor
 4 |       Basic       = %i[bol eol]
 5 |       Extended    = Basic + %i[word_boundary nonword_boundary]
 6 |       String      = %i[bos eos eos_ob_eol]
 7 |       MatchStart  = %i[match_start]
 8 | 
 9 |       All = Extended + String + MatchStart
10 |       Type = :anchor
11 |     end
12 | 
13 |     Map[Anchor::Type] = Anchor::All
14 |   end
15 | end
16 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/token/assertion.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   module Token
 3 |     module Assertion
 4 |       Lookahead = %i[lookahead nlookahead]
 5 |       Lookbehind = %i[lookbehind nlookbehind]
 6 | 
 7 |       All = Lookahead + Lookbehind
 8 |       Type = :assertion
 9 |     end
10 | 
11 |     Map[Assertion::Type] = Assertion::All
12 |   end
13 | end
14 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/token/backreference.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   module Token
 3 |     module Backreference
 4 |       Plain     = %i[number]
 5 |       NumberRef = %i[number_ref number_rel_ref]
 6 |       Number    = Plain + NumberRef
 7 |       Name      = %i[name_ref]
 8 | 
 9 |       RecursionLevel = %i[name_recursion_ref number_recursion_ref]
10 | 
11 |       V1_8_6 = Plain
12 | 
13 |       V1_9_1 = Name + NumberRef + RecursionLevel
14 | 
15 |       All = V1_8_6 + V1_9_1
16 |       Type = :backref
17 |     end
18 | 
19 |     # Type is the same as Backreference so keeping it here, for now.
20 |     module SubexpressionCall
21 |       Name      = %i[name_call]
22 |       Number    = %i[number_call number_rel_call]
23 | 
24 |       All = Name + Number
25 |     end
26 | 
27 |     Map[Backreference::Type] = Backreference::All +
28 |                                SubexpressionCall::All
29 | 
30 |     # alias for symmetry between token symbol and Expression class name
31 |     Backref = Backreference
32 |   end
33 | end
34 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/token/character_set.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   module Token
 3 |     module CharacterSet
 4 |       Basic     = %i[open close negate range]
 5 |       Extended  = Basic + %i[intersection]
 6 | 
 7 |       All = Extended
 8 |       Type = :set
 9 |     end
10 | 
11 |     Map[CharacterSet::Type] = CharacterSet::All
12 | 
13 |     # alias for symmetry between token symbol and Token module name
14 |     Set = CharacterSet
15 |   end
16 | end
17 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/token/character_type.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   module Token
 3 |     module CharacterType
 4 |       Basic     = []
 5 |       Extended  = %i[digit nondigit space nonspace word nonword]
 6 |       Hex       = %i[hex nonhex]
 7 | 
 8 |       Clustered = %i[linebreak xgrapheme]
 9 | 
10 |       All = Basic + Extended + Hex + Clustered
11 |       Type = :type
12 |     end
13 | 
14 |     Map[CharacterType::Type] = CharacterType::All
15 |   end
16 | end
17 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/token/conditional.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   module Token
 3 |     module Conditional
 4 |       Delimiters = %i[open close]
 5 | 
 6 |       Condition  = %i[condition_open condition condition_close]
 7 |       Separator  = %i[separator]
 8 | 
 9 |       All = Conditional::Delimiters + Conditional::Condition + Conditional::Separator
10 | 
11 |       Type = :conditional
12 |     end
13 | 
14 |     Map[Conditional::Type] = Conditional::All
15 |   end
16 | end
17 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/token/escape.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   module Token
 3 |     module Escape
 4 |       Basic = %i[backslash literal]
 5 | 
 6 |       Control = %i[control meta_sequence]
 7 | 
 8 |       ASCII = %i[bell backspace escape form_feed newline carriage
 9 |                  tab vertical_tab]
10 | 
11 |       Unicode = %i[codepoint codepoint_list]
12 | 
13 |       Meta  = %i[dot alternation
14 |                  zero_or_one zero_or_more one_or_more
15 |                  bol eol
16 |                  group_open group_close
17 |                  interval_open interval_close
18 |                  set_open set_close]
19 | 
20 |       Hex   = %i[hex]
21 | 
22 |       Octal = %i[octal]
23 | 
24 |       All   = Basic + Control + ASCII + Unicode + Meta + Hex + Octal
25 |       Type  = :escape
26 |     end
27 | 
28 |     Map[Escape::Type] = Escape::All
29 | 
30 |     # alias for symmetry between Token::* and Expression::*
31 |     EscapeSequence = Escape
32 |   end
33 | end
34 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/token/group.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   module Token
 3 |     module Group
 4 |       Basic     = %i[capture close]
 5 |       Extended  = Basic + %i[options options_switch]
 6 | 
 7 |       Named     = %i[named]
 8 |       Atomic    = %i[atomic]
 9 |       Passive   = %i[passive]
10 |       Comment   = %i[comment]
11 | 
12 |       V1_8_6 = Group::Extended + Group::Named + Group::Atomic +
13 |                Group::Passive + Group::Comment
14 | 
15 |       V2_4_1 = %i[absence]
16 | 
17 |       All = V1_8_6 + V2_4_1
18 |       Type = :group
19 |     end
20 | 
21 |     Map[Group::Type] = Group::All
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/token/keep.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   module Token
 3 |     module Keep
 4 |       Mark = %i[mark]
 5 | 
 6 |       All  = Mark
 7 |       Type = :keep
 8 |     end
 9 | 
10 |     Map[Keep::Type] = Keep::All
11 |   end
12 | end
13 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/token/meta.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   module Token
 3 |     module Meta
 4 |       Basic       = %i[dot]
 5 |       Alternation = %i[alternation]
 6 |       Extended    = Basic + Alternation
 7 | 
 8 |       All = Extended
 9 |       Type = :meta
10 |     end
11 | 
12 |     Map[Meta::Type] = Meta::All
13 | 
14 |     # alias for symmetry between Token::* and Expression::*
15 |     module Alternation
16 |       All  = Meta::Alternation
17 |       Type = Meta::Type
18 |     end
19 |   end
20 | end
21 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/token/posix_class.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   module Token
 3 |     module PosixClass
 4 |       Standard = %i[alnum alpha blank cntrl digit graph
 5 |                     lower print punct space upper xdigit]
 6 | 
 7 |       Extensions = %i[ascii word]
 8 | 
 9 |       All = Standard + Extensions
10 |       Type = :posixclass
11 |       NonType = :nonposixclass
12 |     end
13 | 
14 |     Map[PosixClass::Type]    = PosixClass::All
15 |     Map[PosixClass::NonType] = PosixClass::All
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/token/quantifier.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   module Token
 3 |     module Quantifier
 4 |       Greedy = %i[
 5 |         zero_or_one
 6 |         zero_or_more
 7 |         one_or_more
 8 |       ]
 9 | 
10 |       Reluctant = %i[
11 |         zero_or_one_reluctant
12 |         zero_or_more_reluctant
13 |         one_or_more_reluctant
14 |       ]
15 | 
16 |       Possessive = %i[
17 |         zero_or_one_possessive
18 |         zero_or_more_possessive
19 |         one_or_more_possessive
20 |       ]
21 | 
22 |       Interval             = %i[interval]
23 |       IntervalReluctant    = %i[interval_reluctant]
24 |       IntervalPossessive   = %i[interval_possessive]
25 | 
26 |       IntervalAll = Interval + IntervalReluctant + IntervalPossessive
27 | 
28 |       V1_8_6 = Greedy + Reluctant + Interval + IntervalReluctant
29 |       All = Greedy + Reluctant + Possessive + IntervalAll
30 |       Type = :quantifier
31 |     end
32 | 
33 |     Map[Quantifier::Type] = Quantifier::All
34 |   end
35 | end
36 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/token/virtual.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   module Token
 3 |     module Virtual
 4 |       Root     = %i[root]
 5 |       Sequence = %i[sequence]
 6 | 
 7 |       All  = %i[root sequence]
 8 |       Type = :expression
 9 |     end
10 |   end
11 | end
12 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/version_lookup.rb:
--------------------------------------------------------------------------------
 1 | module Regexp::Syntax
 2 |   VERSION_FORMAT = '\Aruby/\d+\.\d+(\.\d+)?\z'
 3 |   VERSION_REGEXP = /#{VERSION_FORMAT}/
 4 |   VERSION_CONST_REGEXP = /\AV\d+_\d+(?:_\d+)?\z/
 5 | 
 6 |   class InvalidVersionNameError < Regexp::Syntax::SyntaxError
 7 |     def initialize(name)
 8 |       super "Invalid version name '#{name}'. Expected format is '#{VERSION_FORMAT}'"
 9 |     end
10 |   end
11 | 
12 |   class UnknownSyntaxNameError < Regexp::Syntax::SyntaxError
13 |     def initialize(name)
14 |       super "Unknown syntax name '#{name}'."
15 |     end
16 |   end
17 | 
18 |   module_function
19 | 
20 |   # Returns the syntax specification class for the given syntax
21 |   # version name. The special names 'any' and '*' return Syntax::Any.
22 |   def for(name)
23 |     (@alias_map ||= {})[name] ||= version_class(name)
24 |   end
25 | 
26 |   def new(name)
27 |     warn 'Regexp::Syntax.new is deprecated in favor of Regexp::Syntax.for. '\
28 |          'It does not return distinct instances and will be removed in v3.0.0.'
29 |     self.for(name)
30 |   end
31 | 
32 |   def supported?(name)
33 |     name =~ VERSION_REGEXP && comparable(name) >= comparable('1.8.6')
34 |   end
35 | 
36 |   def version_class(version)
37 |     return Regexp::Syntax::Any if ['*', 'any'].include?(version.to_s)
38 | 
39 |     version =~ VERSION_REGEXP || raise(InvalidVersionNameError, version)
40 |     version_const_name = "V#{version.to_s.scan(/\d+/).join('_')}"
41 |     const_get(version_const_name) || raise(UnknownSyntaxNameError, version)
42 |   end
43 | 
44 |   def const_missing(const_name)
45 |     if const_name =~ VERSION_CONST_REGEXP
46 |       return fallback_version_class(const_name)
47 |     end
48 |     super
49 |   end
50 | 
51 |   def fallback_version_class(version)
52 |     sorted = (specified_versions + [version]).sort_by { |ver| comparable(ver) }
53 |     index = sorted.index(version)
54 |     index > 0 && const_get(sorted[index - 1])
55 |   end
56 | 
57 |   def specified_versions
58 |     constants.select { |const_name| const_name =~ VERSION_CONST_REGEXP }
59 |   end
60 | 
61 |   def comparable(name)
62 |     # add .99 to treat versions without a patch value as latest patch version
63 |     Gem::Version.new((name.to_s.scan(/\d+/) << 99).join('.'))
64 |   end
65 | end
66 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions.rb:
--------------------------------------------------------------------------------
1 | # Ruby 1.x is no longer a supported runtime,
2 | # but its regex features are still recognized.
3 | #
4 | # Aliases for the latest patch version are provided as 'ruby/n.n',
5 | # e.g. 'ruby/1.9' refers to Ruby v1.9.3.
6 | Dir[File.expand_path('../versions/*.rb', __FILE__)].sort.each { |f| require_relative f }
7 | 
8 | Regexp::Syntax::CURRENT = Regexp::Syntax.for("ruby/#{RUBY_VERSION}")
9 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions/1.8.6.rb:
--------------------------------------------------------------------------------
 1 | class Regexp::Syntax::V1_8_6 < Regexp::Syntax::Base
 2 |   implements :anchor,     Anchor::All
 3 |   implements :assertion,  Assertion::Lookahead
 4 |   implements :backref,    Backreference::V1_8_6
 5 |   implements :escape,     Escape::Basic + Escape::ASCII + Escape::Meta + Escape::Control
 6 |   implements :free_space, FreeSpace::All
 7 |   implements :group,      Group::V1_8_6
 8 |   implements :literal,    Literal::All
 9 |   implements :meta,       Meta::Extended
10 |   implements :posixclass, PosixClass::Standard
11 |   implements :quantifier, Quantifier::V1_8_6
12 |   implements :set,        CharacterSet::All
13 |   implements :type,       CharacterType::Extended
14 | end
15 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions/1.9.1.rb:
--------------------------------------------------------------------------------
 1 | class Regexp::Syntax::V1_9_1 < Regexp::Syntax::V1_8_6
 2 |   implements :assertion,     Assertion::Lookbehind
 3 |   implements :backref,       Backreference::V1_9_1 + SubexpressionCall::All
 4 |   implements :escape,        Escape::Unicode + Escape::Hex + Escape::Octal
 5 |   implements :posixclass,    PosixClass::Extensions
 6 |   implements :nonposixclass, PosixClass::All
 7 |   implements :property,      UnicodeProperty::V1_9_0
 8 |   implements :nonproperty,   UnicodeProperty::V1_9_0
 9 |   implements :quantifier,    Quantifier::Possessive + Quantifier::IntervalPossessive
10 |   implements :type,          CharacterType::Hex
11 | end
12 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions/1.9.3.rb:
--------------------------------------------------------------------------------
1 | class Regexp::Syntax::V1_9_3 < Regexp::Syntax::V1_9_1
2 |   implements :property,    UnicodeProperty::V1_9_3
3 |   implements :nonproperty, UnicodeProperty::V1_9_3
4 | end
5 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions/2.0.0.rb:
--------------------------------------------------------------------------------
 1 | class Regexp::Syntax::V2_0_0 < Regexp::Syntax::V1_9_3
 2 |   implements :keep,        Keep::All
 3 |   implements :conditional, Conditional::All
 4 |   implements :property,    UnicodeProperty::V2_0_0
 5 |   implements :nonproperty, UnicodeProperty::V2_0_0
 6 |   implements :type,        CharacterType::Clustered
 7 | 
 8 |   excludes   :property,    %i[newline]
 9 |   excludes   :nonproperty, %i[newline]
10 | end
11 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions/2.2.0.rb:
--------------------------------------------------------------------------------
1 | class Regexp::Syntax::V2_2_0 < Regexp::Syntax::V2_0_0
2 |   implements :property,    UnicodeProperty::V2_2_0
3 |   implements :nonproperty, UnicodeProperty::V2_2_0
4 | end
5 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions/2.3.0.rb:
--------------------------------------------------------------------------------
1 | class Regexp::Syntax::V2_3_0 < Regexp::Syntax::V2_2_0
2 |   implements :property,    UnicodeProperty::V2_3_0
3 |   implements :nonproperty, UnicodeProperty::V2_3_0
4 | end
5 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions/2.4.0.rb:
--------------------------------------------------------------------------------
1 | class Regexp::Syntax::V2_4_0 < Regexp::Syntax::V2_3_0
2 |   implements :property,    UnicodeProperty::V2_4_0
3 |   implements :nonproperty, UnicodeProperty::V2_4_0
4 | end
5 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions/2.4.1.rb:
--------------------------------------------------------------------------------
1 | class Regexp::Syntax::V2_4_1 < Regexp::Syntax::V2_4_0
2 |   implements :group, Group::V2_4_1
3 | end
4 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions/2.5.0.rb:
--------------------------------------------------------------------------------
1 | class Regexp::Syntax::V2_5_0 < Regexp::Syntax::V2_4_1
2 |   implements :property,    UnicodeProperty::V2_5_0
3 |   implements :nonproperty, UnicodeProperty::V2_5_0
4 | end
5 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions/2.6.0.rb:
--------------------------------------------------------------------------------
1 | class Regexp::Syntax::V2_6_0 < Regexp::Syntax::V2_5_0
2 |   implements :property,    UnicodeProperty::V2_6_0
3 |   implements :nonproperty, UnicodeProperty::V2_6_0
4 | end
5 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions/2.6.2.rb:
--------------------------------------------------------------------------------
1 | class Regexp::Syntax::V2_6_2 < Regexp::Syntax::V2_6_0
2 |   implements :property,    UnicodeProperty::V2_6_2
3 |   implements :nonproperty, UnicodeProperty::V2_6_2
4 | end
5 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions/2.6.3.rb:
--------------------------------------------------------------------------------
1 | class Regexp::Syntax::V2_6_3 < Regexp::Syntax::V2_6_2
2 |   implements :property,    UnicodeProperty::V2_6_3
3 |   implements :nonproperty, UnicodeProperty::V2_6_3
4 | end
5 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions/3.1.0.rb:
--------------------------------------------------------------------------------
1 | class Regexp::Syntax::V3_1_0 < Regexp::Syntax::V2_6_3
2 |   implements :property,    UnicodeProperty::V3_1_0
3 |   implements :nonproperty, UnicodeProperty::V3_1_0
4 | end
5 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/syntax/versions/3.2.0.rb:
--------------------------------------------------------------------------------
1 | class Regexp::Syntax::V3_2_0 < Regexp::Syntax::V3_1_0
2 |   implements :property,    UnicodeProperty::V3_2_0
3 |   implements :nonproperty, UnicodeProperty::V3_2_0
4 | end
5 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/token.rb:
--------------------------------------------------------------------------------
 1 | class Regexp
 2 |   TOKEN_KEYS = %i[
 3 |     type
 4 |     token
 5 |     text
 6 |     ts
 7 |     te
 8 |     level
 9 |     set_level
10 |     conditional_level
11 |   ].freeze
12 | 
13 |   Token = Struct.new(*TOKEN_KEYS) do
14 |     attr_accessor :previous, :next
15 | 
16 |     def offset
17 |       [ts, te]
18 |     end
19 | 
20 |     def length
21 |       te - ts
22 |     end
23 |   end
24 | end
25 | 


--------------------------------------------------------------------------------
/lib/regexp_parser/version.rb:
--------------------------------------------------------------------------------
1 | class Regexp
2 |   class Parser
3 |     VERSION = '2.10.0'
4 |   end
5 | end
6 | 


--------------------------------------------------------------------------------
/regexp_parser.gemspec:
--------------------------------------------------------------------------------
 1 | $:.unshift File.join(File.dirname(__FILE__), 'lib')
 2 | 
 3 | require 'regexp_parser/version'
 4 | 
 5 | Gem::Specification.new do |spec|
 6 |   spec.name          = 'regexp_parser'
 7 |   spec.version       = ::Regexp::Parser::VERSION
 8 | 
 9 |   spec.summary       = "Scanner, lexer, parser for ruby's regular expressions"
10 |   spec.description   = 'A library for tokenizing, lexing, and parsing Ruby regular expressions.'
11 |   spec.homepage      = 'https://github.com/ammar/regexp_parser'
12 | 
13 |   spec.metadata['bug_tracker_uri'] = "#{spec.homepage}/issues"
14 |   spec.metadata['changelog_uri']   = "#{spec.homepage}/blob/master/CHANGELOG.md"
15 |   spec.metadata['homepage_uri']    = spec.homepage
16 |   spec.metadata['source_code_uri'] = spec.homepage
17 |   spec.metadata['wiki_uri']        = "#{spec.homepage}/wiki"
18 | 
19 |   spec.metadata['rubygems_mfa_required'] = 'true'
20 | 
21 |   spec.authors       = ['Ammar Ali', 'Janosch Müller']
22 |   spec.email         = ['ammarabuali@gmail.com', 'janosch84@gmail.com']
23 | 
24 |   spec.license       = 'MIT'
25 | 
26 |   spec.require_paths = ['lib']
27 | 
28 |   spec.files         = Dir.glob('lib/**/*.{csv,rb,rl}') +
29 |                        %w[Gemfile Rakefile LICENSE regexp_parser.gemspec]
30 | 
31 |   spec.platform      = Gem::Platform::RUBY
32 | 
33 |   spec.required_ruby_version = '>= 2.0.0'
34 | end
35 | 


--------------------------------------------------------------------------------
/spec/expression/base_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Expression::Base) do
 4 |   # test #level
 5 |   include_examples 'parse', /^a(b(c(d)))e$/,
 6 |     [0]          => [to_s: '^',         level: 0],
 7 |     [1]          => [to_s: 'a',         level: 0],
 8 |     [2]          => [to_s: '(b(c(d)))', level: 0],
 9 |     [2, 0]       => [to_s: 'b',         level: 1],
10 |     [2, 1]       => [to_s: '(c(d))',    level: 1],
11 |     [2, 1, 0]    => [to_s: 'c',         level: 2],
12 |     [2, 1, 1]    => [to_s: '(d)',       level: 2],
13 |     [2, 1, 1, 0] => [to_s: 'd',         level: 3],
14 |     [3]          => [to_s: 'e',         level: 0],
15 |     [4]          => [to_s: '$',         level: 0]
16 | 
17 |   # test #coded_offset
18 |   include_examples 'parse', /^a*(b+(c?))$/,
19 |     []        => [Root,             coded_offset: '@0+12'],
20 |     [0]       => [to_s: '^',        coded_offset: '@0+1'],
21 |     [1]       => [to_s: 'a*',       coded_offset: '@1+2'],
22 |     [2]       => [to_s: '(b+(c?))', coded_offset: '@3+8'],
23 |     [2, 0]    => [to_s: 'b+',       coded_offset: '@4+2'],
24 |     [2, 1]    => [to_s: '(c?)',     coded_offset: '@6+4'],
25 |     [2, 1, 0] => [to_s: 'c?',       coded_offset: '@7+2'],
26 |     [3]       => [to_s: '$',        coded_offset: '@11+1']
27 | 
28 |   # test #quantity
29 |   include_examples 'parse', /aa/, [0] => [quantity: [nil, nil]]
30 |   include_examples 'parse', /a?/, [0] => [quantity: [0, 1]]
31 |   include_examples 'parse', /a*/, [0] => [quantity: [0, -1]]
32 |   include_examples 'parse', /a+/, [0] => [quantity: [1, -1]]
33 | 
34 |   # test #repetitions
35 |   include_examples 'parse', /aa/, [0] => [repetitions: 1..1]
36 |   include_examples 'parse', /a?/, [0] => [repetitions: 0..1]
37 |   include_examples 'parse', /a*/, [0] => [repetitions: 0..(Float::INFINITY)]
38 |   include_examples 'parse', /a+/, [0] => [repetitions: 1..(Float::INFINITY)]
39 | 
40 |   # test #base_length, #full_length, #starts_at, #ends_at
41 |   include_examples 'parse', /(aa)/,
42 |     []     => [Root,           base_length: 4, full_length: 4, starts_at: 0, ends_at: 4],
43 |     [0]    => [Group::Capture, base_length: 4, full_length: 4, starts_at: 0, ends_at: 4],
44 |     [0, 0] => [Literal,        base_length: 2, full_length: 2, starts_at: 1, ends_at: 3]
45 |   include_examples 'parse', /(aa){42}/,
46 |     []     => [Root,           base_length: 8, full_length: 8, starts_at: 0, ends_at: 8],
47 |     [0]    => [Group::Capture, base_length: 4, full_length: 8, starts_at: 0, ends_at: 8],
48 |     [0, 0] => [Literal,        base_length: 2, full_length: 2, starts_at: 1, ends_at: 3]
49 |   include_examples 'parse', /(aa) {42}/x,
50 |     []     => [Root,           base_length: 9, full_length: 9, starts_at: 0, ends_at: 9],
51 |     [0]    => [Group::Capture, base_length: 4, full_length: 9, starts_at: 0, ends_at: 9],
52 |     [0, 0] => [Literal,        base_length: 2, full_length: 2, starts_at: 1, ends_at: 3]
53 | 
54 |   # test #to_re
55 |   include_examples 'parse', '^a*(b([cde]+))+f?$',
56 |     [] => [Root, to_re: /^a*(b([cde]+))+f?$/]
57 | 
58 |   specify '#parent' do
59 |     root = Regexp::Parser.parse(/(a(b)){42}/)
60 | 
61 |     expect(root.parent).to be_nil
62 |     expect(root[0].parent).to eq root
63 |     expect(root[0].quantifier.parent).to be_nil
64 |     expect(root[0][0].parent).to eq root[0]
65 |     expect(root[0][1].parent).to eq root[0]
66 |     expect(root[0][1][0].parent).to eq root[0][1]
67 |   end
68 | 
69 |   specify '#to_re warns when used on set members' do
70 |     expect do
71 |       result = Regexp::Parser.parse(/[\b]/)[0][0].to_re
72 |       expect(result).to eq(/\b/)
73 |     end.to output(/set member/).to_stderr
74 |   end
75 | 
76 |   specify 'updating #quantifier updates #repetitions' do
77 |     exp = Regexp::Parser.parse(/a{3}/)[0]
78 |     expect(exp.repetitions).to eq 3..3
79 |     exp.quantifier = Regexp::Parser.parse(/b{5}/)[0].quantifier
80 |     expect(exp.repetitions).to eq 5..5
81 |   end
82 | end
83 | 


--------------------------------------------------------------------------------
/spec/expression/conditional_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Expression::Conditional) do
 4 |   specify('Conditional#condition, #branches') do
 5 |     conditional = RP.parse(/(?<A>a)(?(<A>)T|F)/)[1]
 6 |     expect(conditional.condition).to eq conditional[0]
 7 |     expect(conditional.branches).to eq conditional[1..2]
 8 |   end
 9 | 
10 |   specify('Condition#referenced_expression') do
11 |     root = RP.parse(/(?<A>a)(?(<A>)T|F)/)
12 |     condition = root[1].condition
13 |     expect(condition.referenced_expression).to eq root[0]
14 |     expect(condition.referenced_expression.to_s).to eq '(?<A>a)'
15 | 
16 |     root = RP.parse(/(a)(?(1)T|F)/)
17 |     condition = root[1].condition
18 |     expect(condition.referenced_expression).to eq root[0]
19 |     expect(condition.referenced_expression.to_s).to eq '(a)'
20 |   end
21 | 
22 |   specify('parse conditional excessive branches') do
23 |     regexp = '(?<A>a)(?(<A>)T|F|X)'
24 | 
25 |     expect { RP.parse(regexp) }.to raise_error(Conditional::TooManyBranches)
26 |   end
27 | end
28 | 


--------------------------------------------------------------------------------
/spec/expression/free_space_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Expression::FreeSpace) do
 4 |   specify('white space quantify raises error') do
 5 |     regexp = /
 6 |       a # Comment
 7 |     /x
 8 | 
 9 |     root = RP.parse(regexp)
10 |     space = root[0]
11 | 
12 |     expect(space).to be_instance_of(FreeSpace::WhiteSpace)
13 |     expect { space.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
14 |   end
15 | 
16 |   specify('comment quantify raises error') do
17 |     regexp = /
18 |       a # Comment
19 |     /x
20 | 
21 |     root = RP.parse(regexp)
22 |     comment = root[3]
23 | 
24 |     expect(comment).to be_instance_of(FreeSpace::Comment)
25 |     expect { comment.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
26 |   end
27 | end
28 | 


--------------------------------------------------------------------------------
/spec/expression/methods/construct_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Expression::Shared) do
 4 |   describe '::construct' do
 5 |     {
 6 |       Alternation                       => :meta,
 7 |       Alternative                       => :expression,
 8 |       Anchor::Base                      => :anchor,
 9 |       Anchor::EndOfLine                 => :anchor,
10 |       Assertion::Base                   => :assertion,
11 |       Assertion::Lookahead              => :assertion,
12 |       Backreference::Base               => :backref,
13 |       Backreference::Number             => :backref,
14 |       CharacterSet                      => :set,
15 |       CharacterSet::IntersectedSequence => :expression,
16 |       CharacterSet::Intersection        => :set,
17 |       CharacterSet::Range               => :set,
18 |       CharacterType::Any                => :meta,
19 |       CharacterType::Base               => :type,
20 |       CharacterType::Digit              => :type,
21 |       Conditional::Branch               => :expression,
22 |       Conditional::Condition            => :conditional,
23 |       Conditional::Expression           => :conditional,
24 |       EscapeSequence::Base              => :escape,
25 |       EscapeSequence::Literal           => :escape,
26 |       FreeSpace                         => :free_space,
27 |       Group::Base                       => :group,
28 |       Group::Capture                    => :group,
29 |       Keep::Mark                        => :keep,
30 |       Literal                           => :literal,
31 |       PosixClass                        => :posixclass,
32 |       Quantifier                        => :quantifier,
33 |       Root                              => :expression,
34 |       UnicodeProperty::Base             => :property,
35 |       UnicodeProperty::Number::Decimal  => :property,
36 |     }.each do |klass, expected_type|
37 |       it "works for #{klass}" do
38 |         result = klass.construct
39 |         expect(result).to be_a klass
40 |         expect(result.type).to eq expected_type
41 |       end
42 |     end
43 | 
44 |     it 'allows overriding defaults' do
45 |       expect(Literal.construct(type: :foo).type).to eq :foo
46 |     end
47 | 
48 |     it 'allows passing options' do
49 |       expect(Literal.construct(options: { i: true }).options[:i]).to eq true
50 |     end
51 | 
52 |     it 'raises ArgumentError for unknown parameters' do
53 |       expect { Literal.construct(foo: :foo) }.to raise_error(ArgumentError)
54 |     end
55 |   end
56 | end
57 | 


--------------------------------------------------------------------------------
/spec/expression/methods/human_name_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Regexp::Expression::Shared#human_name') do
 4 |   include_examples 'parse', //,              []      => [human_name: 'root']
 5 |   include_examples 'parse', /a/,             [0]     => [human_name: 'literal']
 6 |   include_examples 'parse', /./,             [0]     => [human_name: 'match-all']
 7 |   include_examples 'parse', /[abc]/,         [0]     => [human_name: 'character set']
 8 |   include_examples 'parse', /[a-c]/,         [0, 0]  => [human_name: 'character range']
 9 |   include_examples 'parse', /\d/,            [0]     => [human_name: 'digit type']
10 |   include_examples 'parse', /\n/,            [0]     => [human_name: 'newline escape']
11 |   include_examples 'parse', /\u{61 62 63}/,  [0]     => [human_name: 'codepoint list escape']
12 |   include_examples 'parse', /\p{ascii}/,     [0]     => [human_name: 'ascii property']
13 |   include_examples 'parse', /[[:ascii:]]/,   [0, 0]  => [human_name: 'ascii posixclass']
14 |   include_examples 'parse', /a{5}/,          [0, :q] => [human_name: 'interval quantifier']
15 |   include_examples 'parse', /^/,             [0]     => [human_name: 'beginning of line']
16 |   include_examples 'parse', /(?=abc)/,       [0]     => [human_name: 'lookahead']
17 |   include_examples 'parse', /(a)(b)/,        [0]     => [human_name: 'capture group 1']
18 |   include_examples 'parse', /(a)(b)/,        [1]     => [human_name: 'capture group 2']
19 |   include_examples 'parse', /(?<x>abc)/,     [0]     => [human_name: 'named capture group']
20 |   include_examples 'parse', /   /x,          [0]     => [human_name: 'free space']
21 |   include_examples 'parse', /#comment
22 |                             /x,              [0]     => [human_name: 'comment']
23 |   include_examples 'parse', /(?#comment)/x,  [0]     => [human_name: 'comment group']
24 |   include_examples 'parse', /(abc)\1/,       [1]     => [human_name: 'backreference']
25 |   include_examples 'parse', /(?<x>)\k<x>/,   [1]     => [human_name: 'backreference by name']
26 |   include_examples 'parse', /(abc)\g<-1>/,   [1]     => [human_name: 'relative subexpression call']
27 |   include_examples 'parse', /a|bc/,          [0]     => [human_name: 'alternation']
28 |   include_examples 'parse', /a|bc/,          [0, 0]  => [human_name: 'alternative']
29 | end
30 | 


--------------------------------------------------------------------------------
/spec/expression/methods/match_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Expression::Base#match') do
 4 |   it 'returns the #match result of the respective Regexp' do
 5 |     expect(RP.parse(/a/).match('a')[0]).to eq 'a'
 6 |   end
 7 | 
 8 |   it 'can be given an offset, just like Regexp#match' do
 9 |     expect(RP.parse(/./).match('ab', 1)[0]).to eq 'b'
10 |   end
11 | 
12 |   it 'works with the #=~ alias' do
13 |     expect(RP.parse(/a/) =~ 'a').to be_a MatchData
14 |   end
15 | end
16 | 
17 | RSpec.describe('Expression::Base#match?') do
18 |   it 'returns true if the Respective Regexp matches' do
19 |     expect(RP.parse(/a/).match?('a')).to be true
20 |   end
21 | 
22 |   it 'returns false if the Respective Regexp does not match' do
23 |     expect(RP.parse(/a/).match?('b')).to be false
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/spec/expression/methods/negative_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Expression::Base#negative?') do
 4 |   include_examples 'parse', //,            []  => [:root,             negative?: false]
 5 |   include_examples 'parse', /a/,           [0] => [:literal,          negative?: false]
 6 | 
 7 |   include_examples 'parse', /\b/,          [0] => [:word_boundary,    negative?: false]
 8 |   include_examples 'parse', /\B/,          [0] => [:nonword_boundary, negative?: true]
 9 | 
10 |   include_examples 'parse', /(?=)/,        [0] => [:lookahead,        negative?: false]
11 |   include_examples 'parse', /(?!)/,        [0] => [:nlookahead,       negative?: true]
12 | 
13 |   include_examples 'parse', /(?<=)/,       [0] => [:lookbehind,       negative?: false]
14 |   include_examples 'parse', /(?<!)/,       [0] => [:nlookbehind,      negative?: true]
15 | 
16 |   include_examples 'parse', /[a]/,         [0] => [:character,        negative?: false]
17 |   include_examples 'parse', /[^a]/,        [0] => [:character,        negative?: true]
18 | 
19 |   include_examples 'parse', /\d/,          [0] => [:digit,            negative?: false]
20 |   include_examples 'parse', /\D/,          [0] => [:nondigit,         negative?: true]
21 | 
22 |   include_examples 'parse', /[[:word:]]/,  [0, 0] => [:word,          negative?: false]
23 |   include_examples 'parse', /[[:^word:]]/, [0, 0] => [:word,          negative?: true]
24 | 
25 |   include_examples 'parse', /\p{word}/,    [0] => [:word,             negative?: false]
26 |   include_examples 'parse', /\p{^word}/,   [0] => [:word,             negative?: true]
27 | 
28 |   include_examples 'parse', //,            []  => [:root,             negated?: false]
29 |   include_examples 'parse', /[^a]/,        [0] => [:character,        negated?: true]
30 | end
31 | 


--------------------------------------------------------------------------------
/spec/expression/methods/parts_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Expression::Base#parts') do
 4 |   include_examples 'parse', //,        []  => [:root,      parts: []]
 5 |   include_examples 'parse', /a/,       [0] => [:literal,   parts: ['a']]
 6 |   include_examples 'parse', /\K/,      [0] => [:mark,      parts: ['\K']]
 7 |   include_examples 'parse', /\p{any}/, [0] => [:any,       parts: ['\p{any}']]
 8 |   include_examples 'parse', /[a]/,     [0] => [:character, parts: ['[', s(Literal, 'a'), ']']]
 9 |   include_examples 'parse', /[^a]/,    [0] => [:character, parts: ['[^', s(Literal, 'a'), ']']]
10 |   include_examples 'parse', /(a)/,     [0] => [:capture,   parts: ['(', s(Literal, 'a'), ')']]
11 |   include_examples 'parse', /(?>a)/,   [0] => [:atomic,    parts: ['(?>', s(Literal, 'a'), ')']]
12 |   include_examples 'parse', /(?=a)/,   [0] => [:lookahead, parts: ['(?=', s(Literal, 'a'), ')']]
13 |   include_examples 'parse', /(?#a)/,   [0] => [:comment,   parts: ['(?#a)']]
14 | 
15 |   include_examples 'parse', /(a(b(c)))/,
16 |     [0] => [:capture, parts: [
17 |       '(',
18 |       s(Literal, 'a'),
19 |       s(Group::Capture, '(',
20 |         s(Literal, 'b'),
21 |         s(Group::Capture, '(',
22 |           s(Literal, 'c'),
23 |         )
24 |       ),
25 |       ')'
26 |     ]]
27 | 
28 |   include_examples 'parse', /a|b|c/,
29 |     [] => [:root, parts: [
30 |       s(Alternation, '|',
31 |         s(Alternative, nil, s(Literal, 'a')),
32 |         s(Alternative, nil, s(Literal, 'b')),
33 |         s(Alternative, nil, s(Literal, 'c'))
34 |       )
35 |     ]],
36 |     [0] => [:alternation, parts: [
37 |       s(Alternative, nil, s(Literal, 'a')),
38 |       '|',
39 |       s(Alternative, nil, s(Literal, 'b')),
40 |       '|',
41 |       s(Alternative, nil, s(Literal, 'c'))
42 |     ]]
43 | 
44 |   include_examples 'parse', /[a-z]/,
45 |     [] => [:root, parts: [
46 |       s(CharacterSet, '[',
47 |         s(CharacterSet::Range, '-', s(Literal, 'a'), s(Literal, 'z')),
48 |       )
49 |     ]],
50 |     [0] => [:character, parts: [
51 |       '[',
52 |       s(CharacterSet::Range, '-', s(Literal, 'a'), s(Literal, 'z')),
53 |       ']'
54 |     ]],
55 |     [0, 0] => [:range, parts: [
56 |       s(Literal, 'a'),
57 |       '-',
58 |       s(Literal, 'z')
59 |     ]]
60 | 
61 |   include_examples 'parse', /[a&&b&&c]/,
62 |     [] => [:root, parts: [
63 |       s(CharacterSet, '[',
64 |         s(CharacterSet::Intersection, '&&',
65 |           s(CharacterSet::IntersectedSequence, nil, s(Literal, 'a')),
66 |           s(CharacterSet::IntersectedSequence, nil, s(Literal, 'b')),
67 |           s(CharacterSet::IntersectedSequence, nil, s(Literal, 'c'))
68 |         )
69 |       )
70 |     ]],
71 |     [0, 0] => [:intersection, parts: [
72 |       s(CharacterSet::IntersectedSequence, nil, s(Literal, 'a')),
73 |       '&&',
74 |       s(CharacterSet::IntersectedSequence, nil, s(Literal, 'b')),
75 |       '&&',
76 |       s(CharacterSet::IntersectedSequence, nil, s(Literal, 'c'))
77 |     ]]
78 | 
79 |   include_examples 'parse', /(a)(?(1)T|F)/,
80 |     [1] => [Conditional::Expression, parts: [
81 |       '(?',
82 |       s(Conditional::Condition, '(1)'),
83 |       s(Conditional::Branch, nil, s(Literal, 'T')),
84 |       '|',
85 |       s(Conditional::Branch, nil, s(Literal, 'F')),
86 |       ')'
87 |     ]]
88 | end
89 | 


--------------------------------------------------------------------------------
/spec/expression/methods/printing_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Expression::Shared#inspect') do
 4 |   it 'includes only essential information' do
 5 |     root = Regexp::Parser.parse(//)
 6 |     expect(root.inspect).to eq '#<Regexp::Expression::Root @expressions=[]>'
 7 | 
 8 |     root = Regexp::Parser.parse(/(a)+/)
 9 |     expect(root.inspect)
10 |       .to match(/#<Regexp::Expression::Root @expressions=\[.+\]/)
11 |     expect(root[0].inspect)
12 |       .to match(/#<Regexp::Expression::Group::Capture @text=.+ @quantifier=.+ @expressions=\[.+\]/)
13 |     expect(root[0].quantifier.inspect)
14 |       .to eq    '#<Regexp::Expression::Quantifier @text="+">'
15 |     expect(root[0][0].inspect)
16 |       .to eq    '#<Regexp::Expression::Literal @text="a">'
17 |   end
18 | end
19 | 
20 | RSpec.describe('Expression::Shared#pretty_print') do
21 |   it 'works' do
22 |     require 'pp'
23 |     pp_to_s = ->(arg) { ''.dup.tap { |buffer| PP.new(buffer).pp(arg) } }
24 | 
25 |     root = Regexp::Parser.parse(/(a)+/)
26 | 
27 |     expect(pp_to_s.(root)).to               start_with '#<Regexp::Expression::Root'
28 |     expect(pp_to_s.(root[0])).to            start_with '#<Regexp::Expression::Group'
29 |     expect(pp_to_s.(root[0].quantifier)).to start_with '#<Regexp::Expression::Quantifier'
30 |     expect(pp_to_s.(root[0][0])).to         start_with '#<Regexp::Expression::Literal'
31 |   end
32 | end
33 | 


--------------------------------------------------------------------------------
/spec/expression/methods/traverse_spec.rb:
--------------------------------------------------------------------------------
  1 | require 'spec_helper'
  2 | 
  3 | RSpec.describe('Subexpression#traverse') do
  4 |   specify('Subexpression#traverse') do
  5 |     root = RP.parse(/a(b(c(d)))|g[h-i]j|klmn/)
  6 | 
  7 |     enters = 0
  8 |     visits = 0
  9 |     exits = 0
 10 | 
 11 |     root.traverse do |event, _exp, _index|
 12 |       enters = (enters + 1) if event == :enter
 13 |       visits = (visits + 1) if event == :visit
 14 |       exits = (exits + 1) if event == :exit
 15 |     end
 16 | 
 17 |     expect(enters).to eq 9
 18 |     expect(enters).to eq exits
 19 | 
 20 |     expect(visits).to eq 9
 21 |   end
 22 | 
 23 |   specify('Subexpression#traverse including self') do
 24 |     root = RP.parse(/a(b(c(d)))|g[h-i]j|klmn/)
 25 | 
 26 |     enters = 0
 27 |     visits = 0
 28 |     exits = 0
 29 | 
 30 |     root.traverse(true) do |event, _exp, _index|
 31 |       enters = (enters + 1) if event == :enter
 32 |       visits = (visits + 1) if event == :visit
 33 |       exits = (exits + 1) if event == :exit
 34 |     end
 35 | 
 36 |     expect(enters).to eq 10
 37 |     expect(enters).to eq exits
 38 | 
 39 |     expect(visits).to eq 9
 40 |   end
 41 | 
 42 |   specify('Subexpression#traverse without a block') do
 43 |     root = RP.parse(/abc/)
 44 |     enum = root.traverse
 45 | 
 46 |     expect(enum).to be_a(Enumerator)
 47 |     event, expr, idx = enum.next
 48 |     expect(event).to eq(:visit)
 49 |     expect(expr).to be_a(Regexp::Expression::Literal)
 50 |     expect(idx).to eq(0)
 51 |   end
 52 | 
 53 |   specify('Subexpression#walk alias') do
 54 |     root = RP.parse(/abc/)
 55 | 
 56 |     expect(root).to respond_to(:walk)
 57 |   end
 58 | 
 59 |   specify('Subexpression#each_expression') do
 60 |     root = RP.parse(/a(?x:b(c))|g[h-k]/)
 61 | 
 62 |     count = 0
 63 |     root.each_expression { count += 1 }
 64 | 
 65 |     expect(count).to eq 13
 66 |   end
 67 | 
 68 |   specify('Subexpression#each_expression including self') do
 69 |     root = RP.parse(/a(?x:b(c))|g[h-k]/)
 70 | 
 71 |     count = 0
 72 |     root.each_expression(true) { count += 1 }
 73 | 
 74 |     expect(count).to eq 14
 75 |   end
 76 | 
 77 |   specify('Subexpression#each_expression with block arity 1') do
 78 |     root = RP.parse(/a(b)c/)
 79 | 
 80 |     texts = []
 81 |     root.each_expression { |exp| texts << exp.text }
 82 | 
 83 |     expect(texts).to eq ['a', '(', 'b', 'c']
 84 |   end
 85 | 
 86 |   specify('Subexpression#each_expression indices') do
 87 |     root = RP.parse(/a(b)c/)
 88 | 
 89 |     indices = []
 90 |     root.each_expression { |_exp, index| (indices << index) }
 91 | 
 92 |     expect(indices).to eq [0, 1, 0, 2]
 93 |   end
 94 | 
 95 |   specify('Subexpression#each_expression indices including self') do
 96 |     root = RP.parse(/a(b)c/)
 97 | 
 98 |     indices = []
 99 |     root.each_expression(true) { |_exp, index| (indices << index) }
100 | 
101 |     expect(indices).to eq [0, 0, 1, 0, 2]
102 |   end
103 | 
104 |   specify('Subexpression#each_expression without a block') do
105 |     root = RP.parse(/abc/)
106 |     enum = root.each_expression
107 | 
108 |     expect(enum).to be_a(Enumerator)
109 |     expr, idx = enum.next
110 |     expect(expr).to be_a(Regexp::Expression::Literal)
111 |     expect(idx).to eq(0)
112 |   end
113 | 
114 |   specify('Subexpression#flat_map without block') do
115 |     root = RP.parse(/a(b([c-e]+))?/)
116 | 
117 |     array = root.flat_map
118 | 
119 |     expect(array).to be_instance_of(Array)
120 |     expect(array.length).to eq 8
121 | 
122 |     array.each do |item|
123 |       expect(item).to be_instance_of(Array)
124 |       expect(item.length).to eq 2
125 |       expect(item.first).to be_a(Regexp::Expression::Base)
126 |       expect(item.last).to be_a(Integer)
127 |     end
128 |   end
129 | 
130 |   specify('Subexpression#flat_map without block including self') do
131 |     root = RP.parse(/a(b([c-e]+))?/)
132 | 
133 |     array = root.flat_map(true)
134 | 
135 |     expect(array).to be_instance_of(Array)
136 |     expect(array.length).to eq 9
137 |   end
138 | 
139 |   specify('Subexpression#flat_map expressions for block with arity 1') do
140 |     root = RP.parse(/a(b(c(d)))/)
141 | 
142 |     result = root.flat_map { |exp| exp.text if exp.terminal? }.compact
143 | 
144 |     expect(result).to eq ['a', 'b', 'c', 'd']
145 |   end
146 | 
147 |   specify('Subexpression#flat_map indices') do
148 |     root = RP.parse(/a(b([c-e]+))?f*g/)
149 | 
150 |     indices = root.flat_map { |_exp, index| index }
151 | 
152 |     expect(indices).to eq [0, 1, 0, 1, 0, 0, 0, 1, 2, 3]
153 |   end
154 | 
155 |   specify('Subexpression#flat_map indices including self') do
156 |     root = RP.parse(/a(b([c-e]+))?f*g/)
157 | 
158 |     indices = root.flat_map(true) { |_exp, index| index }
159 | 
160 |     expect(indices).to eq [0, 0, 1, 0, 1, 0, 0, 0, 1, 2, 3]
161 |   end
162 | 
163 |   specify('Subexpression#flat_map expressions') do
164 |     root = RP.parse(/a(b(c(d)))/)
165 | 
166 |     levels = root.flat_map { |exp| [exp.level, exp.text] if exp.terminal? }.compact
167 | 
168 |     expect(levels).to eq [[0, 'a'], [1, 'b'], [2, 'c'], [3, 'd']]
169 |   end
170 | 
171 |   specify('Subexpression#flat_map expressions including self') do
172 |     root = RP.parse(/a(b(c(d)))/)
173 | 
174 |     levels = root.flat_map(true) { |exp| [exp.level, exp.to_s] }.compact
175 | 
176 |     expect(levels).to eq [[0, 'a(b(c(d)))'], [0, 'a'], [0, '(b(c(d)))'], [1, 'b'], [1, '(c(d))'], [2, 'c'], [2, '(d)'], [3, 'd']]
177 |   end
178 | end
179 | 


--------------------------------------------------------------------------------
/spec/expression/subexpression_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Expression::Subexpression) do
 4 |   # check #ts, #te
 5 |   include_examples 'parse', /abcd|ghij|klmn|pqur/,
 6 |     [0]    => [Alternation, ts: 0,  te: 19],
 7 |     [0, 0] => [Alternative, ts: 0,  te: 4],
 8 |     [0, 1] => [Alternative, ts: 5,  te: 9],
 9 |     [0, 2] => [Alternative, ts: 10, te: 14],
10 |     [0, 3] => [Alternative, ts: 15, te: 19]
11 | 
12 |   # check #nesting_level
13 |   include_examples 'parse', /a(b(\d|[ef-g[h]]))/,
14 |     [0]                   => [Literal,              to_s: 'a',            nesting_level: 1],
15 |     [1, 0]                => [Literal,              to_s: 'b',            nesting_level: 2],
16 |     [1, 1, 0]             => [Alternation,          to_s: '\d|[ef-g[h]]', nesting_level: 3],
17 |     [1, 1, 0, 0]          => [Alternative,          to_s: '\d',           nesting_level: 4],
18 |     [1, 1, 0, 0, 0]       => [CharacterType::Digit, to_s: '\d',           nesting_level: 5],
19 |     [1, 1, 0, 1]          => [Alternative,          to_s: '[ef-g[h]]',    nesting_level: 4],
20 |     [1, 1, 0, 1, 0]       => [CharacterSet,         to_s: '[ef-g[h]]',    nesting_level: 5],
21 |     [1, 1, 0, 1, 0, 0]    => [Literal,              to_s: 'e',            nesting_level: 6],
22 |     [1, 1, 0, 1, 0, 1]    => [CharacterSet::Range,  to_s: 'f-g',          nesting_level: 6],
23 |     [1, 1, 0, 1, 0, 1, 0] => [Literal,              to_s: 'f',            nesting_level: 7],
24 |     [1, 1, 0, 1, 0, 2, 0] => [Literal,              to_s: 'h',            nesting_level: 7]
25 | 
26 |   specify('#dig') do
27 |     root = RP.parse(/(((a)))/)
28 | 
29 |     expect(root.dig(0).to_s).to eq '(((a)))'
30 |     expect(root.dig(0, 0, 0, 0).to_s).to eq 'a'
31 |     expect(root.dig(0, 0, 0, 0, 0)).to be_nil
32 |     expect(root.dig(3, 7)).to be_nil
33 |   end
34 | end
35 | 


--------------------------------------------------------------------------------
/spec/expression/te_ts_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Expression::Shared#te,ts') do
 4 |   # Many tokens/expressions have their own tests for #te and #ts.
 5 |   # This is an integration-like test to ensure they are correct in conjunction.
 6 |   it 'is correct irrespective of nesting or preceding tokens' do
 7 |     regexp = regexp_with_all_features
 8 |     source = regexp.source
 9 |     root = RP.parse(regexp)
10 | 
11 |     checked_exps = root.each_expression.with_object([]) do |(exp), acc|
12 |       acc.each { |e| fail "dupe: #{[e, exp]}" if e.to_s == exp.to_s }
13 |       acc << exp unless exp.is_a?(Sequence) || exp.is_a?(WhiteSpace)
14 |     end
15 |     expect(checked_exps).not_to be_empty
16 | 
17 |     checked_exps.each do |exp|
18 |       start = source.index(exp.to_s(:original))
19 |       expect(exp.ts).to eq(start),
20 |         "expected #{exp.class} #{exp} to start at #{start}, got #{exp.ts}"
21 | 
22 |       end_idx = start + exp.base_length
23 |       expect(exp.te).to eq(end_idx),
24 |         "expected #{exp.class} #{exp} to end at #{end_idx}, got #{exp.te}"
25 |     end
26 |   end
27 | end
28 | 


--------------------------------------------------------------------------------
/spec/expression/to_h_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Expression::Base#to_h') do
 4 |   include_examples 'parse', /abc/, [] => [Root, to_h: {
 5 |     token: :root,
 6 |     type: :expression,
 7 |     text: 'abc',
 8 |     starts_at: 0,
 9 |     length: 3,
10 |     quantifier: nil,
11 |     options: {},
12 |     level: 0,
13 |     set_level: 0,
14 |     conditional_level: 0,
15 |     expressions: [
16 |       {
17 |         token: :literal,
18 |         type: :literal,
19 |         text: 'abc',
20 |         starts_at: 0,
21 |         length: 3,
22 |         quantifier: nil,
23 |         options: {},
24 |         level: 0,
25 |         set_level: 0,
26 |         conditional_level: 0
27 |       }
28 |     ]
29 |   }]
30 | 
31 |   include_examples 'parse', /a{2,4}/, [0, :q] => [Quantifier, to_h: {
32 |     max: 4,
33 |     min: 2,
34 |     mode: :greedy,
35 |     text: '{2,4}',
36 |     token: :interval,
37 |   }]
38 | 
39 |   specify('Conditional#to_h') do
40 |     root = RP.parse('(?<A>a)(?(<A>)b|c)')
41 |     expect { root.to_h }.not_to(raise_error)
42 |   end
43 | end
44 | 


--------------------------------------------------------------------------------
/spec/expression/to_s_spec.rb:
--------------------------------------------------------------------------------
  1 | require 'spec_helper'
  2 | 
  3 | RSpec.describe('Expression::Base#to_s') do
  4 |   def parse_frozen(pattern)
  5 |     Leto.deep_freeze(RP.parse(pattern))
  6 |   end
  7 | 
  8 |   def expect_round_trip(pattern)
  9 |     parsed = parse_frozen(pattern)
 10 | 
 11 |     expect(parsed.to_s).to eql(pattern)
 12 |   end
 13 | 
 14 |   specify('literal alternation') do
 15 |     expect_round_trip('abcd|ghij|klmn|pqur')
 16 |   end
 17 | 
 18 |   specify('quantified alternations') do
 19 |     expect_round_trip('(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)')
 20 |   end
 21 | 
 22 |   specify('quantified sets') do
 23 |     expect_round_trip('[abc]+|[^def]{3,6}')
 24 |   end
 25 | 
 26 |   specify('property sets') do
 27 |     expect_round_trip('[\a\b\p{Lu}\P{Z}\c\d]+')
 28 |   end
 29 | 
 30 |   specify('groups') do
 31 |     expect_round_trip("(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++")
 32 |   end
 33 | 
 34 |   specify('assertions') do
 35 |     expect_round_trip('(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?')
 36 |   end
 37 | 
 38 |   specify('comments') do
 39 |     expect_round_trip('(?#start)a(?#middle)b(?#end)')
 40 |   end
 41 | 
 42 |   specify('options') do
 43 |     expect_round_trip('(?mix:start)a(?-mix:middle)b(?i-mx:end)')
 44 |   end
 45 | 
 46 |   specify('url') do
 47 |     expect_round_trip('(^$)|(^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*' + '\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$)')
 48 |   end
 49 | 
 50 |   specify('multiline source') do
 51 |     multiline = /
 52 |           \A
 53 |           a?      # One letter
 54 |           b{2,5}  # Another one
 55 |           [c-g]+  # A set
 56 |           \z
 57 |         /x
 58 | 
 59 |     expect(parse_frozen(multiline).to_s).to eql(multiline.source)
 60 |   end
 61 | 
 62 |   specify('multiline #to_s') do
 63 |     multiline = /
 64 |           \A
 65 |           a?      # One letter
 66 |           b{2,5}  # Another one
 67 |           [c-g]+  # A set
 68 |           \z
 69 |         /x
 70 | 
 71 |     expect_round_trip(multiline.to_s)
 72 |   end
 73 | 
 74 |   specify('multiline with free space before quantifiers') do
 75 |     multiline = /
 76 |           \A
 77 |           a   ?             # One letter
 78 |           b {2,5}           # Another one
 79 |           [c-g]  +          # A set
 80 |           |                 #
 81 |           [h-j]  +          (?# Extra test for Sequence#quantify )
 82 |           \z
 83 |         /x
 84 | 
 85 |     expect_round_trip(multiline.to_s)
 86 |   end
 87 | 
 88 |   specify('regexp with all features') do
 89 |     expect_round_trip(regexp_with_all_features.source)
 90 |   end
 91 | 
 92 |   # special case: implicit groups used for chained quantifiers produce no parens
 93 |   specify 'chained quantifiers #to_s' do
 94 |     pattern = /a+{1}{2}/
 95 |     root = parse_frozen(pattern)
 96 |     expect(root.to_s).to eql('a+{1}{2}')
 97 |   end
 98 | 
 99 |   # regression test for https://github.com/ammar/regexp_parser/issues/74
100 |   specify('non-ascii comment') do
101 |     pattern = '(?x) 😋 # 😋'
102 |     root = RP.parse(pattern)
103 |     expect(root.last).to be_a(Regexp::Expression::Comment)
104 |     expect(root.last.to_s).to eql('# 😋')
105 |     expect(root.to_s).to eql(pattern)
106 |   end
107 | end
108 | 


--------------------------------------------------------------------------------
/spec/lexer/all_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Lexer) do
 4 |   specify('lexer returns an array') do
 5 |     expect(RL.lex('abc')).to be_instance_of(Array)
 6 |   end
 7 | 
 8 |   specify('lexer returns tokens') do
 9 |     tokens = RL.lex('^abc+[^one]{2,3}\b\d\C-C$')
10 |     expect(tokens).to all(be_a Regexp::Token)
11 |     expect(tokens.map { |token| token.to_a.length }).to all(eq 8)
12 |   end
13 | 
14 |   specify('lexer token count') do
15 |     tokens = RL.lex(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
16 |     expect(tokens.length).to eq 28
17 |   end
18 | 
19 |   specify('lexer scan alias') do
20 |     expect(RL.scan(/a|b|c/)).to eq RL.lex(/a|b|c/)
21 |   end
22 | end
23 | 


--------------------------------------------------------------------------------
/spec/lexer/conditionals_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Conditional lexing') do
 4 |   include_examples 'lex', /(?<A>a)(?(<A>)b|c)/,
 5 |     3 => [:conditional, :open,       '(?',     7,  9, 0, 0, 0],
 6 |     4 => [:conditional, :condition,  '(<A>)',  9, 14, 0, 0, 1],
 7 |     6 => [:conditional, :separator,  '|',     15, 16, 0, 0, 1],
 8 |     8 => [:conditional, :close,      ')',     17, 18, 0, 0, 0]
 9 | 
10 |   include_examples 'lex', /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/,
11 |      0 => [:group,       :capture,     '(',       0,  1, 0, 0, 0],
12 |      1 => [:group,       :named,       '(?<A>',   1,  6, 1, 0, 0],
13 |      5 => [:conditional, :open,        '(?',     13, 15, 2, 0, 0],
14 |      6 => [:conditional, :condition,   '(<A>)',  15, 20, 2, 0, 1],
15 |      8 => [:conditional, :separator,   '|',      21, 22, 2, 0, 1],
16 |     10 => [:conditional, :open,        '(?',     23, 25, 3, 0, 1],
17 |     11 => [:conditional, :condition,   '(<B>)',  25, 30, 3, 0, 2],
18 |     12 => [:set,         :open,        '[',      30, 31, 3, 0, 2],
19 |     13 => [:literal,     :literal,     'e',      31, 32, 3, 1, 2],
20 |     14 => [:set,         :range,       '-',      32, 33, 3, 1, 2],
21 |     15 => [:literal,     :literal,     'g',      33, 34, 3, 1, 2],
22 |     16 => [:set,         :close,       ']',      34, 35, 3, 0, 2],
23 |     17 => [:conditional, :separator,   '|',      35, 36, 3, 0, 2],
24 |     23 => [:conditional, :close,       ')',      41, 42, 3, 0, 1],
25 |     25 => [:conditional, :close,       ')',      43, 44, 2, 0, 0],
26 |     26 => [:group,       :close,       ')',      44, 45, 1, 0, 0],
27 |     27 => [:group,       :close,       ')',      45, 46, 0, 0, 0]
28 | 
29 |   include_examples 'lex', /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/,
30 |      9 => [:conditional, :open,       '(?',    9, 11, 0, 0, 0],
31 |     10 => [:conditional, :condition,  '(1)',  11, 14, 0, 0, 1],
32 |     11 => [:conditional, :open,       '(?',   14, 16, 0, 0, 1],
33 |     12 => [:conditional, :condition,  '(2)',  16, 19, 0, 0, 2],
34 |     13 => [:conditional, :open,       '(?',   19, 21, 0, 0, 2],
35 |     14 => [:conditional, :condition,  '(3)',  21, 24, 0, 0, 3],
36 |     16 => [:conditional, :separator,  '|',    25, 26, 0, 0, 3],
37 |     18 => [:conditional, :close,      ')',    27, 28, 0, 0, 2],
38 |     19 => [:conditional, :close,      ')',    28, 29, 0, 0, 1],
39 |     20 => [:conditional, :separator,  '|',    29, 30, 0, 0, 1],
40 |     21 => [:conditional, :open,       '(?',   30, 32, 0, 0, 1],
41 |     22 => [:conditional, :condition,  '(3)',  32, 35, 0, 0, 2],
42 |     23 => [:conditional, :open,       '(?',   35, 37, 0, 0, 2],
43 |     24 => [:conditional, :condition,  '(2)',  37, 40, 0, 0, 3],
44 |     26 => [:conditional, :separator,  '|',    41, 42, 0, 0, 3],
45 |     28 => [:conditional, :close,      ')',    43, 44, 0, 0, 2],
46 |     29 => [:conditional, :separator,  '|',    44, 45, 0, 0, 2],
47 |     30 => [:conditional, :open,       '(?',   45, 47, 0, 0, 2],
48 |     31 => [:conditional, :condition,  '(1)',  47, 50, 0, 0, 3],
49 |     33 => [:conditional, :separator,  '|',    51, 52, 0, 0, 3],
50 |     35 => [:conditional, :close,      ')',    53, 54, 0, 0, 2],
51 |     36 => [:conditional, :close,      ')',    54, 55, 0, 0, 1],
52 |     37 => [:conditional, :close,      ')',    55, 56, 0, 0, 0]
53 | end
54 | 


--------------------------------------------------------------------------------
/spec/lexer/delimiters_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Literal delimiter lexing') do
 4 |   include_examples 'lex', '}',
 5 |     0 => [:literal,     :literal,       '}',       0,  1,  0, 0, 0]
 6 | 
 7 |   include_examples 'lex', '}}',
 8 |     0 => [:literal,     :literal,       '}}',      0,  2,  0, 0, 0]
 9 | 
10 |   include_examples 'lex', '{',
11 |     0 => [:literal,     :literal,       '{',       0,  1,  0, 0, 0]
12 | 
13 |   include_examples 'lex', '{{',
14 |     0 => [:literal,     :literal,       '{{',      0,  2,  0, 0, 0]
15 | 
16 |   include_examples 'lex', '{}',
17 |     0 => [:literal,     :literal,       '{}',      0,  2,  0, 0, 0]
18 | 
19 |   include_examples 'lex', '}{',
20 |     0 => [:literal,     :literal,       '}{',      0,  2,  0, 0, 0]
21 | 
22 |   include_examples 'lex', '}{+',
23 |     0 => [:literal,     :literal,       '}',       0,  1,  0, 0, 0],
24 |     1 => [:literal,     :literal,       '{',       1,  2,  0, 0, 0],
25 |     2 => [:quantifier,  :one_or_more,   '+',       2,  3,  0, 0, 0]
26 | 
27 |   include_examples 'lex', '{{var}}',
28 |     0 => [:literal,     :literal,       '{{var}}',  0,  7,  0, 0, 0]
29 | 
30 |   include_examples 'lex', 'a{b}c',
31 |     0 => [:literal,     :literal,       'a{b}c',    0,  5,  0, 0, 0]
32 | 
33 |   include_examples 'lex', 'a{1,2',
34 |     0 => [:literal,     :literal,       'a{1,2',    0,  5,  0, 0, 0]
35 | 
36 |   include_examples 'lex', '({.+})',
37 |     0 => [:group,       :capture,       '(',    0,  1,  0, 0, 0],
38 |     1 => [:literal,     :literal,       '{',    1,  2,  1, 0, 0],
39 |     2 => [:meta,        :dot,           '.',    2,  3,  1, 0, 0],
40 |     3 => [:quantifier,  :one_or_more,   '+',    3,  4,  1, 0, 0],
41 |     4 => [:literal,     :literal,       '}',    4,  5,  1, 0, 0],
42 |     5 => [:group,       :close,         ')',    5,  6,  0, 0, 0]
43 | 
44 |   include_examples 'lex', ']',
45 |     0 => [:literal,     :literal,       ']',        0,  1,  0, 0, 0]
46 | 
47 |   include_examples 'lex', ']]',
48 |     0 => [:literal,     :literal,       ']]',       0,  2,  0, 0, 0]
49 | 
50 |   include_examples 'lex', ']\[',
51 |     0 => [:literal,     :literal,       ']',        0,  1,  0, 0, 0],
52 |     1 => [:escape,      :set_open,      '\[',       1,  3,  0, 0, 0]
53 | 
54 |   include_examples 'lex', '()',
55 |     0 => [:group,       :capture,       '(',        0,  1,  0, 0, 0],
56 |     1 => [:group,       :close,         ')',        1,  2,  0, 0, 0]
57 | 
58 |   include_examples 'lex', '{abc:.+}}}[^}]]}',
59 |     0 => [:literal,     :literal,       '{abc:',    0,  5,  0, 0, 0],
60 |     1 => [:meta,        :dot,           '.',        5,  6,  0, 0, 0],
61 |     2 => [:quantifier,  :one_or_more,   '+',        6,  7,  0, 0, 0],
62 |     3 => [:literal,     :literal,       '}}}',      7,  10, 0, 0, 0],
63 |     4 => [:set,         :open,          '[',        10, 11, 0, 0, 0],
64 |     5 => [:set,         :negate,        '^',        11, 12, 0, 1, 0],
65 |     6 => [:literal,     :literal,       '}',        12, 13, 0, 1, 0],
66 |     7 => [:set,         :close,         ']',        13, 14, 0, 0, 0],
67 |     8 => [:literal,     :literal,       ']}',       14, 16, 0, 0, 0]
68 | end
69 | 


--------------------------------------------------------------------------------
/spec/lexer/escapes_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Escape lexing') do
 4 |   include_examples 'lex', '\u{62}',
 5 |     0 => [:escape,  :codepoint_list, '\u{62}',       0, 6,  0, 0, 0]
 6 | 
 7 |   include_examples 'lex', '\u{62 63 64}',
 8 |     0 => [:escape,  :codepoint_list, '\u{62 63 64}', 0, 12, 0, 0, 0]
 9 | 
10 |   include_examples 'lex', '\u{62 63 64}+',
11 |     0 => [:escape,     :codepoint_list, '\u{62 63}',  0,  9,  0, 0, 0],
12 |     1 => [:escape,     :codepoint_list, '\u{64}',     9,  15, 0, 0, 0],
13 |     2 => [:quantifier, :one_or_more,    '+',          15, 16, 0, 0, 0]
14 | end
15 | 


--------------------------------------------------------------------------------
/spec/lexer/keep_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Keep lexing') do
 4 |   include_examples 'lex', /ab\Kcd/,
 5 |     1 => [:keep, :mark, '\K', 2,  4,  0, 0, 0]
 6 | 
 7 |   include_examples 'lex', /(a\Kb)|(c\\\Kd)ef/,
 8 |     2 => [:keep, :mark, '\K', 2,  4,  1, 0, 0],
 9 |     9 => [:keep, :mark, '\K', 11, 13, 1, 0, 0]
10 | end
11 | 


--------------------------------------------------------------------------------
/spec/lexer/literals_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Literal lexing') do
 4 |   # ascii, single byte characters
 5 |   include_examples 'lex', 'a',
 6 |     0 => [:literal,     :literal,       'a',        0, 1, 0, 0, 0]
 7 | 
 8 |   include_examples 'lex', 'ab+',
 9 |     0 => [:literal,     :literal,       'a',        0, 1, 0, 0, 0],
10 |     1 => [:literal,     :literal,       'b',        1, 2, 0, 0, 0],
11 |     2 => [:quantifier,  :one_or_more,   '+',        2, 3, 0, 0, 0]
12 | 
13 |   # 2 byte wide characters
14 |   include_examples 'lex', 'äöü+',
15 |     0 => [:literal,     :literal,       'äö',       0, 2, 0, 0, 0],
16 |     1 => [:literal,     :literal,       'ü',        2, 3, 0, 0, 0],
17 |     2 => [:quantifier,  :one_or_more,   '+',        3, 4, 0, 0, 0]
18 | 
19 |   # 3 byte wide characters, Japanese
20 |   include_examples 'lex', 'ab?れます+cd',
21 |     0 => [:literal,     :literal,       'a',        0, 1, 0, 0, 0],
22 |     1 => [:literal,     :literal,       'b',        1, 2, 0, 0, 0],
23 |     2 => [:quantifier,  :zero_or_one,   '?',        2, 3, 0, 0, 0],
24 |     3 => [:literal,     :literal,       'れま',     3, 5, 0, 0, 0],
25 |     4 => [:literal,     :literal,       'す',       5, 6, 0, 0, 0],
26 |     5 => [:quantifier,  :one_or_more,   '+',        6, 7, 0, 0, 0],
27 |     6 => [:literal,     :literal,       'cd',       7, 9, 0, 0, 0]
28 | 
29 |   # 4 byte wide characters, Osmanya
30 |   include_examples 'lex', '𐒀𐒁?𐒂ab+𐒃',
31 |     0 => [:literal,     :literal,       '𐒀',        0, 1, 0, 0, 0],
32 |     1 => [:literal,     :literal,       '𐒁',        1, 2, 0, 0, 0],
33 |     2 => [:quantifier,  :zero_or_one,   '?',        2, 3, 0, 0, 0],
34 |     3 => [:literal,     :literal,       '𐒂a',       3, 5, 0, 0, 0],
35 |     4 => [:literal,     :literal,       'b',        5, 6, 0, 0, 0],
36 |     5 => [:quantifier,  :one_or_more,   '+',        6, 7, 0, 0, 0],
37 |     6 => [:literal,     :literal,       '𐒃',        7, 8, 0, 0, 0]
38 | 
39 |   include_examples 'lex', 'mu𝄞?si*𝄫c+',
40 |     0 => [:literal,     :literal,       'mu',       0, 2, 0, 0, 0],
41 |     1 => [:literal,     :literal,       '𝄞',        2, 3, 0, 0, 0],
42 |     2 => [:quantifier,  :zero_or_one,   '?',        3, 4, 0, 0, 0],
43 |     3 => [:literal,     :literal,       's',        4, 5, 0, 0, 0],
44 |     4 => [:literal,     :literal,       'i',        5, 6, 0, 0, 0],
45 |     5 => [:quantifier,  :zero_or_more,  '*',        6, 7, 0, 0, 0],
46 |     6 => [:literal,     :literal,       '𝄫',        7, 8, 0, 0, 0],
47 |     7 => [:literal,     :literal,       'c',        8, 9, 0, 0, 0],
48 |     8 => [:quantifier,  :one_or_more,   '+',        9, 10, 0, 0, 0]
49 | 
50 |   specify('lex single 2 byte char') do
51 |     tokens = RL.lex("\u0627+")
52 |     expect(tokens.count).to eq 2
53 |   end
54 | 
55 |   specify('lex single 3 byte char') do
56 |     tokens = RL.lex("\u308C+")
57 |     expect(tokens.count).to eq 2
58 |   end
59 | 
60 |   specify('lex single 4 byte char') do
61 |     tokens = RL.lex("\u{1D11E}+")
62 |     expect(tokens.count).to eq 2
63 |   end
64 | end
65 | 


--------------------------------------------------------------------------------
/spec/lexer/refcalls_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('RefCall lexing') do
 4 |   # Traditional numerical group back-reference
 5 |   include_examples 'lex', '(abc)\1',
 6 |     3 => [:backref, :number,                '\1',         5,  7, 0, 0, 0]
 7 | 
 8 |   # Group back-references, named, numbered, and relative
 9 |   include_examples 'lex', '(?<X>abc)\k<X>',
10 |     3 => [:backref, :name_ref,              '\k<X>',      9, 14, 0, 0, 0]
11 |   include_examples 'lex', "(?<X>abc)\\k'X'",
12 |     3 => [:backref, :name_ref,              "\\k'X'",     9, 14, 0, 0, 0]
13 | 
14 |   include_examples 'lex', '(abc)\k<1>',
15 |     3 => [:backref, :number_ref,            '\k<1>',      5, 10, 0, 0, 0]
16 |   include_examples 'lex', "(abc)\\k'1'",
17 |     3 => [:backref, :number_ref,            "\\k'1'",     5, 10, 0, 0, 0]
18 | 
19 |   include_examples 'lex', '(abc)\k<-1>',
20 |     3 => [:backref, :number_rel_ref,        '\k<-1>',     5, 11, 0, 0, 0]
21 |   include_examples 'lex', "(abc)\\k'-1'",
22 |     3 => [:backref, :number_rel_ref,        "\\k'-1'",    5, 11, 0, 0, 0]
23 | 
24 |   # Sub-expression invocation, named, numbered, and relative
25 |   include_examples 'lex', '(?<X>abc)\g<X>',
26 |     3 => [:backref, :name_call,             '\g<X>',      9, 14, 0, 0, 0]
27 |   include_examples 'lex', "(?<X>abc)\\g'X'",
28 |     3 => [:backref, :name_call,             "\\g'X'",     9, 14, 0, 0, 0]
29 | 
30 |   include_examples 'lex', '(abc)\g<1>',
31 |     3 => [:backref, :number_call,           '\g<1>',      5, 10, 0, 0, 0]
32 |   include_examples 'lex', "(abc)\\g'1'",
33 |     3 => [:backref, :number_call,           "\\g'1'",     5, 10, 0, 0, 0]
34 | 
35 |   include_examples 'lex', '\g<0>',
36 |     0 => [:backref, :number_call,           '\g<0>',      0,  5, 0, 0, 0]
37 |   include_examples 'lex', "\\g'0'",
38 |     0 => [:backref, :number_call,           "\\g'0'",     0,  5, 0, 0, 0]
39 | 
40 |   include_examples 'lex', '(abc)\g<-1>',
41 |     3 => [:backref, :number_rel_call,       '\g<-1>',     5, 11, 0, 0, 0]
42 |   include_examples 'lex', "(abc)\\g'-1'",
43 |     3 => [:backref, :number_rel_call,       "\\g'-1'",    5, 11, 0, 0, 0]
44 | 
45 |   include_examples 'lex', '(abc)\g<+1>',
46 |     3 => [:backref, :number_rel_call,       '\g<+1>',     5, 11, 0, 0, 0]
47 |   include_examples 'lex', "(abc)\\g'+1'",
48 |     3 => [:backref, :number_rel_call,       "\\g'+1'",    5, 11, 0, 0, 0]
49 | 
50 |   # Group back-references, with nesting level
51 |   include_examples 'lex', '(?<X>abc)\k<X-0>',
52 |     3 => [:backref, :name_recursion_ref,    '\k<X-0>',    9, 16, 0, 0, 0]
53 |   include_examples 'lex', "(?<X>abc)\\k'X-0'",
54 |     3 => [:backref, :name_recursion_ref,    "\\k'X-0'",   9, 16, 0, 0, 0]
55 | 
56 |   include_examples 'lex', '(abc)\k<1-0>',
57 |     3 => [:backref, :number_recursion_ref,  '\k<1-0>',    5, 12, 0, 0, 0]
58 |   include_examples 'lex', "(abc)\\k'1-0'",
59 |     3 => [:backref, :number_recursion_ref,  "\\k'1-0'",   5, 12, 0, 0, 0]
60 | end
61 | 


--------------------------------------------------------------------------------
/spec/parser/all_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Parser) do
 4 |   specify('parse returns a root expression') do
 5 |     expect(RP.parse('abc')).to be_instance_of(Root)
 6 |   end
 7 | 
 8 |   specify('parse can be called with block') do
 9 |     expect(RP.parse('abc') { |root| root.class }).to eq Root
10 |   end
11 | 
12 |   specify('parse root contains expressions') do
13 |     root = RP.parse(/^a.c+[^one]{2,3}\b\d\\\C-C$/)
14 |     expect(root.expressions).to all(be_a Regexp::Expression::Base)
15 |   end
16 | 
17 |   specify('parse root options mi') do
18 |     root = RP.parse(/[abc]/mi)
19 | 
20 |     expect(root.m?).to be true
21 |     expect(root.i?).to be true
22 |     expect(root.x?).to be false
23 |   end
24 | 
25 |   specify('parse no quantifier target raises error') do
26 |     expect { RP.parse('?abc') }.to raise_error(Regexp::Parser::Error)
27 |   end
28 | 
29 |   specify('parse sequence no quantifier target raises error') do
30 |     expect { RP.parse('abc|?def') }.to raise_error(Regexp::Parser::Error)
31 |   end
32 | end
33 | 


--------------------------------------------------------------------------------
/spec/parser/alternation_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Alternation parsing') do
 4 |   include_examples 'parse', /a|b/,
 5 |     [0]       => [Alternation, text: '|', count: 2],
 6 |     [0, 0]    => [Alternative, text: '',  count: 1],
 7 |     [0, 0, 0] => [:literal,    text: 'a'          ],
 8 |     [0, 1]    => [Alternative, text: '',  count: 1],
 9 |     [0, 1, 0] => [:literal,    text: 'b'          ]
10 | 
11 |   include_examples 'parse', /a|(b)c/,
12 |     [0]       => [Alternation, text: '|', count: 2],
13 |     [0, 0]    => [Alternative, text: '',  count: 1],
14 |     [0, 0, 0] => [:literal,    text: 'a'          ],
15 |     [0, 1]    => [Alternative, text: '',  count: 2],
16 |     [0, 1, 0] => [:capture,    to_s: '(b)'        ],
17 |     [0, 1, 1] => [:literal,    text: 'c'          ]
18 | 
19 |   include_examples 'parse', /(ab??|cd*|ef+)*|(gh|ij|kl)?/,
20 |     [0]                => [Alternation, text: '|', count: 2, quantified?: false],
21 |     [0, 0]             => [Alternative, text: '',  count: 1, quantified?: false],
22 |     [0, 0, 0]          => [:capture,               count: 1, quantified?: true ],
23 |     [0, 0, 0, 0]       => [Alternation, text: '|', count: 3                    ],
24 |     [0, 0, 0, 0, 0]    => [Alternative, text: '',  count: 2                    ],
25 |     [0, 0, 0, 0, 0, 0] => [:literal,    to_s: 'a'                              ],
26 |     [0, 0, 0, 0, 0, 1] => [:literal,    to_s: 'b??'                            ],
27 |     [0, 1]             => [Alternative, text: '',  count: 1, quantified?: false],
28 |     [0, 1, 0]          => [:capture,               count: 1, quantified?: true ]
29 | 
30 |   # test correct ts values for empty sequences
31 |   include_examples 'parse', /|||/,
32 |     [0]       => [Alternation, text: '|', count: 4, starts_at: 0],
33 |     [0, 0]    => [Alternative, to_s: '',  count: 0, starts_at: 0],
34 |     [0, 1]    => [Alternative, to_s: '',  count: 0, starts_at: 1],
35 |     [0, 2]    => [Alternative, to_s: '',  count: 0, starts_at: 2],
36 |     [0, 3]    => [Alternative, to_s: '',  count: 0, starts_at: 3]
37 | 
38 |   # test correct ts values for non-empty sequences
39 |   include_examples 'parse', /ab|cd|ef|gh/,
40 |     [0]       => [Alternation, text: '|',   count: 4, starts_at: 0],
41 |     [0, 0]    => [Alternative, to_s: 'ab',  count: 1, starts_at: 0],
42 |     [0, 1]    => [Alternative, to_s: 'cd',  count: 1, starts_at: 3],
43 |     [0, 2]    => [Alternative, to_s: 'ef',  count: 1, starts_at: 6],
44 |     [0, 3]    => [Alternative, to_s: 'gh',  count: 1, starts_at: 9]
45 | end
46 | 


--------------------------------------------------------------------------------
/spec/parser/anchors_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Anchor parsing') do
 4 |   include_examples 'parse', /^a/,  0 => [:anchor, :bol,              Anchor::BOL]
 5 |   include_examples 'parse', /a$/,  1 => [:anchor, :eol,              Anchor::EOL]
 6 | 
 7 |   include_examples 'parse', /\Aa/, 0 => [:anchor, :bos,              Anchor::BOS]
 8 |   include_examples 'parse', /a\z/, 1 => [:anchor, :eos,              Anchor::EOS]
 9 |   include_examples 'parse', /a\Z/, 1 => [:anchor, :eos_ob_eol,       Anchor::EOSobEOL]
10 | 
11 |   include_examples 'parse', /a\b/, 1 => [:anchor, :word_boundary,    Anchor::WordBoundary]
12 |   include_examples 'parse', /a\B/, 1 => [:anchor, :nonword_boundary, Anchor::NonWordBoundary]
13 | 
14 |   include_examples 'parse', /a\G/, 1 => [:anchor, :match_start,      Anchor::MatchStart]
15 | 
16 |   include_examples 'parse', /\\A/, 0 => [:escape, :backslash,        EscapeSequence::Literal]
17 | end
18 | 


--------------------------------------------------------------------------------
/spec/parser/conditionals_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Conditional parsing') do
 4 |   include_examples 'parse', /(?<A>a)(?(<A>)T|F)/,
 5 |     [1]       => [:conditional, :open, Conditional::Expression, to_s: '(?(<A>)T|F)', reference: 'A', ts: 7],
 6 |     [1, 0]    => [:conditional, :condition, Conditional::Condition, to_s: '(<A>)', reference: 'A', ts: 9],
 7 |     [1, 1]    => [:expression,  :sequence,  Conditional::Branch, to_s: 'T', ts: 14],
 8 |     [1, 1, 0] => [:literal, text: 'T', ts: 14],
 9 |     [1, 2]    => [:expression,  :sequence,  Conditional::Branch, to_s: 'F', ts: 16],
10 |     [1, 2, 0] => [:literal, text: 'F', ts: 16]
11 | 
12 |   include_examples 'parse', /(a)(?(1)T|F)/,
13 |     [1]       => [:conditional, :open, Conditional::Expression, to_s: '(?(1)T|F)', reference: 1, ts: 3],
14 |     [1, 0]    => [:conditional, :condition, Conditional::Condition, to_s: '(1)', reference: 1, ts: 5],
15 |     [1, 1]    => [:expression,  :sequence,  Conditional::Branch, to_s: 'T', ts: 8],
16 |     [1, 1, 0] => [:literal, text: 'T', ts: 8],
17 |     [1, 2]    => [:expression,  :sequence,  Conditional::Branch, to_s: 'F', ts: 10],
18 |     [1, 2, 0] => [:literal, text: 'F', ts: 10]
19 | 
20 |   include_examples 'parse', /(foo)(?(1)\d+|(\w)){42}/,
21 |     [1]       => [Conditional::Expression, quantified?: true, to_s: '(?(1)\d+|(\w)){42}'],
22 |     [1, 0]    => [Conditional::Condition, quantified?: false],
23 |     [1, 1]    => [Conditional::Branch, quantified?: false],
24 |     [1, 1, 0] => [:digit, quantified?: true, to_s: '\d+'],
25 |     [1, 2]    => [Conditional::Branch, quantified?: false]
26 | 
27 |   # test nested and mixed with alternations
28 |   include_examples 'parse', <<-EOS.gsub(/\s/, ''),
29 |       (
30 |         (a)
31 |         |
32 |         (b)
33 |         |
34 |         (
35 |           (
36 |             ?(2)
37 |             (c(d|e)+)?
38 |             |
39 |             (
40 |               ?(3)
41 |               f
42 |               |
43 |               (
44 |                 ?(4)
45 |                 (g|(h)(i))
46 |               )
47 |             )
48 |           )
49 |         )
50 |       )
51 |     EOS
52 |     [0]                         => [Group::Capture, count: 1],
53 |     [0, 0]                      => [Alternation, count: 3],
54 |     [0, 0, 2]                   => [Alternative, count: 1],
55 |     [0, 0, 2, 0]                => [Group::Capture, count: 1],
56 |     [0, 0, 2, 0, 0]             => [Conditional::Expression, count: 3, conditional_level: 0],
57 |     [0, 0, 2, 0, 0, 0]          => [Conditional::Condition, to_s: '(2)', conditional_level: 1],
58 |     [0, 0, 2, 0, 0, 1]          => [Conditional::Branch, to_s: '(c(d|e)+)?', conditional_level: 1],
59 |     [0, 0, 2, 0, 0, 2]          => [Conditional::Branch, to_s: '(?(3)f|(?(4)(g|(h)(i))))', conditional_level: 1],
60 |     [0, 0, 2, 0, 0, 2, 0]       => [Conditional::Expression, count: 3, conditional_level: 1],
61 |     [0, 0, 2, 0, 0, 2, 0, 0]    => [Conditional::Condition, to_s: '(3)', conditional_level: 2],
62 |     [0, 0, 2, 0, 0, 2, 0, 1]    => [Conditional::Branch, count: 1, to_s: 'f', conditional_level: 2],
63 |     [0, 0, 2, 0, 0, 2, 0, 1, 0] => [Literal, text: 'f', conditional_level: 2]
64 | 
65 |   # test empty branch
66 |   include_examples 'parse', /(?<A>a)(?(<A>)T|)/,
67 |     [1]    => [Conditional::Expression, count: 3, to_s: '(?(<A>)T|)'],
68 |     [1, 2] => [Conditional::Branch, to_s: '', ts: 16]
69 | 
70 |   # test insignificant leading zeros in the condition's group number ref
71 |   include_examples 'parse', /(a)(?(001)T)/,
72 |     [1, 0] => [Conditional::Condition, to_s: '(001)', reference: 1]
73 | end
74 | 


--------------------------------------------------------------------------------
/spec/parser/errors_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Parsing errors') do
 4 |   let(:parser) { Regexp::Parser.new }
 5 |   before { parser.parse(/foo/) } # initializes ivars
 6 | 
 7 |   it('raises UnknownTokenTypeError for unknown token types') do
 8 |     expect { parser.send(:parse_token, Regexp::Token.new(:foo, :bar)) }
 9 |       .to raise_error(Regexp::Parser::UnknownTokenTypeError)
10 |   end
11 | 
12 |   RSpec.shared_examples 'UnknownTokenError' do |type|
13 |     it "raises for unknown tokens of type #{type}" do
14 |       expect { parser.send(:parse_token, Regexp::Token.new(type, :foo)) }
15 |         .to raise_error(Regexp::Parser::UnknownTokenError)
16 |     end
17 |   end
18 | 
19 |   include_examples 'UnknownTokenError', :anchor
20 |   include_examples 'UnknownTokenError', :backref
21 |   include_examples 'UnknownTokenError', :conditional
22 |   include_examples 'UnknownTokenError', :free_space
23 |   include_examples 'UnknownTokenError', :group
24 |   include_examples 'UnknownTokenError', :meta
25 |   include_examples 'UnknownTokenError', :nonproperty
26 |   include_examples 'UnknownTokenError', :property
27 |   include_examples 'UnknownTokenError', :quantifier
28 |   include_examples 'UnknownTokenError', :set
29 |   include_examples 'UnknownTokenError', :type
30 | end
31 | 


--------------------------------------------------------------------------------
/spec/parser/escapes_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('EscapeSequence parsing') do
 4 |   es = EscapeSequence
 5 | 
 6 |   include_examples 'parse', /a\ac/,          1 => [:escape, :bell,           es::Bell]
 7 |   include_examples 'parse', /a\ec/,          1 => [:escape, :escape,         es::AsciiEscape]
 8 |   include_examples 'parse', /a\fc/,          1 => [:escape, :form_feed,      es::FormFeed]
 9 |   include_examples 'parse', /a\nc/,          1 => [:escape, :newline,        es::Newline]
10 |   include_examples 'parse', /a\rc/,          1 => [:escape, :carriage,       es::Return]
11 |   include_examples 'parse', /a\tc/,          1 => [:escape, :tab,            es::Tab]
12 |   include_examples 'parse', /a\vc/,          1 => [:escape, :vertical_tab,   es::VerticalTab]
13 | 
14 |   # meta character escapes
15 |   include_examples 'parse', /a\.c/,          1 => [:escape, :dot,            es::Literal]
16 |   include_examples 'parse', /a\?c/,          1 => [:escape, :zero_or_one,    es::Literal]
17 |   include_examples 'parse', /a\*c/,          1 => [:escape, :zero_or_more,   es::Literal]
18 |   include_examples 'parse', /a\+c/,          1 => [:escape, :one_or_more,    es::Literal]
19 |   include_examples 'parse', /a\|c/,          1 => [:escape, :alternation,    es::Literal]
20 |   include_examples 'parse', /a\(c/,          1 => [:escape, :group_open,     es::Literal]
21 |   include_examples 'parse', /a\)c/,          1 => [:escape, :group_close,    es::Literal]
22 |   include_examples 'parse', /a\{c/,          1 => [:escape, :interval_open,  es::Literal]
23 |   include_examples 'parse', /a\}c/,          1 => [:escape, :interval_close, es::Literal]
24 | 
25 |   # unicode escapes
26 |   include_examples 'parse', /a\u0640/,       1 => [:escape, :codepoint,      es::Codepoint]
27 |   include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list, es::CodepointList]
28 |   include_examples 'parse', /a\u{10FFFF}/,   1 => [:escape, :codepoint_list, es::CodepointList]
29 | 
30 |   # hex escapes
31 |   include_examples 'parse', /a\xFF/n,        1 => [:escape, :hex,            es::Hex]
32 | 
33 |   # octal escapes
34 |   include_examples 'parse', /a\177/n,        1 => [:escape, :octal,          es::Octal]
35 | 
36 |   # test #char and #codepoint
37 |   include_examples 'parse', /\n/,            0 => [char:  "\n",    codepoint:  10      ]
38 |   include_examples 'parse', /\?/,            0 => [char:  '?',     codepoint:  63      ]
39 |   include_examples 'parse', /\101/,          0 => [char:  'A',     codepoint:  65      ]
40 |   include_examples 'parse', /\x42/,          0 => [char:  'B',     codepoint:  66      ]
41 |   include_examples 'parse', /\xA/,           0 => [char:  "\n",    codepoint:  10      ]
42 |   include_examples 'parse', /\u0043/,        0 => [char:  'C',     codepoint:  67      ]
43 |   include_examples 'parse', /\u{44 45}/,     0 => [chars: %w[D E], codepoints: [68, 69]]
44 | 
45 |   specify('codepoint_list #char and #codepoint raise errors') do
46 |     exp = RP.parse(/\u{44 45}/)[0]
47 |     expect { exp.char }.to raise_error(/#chars/)
48 |     expect { exp.codepoint }.to raise_error(/#codepoints/)
49 |   end
50 | 
51 |   # Meta/control escapes
52 |   #
53 |   # After the following fix in Ruby 3.1, a Regexp#source containing meta/control
54 |   # escapes can only be set with the Regexp::new constructor.
55 |   # In Regexp literals, these escapes are now pre-processed to hex escapes.
56 |   #
57 |   # https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
58 |   n = ->(regexp_body){ Regexp.new(regexp_body.force_encoding('ascii-8bit')) }
59 | 
60 |   include_examples 'parse', n.('\\\\\c2b'),  1 => [es::Control,     text: '\c2',     char: "\x12",   codepoint: 18 ]
61 |   include_examples 'parse', n.('\d\C-C\w'),  1 => [es::Control,     text: '\C-C',    char: "\x03",   codepoint: 3  ]
62 |   include_examples 'parse', n.('\Z\M-Z'),    1 => [es::Meta,        text: '\M-Z',    char: "\u00DA", codepoint: 218]
63 |   include_examples 'parse', n.('\A\M-\C-X'), 1 => [es::MetaControl, text: '\M-\C-X', char: "\u0098", codepoint: 152]
64 |   include_examples 'parse', n.('\A\M-\cX'),  1 => [es::MetaControl, text: '\M-\cX',  char: "\u0098", codepoint: 152]
65 |   include_examples 'parse', n.('\A\C-\M-X'), 1 => [es::MetaControl, text: '\C-\M-X', char: "\u0098", codepoint: 152]
66 |   include_examples 'parse', n.('\A\c\M-X'),  1 => [es::MetaControl, text: '\c\M-X',  char: "\u0098", codepoint: 152]
67 | end
68 | 


--------------------------------------------------------------------------------
/spec/parser/free_space_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('FreeSpace parsing') do
 4 |   include_examples 'parse', /a b c/,
 5 |     [0] => [Literal, text: 'a b c']
 6 | 
 7 |   include_examples 'parse', /a b c/x,
 8 |     [0] => [Literal, text: 'a'],
 9 |     [1] => [WhiteSpace, text: ' '],
10 |     [2] => [Literal, text: 'b'],
11 |     [3] => [WhiteSpace, text: ' '],
12 |     [4] => [Literal, text: 'c']
13 | 
14 |   include_examples 'parse', /a * b + c/x,
15 |     [0] => [Literal, to_s: 'a*', quantified?: true],
16 |     [1] => [WhiteSpace, text: ' '],
17 |     [2] => [WhiteSpace, text: ' '],
18 |     [3] => [Literal, to_s: 'b+', quantified?: true],
19 |     [4] => [WhiteSpace, text: ' '],
20 |     [5] => [WhiteSpace, text: ' '],
21 |     [6] => [Literal, to_s: 'c']
22 | 
23 |   include_examples 'parse', /
24 |       a   ?     # One letter
25 |       b {2,5}   # Another one
26 |       [c-g]  +  # A set
27 |       (h|i|j)   # A group
28 |     /x,
29 |     [0]  => [WhiteSpace],
30 |     [1]  => [Literal, to_s: 'a?', quantified?: true],
31 |     [2]  => [WhiteSpace, text: '   '],
32 |     [3]  => [WhiteSpace, text: '     '],
33 |     [4]  => [Comment, to_s: "# One letter\n"],
34 |     [5]  => [WhiteSpace],
35 |     [6]  => [Literal, to_s: 'b{2,5}', quantified?: true],
36 |     [7]  => [WhiteSpace, text: ' '],
37 |     [8]  => [WhiteSpace, text: '   '],
38 |     [9]  => [Comment, to_s: "# Another one\n"],
39 |     [10] => [WhiteSpace],
40 |     [11] => [CharacterSet, to_s: '[c-g]+', quantified?: true],
41 |     [12] => [WhiteSpace],
42 |     [13] => [WhiteSpace],
43 |     [14] => [Comment, to_s: "# A set\n"],
44 |     [15] => [WhiteSpace],
45 |     [16] => [Group::Capture],
46 |     [17] => [WhiteSpace],
47 |     [18] => [Comment, to_s: "# A group\n",]
48 | 
49 |   include_examples 'parse', /
50 |       a
51 |       # comment 1
52 |       ?
53 |       (
54 |        b # comment 2
55 |        # comment 3
56 |        +
57 |       )
58 |       # comment 4
59 |       *
60 |     /x,
61 |     [0]    => [WhiteSpace],
62 |     [1]    => [Literal, to_s: 'a?', quantified?: true],
63 |     [2]    => [WhiteSpace],
64 |     [3]    => [Comment],
65 |     [4]    => [WhiteSpace],
66 |     [5]    => [WhiteSpace],
67 |     [6]    => [Group::Capture, quantified?: true],
68 |     [6, 0] => [WhiteSpace],
69 |     [6, 1] => [Literal, to_s: 'b+', quantified?: true],
70 |     [6, 2] => [WhiteSpace],
71 |     [6, 3] => [Comment, to_s: "# comment 2\n"],
72 |     [6, 4] => [WhiteSpace],
73 |     [6, 5] => [Comment, to_s: "# comment 3\n"],
74 |     [6, 6] => [WhiteSpace],
75 |     [6, 7] => [WhiteSpace]
76 | end
77 | 


--------------------------------------------------------------------------------
/spec/parser/keep_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 | 
3 | RSpec.describe('Keep parsing') do
4 |   include_examples 'parse', /ab\Kcd/, 1      => [:keep, :mark, Keep::Mark, text: '\K']
5 |   include_examples 'parse', /(a\K)/,  [0, 1] => [:keep, :mark, Keep::Mark, text: '\K']
6 | end
7 | 


--------------------------------------------------------------------------------
/spec/parser/options_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('passing options to parse') do
 4 |   it 'raises if if parsing from a Regexp and options are passed' do
 5 |     expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
 6 |       ArgumentError,
 7 |       'options cannot be supplied unless parsing a String'
 8 |     )
 9 |   end
10 | 
11 |   it 'sets options if parsing from a String' do
12 |     root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED)
13 | 
14 |     expect(root.options).to eq(m: true, x: true)
15 |   end
16 | 
17 |   it 'allows options to not be supplied when parsing from a Regexp' do
18 |     root = RP.parse(/a+/ix)
19 | 
20 |     expect(root.options).to eq(i: true, x: true)
21 |   end
22 | 
23 |   it 'has an empty option-hash when parsing from a String and passing no options' do
24 |     root = RP.parse('a+')
25 | 
26 |     expect(root.options).to be_empty
27 |   end
28 | end
29 | 


--------------------------------------------------------------------------------
/spec/parser/posix_classes_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('PosixClass parsing') do
 4 |   include_examples 'parse', /[[:word:]]/,
 5 |     [0]    => [CharacterSet, count: 1],
 6 |     [0, 0] => [:posixclass,    :word, PosixClass, name: 'word', text: '[:word:]']
 7 |   include_examples 'parse', /[[:^word:]]/,
 8 |     [0]    => [CharacterSet, count: 1],
 9 |     [0, 0] => [:nonposixclass, :word, PosixClass, name: 'word', text: '[:^word:]']
10 | 
11 |   # cases treated as regular subsets by Ruby, not as (invalid) posix classes
12 |   include_examples 'parse', '[[:ab]c:]',
13 |     [0, 0]    => [CharacterSet, count: 3],
14 |     [0, 0, 0] => [Literal, text: ':']
15 | 
16 |   include_examples 'parse', '[[:a[b]c:]]',
17 |     [0, 0]    => [CharacterSet, count: 5],
18 |     [0, 0, 0] => [Literal, text: ':']
19 | end
20 | 


--------------------------------------------------------------------------------
/spec/parser/properties_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Property parsing') do
 4 |   # test various notations supported by Ruby
 5 |   include_examples 'parse', '\p{sd}',           0 => [:property, :soft_dotted]
 6 |   include_examples 'parse', '\p{SD}',           0 => [:property, :soft_dotted]
 7 |   include_examples 'parse', '\p{Soft Dotted}',  0 => [:property, :soft_dotted]
 8 |   include_examples 'parse', '\p{Soft-Dotted}',  0 => [:property, :soft_dotted]
 9 |   include_examples 'parse', '\p{sOfT_dOtTeD}',  0 => [:property, :soft_dotted]
10 | 
11 |   # test ^-negation
12 |   include_examples 'parse', '\p{^sd}',          0 => [:nonproperty, :soft_dotted]
13 |   include_examples 'parse', '\p{^SD}',          0 => [:nonproperty, :soft_dotted]
14 |   include_examples 'parse', '\p{^Soft Dotted}', 0 => [:nonproperty, :soft_dotted]
15 |   include_examples 'parse', '\p{^Soft-Dotted}', 0 => [:nonproperty, :soft_dotted]
16 |   include_examples 'parse', '\p{^sOfT_dOtTeD}', 0 => [:nonproperty, :soft_dotted]
17 | 
18 |   # test P-negation
19 |   include_examples 'parse', '\P{sd}',           0 => [:nonproperty, :soft_dotted]
20 |   include_examples 'parse', '\P{SD}',           0 => [:nonproperty, :soft_dotted]
21 |   include_examples 'parse', '\P{Soft Dotted}',  0 => [:nonproperty, :soft_dotted]
22 |   include_examples 'parse', '\P{Soft-Dotted}',  0 => [:nonproperty, :soft_dotted]
23 |   include_examples 'parse', '\P{sOfT_dOtTeD}',  0 => [:nonproperty, :soft_dotted]
24 | 
25 |   # double negation is positive again
26 |   include_examples 'parse', '\P{^sd}',          0 => [:property, :soft_dotted]
27 |   include_examples 'parse', '\P{^SD}',          0 => [:property, :soft_dotted]
28 |   include_examples 'parse', '\P{^Soft Dotted}', 0 => [:property, :soft_dotted]
29 |   include_examples 'parse', '\P{^Soft-Dotted}', 0 => [:property, :soft_dotted]
30 |   include_examples 'parse', '\P{^sOfT_dOtTeD}', 0 => [:property, :soft_dotted]
31 | 
32 |   # test #shortcut
33 |   include_examples 'parse', '\p{soft_dotted}',  0 => [:property, :soft_dotted, shortcut: 'sd']
34 |   include_examples 'parse', '\p{sd}',           0 => [:property, :soft_dotted, shortcut: 'sd']
35 |   include_examples 'parse', '\p{in_bengali}',   0 => [:property, :in_bengali, shortcut: nil]
36 | 
37 |   # test classification
38 |   include_examples 'parse', '\p{age=5.2}',                     0 => [UnicodeProperty::Age]
39 |   include_examples 'parse', '\p{InArmenian}',                  0 => [UnicodeProperty::Block]
40 |   include_examples 'parse', '\p{Math}',                        0 => [UnicodeProperty::Derived]
41 |   include_examples 'parse', '\p{Emoji}',                       0 => [UnicodeProperty::Emoji]
42 |   include_examples 'parse', '\p{GraphemeClusterBreak=Extend}', 0 => [UnicodeProperty::Enumerated]
43 |   include_examples 'parse', '\p{Hiragana}',                    0 => [UnicodeProperty::Script]
44 | 
45 |   specify('parse abandoned newline property') do
46 |     root = RP.parse('\p{newline}', 'ruby/1.9')
47 |     expect(root.expressions.last).to be_a(UnicodeProperty::Base)
48 | 
49 |     expect { RP.parse('\p{newline}', 'ruby/2.0') }.to raise_error(Regexp::Syntax::NotImplementedError)
50 |   end
51 | 
52 |   # cannot test older Rubies because of https://bugs.ruby-lang.org/issues/18686
53 |   if ruby_version_at_least('3.2.0')
54 |     specify('parse all properties of current ruby') do
55 |       unsupported = RegexpPropertyValues.all_for_current_ruby.reject do |prop|
56 |         RP.parse("\\p{#{prop}}") rescue false
57 |       end
58 |       expect(unsupported).to be_empty
59 |     end
60 |   end
61 | 
62 |   # Ruby 2.3 supports a short prop name (sterm) without supporting the long name
63 |   # of the same prop (sentence_terminal). Let's ignore this unique case.
64 |   if ruby_version_at_least('2.4.0')
65 |     specify('parse only properties of current ruby') do
66 |       syntax = Regexp::Syntax.for("ruby/#{RUBY_VERSION}")
67 |       excessive = syntax.features.fetch(:property, []).reject do |prop|
68 |         begin
69 |           Regexp.new("\\p{#{prop}}")
70 |         rescue RegexpError, SyntaxError # error class depends on Ruby version
71 |           false
72 |         end
73 |       end
74 |       expect(excessive).to be_empty
75 |     end
76 |   end
77 | end
78 | 


--------------------------------------------------------------------------------
/spec/parser/quantifiers_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Quantifier parsing') do
 4 |   include_examples 'parse', /a?b/,     [0, :q] => [:zero_or_one,  text: '?',     mode: :greedy,     min: 0, max: 1,  ts: 1]
 5 |   include_examples 'parse', /a??b/,    [0, :q] => [:zero_or_one,  text: '??',    mode: :reluctant,  min: 0, max: 1,  ts: 1]
 6 |   include_examples 'parse', /a?+b/,    [0, :q] => [:zero_or_one,  text: '?+',    mode: :possessive, min: 0, max: 1,  ts: 1]
 7 |   include_examples 'parse', /a*b/,     [0, :q] => [:zero_or_more, text: '*',     mode: :greedy,     min: 0, max: -1, ts: 1]
 8 |   include_examples 'parse', /a*?b/,    [0, :q] => [:zero_or_more, text: '*?',    mode: :reluctant,  min: 0, max: -1, ts: 1]
 9 |   include_examples 'parse', /a*+b/,    [0, :q] => [:zero_or_more, text: '*+',    mode: :possessive, min: 0, max: -1, ts: 1]
10 |   include_examples 'parse', /a+b/,     [0, :q] => [:one_or_more,  text: '+',     mode: :greedy,     min: 1, max: -1, ts: 1]
11 |   include_examples 'parse', /a+?b/,    [0, :q] => [:one_or_more,  text: '+?',    mode: :reluctant,  min: 1, max: -1, ts: 1]
12 |   include_examples 'parse', /a++b/,    [0, :q] => [:one_or_more,  text: '++',    mode: :possessive, min: 1, max: -1, ts: 1]
13 |   include_examples 'parse', /a{2,4}b/, [0, :q] => [:interval,     text: '{2,4}', mode: :greedy,     min: 2, max: 4,  ts: 1]
14 |   include_examples 'parse', /a{2,}b/,  [0, :q] => [:interval,     text: '{2,}',  mode: :greedy,     min: 2, max: -1, ts: 1]
15 |   include_examples 'parse', /a{,3}b/,  [0, :q] => [:interval,     text: '{,3}',  mode: :greedy,     min: 0, max: 3,  ts: 1]
16 |   include_examples 'parse', /a{4}b/,   [0, :q] => [:interval,     text: '{4}',   mode: :greedy,     min: 4, max: 4,  ts: 1]
17 |   include_examples 'parse', /a{004}b/, [0, :q] => [:interval,     text: '{004}', mode: :greedy,     min: 4, max: 4,  ts: 1]
18 | 
19 |   # special case: exps with chained quantifiers are wrapped in implicit passive groups
20 |   include_examples 'parse', /a+{2}{3}/,
21 |     [0]           => [:group,      :passive,     Group::Passive, implicit?: true, level: 0],
22 |     [0, :q]       => [:quantifier, :interval,    Quantifier,     text: '{3}',     level: 0],
23 |     [0, 0]        => [:group,      :passive,     Group::Passive, implicit?: true, level: 1],
24 |     [0, 0, :q]    => [:quantifier, :interval,    Quantifier,     text: '{2}',     level: 1],
25 |     [0, 0, 0]     => [:literal,    :literal,     Literal,        text: 'a',       level: 2],
26 |     [0, 0, 0, :q] => [:quantifier, :one_or_more, Quantifier,     text: '+',       level: 2]
27 | 
28 |   # Ruby does not support modes for intervals, following `?` and `+` are read as chained quantifiers
29 |   include_examples 'parse', /a{2,4}?b/,
30 |     [0, :q]    => [:quantifier, :zero_or_one, Quantifier, text: '?',     mode: :greedy, min: 0, max: 1, ts: 6],
31 |     [0, 0, :q] => [:quantifier, :interval,    Quantifier, text: '{2,4}', mode: :greedy, min: 2, max: 4, ts: 1]
32 |   include_examples 'parse', /a{2,4}+b/,
33 |     [0, :q]    => [:quantifier, :one_or_more, Quantifier, text: '+',     mode: :greedy, min: 1, max: -1, ts: 6],
34 |     [0, 0, :q] => [:quantifier, :interval,    Quantifier, text: '{2,4}', mode: :greedy, min: 2, max: 4,  ts: 1]
35 | 
36 |   specify('mode-checking methods') do
37 |     exp = RP.parse(/a??/).first
38 | 
39 |     expect(exp).to be_reluctant
40 |     expect(exp).to be_lazy
41 |     expect(exp).not_to be_greedy
42 |     expect(exp).not_to be_possessive
43 |     expect(exp.quantifier).to be_reluctant
44 |     expect(exp.quantifier).to be_lazy
45 |     expect(exp.quantifier).not_to be_greedy
46 |     expect(exp.quantifier).not_to be_possessive
47 |   end
48 | end
49 | 


--------------------------------------------------------------------------------
/spec/parser/set/intersections_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | # edge cases with `...-&&...` and `...&&-...` are checked in ./ranges_spec.rb
 4 | 
 5 | RSpec.describe('CharacterSet::Intersection parsing') do
 6 |   include_examples 'parse', /[a&&z]/,
 7 |     [0]          => [CharacterSet, count: 1],
 8 |     [0, 0]       => [CharacterSet::Intersection, count: 2],
 9 |     [0, 0, 0]    => [CharacterSet::IntersectedSequence, count: 1],
10 |     [0, 0, 0, 0] => [:literal, text: 'a'],
11 |     [0, 0, 1]    => [CharacterSet::IntersectedSequence, count: 1],
12 |     [0, 0, 1, 0] => [:literal, text: 'z']
13 | 
14 |   include_examples 'parse', /[a-z&&[^a]]/,
15 |     [0]          => [CharacterSet, count: 1],
16 |     [0, 0]       => [CharacterSet::Intersection, count: 2],
17 |     [0, 0, 0]    => [CharacterSet::IntersectedSequence, count: 1],
18 |     [0, 0, 0, 0] => [CharacterSet::Range, count: 2],
19 |     [0, 0, 1]    => [CharacterSet::IntersectedSequence, count: 1],
20 |     [0, 0, 1, 0] => [CharacterSet, count: 1]
21 | 
22 |   include_examples 'parse', /[a&&a-z]/,
23 |     [0]          => [CharacterSet, count: 1],
24 |     [0, 0]       => [CharacterSet::Intersection, count: 2],
25 |     [0, 0, 0]    => [CharacterSet::IntersectedSequence, count: 1],
26 |     [0, 0, 0, 0] => [:literal, text: 'a'],
27 |     [0, 0, 1]    => [CharacterSet::IntersectedSequence, count: 1],
28 |     [0, 0, 1, 0] => [CharacterSet::Range, count: 2]
29 | 
30 |   include_examples 'parse', /[a&&\w]/,
31 |     [0]          => [CharacterSet, count: 1],
32 |     [0, 0]       => [CharacterSet::Intersection, count: 2],
33 |     [0, 0, 1]    => [CharacterSet::IntersectedSequence, count: 1],
34 |     [0, 0, 1, 0] => [:word, text: '\w']
35 | 
36 |   include_examples 'parse', /[\h&&\w&&efg]/,
37 |     [0]          => [CharacterSet, count: 1],
38 |     [0, 0]       => [CharacterSet::Intersection, count: 3],
39 |     [0, 0, 0]    => [CharacterSet::IntersectedSequence, count: 1],
40 |     [0, 0, 0, 0] => [:hex, text: '\h'],
41 |     [0, 0, 1]    => [CharacterSet::IntersectedSequence, count: 1],
42 |     [0, 0, 1, 0] => [:word, text: '\w'],
43 |     [0, 0, 2]    => [CharacterSet::IntersectedSequence, count: 3],
44 |     [0, 0, 2, 0] => [:literal, text: 'e'],
45 |     [0, 0, 2, 1] => [:literal, text: 'f'],
46 |     [0, 0, 2, 2] => [:literal, text: 'g']
47 | 
48 |   # test correct ts values for empty sequences
49 |   include_examples 'parse', /[&&]/,
50 |     [0, 0]       => [CharacterSet::Intersection, text: '&&', count: 2, ts: 1],
51 |     [0, 0, 0]    => [CharacterSet::IntersectedSequence,      count: 0, ts: 1],
52 |     [0, 0, 1]    => [CharacterSet::IntersectedSequence,      count: 0, ts: 3]
53 | 
54 |   # test correct ts values for non-empty sequences
55 |   include_examples 'parse', /[ab&&cd&&ef]/,
56 |     [0, 0]       => [CharacterSet::Intersection,        count: 3, text: '&&', ts: 1],
57 |     [0, 0, 0]    => [CharacterSet::IntersectedSequence, count: 2, to_s: 'ab', ts: 1],
58 |     [0, 0, 1]    => [CharacterSet::IntersectedSequence, count: 2, to_s: 'cd', ts: 5],
59 |     [0, 0, 2]    => [CharacterSet::IntersectedSequence, count: 2, to_s: 'ef', ts: 9]
60 | 
61 |   # Some edge-case patterns are evaluated with #match to make sure that
62 |   # their matching behavior still reflects the way they are parsed.
63 |   # #capturing_stderr is used to skip any warnings generated by this.
64 |   specify('intersections behavior remains unchanged') do
65 |     capturing_stderr do
66 |       expect(/[a&&z]/).not_to match 'a'
67 |       expect(/[a&&z]/).not_to match '&'
68 |       expect(/[a&&z]/).not_to match 'z'
69 |       expect(/[a-z&&[^a]]/).not_to match 'a'
70 |       expect(/[a-z&&[^a]]/).not_to match '&'
71 |       expect(/[a-z&&[^a]]/).to     match 'b'
72 |       expect(/[a&&a-z]/).to     match 'a'
73 |       expect(/[a&&a-z]/).not_to match '&'
74 |       expect(/[a&&a-z]/).not_to match 'b'
75 |       expect(/[a&&\w]/).to     match 'a'
76 |       expect(/[a&&\w]/).not_to match '&'
77 |       expect(/[a&&\w]/).not_to match 'b'
78 |       expect(/[\h&&\w&&efg]/).to     match 'e'
79 |       expect(/[\h&&\w&&efg]/).to     match 'f'
80 |       expect(/[\h&&\w&&efg]/).not_to match 'a'
81 |       expect(/[\h&&\w&&efg]/).not_to match 'g'
82 |     end
83 |   end
84 | end
85 | 


--------------------------------------------------------------------------------
/spec/parser/set/ranges_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('CharacterSet::Range parsing') do
 4 |   include_examples 'parse', '[a-z]',
 5 |     [0]       => [CharacterSet, count: 1],
 6 |     [0, 0]    => [CharacterSet::Range, count: 2],
 7 |     [0, 0, 0] => [:literal, text: 'a'],
 8 |     [0, 0, 1] => [:literal, text: 'z']
 9 | 
10 |   include_examples 'parse', '[\x00-\x22]',
11 |     [0]       => [CharacterSet, count: 1],
12 |     [0, 0]    => [CharacterSet::Range, count: 2],
13 |     [0, 0, 0] => [:hex, text: '\x00'],
14 |     [0, 0, 1] => [:hex, text: '\x22']
15 | 
16 |   include_examples 'parse', '[\u{40 42}-\u1234]',
17 |     [0]       => [CharacterSet, count: 1],
18 |     [0, 0]    => [CharacterSet::Range, count: 2],
19 |     [0, 0, 0] => [:codepoint_list, text: '\u{40 42}'],
20 |     [0, 0, 1] => [:codepoint, text: '\u1234']
21 | 
22 |   include_examples 'parse', '[--z]',
23 |     [0]       => [CharacterSet, count: 1],
24 |     [0, 0]    => [CharacterSet::Range, count: 2],
25 |     [0, 0, 0] => [:literal, text: '-'],
26 |     [0, 0, 1] => [:literal, text: 'z']
27 | 
28 |   include_examples 'parse', '[!--]',
29 |     [0]       => [CharacterSet, count: 1],
30 |     [0, 0]    => [CharacterSet::Range, count: 2],
31 |     [0, 0, 0] => [:literal, text: '!'],
32 |     [0, 0, 1] => [:literal, text: '-']
33 | 
34 |   include_examples 'parse', '[!-^]',
35 |     [0]       => [CharacterSet, count: 1],
36 |     [0, 0]    => [CharacterSet::Range, count: 2],
37 |     [0, 0, 0] => [:literal, text: '!'],
38 |     [0, 0, 1] => [:literal, text: '^']
39 | 
40 |   # edge cases that are NOT treated as range
41 | 
42 |   include_examples 'parse', '[^-z]',
43 |     [0]    => [CharacterSet, count: 2],
44 |     [0, 0] => [:literal, text: '-'],
45 |     [0, 1] => [:literal, text: 'z']
46 | 
47 |   include_examples 'parse', '[[\-ab]&&-bc]',
48 |     [0]          => [CharacterSet, count: 1],
49 |     [0, 0]       => [CharacterSet::Intersection, count: 2],
50 |     [0, 0, 0]    => [CharacterSet::IntersectedSequence, count: 1],
51 |     [0, 0, 0, 0] => [CharacterSet, count: 3],
52 |     [0, 0, 1]    => [CharacterSet::IntersectedSequence, count: 3],
53 |     [0, 0, 1, 0] => [:literal, text: '-']
54 | 
55 |   include_examples 'parse', '[bc-&&[\-ab]]',
56 |     [0]          => [CharacterSet, count: 1],
57 |     [0, 0]       => [CharacterSet::Intersection, count: 2],
58 |     [0, 0, 0]    => [CharacterSet::IntersectedSequence, count: 3],
59 |     [0, 0, 0, 2] => [:literal, text: '-'],
60 |     [0, 0, 1]    => [CharacterSet::IntersectedSequence, count: 1],
61 |     [0, 0, 1, 0] => [CharacterSet, count: 3]
62 | 
63 |   # Some edge-case patterns are evaluated with #match to make sure that
64 |   # their matching behavior still reflects the way they are parsed.
65 |   # #capturing_stderr is used to skip any warnings generated by this.
66 |   specify('ranges behavior remains unchanged') do
67 |     capturing_stderr do
68 |       expect(Regexp.new('[\x00-\x22]')).to match "\x11"
69 |       expect(Regexp.new('[\u{40 42}-\u1234]')).to match "\u0600"
70 |       expect(Regexp.new('[--z]')).to match 'a'
71 |       expect(Regexp.new('[!--]')).to match '$'
72 |       expect(Regexp.new('[!-^]')).to match '$'
73 | 
74 |       # edge cases that are NOT treated as ranges
75 |       expect(Regexp.new('[^-z]')).to     match 'a'
76 |       expect(Regexp.new('[^-z]')).not_to match 'z'
77 |       expect(Regexp.new('[[\-ab]&&-bc]')).to     match '-'
78 |       expect(Regexp.new('[[\-ab]&&-bc]')).to     match 'b'
79 |       expect(Regexp.new('[[\-ab]&&-bc]')).not_to match 'a'
80 |       expect(Regexp.new('[[\-ab]&&-bc]')).not_to match 'c'
81 |       expect(Regexp.new('[bc-&&[\-ab]]')).to     match '-'
82 |       expect(Regexp.new('[bc-&&[\-ab]]')).to     match 'b'
83 |       expect(Regexp.new('[bc-&&[\-ab]]')).not_to match 'a'
84 |       expect(Regexp.new('[bc-&&[\-ab]]')).not_to match 'c'
85 |     end
86 |   end
87 | end
88 | 


--------------------------------------------------------------------------------
/spec/parser/sets_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('CharacterSet parsing') do
 4 |   include_examples 'parse', /[ab]+/,
 5 |     [0]    => [:set, :character, CharacterSet, text: '[', count: 2, quantified?: true],
 6 |     [0, 0] => [:literal, :literal, Literal, text: 'a', set_level: 1],
 7 |     [0, 1] => [:literal, :literal, Literal, text: 'b', set_level: 1]
 8 | 
 9 |   include_examples 'parse', /[a\dc]/,
10 |     [0]    => [:set, :character, CharacterSet, text: '[', count: 3],
11 |     [0, 1] => [:type, :digit, CharacterType::Digit]
12 | 
13 |   include_examples 'parse', /[a\bc]/,
14 |     [0]    => [:set, :character, CharacterSet, text: '[', count: 3],
15 |     [0, 1] => [:escape, :backspace, EscapeSequence::Backspace, text: '\b']
16 | 
17 |   include_examples 'parse', '[a\xFz]',
18 |     [0]    => [:set, :character, CharacterSet, text: '[', count: 3],
19 |     [0, 1] => [:escape, :hex, EscapeSequence::Hex, text: '\xF']
20 | 
21 |   include_examples 'parse', '[a\x20c]',
22 |     [0]    => [:set, :character, CharacterSet, text: '[', count: 3],
23 |     [0, 1] => [:escape, :hex, EscapeSequence::Hex, text: '\x20']
24 | 
25 |   include_examples 'parse', '[a\77c]',
26 |     [0]    => [:set, :character, CharacterSet, text: '[', count: 3],
27 |     [0, 1] => [:escape, :octal, EscapeSequence::Octal, text: '\77']
28 | 
29 |   include_examples 'parse', '[a\u0640c]',
30 |     [0]    => [:set, :character, CharacterSet, text: '[', count: 3],
31 |     [0, 1] => [:escape, :codepoint, EscapeSequence::Codepoint, text: '\u0640']
32 | 
33 |   include_examples 'parse', '[a\u{41 1F60D}c]',
34 |     [0]    => [:set, :character, CharacterSet, text: '[', count: 3],
35 |     [0, 1] => [:escape, :codepoint_list, EscapeSequence::CodepointList, text: '\u{41 1F60D}']
36 | 
37 |   include_examples 'parse', '[[:digit:][:^lower:]]+',
38 |     [0]    => [:set, :character, CharacterSet, text: '[', count: 2],
39 |     [0, 0] => [:posixclass, :digit, PosixClass, text: '[:digit:]'],
40 |     [0, 1] => [:nonposixclass, :lower, PosixClass, text: '[:^lower:]']
41 | 
42 |   include_examples 'parse', '[a[b[c]d]e]',
43 |     [0]          => [:set,     :character, CharacterSet, text: '[', count: 3, set_level: 0],
44 |     [0, 0]       => [:literal, :literal,   Literal,      text: 'a',           set_level: 1],
45 |     [0, 1]       => [:set,     :character, CharacterSet, text: '[', count: 3, set_level: 1],
46 |     [0, 2]       => [:literal, :literal,   Literal,      text: 'e',           set_level: 1],
47 |     [0, 1, 1]    => [:set,     :character, CharacterSet, text: '[', count: 1, set_level: 2],
48 |     [0, 1, 1, 0] => [:literal, :literal,   Literal,      text: 'c',           set_level: 3]
49 | 
50 |   include_examples 'parse', '[a[^b[c]]]',
51 |     [0]          => [:set,     :character, CharacterSet, text: '[', count: 2, set_level: 0],
52 |     [0, 0]       => [:literal, :literal,   Literal,      text: 'a',           set_level: 1],
53 |     [0, 1]       => [:set,     :character, CharacterSet, text: '[', count: 2, set_level: 1],
54 |     [0, 1, 0]    => [:literal, :literal,   Literal,      text: 'b',           set_level: 2],
55 |     [0, 1, 1]    => [:set,     :character, CharacterSet, text: '[', count: 1, set_level: 2],
56 |     [0, 1, 1, 0] => [:literal, :literal,   Literal,      text: 'c',           set_level: 3]
57 | 
58 |   include_examples 'parse', '[aaa]',
59 |     [0]     => [:set,     :character, CharacterSet, text: '[', count: 3],
60 |     [0, 0]  => [:literal, :literal,   Literal,      text: 'a'],
61 |     [0, 1]  => [:literal, :literal,   Literal,      text: 'a'],
62 |     [0, 2]  => [:literal, :literal,   Literal,      text: 'a']
63 | 
64 |   include_examples 'parse', '[   ]',
65 |     [0]     => [:set,     :character, CharacterSet, text: '[', count: 3],
66 |     [0, 0]  => [:literal, :literal,   Literal,      text: ' '],
67 |     [0, 1]  => [:literal, :literal,   Literal,      text: ' '],
68 |     [0, 2]  => [:literal, :literal,   Literal,      text: ' ']
69 | 
70 |   include_examples 'parse', '(?x)[   ]', # shouldn't merge whitespace even in x-mode
71 |     [1]     => [:set,     :character, CharacterSet, text: '[', count: 3],
72 |     [1, 0]  => [:literal, :literal,   Literal,      text: ' '],
73 |     [1, 1]  => [:literal, :literal,   Literal,      text: ' '],
74 |     [1, 2]  => [:literal, :literal,   Literal,      text: ' ']
75 | 
76 |   include_examples 'parse', '[[.span-ll.]]', # collating sequences are disabled in Onigmo
77 |     [0, 0]    => [:set,     :character, CharacterSet, text: '[', count: 7],
78 |     [0, 0, 0] => [:literal, :literal,   Literal,      text: '.']
79 | 
80 |   include_examples 'parse', '[[=e=]]', # character equivalents are disabled in Onigmo
81 |     [0, 0]    => [:set,     :character, CharacterSet, text: '[', count: 3],
82 |     [0, 0, 0] => [:literal, :literal,   Literal,      text: '=']
83 | end
84 | 


--------------------------------------------------------------------------------
/spec/parser/types_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('CharacterType parsing') do
 4 |   include_examples 'parse', /a\dc/, 1 => [:type, :digit,     CharacterType::Digit]
 5 |   include_examples 'parse', /a\Dc/, 1 => [:type, :nondigit,  CharacterType::NonDigit]
 6 | 
 7 |   include_examples 'parse', /a\sc/, 1 => [:type, :space,     CharacterType::Space]
 8 |   include_examples 'parse', /a\Sc/, 1 => [:type, :nonspace,  CharacterType::NonSpace]
 9 | 
10 |   include_examples 'parse', /a\hc/, 1 => [:type, :hex,       CharacterType::Hex]
11 |   include_examples 'parse', /a\Hc/, 1 => [:type, :nonhex,    CharacterType::NonHex]
12 | 
13 |   include_examples 'parse', /a\wc/, 1 => [:type, :word,      CharacterType::Word]
14 |   include_examples 'parse', /a\Wc/, 1 => [:type, :nonword,   CharacterType::NonWord]
15 | 
16 |   include_examples 'parse', 'a\Rc', 1 => [:type, :linebreak, CharacterType::Linebreak]
17 |   include_examples 'parse', 'a\Xc', 1 => [:type, :xgrapheme, CharacterType::ExtendedGrapheme]
18 | end
19 | 


--------------------------------------------------------------------------------
/spec/scanner/all_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Scanner) do
 4 |   specify('scanner returns an array') do
 5 |     expect(RS.scan('abc')).to be_instance_of(Array)
 6 |   end
 7 | 
 8 |   specify('scanner returns tokens as arrays') do
 9 |     tokens = RS.scan('^abc+[^one]{2,3}\b\d\C-C$')
10 |     expect(tokens).to all(be_a Array)
11 |     expect(tokens.map(&:length)).to all(eq 5)
12 |   end
13 | 
14 |   specify('scanner token count') do
15 |     re = /^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i
16 |     expect(RS.scan(re).length).to eq 28
17 |   end
18 | end
19 | 


--------------------------------------------------------------------------------
/spec/scanner/anchors_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Anchor scanning') do
 4 |   include_examples 'scan', '^abc',    0 => [:anchor,  :bol,              '^',    0, 1]
 5 |   include_examples 'scan', 'abc$',    1 => [:anchor,  :eol,              '$',    3, 4]
 6 | 
 7 |   include_examples 'scan', '\Aabc',   0 => [:anchor,  :bos,              '\A',   0, 2]
 8 |   include_examples 'scan', 'abc\z',   1 => [:anchor,  :eos,              '\z',   3, 5]
 9 |   include_examples 'scan', 'abc\Z',   1 => [:anchor,  :eos_ob_eol,       '\Z',   3, 5]
10 | 
11 |   include_examples 'scan', 'a\bc',    1 => [:anchor,  :word_boundary,    '\b',   1, 3]
12 |   include_examples 'scan', 'a\Bc',    1 => [:anchor,  :nonword_boundary, '\B',   1, 3]
13 | 
14 |   include_examples 'scan', 'a\Gc',    1 => [:anchor,  :match_start,      '\G',   1, 3]
15 | 
16 |   include_examples 'scan', "\\\\Ac",  0 => [:escape, :backslash,         '\\\\', 0, 2]
17 |   include_examples 'scan', "a\\\\z",  1 => [:escape, :backslash,         '\\\\', 1, 3]
18 |   include_examples 'scan', "a\\\\Z",  1 => [:escape, :backslash,         '\\\\', 1, 3]
19 |   include_examples 'scan', "a\\\\bc", 1 => [:escape, :backslash,         '\\\\', 1, 3]
20 |   include_examples 'scan', "a\\\\Bc", 1 => [:escape, :backslash,         '\\\\', 1, 3]
21 | end
22 | 


--------------------------------------------------------------------------------
/spec/scanner/delimiters_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Literal delimiter scanning') do
 4 |   include_examples 'scan', '}',
 5 |     0 => [:literal,     :literal,       '}',        0,  1]
 6 | 
 7 |   include_examples 'scan', '}}',
 8 |     0 => [:literal,     :literal,       '}}',       0,  2]
 9 | 
10 |   include_examples 'scan', '{',
11 |     0 => [:literal,     :literal,       '{',        0,  1]
12 | 
13 |   include_examples 'scan', '{{',
14 |     0 => [:literal,     :literal,       '{{',       0,  2]
15 | 
16 |   include_examples 'scan', '{}',
17 |     0 => [:literal,     :literal,       '{}',       0,  2]
18 | 
19 |   include_examples 'scan', '}{',
20 |     0 => [:literal,     :literal,       '}{',       0,  2]
21 | 
22 |   include_examples 'scan', '}{+',
23 |     0 => [:literal,     :literal,       '}{',       0,  2]
24 | 
25 |   include_examples 'scan', '{{var}}',
26 |     0 => [:literal,     :literal,       '{{var}}',  0,  7]
27 | 
28 |   include_examples 'scan', 'a{1,2',
29 |     0 => [:literal,     :literal,       'a{1,2',    0,  5]
30 | 
31 |   include_examples 'scan', '({.+})',
32 |     0 => [:group,       :capture,       '(',        0,  1],
33 |     1 => [:literal,     :literal,       '{',        1,  2],
34 |     2 => [:meta,        :dot,           '.',        2,  3],
35 |     3 => [:quantifier,  :one_or_more,   '+',        3,  4],
36 |     4 => [:literal,     :literal,       '}',        4,  5],
37 |     5 => [:group,       :close,         ')',        5,  6]
38 | 
39 |   include_examples 'scan', ']',
40 |     0 => [:literal,     :literal,       ']',        0,  1]
41 | 
42 |   include_examples 'scan', ']]',
43 |     0 => [:literal,     :literal,       ']]',       0,  2]
44 | 
45 |   include_examples 'scan', ']\[',
46 |     0 => [:literal,     :literal,       ']',        0,  1],
47 |     1 => [:escape,      :set_open,      '\[',       1,  3]
48 | 
49 |   include_examples 'scan', '()',
50 |     0 => [:group,       :capture,       '(',        0,  1],
51 |     1 => [:group,       :close,         ')',        1,  2]
52 | end
53 | 


--------------------------------------------------------------------------------
/spec/scanner/keep_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Keep scanning') do
 4 |   include_examples 'scan', /ab\Kcd/,
 5 |     1 => [:keep, :mark, '\K', 2,  4]
 6 | 
 7 |   include_examples 'scan', /(a\Kb)|(c\\\Kd)ef/,
 8 |     2 => [:keep, :mark, '\K', 2,  4],
 9 |     9 => [:keep, :mark, '\K', 11, 13]
10 | end
11 | 


--------------------------------------------------------------------------------
/spec/scanner/literals_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('UTF8 scanning') do
 4 |   # ascii, single byte characters
 5 |   include_examples 'scan', 'a',
 6 |     0 => [:literal,     :literal,       'a',        0, 1]
 7 | 
 8 |   include_examples 'scan', 'ab+',
 9 |     0 => [:literal,     :literal,       'ab',       0, 2],
10 |     1 => [:quantifier,  :one_or_more,   '+',        2, 3]
11 | 
12 |   # 2 byte wide characters
13 |   include_examples 'scan', 'äöü',
14 |     0 => [:literal,     :literal,        'äöü',     0, 3]
15 | 
16 |   # 3 byte wide characters, Japanese
17 |   include_examples 'scan', 'ab?れます+cd',
18 |     0 => [:literal,     :literal,       'ab',       0, 2],
19 |     1 => [:quantifier,  :zero_or_one,   '?',        2, 3],
20 |     2 => [:literal,     :literal,       'れます',    3, 6],
21 |     3 => [:quantifier,  :one_or_more,   '+',        6, 7],
22 |     4 => [:literal,     :literal,       'cd',       7, 9]
23 | 
24 |   # 4 byte wide characters, Osmanya
25 |   include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃',
26 |     0 => [:literal,     :literal,       '𐒀𐒁',       0, 2],
27 |     1 => [:quantifier,  :zero_or_one,   '?',        2, 3],
28 |     2 => [:literal,     :literal,       '𐒂ab',      3, 6],
29 |     3 => [:quantifier,  :one_or_more,   '+',        6, 7],
30 |     4 => [:literal,     :literal,       '𐒃',        7, 8]
31 | 
32 |   include_examples 'scan', 'mu𝄞?si*𝄫c+',
33 |     0 => [:literal,     :literal,       'mu𝄞',       0, 3],
34 |     1 => [:quantifier,  :zero_or_one,   '?',        3, 4],
35 |     2 => [:literal,     :literal,       'si',       4, 6],
36 |     3 => [:quantifier,  :zero_or_more,  '*',        6, 7],
37 |     4 => [:literal,     :literal,       '𝄫c',       7, 9],
38 |     5 => [:quantifier,  :one_or_more,   '+',        9, 10]
39 | end
40 | 


--------------------------------------------------------------------------------
/spec/scanner/meta_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Meta scanning') do
 4 |   include_examples 'scan', /abc??|def*+|ghi+/,
 5 |     0  => [:literal,     :literal,                 'abc',  0,  3],
 6 |     1  => [:quantifier,  :zero_or_one_reluctant,   '??',   3,  5],
 7 |     2  => [:meta,        :alternation,             '|',    5,  6],
 8 |     3  => [:literal,     :literal,                 'def',  6,  9],
 9 |     4  => [:quantifier,  :zero_or_more_possessive, '*+',   9,  11],
10 |     5  => [:meta,        :alternation,             '|',    11, 12]
11 | 
12 |   include_examples 'scan', /(a\|b)|(c|d)\|(e[|]f)/,
13 |     2  => [:escape,      :alternation,             '\|',   2,  4],
14 |     5  => [:meta,        :alternation,             '|',    6,  7],
15 |     8  => [:meta,        :alternation,             '|',    9,  10],
16 |     11 => [:escape,      :alternation,             '\|',   12, 14],
17 |     15 => [:literal,     :literal,                 '|',    17, 18]
18 | end
19 | 


--------------------------------------------------------------------------------
/spec/scanner/options_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('passing options to scan') do
 4 |   def expect_type_tokens(tokens, type_tokens)
 5 |     expect(tokens.map { |type, token, *| [type, token] }).to eq(type_tokens)
 6 |   end
 7 | 
 8 |   it 'raises if if scanning from a Regexp and options are passed' do
 9 |     expect { RS.scan(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
10 |       ArgumentError,
11 |       'options cannot be supplied unless scanning a String'
12 |     )
13 |   end
14 | 
15 |   it 'sets free_spacing based on options if scanning from a String' do
16 |     expect_type_tokens(
17 |       RS.scan('a+#c', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED),
18 |       [
19 |         %i[literal literal],
20 |         %i[quantifier one_or_more],
21 |         %i[free_space comment]
22 |       ]
23 |     )
24 |   end
25 | 
26 |   it 'does not set free_spacing if scanning from a String and passing no options' do
27 |     expect_type_tokens(
28 |       RS.scan('a+#c'),
29 |       [
30 |         %i[literal literal],
31 |         %i[quantifier one_or_more],
32 |         %i[literal literal]
33 |       ]
34 |     )
35 |   end
36 | end
37 | 


--------------------------------------------------------------------------------
/spec/scanner/properties_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Property scanning') do
 4 |   RSpec.shared_examples 'scan property' do |text, token|
 5 |     it("scans \\p{#{text}} as property #{token}") do
 6 |       result = RS.scan("\\p{#{text}}")[0]
 7 |       expect(result[0..1]).to eq [:property, token]
 8 |     end
 9 | 
10 |     it("scans \\P{#{text}} as nonproperty #{token}") do
11 |       result = RS.scan("\\P{#{text}}")[0]
12 |       expect(result[0..1]).to eq [:nonproperty, token]
13 |     end
14 | 
15 |     it("scans \\p{^#{text}} as nonproperty #{token}") do
16 |       result = RS.scan("\\p{^#{text}}")[0]
17 |       expect(result[0..1]).to eq [:nonproperty, token]
18 |     end
19 | 
20 |     it("scans double-negated \\P{^#{text}} as property #{token}") do
21 |       result = RS.scan("\\P{^#{text}}")[0]
22 |       expect(result[0..1]).to eq [:property, token]
23 |     end
24 |   end
25 | 
26 |   include_examples 'scan property', 'Alnum',                :alnum
27 | 
28 |   include_examples 'scan property', 'XPosixPunct',          :xposixpunct
29 | 
30 |   include_examples 'scan property', 'Newline',              :newline
31 | 
32 |   include_examples 'scan property', 'Any',                  :any
33 | 
34 |   include_examples 'scan property', 'Assigned',             :assigned
35 | 
36 |   include_examples 'scan property', 'Age=1.1',              :'age=1.1'
37 |   include_examples 'scan property', 'Age=10.0',             :'age=10.0'
38 | 
39 |   include_examples 'scan property', 'ahex',                 :ascii_hex_digit
40 |   include_examples 'scan property', 'ASCII_Hex_Digit',      :ascii_hex_digit # test underscore
41 | 
42 |   include_examples 'scan property', 'sd',                   :soft_dotted
43 |   include_examples 'scan property', 'Soft-Dotted',          :soft_dotted # test dash
44 | 
45 |   include_examples 'scan property', 'Egyp',                 :egyptian_hieroglyphs
46 |   include_examples 'scan property', 'Egyptian Hieroglyphs', :egyptian_hieroglyphs # test whitespace
47 | 
48 |   include_examples 'scan property', 'Linb',                 :linear_b
49 |   include_examples 'scan property', 'Linear-B',             :linear_b # test dash
50 | 
51 |   include_examples 'scan property', 'InArabic',             :in_arabic # test block
52 |   include_examples 'scan property', 'in Arabic',            :in_arabic # test block w. whitespace
53 |   include_examples 'scan property', 'In_Arabic',            :in_arabic # test block w. underscore
54 | 
55 |   include_examples 'scan property', 'Yiii',                 :yi
56 |   include_examples 'scan property', 'Yi',                   :yi
57 | 
58 |   include_examples 'scan property', 'Zinh',                 :inherited
59 |   include_examples 'scan property', 'Inherited',            :inherited
60 |   include_examples 'scan property', 'Qaai',                 :inherited
61 | 
62 |   include_examples 'scan property', 'Zzzz',                 :unknown
63 |   include_examples 'scan property', 'Unknown',              :unknown
64 | end
65 | 


--------------------------------------------------------------------------------
/spec/scanner/quantifiers_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Quantifier scanning') do
 4 |   include_examples 'scan', 'a?',       1 => [:quantifier,  :zero_or_one,             '?',     1, 2]
 5 |   include_examples 'scan', 'a??',      1 => [:quantifier,  :zero_or_one_reluctant,   '??',    1, 3]
 6 |   include_examples 'scan', 'a?+',      1 => [:quantifier,  :zero_or_one_possessive,  '?+',    1, 3]
 7 | 
 8 |   include_examples 'scan', 'a*',       1 => [:quantifier,  :zero_or_more,            '*',     1, 2]
 9 |   include_examples 'scan', 'a*?',      1 => [:quantifier,  :zero_or_more_reluctant,  '*?',    1, 3]
10 |   include_examples 'scan', 'a*+',      1 => [:quantifier,  :zero_or_more_possessive, '*+',    1, 3]
11 | 
12 |   include_examples 'scan', 'a+',       1 => [:quantifier,  :one_or_more,             '+',     1, 2]
13 |   include_examples 'scan', 'a+?',      1 => [:quantifier,  :one_or_more_reluctant,   '+?',    1, 3]
14 |   include_examples 'scan', 'a++',      1 => [:quantifier,  :one_or_more_possessive,  '++',    1, 3]
15 | 
16 |   include_examples 'scan', 'a{2}',     1 => [:quantifier,  :interval,                '{2}',   1, 4]
17 |   include_examples 'scan', 'a{2,}',    1 => [:quantifier,  :interval,                '{2,}',  1, 5]
18 |   include_examples 'scan', 'a{,2}',    1 => [:quantifier,  :interval,                '{,2}',  1, 5]
19 |   include_examples 'scan', 'a{2,4}',   1 => [:quantifier,  :interval,                '{2,4}', 1, 6]
20 | 
21 |   # special case: chained quantifiers
22 |   include_examples 'scan', 'a+{2}{3}', 1 => [:quantifier,  :one_or_more,             '+',     1, 2]
23 |   include_examples 'scan', 'a+{2}{3}', 2 => [:quantifier,  :interval,                '{2}',   2, 5]
24 |   include_examples 'scan', 'a+{2}{3}', 3 => [:quantifier,  :interval,                '{3}',   5, 8]
25 | end
26 | 


--------------------------------------------------------------------------------
/spec/scanner/refcalls_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('RefCall scanning') do
 4 |   # Traditional numerical group back-reference
 5 |   include_examples 'scan', '(abc)\1' ,          3 => [:backref, :number,                  '\1',         5, 7]
 6 | 
 7 |   # Group back-references, named, numbered, and relative
 8 |   #
 9 |   # NOTE: only \g supports forward-looking references using '+', e.g. \g<+1>
10 |   # refers to the next group, but \k<+1> refers to a group named '+1'.
11 |   # Inversely, only \k supports addition or subtraction of a recursion level.
12 |   # E.g. \k<x+0> refers to a group named 'x' at the current recursion level,
13 |   # but \g<x+0> refers to a a group named 'x+0'.
14 |   #
15 |   include_examples 'scan', '(?<X>abc)\k<X>',    3 => [:backref, :name_ref_ab,             '\k<X>',      9, 14]
16 |   include_examples 'scan', "(?<X>abc)\\k'X'",   3 => [:backref, :name_ref_sq,             "\\k'X'",     9, 14]
17 | 
18 |   include_examples 'scan', '(?<+1>abc)\k<+1>',  3 => [:backref, :name_ref_ab,             '\k<+1>',    10, 16]
19 |   include_examples 'scan', "(?<+1>abc)\\k'+1'", 3 => [:backref, :name_ref_sq,             "\\k'+1'",   10, 16]
20 | 
21 |   include_examples 'scan', '(abc)\k<1>',        3 => [:backref, :number_ref_ab,           '\k<1>',      5, 10]
22 |   include_examples 'scan', "(abc)\\k'1'",       3 => [:backref, :number_ref_sq,           "\\k'1'",     5, 10]
23 |   include_examples 'scan', "(abc)\\k'001'",     3 => [:backref, :number_ref_sq,           "\\k'001'",   5, 12]
24 | 
25 |   include_examples 'scan', '(abc)\k<-1>',       3 => [:backref, :number_rel_ref_ab,       '\k<-1>',     5, 11]
26 |   include_examples 'scan', "(abc)\\k'-1'",      3 => [:backref, :number_rel_ref_sq,       "\\k'-1'",    5, 11]
27 |   include_examples 'scan', '(abc)\k<-001>',     3 => [:backref, :number_rel_ref_ab,       '\k<-001>',   5, 13]
28 | 
29 |   # Sub-expression invocation, named, numbered, and relative
30 |   include_examples 'scan', '(?<X>abc)\g<X>',    3 => [:backref, :name_call_ab,            '\g<X>',      9, 14]
31 |   include_examples 'scan', "(?<X>abc)\\g'X'",   3 => [:backref, :name_call_sq,            "\\g'X'",     9, 14]
32 | 
33 |   include_examples 'scan', '(?<X>abc)\g<X-1>',  3 => [:backref, :name_call_ab,            '\g<X-1>',    9, 16]
34 |   include_examples 'scan', "(?<X>abc)\\g'X-1'", 3 => [:backref, :name_call_sq,            "\\g'X-1'",   9, 16]
35 | 
36 |   include_examples 'scan', '(abc)\g<1>',        3 => [:backref, :number_call_ab,          '\g<1>',      5, 10]
37 |   include_examples 'scan', "(abc)\\g'1'",       3 => [:backref, :number_call_sq,          "\\g'1'",     5, 10]
38 |   include_examples 'scan', '(abc)\g<001>',      3 => [:backref, :number_call_ab,          '\g<001>',    5, 12]
39 | 
40 |   include_examples 'scan', 'a(b|\g<0>)',        4 => [:backref, :number_call_ab,          '\g<0>',      4, 9]
41 |   include_examples 'scan', "a(b|\\g'0')",       4 => [:backref, :number_call_sq,          "\\g'0'",     4, 9]
42 | 
43 |   include_examples 'scan', '(abc)\g<-1>',       3 => [:backref, :number_rel_call_ab,      '\g<-1>',     5, 11]
44 |   include_examples 'scan', "(abc)\\g'-1'",      3 => [:backref, :number_rel_call_sq,      "\\g'-1'",    5, 11]
45 |   include_examples 'scan', '(abc)\g<-001>',     3 => [:backref, :number_rel_call_ab,      '\g<-001>',   5, 13]
46 | 
47 |   include_examples 'scan', '\g<+1>(abc)',       0 => [:backref, :number_rel_call_ab,      '\g<+1>',     0, 6]
48 |   include_examples 'scan', "\\g'+1'(abc)",      0 => [:backref, :number_rel_call_sq,      "\\g'+1'",    0, 6]
49 | 
50 |   # Group back-references, with recursion level
51 |   include_examples 'scan', '(?<X>abc)\k<X-0>',  3 => [:backref, :name_recursion_ref_ab,   '\k<X-0>',    9, 16]
52 |   include_examples 'scan', "(?<X>abc)\\k'X-0'", 3 => [:backref, :name_recursion_ref_sq,   "\\k'X-0'",   9, 16]
53 | 
54 |   include_examples 'scan', '(abc)\k<1-0>',      3 => [:backref, :number_recursion_ref_ab, '\k<1-0>',    5, 12]
55 |   include_examples 'scan', "(abc)\\k'1-0'",     3 => [:backref, :number_recursion_ref_sq, "\\k'1-0'",   5, 12]
56 | 
57 |   include_examples 'scan', '(abc)\k<+1-0>',     3 => [:backref, :name_recursion_ref_ab,   '\k<+1-0>',   5, 13]
58 |   include_examples 'scan', "(abc)\\k'+1-0'",    3 => [:backref, :name_recursion_ref_sq,   "\\k'+1-0'",  5, 13]
59 | end
60 | 


--------------------------------------------------------------------------------
/spec/scanner/types_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe('Type scanning') do
 4 |   include_examples 'scan', 'a\dc', 1 => [:type, :digit,     '\d', 1, 3]
 5 |   include_examples 'scan', 'a\Dc', 1 => [:type, :nondigit,  '\D', 1, 3]
 6 |   include_examples 'scan', 'a\hc', 1 => [:type, :hex,       '\h', 1, 3]
 7 |   include_examples 'scan', 'a\Hc', 1 => [:type, :nonhex,    '\H', 1, 3]
 8 |   include_examples 'scan', 'a\sc', 1 => [:type, :space,     '\s', 1, 3]
 9 |   include_examples 'scan', 'a\Sc', 1 => [:type, :nonspace,  '\S', 1, 3]
10 |   include_examples 'scan', 'a\wc', 1 => [:type, :word,      '\w', 1, 3]
11 |   include_examples 'scan', 'a\Wc', 1 => [:type, :nonword,   '\W', 1, 3]
12 |   include_examples 'scan', 'a\Rc', 1 => [:type, :linebreak, '\R', 1, 3]
13 |   include_examples 'scan', 'a\Xc', 1 => [:type, :xgrapheme, '\X', 1, 3]
14 | end
15 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | $VERBOSE = true
 2 | 
 3 | require 'leto'
 4 | require 'regexp_property_values'
 5 | require_relative 'support/capturing_stderr'
 6 | require_relative 'support/shared_examples'
 7 | 
 8 | req_warn = capturing_stderr { @required_now = require('regexp_parser') }
 9 | req_warn.empty? || fail("requiring parser generated warnings:\n#{req_warn}")
10 | @required_now || fail("regexp_parser was required earlier than expected")
11 | 
12 | RS = Regexp::Scanner
13 | RL = Regexp::Lexer
14 | RP = Regexp::Parser
15 | RE = Regexp::Expression
16 | T = Regexp::Syntax::Token
17 | 
18 | include Regexp::Expression
19 | 
20 | def ruby_version_at_least(version)
21 |   Gem::Version.new(RUBY_VERSION.dup) >= Gem::Version.new(version)
22 | end
23 | 
24 | RSpec.configure do |config|
25 |   config.around(:example) do |example|
26 |     # treat unexpected warnings as failures
27 |     expect { example.run }.not_to output.to_stderr
28 |   end
29 | end
30 | 
31 | def s(klass, text = '', *children)
32 |   exp = klass.construct(text: text.to_s)
33 |   children.each { |child| exp.expressions << child }
34 |   exp
35 | end
36 | 
37 | def regexp_with_all_features
38 |   return /dummy/ unless ruby_version_at_least('2.4.1')
39 | 
40 |   Regexp.new(<<-'REGEXP', Regexp::EXTENDED)
41 |     \A
42 |     a++
43 |     (?:
44 |       \b {2}
45 |       (?>
46 |         c ??
47 |         😀😀😀
48 |         # 😄😄😄
49 |         (?# 😃😃😃 )
50 |         (
51 |           \d *+
52 |           (
53 |             ALT1
54 |             |
55 |             ALT2
56 |           )
57 |         ) {004}
58 |         |
59 |         [ä-ü&&ö[:ascii:]\p{thai}] {6}
60 |         |
61 |         \z
62 |       )
63 |       (?=lm{8}) ?+
64 |       \K
65 |       (?~
66 |         \1
67 |         \g<-1> {10}
68 |         \uFFFF
69 |         \012
70 |       )
71 |       (?(1)
72 |         BRANCH1
73 |         |
74 |         BRANCH2
75 |       )
76 |     )
77 |   REGEXP
78 | end
79 | 


--------------------------------------------------------------------------------
/spec/support/capturing_stderr.rb:
--------------------------------------------------------------------------------
 1 | require 'stringio'
 2 | 
 3 | def capturing_stderr(&block)
 4 |   old_stderr, $stderr = $stderr, StringIO.new
 5 |   block.call
 6 |   $stderr.string
 7 | ensure
 8 |   $stderr = old_stderr
 9 | end
10 | 


--------------------------------------------------------------------------------
/spec/support/shared_examples.rb:
--------------------------------------------------------------------------------
 1 | RSpec.shared_examples 'syntax' do |opts|
 2 |   opts[:implements].each do |type, tokens|
 3 |     tokens.each do |token|
 4 |       it("implements #{token} #{type}") do
 5 |         expect(described_class.implements?(type, token)).to be true
 6 |       end
 7 |     end
 8 |   end
 9 | 
10 |   opts[:excludes] && opts[:excludes].each do |type, tokens|
11 |     tokens.each do |token|
12 |       it("does not implement #{token} #{type}") do
13 |         expect(described_class.implements?(type, token)).to be false
14 |       end
15 |     end
16 |   end
17 | end
18 | 
19 | RSpec.shared_examples 'scan' do |pattern, checks|
20 |   context "given the pattern #{pattern}" do
21 |     before(:all) { @tokens = Regexp::Scanner.scan(pattern) }
22 | 
23 |     checks.each do |index, (type, token, text, ts, te)|
24 |       it "scans token #{index} as #{token} #{type} at #{ts}..#{te}" do
25 |         result = @tokens.at(index)
26 |         result || fail("no token at index #{index}, max is #{@tokens.size - 1}")
27 | 
28 |         expect(result[0]).to eq type
29 |         expect(result[1]).to eq token
30 |         expect(result[2]).to eq text
31 |         expect(result[3]).to eq ts
32 |         expect(result[4]).to eq te
33 |       end
34 |     end
35 |   end
36 | end
37 | 
38 | RSpec.shared_examples 'lex' do |pattern, checks|
39 |   context "given the pattern #{pattern}" do
40 |     before(:all) { @tokens = Regexp::Lexer.lex(pattern) }
41 | 
42 |     checks.each do |index, (type, token, text, ts, te, lvl, set_lvl, cond_lvl)|
43 |       it "lexes token #{index} as #{token} #{type} at #{lvl}, #{set_lvl}, #{cond_lvl}" do
44 |         struct = @tokens.at(index)
45 | 
46 |         expect(struct.type).to eq type
47 |         expect(struct.token).to eq token
48 |         expect(struct.text).to eq text
49 |         expect(struct.ts).to eq ts
50 |         expect(struct.te).to eq te
51 |         expect(struct.level).to eq lvl
52 |         expect(struct.set_level).to eq set_lvl
53 |         expect(struct.conditional_level).to eq cond_lvl
54 |       end
55 |     end
56 |   end
57 | end
58 | 
59 | RSpec.shared_examples 'parse' do |pattern, checks|
60 |   context "given the pattern #{pattern}" do
61 |     before(:all) { @root = Regexp::Parser.parse(pattern, '*') }
62 | 
63 |     checks.each do |path, expectations|
64 |       path = Array(path)
65 |       inspect_quantifier = path.last == :q && path.pop
66 | 
67 |       attributes = expectations.pop if expectations.last.is_a?(Hash)
68 |       klass      = expectations.pop if expectations.last.is_a?(Class)
69 |       token      = expectations.pop
70 |       type       = expectations.pop
71 | 
72 |       description = klass || token || type || 'Expression'
73 | 
74 |       it "parses expression at #{path} as #{description}" do
75 |         exp = @root.dig(*path)
76 |         exp = exp.quantifier if inspect_quantifier
77 | 
78 |         klass && expect(exp).to(be_instance_of(klass))
79 |         type  && expect(exp.type).to(eq(type))
80 |         token && expect(exp.token).to(eq(token))
81 | 
82 |         attributes && attributes.each do |method, value|
83 |           actual = exp.send(method)
84 |           expect(actual).to eq(value),
85 |             "expected #{description} at #{path} to "\
86 |             "have #{method} #{value.inspect}, got #{actual.inspect}"
87 |         end
88 |       end
89 |     end
90 |   end
91 | end
92 | 


--------------------------------------------------------------------------------
/spec/syntax/syntax_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Syntax) do
 4 |   describe('::for') do
 5 |     it { expect(Regexp::Syntax.for('ruby/1.8.6')).to eq  Regexp::Syntax::V1_8_6 }
 6 |     it { expect(Regexp::Syntax.for('ruby/1.8')).to eq    Regexp::Syntax::V1_8_6 }
 7 |     it { expect(Regexp::Syntax.for('ruby/1.9.1')).to eq  Regexp::Syntax::V1_9_1 }
 8 |     it { expect(Regexp::Syntax.for('ruby/1.9')).to eq    Regexp::Syntax::V1_9_3 }
 9 |     it { expect(Regexp::Syntax.for('ruby/2.0.0')).to eq  Regexp::Syntax::V2_0_0 }
10 |     it { expect(Regexp::Syntax.for('ruby/2.0')).to eq    Regexp::Syntax::V2_0_0 }
11 |     it { expect(Regexp::Syntax.for('ruby/2.1')).to eq    Regexp::Syntax::V2_0_0 }
12 |     it { expect(Regexp::Syntax.for('ruby/2.2.0')).to eq  Regexp::Syntax::V2_2_0 }
13 |     it { expect(Regexp::Syntax.for('ruby/2.2.10')).to eq Regexp::Syntax::V2_2_0 }
14 |     it { expect(Regexp::Syntax.for('ruby/2.2')).to eq    Regexp::Syntax::V2_2_0 }
15 |     it { expect(Regexp::Syntax.for('ruby/2.3.0')).to eq  Regexp::Syntax::V2_3_0 }
16 |     it { expect(Regexp::Syntax.for('ruby/2.3')).to eq    Regexp::Syntax::V2_3_0 }
17 |     it { expect(Regexp::Syntax.for('ruby/2.4.0')).to eq  Regexp::Syntax::V2_4_0 }
18 |     it { expect(Regexp::Syntax.for('ruby/2.4.1')).to eq  Regexp::Syntax::V2_4_1 }
19 |     it { expect(Regexp::Syntax.for('ruby/2.5.0')).to eq  Regexp::Syntax::V2_5_0 }
20 |     it { expect(Regexp::Syntax.for('ruby/2.5')).to eq    Regexp::Syntax::V2_5_0 }
21 |     it { expect(Regexp::Syntax.for('ruby/2.6.0')).to eq  Regexp::Syntax::V2_6_0 }
22 |     it { expect(Regexp::Syntax.for('ruby/2.6.2')).to eq  Regexp::Syntax::V2_6_2 }
23 |     it { expect(Regexp::Syntax.for('ruby/2.6.3')).to eq  Regexp::Syntax::V2_6_3 }
24 |     it { expect(Regexp::Syntax.for('ruby/2.6')).to eq    Regexp::Syntax::V2_6_3 }
25 |     it { expect(Regexp::Syntax.for('ruby/3.0.0')).to eq  Regexp::Syntax::V2_6_3 }
26 |     it { expect(Regexp::Syntax.for('ruby/3.0')).to eq    Regexp::Syntax::V2_6_3 }
27 |     it { expect(Regexp::Syntax.for('ruby/3.1.0')).to eq  Regexp::Syntax::V3_1_0 }
28 |     it { expect(Regexp::Syntax.for('ruby/3.1')).to eq    Regexp::Syntax::V3_1_0 }
29 |     it { expect(Regexp::Syntax.for('ruby/3.2.0')).to eq  Regexp::Syntax::V3_2_0 }
30 |     it { expect(Regexp::Syntax.for('ruby/3.2')).to eq    Regexp::Syntax::V3_2_0 }
31 | 
32 |     it { expect(Regexp::Syntax.for('any')).to eq         Regexp::Syntax::Any }
33 |     it { expect(Regexp::Syntax.for('*')).to eq           Regexp::Syntax::Any }
34 | 
35 |     it 'raises for unknown names' do
36 |       expect { Regexp::Syntax.for('ruby/1.0') }.to raise_error(Regexp::Syntax::UnknownSyntaxNameError)
37 |     end
38 | 
39 |     it 'raises for invalid names' do
40 |       expect { Regexp::Syntax.version_class('2.0.0') }.to raise_error(Regexp::Syntax::InvalidVersionNameError)
41 |       expect { Regexp::Syntax.version_class('ruby/20') }.to raise_error(Regexp::Syntax::InvalidVersionNameError)
42 |     end
43 |   end
44 | 
45 |   specify('::new is a deprecated alias of ::for') do
46 |     expect { expect(Regexp::Syntax.new('ruby/2.0.0')).to eq Regexp::Syntax::V2_0_0 }
47 |       .to output(/deprecated/).to_stderr
48 |   end
49 | 
50 |   specify('not implemented') do
51 |     expect { RP.parse('\p{alpha}', 'ruby/1.8') }.to raise_error(Regexp::Syntax::NotImplementedError)
52 |   end
53 | 
54 |   specify('supported?') do
55 |     expect(Regexp::Syntax.supported?('ruby/1.1.1')).to be false
56 |     expect(Regexp::Syntax.supported?('ruby/2.4.3')).to be true
57 |     expect(Regexp::Syntax.supported?('ruby/2.5')).to be true
58 |   end
59 | 
60 |   specify('raises for unknown constant lookups') do
61 |     expect { Regexp::Syntax::V1 }.to raise_error(/V1/)
62 |   end
63 | 
64 |   specify('instantiation is deprecated but still works') do
65 |     expect { @instance = Regexp::Syntax::V3_1_0.new }
66 |       .to output(/deprecated/).to_stderr
67 |     expect { expect(@instance.implements?(:literal, :literal)).to be true }
68 |       .to output(/deprecated/).to_stderr
69 |   end
70 | end
71 | 


--------------------------------------------------------------------------------
/spec/syntax/syntax_token_map_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Syntax::Token::Map) do
 4 |   let(:map) { Regexp::Syntax::Token::Map }
 5 |   let(:current_syntax) { Regexp::Syntax::CURRENT }
 6 | 
 7 |   specify('is complete') do
 8 |     current_syntax.features.each do |type, tokens|
 9 |       tokens.each { |token| expect(map[type]).to include(token) }
10 |     end
11 |   end
12 | 
13 |   specify('contains no duplicate tokens') do
14 |     current_syntax.features.each do |_type, tokens|
15 |       expect(tokens).to eq tokens.uniq
16 |     end
17 |   end
18 | 
19 |   specify('contains no duplicate type/token combinations') do
20 |     combinations = map.flat_map do |type, tokens|
21 |       tokens.map { |token| "#{type} #{token}" }
22 |     end
23 | 
24 |     non_uniq = combinations.group_by { |str| str }.select { |_, v| v.count > 1 }
25 | 
26 |     expect(non_uniq.keys).to be_empty
27 |   end
28 | end
29 | 


--------------------------------------------------------------------------------
/spec/syntax/versions/1.8.6_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Syntax::V1_8_6) do
 4 |   include_examples 'syntax',
 5 |   implements: {
 6 |     assertion: T::Assertion::Lookahead,
 7 |     backref: T::Backreference::Plain,
 8 |     escape: T::Escape::Basic + T::Escape::ASCII + T::Escape::Meta + T::Escape::Control,
 9 |     group: T::Group::V1_8_6,
10 |     quantifier: T::Quantifier::Greedy + T::Quantifier::Reluctant + T::Quantifier::Interval + T::Quantifier::IntervalReluctant
11 |   },
12 |   excludes: {
13 |     assertion: T::Assertion::Lookbehind,
14 |     backref: T::Backreference::All - T::Backreference::Plain + T::SubexpressionCall::All,
15 |     quantifier: T::Quantifier::Possessive
16 |   }
17 | end
18 | 


--------------------------------------------------------------------------------
/spec/syntax/versions/1.9.1_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Syntax::V1_9_1) do
 4 |   include_examples 'syntax',
 5 |   implements: {
 6 |     escape: T::Escape::Hex + T::Escape::Octal + T::Escape::Unicode,
 7 |     type: T::CharacterType::Hex,
 8 |     quantifier: T::Quantifier::Greedy + T::Quantifier::Reluctant + T::Quantifier::Possessive
 9 |   }
10 | end
11 | 


--------------------------------------------------------------------------------
/spec/syntax/versions/1.9.3_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Syntax::V1_9_3) do
 4 |   include_examples 'syntax',
 5 |   implements: {
 6 |     property: T::UnicodeProperty::Script_V1_9_3 + T::UnicodeProperty::Age_V1_9_3,
 7 |     nonproperty: T::UnicodeProperty::Script_V1_9_3 + T::UnicodeProperty::Age_V1_9_3
 8 |   }
 9 | end
10 | 


--------------------------------------------------------------------------------
/spec/syntax/versions/2.0.0_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Syntax::V2_0_0) do
 4 |   include_examples 'syntax',
 5 |   implements: {
 6 |     property: T::UnicodeProperty::Age_V2_0_0,
 7 |     nonproperty: T::UnicodeProperty::Age_V2_0_0
 8 |   },
 9 |   excludes: {
10 |     property: %i[newline],
11 |     nonproperty: %i[newline]
12 |   }
13 | end
14 | 


--------------------------------------------------------------------------------
/spec/syntax/versions/2.2.0_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Syntax::V2_2_0) do
 4 |   include_examples 'syntax',
 5 |   implements: {
 6 |     property: T::UnicodeProperty::Script_V2_2_0 + T::UnicodeProperty::Age_V2_2_0,
 7 |     nonproperty: T::UnicodeProperty::Script_V2_2_0 + T::UnicodeProperty::Age_V2_2_0
 8 |   }
 9 | end
10 | 


--------------------------------------------------------------------------------
/spec/syntax/versions/3.2.0_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Syntax::V3_2_0) do
 4 |   include_examples 'syntax',
 5 |   implements: {
 6 |     property: T::UnicodeProperty::Script_V3_2_0 + T::UnicodeProperty::Age_V3_2_0,
 7 |     nonproperty: T::UnicodeProperty::Script_V3_2_0 + T::UnicodeProperty::Age_V3_2_0
 8 |   }
 9 | end
10 | 


--------------------------------------------------------------------------------
/spec/token/token_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | RSpec.describe(Regexp::Token) do
 4 |   specify('#offset') do
 5 |     regexp = /ab?cd/
 6 |     tokens = RL.lex(regexp)
 7 | 
 8 |     expect(tokens[1].text).to eq 'b'
 9 |     expect(tokens[1].offset).to eq [1, 2]
10 | 
11 |     expect(tokens[2].text).to eq '?'
12 |     expect(tokens[2].offset).to eq [2, 3]
13 | 
14 |     expect(tokens[3].text).to eq 'cd'
15 |     expect(tokens[3].offset).to eq [3, 5]
16 |   end
17 | 
18 |   specify('#length') do
19 |     regexp = /abc?def/
20 |     tokens = RL.lex(regexp)
21 | 
22 |     expect(tokens[0].text).to eq 'ab'
23 |     expect(tokens[0].length).to eq 2
24 | 
25 |     expect(tokens[1].text).to eq 'c'
26 |     expect(tokens[1].length).to eq 1
27 | 
28 |     expect(tokens[2].text).to eq '?'
29 |     expect(tokens[2].length).to eq 1
30 | 
31 |     expect(tokens[3].text).to eq 'def'
32 |     expect(tokens[3].length).to eq 3
33 |   end
34 | 
35 |   specify('#to_h') do
36 |     regexp = /abc?def/
37 |     tokens = RL.lex(regexp)
38 | 
39 |     expect(tokens[0].text).to eq 'ab'
40 |     expect(tokens[0].to_h).to eq type: :literal, token: :literal, text: 'ab', ts: 0, te: 2, level: 0, set_level: 0, conditional_level: 0
41 | 
42 |     expect(tokens[2].text).to eq '?'
43 |     expect(tokens[2].to_h).to eq type: :quantifier, token: :zero_or_one, text: '?', ts: 3, te: 4, level: 0, set_level: 0, conditional_level: 0
44 |   end
45 | 
46 |   specify('#next') do
47 |     regexp = /a+b?c*d{2,3}/
48 |     tokens = RL.lex(regexp)
49 | 
50 |     a = tokens.first
51 |     expect(a.text).to eq 'a'
52 | 
53 |     plus = a.next
54 |     expect(plus.text).to eq '+'
55 | 
56 |     b = plus.next
57 |     expect(b.text).to eq 'b'
58 | 
59 |     interval = tokens.last
60 |     expect(interval.text).to eq '{2,3}'
61 | 
62 |     expect(interval.next).to be_nil
63 |   end
64 | 
65 |   specify('#previous') do
66 |     regexp = /a+b?c*d{2,3}/
67 |     tokens = RL.lex(regexp)
68 | 
69 |     interval = tokens.last
70 |     expect(interval.text).to eq '{2,3}'
71 | 
72 |     d = interval.previous
73 |     expect(d.text).to eq 'd'
74 | 
75 |     star = d.previous
76 |     expect(star.text).to eq '*'
77 | 
78 |     c = star.previous
79 |     expect(c.text).to eq 'c'
80 | 
81 |     a = tokens.first
82 |     expect(a.text).to eq 'a'
83 |     expect(a.previous).to be_nil
84 |   end
85 | end
86 | 


--------------------------------------------------------------------------------
/tasks/benchmark.rake:
--------------------------------------------------------------------------------
 1 | BENCHMARKS_DIR = "#{__dir__}/benchmarks"
 2 | 
 3 | desc 'Run all IPS benchmarks'
 4 | task :benchmark do
 5 |   Dir["#{BENCHMARKS_DIR}/*.rb"].sort.each { |file| load(file) }
 6 | end
 7 | 
 8 | namespace :benchmark do
 9 |   desc 'Run all IPS benchmarks and store the comparison results in BENCHMARK.md'
10 |   task :write_to_file do
11 |     require 'stringio'
12 | 
13 |     string_io = StringIO.new
14 |     with_stdouts(STDOUT, string_io) { Rake.application[:benchmark].invoke }
15 | 
16 |     File.write "#{BENCHMARKS_DIR}/log",
17 |                "Results of rake:benchmark on #{RUBY_DESCRIPTION}\n\n" +
18 |                string_io.string.gsub(/Warming up.*?Comparison:/m, '')
19 |   end
20 | end
21 | 
22 | def with_stdouts(*ios)
23 |   old_stdout = $stdout
24 |   ios.define_singleton_method(:method_missing) { |*args| each { |io| io.send(*args) } }
25 |   ios.define_singleton_method(:respond_to?) { |*args| IO.respond_to?(*args) }
26 |   $stdout = ios
27 |   yield
28 | ensure
29 |   $stdout = old_stdout
30 | end
31 | 


--------------------------------------------------------------------------------
/tasks/benchmarks/log:
--------------------------------------------------------------------------------
 1 | Results of rake:benchmark on ruby 3.1.0p0 (2021-12-25 revision fb4df44d16) [arm64-darwin21]
 2 | 
 3 | Parsing a minimal Regexp
 4 | 
 5 |        Scanner::scan:    32069.4 i/s
 6 |           Lexer::lex:    30700.6 i/s - same-ish: difference falls within error
 7 |        Parser::parse:    26248.5 i/s - 1.22x  (± 0.00) slower
 8 | 
 9 | Parsing a complex Regexp (URI.regexp)
10 | 
11 |        Scanner::scan:      843.4 i/s
12 |           Lexer::lex:      546.3 i/s - 1.54x  (± 0.00) slower
13 |        Parser::parse:      332.5 i/s - 2.54x  (± 0.00) slower
14 | 
15 | 


--------------------------------------------------------------------------------
/tasks/benchmarks/minimal_regexp.rb:
--------------------------------------------------------------------------------
 1 | require 'benchmark/ips'
 2 | require_relative '../../lib/regexp_parser'
 3 | 
 4 | puts 'Parsing a minimal Regexp'
 5 | 
 6 | regexp = /./
 7 | 
 8 | Benchmark.ips do |x|
 9 |   x.report('Scanner::scan') { Regexp::Scanner.scan(regexp) }
10 |   x.report('Lexer::lex')    { Regexp::Lexer.lex(regexp)    }
11 |   x.report('Parser::parse') { Regexp::Parser.parse(regexp) }
12 |   x.compare!
13 | end
14 | 


--------------------------------------------------------------------------------
/tasks/benchmarks/uri_regexp.rb:
--------------------------------------------------------------------------------
 1 | require 'benchmark/ips'
 2 | require_relative '../../lib/regexp_parser'
 3 | 
 4 | puts 'Parsing a complex Regexp (URI.regexp)'
 5 | 
 6 | require 'uri'
 7 | regexp = URI::DEFAULT_PARSER.make_regexp
 8 | 
 9 | Benchmark.ips do |x|
10 |   x.report('Scanner::scan') { Regexp::Scanner.scan(regexp) }
11 |   x.report('Lexer::lex')    { Regexp::Lexer.lex(regexp)    }
12 |   x.report('Parser::parse') { Regexp::Parser.parse(regexp) }
13 |   x.compare!
14 | end
15 | 


--------------------------------------------------------------------------------
/tasks/props.rake:
--------------------------------------------------------------------------------
 1 | namespace :props do
 2 |   desc 'Write new property value hashes for the properties scanner'
 3 |   task :update do
 4 |     require 'regexp_property_values'
 5 |     RegexpPropertyValues.update
 6 |     dir = File.join(__dir__, '../lib/regexp_parser/scanner/properties')
 7 | 
 8 |     write_hash_to_file = ->(hash, path) do
 9 |       File.open(path, 'w') do |f|
10 |         f.puts '# THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT',
11 |                *hash.sort.map { |pair| pair.join(',') }
12 |       end
13 |       puts "Wrote #{hash.count} aliases to `#{path}`"
14 |     end
15 | 
16 |     long_names_to_tokens = RegexpPropertyValues.all.map do |val|
17 |       [val.identifier, val.full_name.downcase]
18 |     end
19 |     write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.csv")
20 | 
21 |     short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
22 |       [k.identifier, v.full_name.downcase]
23 |     end
24 |     write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.csv")
25 |   end
26 | end
27 | 


--------------------------------------------------------------------------------
/tasks/ragel.rake:
--------------------------------------------------------------------------------
 1 | RAGEL_SOURCE_DIR = File.join(__dir__, '../lib/regexp_parser/scanner')
 2 | RAGEL_OUTPUT_DIR = File.join(__dir__, '../lib/regexp_parser')
 3 | RAGEL_SOURCE_FILES = %w[scanner] # scanner.rl imports the other files
 4 | 
 5 | namespace :ragel do
 6 |   desc 'Process the ragel source files and output ruby code'
 7 |   task rb: :install do |task|
 8 |     RAGEL_SOURCE_FILES.each do |source_file|
 9 |       source_path = "#{RAGEL_SOURCE_DIR}/#{source_file}.rl"
10 |       output_path = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
11 |       # -L = omit line hint comments
12 |       flags = ENV['DEBUG_RAGEL'].to_i == 1 ? ['-p'] : ['-L']
13 |       # using faster flat table driven FSM, about 25% larger code, but about 30% faster
14 |       flags << '-F1'
15 |       sh "ragel -R #{source_path} -o #{output_path} #{flags.join(' ')}"
16 | 
17 |       contents = File
18 |         .read(output_path)
19 |         .gsub(/[ \t]+$/, '') # remove trailing whitespace emitted by ragel
20 |         .gsub(/(?<=\d,)[ \t]+|^[ \t]+(?=-?\d)/, '') # compact FSM tables (saves ~6KB)
21 |         .gsub(/\n(?:[ \t]*\n){2,}/, "\n\n") # compact blank lines
22 | 
23 |       File.open(output_path, 'w') do |file|
24 |         file.puts <<~RUBY
25 |           # -*- warn-indent:false;  -*-
26 |           #
27 |           # THIS IS A GENERATED FILE, DO NOT EDIT DIRECTLY
28 |           #
29 |           # This file was generated from #{source_path.split('/').last}
30 |           # by running `bundle exec rake #{task.name}`
31 |         RUBY
32 | 
33 |         file.write(contents)
34 |       end
35 |     end
36 |   end
37 | 
38 |   desc 'Delete the ragel generated source file(s)'
39 |   task :clean do
40 |     RAGEL_SOURCE_FILES.each do |file|
41 |       sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
42 |     end
43 |   end
44 | 
45 |   desc 'Make sure that ragel is installed'
46 |   task :install do
47 |     next if ENV['CI']
48 | 
49 |     if system('command -v ragel')
50 |       # already installed
51 |     elsif system('command -v brew')
52 |       puts 'ragel not found, installing with homebrew ...'
53 |       `brew install ragel`
54 |     elsif system('command -v apt-get')
55 |       puts 'ragel not found, installing with apt-get ...'
56 |       `sudo apt-get install -y ragel`
57 |     else
58 |       raise 'Could not install ragel. Please install it manually.'
59 |     end
60 |   end
61 | end
62 | 


--------------------------------------------------------------------------------