├── .dir-locals.el ├── .document ├── .github ├── dependabot.yml └── workflows │ ├── ci.yml │ └── release.yml ├── .gitignore ├── .rdoc_options ├── COPYING ├── Gemfile ├── LICENSE.txt ├── NEWS.md ├── README.md ├── Rakefile ├── benchmark ├── check.yaml ├── check_until.yaml ├── scan.yaml └── search.yaml ├── bin ├── console └── setup ├── doc └── strscan │ ├── helper_methods.md │ ├── link_refs.txt │ ├── methods │ ├── get_byte.md │ ├── get_charpos.md │ ├── get_pos.md │ ├── getch.md │ ├── scan.md │ ├── scan_until.md │ ├── set_pos.md │ ├── skip.md │ ├── skip_until.md │ └── terminate.md │ └── strscan.md ├── ext ├── jruby │ ├── lib │ │ └── strscan.rb │ └── org │ │ └── jruby │ │ └── ext │ │ └── strscan │ │ ├── RubyStringScanner.java │ │ └── StringScannerLibrary.java └── strscan │ ├── extconf.rb │ └── strscan.c ├── lib └── strscan │ └── strscan.rb ├── run-test.rb ├── strscan.gemspec └── test ├── lib └── helper.rb └── strscan ├── test_ractor.rb └── test_stringscanner.rb /.dir-locals.el: -------------------------------------------------------------------------------- 1 | ((c-mode . ((c-file-style . "ruby")))) 2 | -------------------------------------------------------------------------------- /.document: -------------------------------------------------------------------------------- 1 | COPYING 2 | LICENSE.txt 3 | NEWS.md 4 | README.md 5 | ext/strscan/strscan.c 6 | doc/strscan/ 7 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: 'github-actions' 4 | directory: '/' 5 | schedule: 6 | interval: 'weekly' 7 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | - push 5 | - pull_request 6 | 7 | env: 8 | JRUBY_OPTS: "-X+C" # temporarily force JRuby to compile, so Java exception trace will contain .rb lines 9 | 10 | jobs: 11 | ruby-versions: 12 | uses: ruby/actions/.github/workflows/ruby_versions.yml@master 13 | with: 14 | min_version: 2.4 15 | 16 | host: 17 | needs: ruby-versions 18 | name: ${{ matrix.os }} ${{ matrix.ruby }} 19 | runs-on: ${{ matrix.os }} 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }} 24 | os: 25 | - ubuntu-latest 26 | - macos-latest 27 | - windows-latest 28 | include: 29 | - { os: windows-latest , ruby: mingw } 30 | - { os: windows-latest , ruby: mswin } 31 | exclude: 32 | - { os: macos-latest , ruby: 2.4 } 33 | - { os: macos-latest , ruby: 2.5 } 34 | - { os: windows-latest , ruby: head } 35 | - { os: ubuntu-latest , ruby: truffleruby } 36 | - { os: macos-latest , ruby: truffleruby } 37 | - { os: windows-latest , ruby: truffleruby } 38 | - { os: windows-latest , ruby: truffleruby-head } 39 | 40 | steps: 41 | - uses: actions/checkout@v4 42 | 43 | - uses: actions/setup-java@v4 44 | with: 45 | distribution: zulu 46 | java-version: 21 47 | if: >- 48 | startsWith(matrix.ruby, 'jruby') 49 | 50 | - name: Set up Ruby 51 | uses: ruby/setup-ruby@v1 52 | with: 53 | ruby-version: ${{ matrix.ruby }} 54 | bundler-cache: true 55 | 56 | - run: bundle exec rake compile 57 | 58 | # If strscan in Ruby's master has the same version of this strscan, 59 | # "gem install pkg/*.gem" fails. 60 | - run: bundle exec rake version:bump 61 | if: >- 62 | matrix.ruby == 'head' || 63 | matrix.ruby == 'mingw' || 64 | matrix.ruby == 'mswin' 65 | 66 | - run: bundle exec rake build 67 | 68 | - uses: actions/upload-artifact@v4 69 | if: >- 70 | matrix.os == 'ubuntu-latest' && 71 | (matrix.ruby == '3.3' || matrix.ruby == 'jruby') 72 | with: 73 | name: gem-${{ matrix.os }}-${{ matrix.ruby }} 74 | path: pkg/ 75 | 76 | - run: bundle exec rake test 77 | 78 | - run: gem install --verbose --backtrace pkg/*.gem 79 | 80 | - run: gem install test-unit-ruby-core test-unit 81 | 82 | - name: Run tests on the installed gem 83 | run: ruby run-test.rb 84 | if: matrix.ruby != '2.4' # strscan is a default gem from 2.5 85 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | push: 4 | tags: 5 | - "*" 6 | jobs: 7 | github: 8 | name: GitHub 9 | runs-on: ubuntu-latest 10 | timeout-minutes: 10 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Release 14 | run: | 15 | ruby \ 16 | -e 'print("## strscan "); \ 17 | puts(ARGF.read.split(/^## /)[1].gsub(/ {.+?}/, ""))' \ 18 | NEWS.md > release-note.md 19 | title="$(head -n 1 release-note.md | sed -e 's/^## //')" 20 | tail -n +2 release-note.md > release-note-without-version.md 21 | gh release create \ 22 | ${GITHUB_REF_NAME} \ 23 | --discussion-category Announcements \ 24 | --notes-file release-note-without-version.md \ 25 | --title "${title}" 26 | env: 27 | GH_TOKEN: ${{ github.token }} 28 | 29 | rubygems: 30 | name: RubyGems 31 | runs-on: ubuntu-latest 32 | timeout-minutes: 10 33 | permissions: 34 | id-token: write 35 | environment: release 36 | strategy: 37 | matrix: 38 | ruby: 39 | - ruby 40 | - jruby 41 | steps: 42 | - uses: actions/checkout@v4 43 | - uses: ruby/setup-ruby@v1 44 | with: 45 | ruby-version: ${{ matrix.ruby }} 46 | bundler-cache: true 47 | - name: Build 48 | run: | 49 | bundle exec rake compile 50 | - uses: rubygems/configure-rubygems-credentials@v1.0.0 51 | - name: Push gems 52 | run: | 53 | bundle exec rake release:rubygem_push 54 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /Gemfile.lock 4 | /_yardoc/ 5 | /coverage/ 6 | /html/ 7 | /lib/*.jar 8 | /lib/*.so 9 | /lib/*.dylib 10 | /lib/*.bundle 11 | /pkg/ 12 | /spec/reports/ 13 | /tmp/ 14 | -------------------------------------------------------------------------------- /.rdoc_options: -------------------------------------------------------------------------------- 1 | --- 2 | rdoc_include: 3 | - doc 4 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | StringScanner is copyrighted free software by Minero Aoki. 2 | You can redistribute it and/or modify it under either the terms of the 3 | 2-clause BSDL (see the file LICENSE.txt), or the conditions below: 4 | 5 | 1. You may make and give away verbatim copies of the source form of the 6 | software without restriction, provided that you duplicate all of the 7 | original copyright notices and associated disclaimers. 8 | 9 | 2. You may modify your copy of the software in any way, provided that 10 | you do at least ONE of the following: 11 | 12 | a. place your modifications in the Public Domain or otherwise 13 | make them Freely Available, such as by posting said 14 | modifications to Usenet or an equivalent medium, or by allowing 15 | the author to include your modifications in the software. 16 | 17 | b. use the modified software only within your corporation or 18 | organization. 19 | 20 | c. give non-standard binaries non-standard names, with 21 | instructions on where to get the original software distribution. 22 | 23 | d. make other distribution arrangements with the author. 24 | 25 | 3. You may distribute the software in object code or binary form, 26 | provided that you do at least ONE of the following: 27 | 28 | a. distribute the binaries and library files of the software, 29 | together with instructions (in the manual page or equivalent) 30 | on where to get the original distribution. 31 | 32 | b. accompany the distribution with the machine-readable source of 33 | the software. 34 | 35 | c. give non-standard binaries non-standard names, with 36 | instructions on where to get the original software distribution. 37 | 38 | d. make other distribution arrangements with the author. 39 | 40 | 4. You may modify and include the part of the software into any other 41 | software (possibly commercial). But some files in the distribution 42 | are not written by the author, so that they are not under these terms. 43 | 44 | For the list of those files and their copying conditions, see the 45 | file LEGAL. 46 | 47 | 5. The scripts and library files supplied as input to or produced as 48 | output from the software do not automatically fall under the 49 | copyright of the software, but belong to whomever generated them, 50 | and may be sold commercially, and may be aggregated with this 51 | software. 52 | 53 | 6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR 54 | IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 55 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 56 | PURPOSE. 57 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gemspec 4 | 5 | group :development do 6 | gem "benchmark-driver" 7 | gem "rake-compiler" 8 | gem "rdoc" 9 | gem "ruby-maven", :platforms => :jruby 10 | gem "test-unit" 11 | gem "test-unit-ruby-core" 12 | end 13 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 1999-2006 Minero Aoki. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | 1. Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | 2. Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 13 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 15 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 16 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 17 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 18 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 19 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 20 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 21 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 22 | SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # News 2 | 3 | ## 3.1.4 - 2025-05-03 4 | 5 | ### Fixes 6 | 7 | * JRuby: `StringScanner#scan_integer`: Fixed a integer converting 8 | problem. 9 | * GH-150 10 | * jruby/jruby#8823 11 | * Reported by Thomas Leitner 12 | 13 | * JRuby: `StringScanner#scan_integer`: Added missing length check. 14 | * GH-152 15 | * GH-153 16 | 17 | ### Thanks 18 | 19 | * Thomas Leitner 20 | 21 | ## 3.1.4 - 2025-04-29 22 | 23 | ### Fixes 24 | 25 | * `StringScanner#named_captures`: Fixed an incompatible behavior 26 | when there are multiple same name groups. 27 | * GH-145 28 | * GH-146 29 | * Reported by Linus Sellberg 30 | 31 | ### Thanks 32 | 33 | * Linus Sellberg 34 | 35 | ## 3.1.3 - 2025-04-13 36 | 37 | ### Fixes 38 | 39 | * `StringScanner#scan_integer`: Fixed a bug that matched data isn't 40 | updated. 41 | * GH-130 42 | * GH-133 43 | * Reported by Andrii Konchyn 44 | * Patch by Jean Boussier 45 | 46 | * `StringScanner#scan_until`: Fixed a bug that `String` pattern 47 | usage set incorrect match data. 48 | * GH-131 49 | * GH-138 50 | * Reported by Andrii Konchyn 51 | * Patch by NAITOH Jun 52 | 53 | * `StringScanner#scan_integer(base: 16)`: Fixed a bug that `0x` 54 | and `0x` isn't matched. 55 | * GH-140 56 | * GH-141 57 | * Reported by Andrii Konchyn 58 | * Patch by Jean Boussier 59 | 60 | * Fixed a bug that `String` pattern doesn't clear named captures. 61 | * GH-135 62 | * GH-142 63 | * Reported by Andrii Konchyn 64 | * Patch by NAITOH Jun 65 | 66 | * `StrinScanner#[]`: Fixed a bug that `String` pattern and unknown 67 | capture group name returns `nil` instead of raising `IndexError` 68 | like `Regexp` pattern. 69 | * GH-139 70 | * GH-143 71 | * Reported by Benoit Daloze 72 | * Patch by NAITOH Jun 73 | 74 | * `StrinScanner#pos` and `StrinScanner#pos=`: Fixed a bug that 75 | position is treated as `int` not `long`. 76 | * GH-147 77 | * Patch by Jean Boussier 78 | 79 | ### Thanks 80 | 81 | * Andrii Konchyn 82 | * Jean Boussier 83 | * NAITOH Jun 84 | 85 | ## 3.1.2 - 2024-12-15 86 | 87 | ### Fixes 88 | 89 | * JRuby: Fixed a bug that `StringScanner#scan_integer` may read 90 | out-of-bounds data. 91 | * GH-125 92 | * GH-127 93 | 94 | * JRuby: Fixed a wrong `StringScanner::Version` value bug. 95 | 96 | ## 3.1.1 - 2024-12-12 97 | 98 | ### Improvements 99 | 100 | * Added `StringScanner#scan_byte` 101 | * GH-89 102 | 103 | * Added `StringScanner#peek_byte` 104 | * GH-89 105 | 106 | * Added support for `String` pattern by the following methods: 107 | * `StringScanner#exist?` 108 | * `StringScanner#scan_until` 109 | * `StringScanner#skip_until` 110 | * `StringScanner#check_until` 111 | * `StringScanner#search_full` 112 | * GH-106 113 | * Patch by NAITOH Jun 114 | 115 | * Improved performance. 116 | * GH-108 117 | * GH-109 118 | * GH-110 119 | * Patch by NAITOH Jun 120 | 121 | * Improved performance. 122 | * GH-117 123 | 124 | * Added `StringScanner#scan_integer` 125 | * GH-113 126 | * GH-115 127 | * GH-116 128 | 129 | ### Thanks 130 | 131 | * NAITOH Jun 132 | 133 | ## 3.1.0 - 2024-02-04 134 | 135 | ### Fixes 136 | 137 | * jruby: Fixed a bug that substring can't be used. 138 | * GH-86 139 | * GH-87 140 | 141 | ## 3.0.9 - 2024-01-21 142 | 143 | ### Improvements 144 | 145 | * jruby: `StringScanner#rest`: Changed to use the source encoding instead of 146 | `US-ASCII` for empty case. 147 | * GH-78 148 | * GH-79 149 | * GH-80 150 | * Reported by NAITOH Jun 151 | 152 | * jruby: Dropped support for old Joni. 153 | * GH-76 154 | * Patch by Olle Jonsson 155 | 156 | * jruby: `StringScanner#scan`: Stopped to use shared string for result. 157 | * GH-83 158 | * GH-84 159 | * Reported by NAITOH Jun 160 | 161 | ### Thanks 162 | 163 | * NAITOH Jun 164 | 165 | * Olle Jonsson 166 | 167 | ## 3.0.8 - 2024-01-13 168 | 169 | ### Improvements 170 | 171 | * `StringScanner#captures`: Changed to return `nil` not `""` for 172 | unmached capture. Because `StringScanner#[]` and `MatchData#[]` 173 | does so. 174 | * GH-72 175 | * Patched by NAITOH Jun 176 | 177 | ### Thanks 178 | 179 | * NAITOH Jun 180 | 181 | ## 3.0.7 - 2023-10-11 182 | 183 | ### Improvements 184 | 185 | * jruby: Added support for fixed anchor. 186 | * GH-57 187 | 188 | ### Fixes 189 | 190 | * Fixed a crash bug of `StringScanner#named_capture` on not matched 191 | status. 192 | * GH-61 193 | * Patch by OKURA Masafumi 194 | 195 | ### Thanks 196 | 197 | * OKURA Masafumi 198 | 199 | ## 3.0.6 - 2023-02-08 200 | 201 | ### Improvements 202 | 203 | * doc: Improved `StringScanner#rest?`. 204 | [GH-49] 205 | [Patch by OKURA Masafumi] 206 | 207 | * jruby: Added support for joni 2.2. 208 | [GH-55] 209 | 210 | ### Thanks 211 | 212 | * OKURA Masafumi 213 | 214 | ## 3.0.5 - 2022-12-08 215 | 216 | ### Improvements 217 | 218 | * Added `StringScanner#named_captures` 219 | [GitHub#44](https://github.com/ruby/strscan/pull/44) 220 | [GitHub#43](https://github.com/ruby/strscan/issues/43) 221 | [Patch by Eriko Sugiyama] 222 | [Reported by Akim Demaille] 223 | 224 | ### Thanks 225 | 226 | * Eriko Sugiyama 227 | 228 | * Akim Demaille 229 | 230 | ## 3.0.4 - 2022-07-24 231 | 232 | ### Improvements 233 | 234 | * Added missing license files to gem. 235 | [GitHub#41](https://github.com/ruby/strscan/pull/41) 236 | [Patch by Martin Sander] 237 | 238 | ### Fixes 239 | 240 | * Fixed a `StringScanner#scan` bug that may not set match result on 241 | JRuby. 242 | [GitHub#45](https://github.com/ruby/strscan/pull/45) 243 | [Patch by Kiichi Hiromasa] 244 | 245 | ### Thanks 246 | 247 | * Martin Sander 248 | 249 | * Kiichi Hiromasa 250 | 251 | ## 3.0.3 - 2022-05-11 252 | 253 | ### Improvements 254 | 255 | * Improved JRuby's release process integration. 256 | [GitHub#39][Patch by Charles Oliver Nutter] 257 | 258 | ### Thanks 259 | 260 | * Charles Oliver Nutter 261 | 262 | ## 3.0.2 - 2022-05-09 263 | 264 | ### Improvements 265 | 266 | * Improved documentation. 267 | [GitHub#32][Patch by David Crosby] 268 | 269 | * Added support for TruffleRuby. 270 | [GitHub#35][Patch by Benoit Daloze] 271 | 272 | ### Thanks 273 | 274 | * David Crosby 275 | 276 | * Benoit Daloze 277 | 278 | ## 3.0.2.pre1 - 2022-04-19 279 | 280 | ### Improvements 281 | 282 | * Added support for JRuby. 283 | [GitHub#25][Patch by Charles Oliver Nutter] 284 | 285 | ### Thanks 286 | 287 | * Charles Oliver Nutter 288 | 289 | ## 3.0.1 - 2021-10-23 290 | 291 | ### Fixes 292 | 293 | * Fixed a segmentation of `StringScanner#charpos` when 294 | `String#byteslice` returns non string value. 295 | [Bug #17756][GitHub#20][Patch by Kenichi Kamiya] 296 | 297 | ### Thanks 298 | 299 | * Kenichi Kamiya 300 | 301 | ## 1.0.3 - 2019-10-14 302 | 303 | ### Improvements 304 | 305 | * Stopped depending on `regint.h`. 306 | 307 | ### Fixes 308 | 309 | * Fixed a bug that a build flag is ignored when this is installed by 310 | `gem install`. 311 | [GitHub#7][Reported by Michael Camilleri] 312 | 313 | ### Thanks 314 | 315 | * Michael Camilleri 316 | 317 | ## 1.0.2 - 2019-10-13 318 | 319 | ### Improvements 320 | 321 | * Added support for `String` as a pattern. This improves performance. 322 | [GitHub#4] 323 | 324 | * Improved documentation. 325 | [GitHub#8][Patch by Espartaco Palma] 326 | 327 | * Added tests for anchors. 328 | [GitHub#9][Patch by Jeanine Adkisson] 329 | 330 | * Added support for fixed anchor mode. In this mode, `\A` matches to 331 | the beginning of source string instead of the current 332 | position. `^` matches to the begging of line instead of the 333 | current position. 334 | 335 | You can use fixed anchor mode by passing `fixed_anchor: true` 336 | option to `StringScanner.new` such as `StringScanner.new(string, 337 | fixed_anchor: true)`. 338 | 339 | `StringScanner#fixed_anchor?` is also added to get whether fixed 340 | anchor mode is used or not. 341 | 342 | [GitHub#6][Patch by Michael Camilleri] 343 | [GitHub#10] 344 | 345 | ### Thanks 346 | 347 | * Espartaco Palma 348 | 349 | * Michael Camilleri 350 | 351 | * Jeanine Adkisson 352 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # StringScanner 2 | 3 | [![CI](https://github.com/ruby/strscan/actions/workflows/ci.yml/badge.svg)](https://github.com/ruby/strscan/actions/workflows/ci.yml) 4 | 5 | StringScanner provides for lexical scanning operations on a String. 6 | 7 | ## Installation 8 | 9 | Add this line to your application's Gemfile: 10 | 11 | ```ruby 12 | gem 'strscan' 13 | ``` 14 | 15 | And then execute: 16 | 17 | ```console 18 | $ bundle 19 | ``` 20 | 21 | Or install it yourself as: 22 | 23 | ```console 24 | $ gem install strscan 25 | ``` 26 | 27 | ## Usage 28 | 29 | ```ruby 30 | require 'strscan' 31 | 32 | s = StringScanner.new('This is an example string') 33 | s.eos? # -> false 34 | 35 | p s.scan(/\w+/) # -> "This" 36 | p s.scan(/\w+/) # -> nil 37 | p s.scan(/\s+/) # -> " " 38 | p s.scan(/\s+/) # -> nil 39 | p s.scan(/\w+/) # -> "is" 40 | s.eos? # -> false 41 | 42 | p s.scan(/\s+/) # -> " " 43 | p s.scan(/\w+/) # -> "an" 44 | p s.scan(/\s+/) # -> " " 45 | p s.scan(/\w+/) # -> "example" 46 | p s.scan(/\s+/) # -> " " 47 | p s.scan(/\w+/) # -> "string" 48 | s.eos? # -> true 49 | 50 | p s.scan(/\s+/) # -> nil 51 | p s.scan(/\w+/) # -> nil 52 | ``` 53 | 54 | ## Development 55 | 56 | After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake` to compile this and run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. 57 | 58 | To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). 59 | 60 | ## Contributing 61 | 62 | Bug reports and pull requests are welcome on GitHub at https://github.com/ruby/strscan. 63 | 64 | 65 | ## License 66 | 67 | The gem is available as open source under the terms of the [2-Clause BSD License](https://opensource.org/licenses/BSD-2-Clause). 68 | 69 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require "rake/testtask" 3 | require "rdoc/task" 4 | 5 | task :default => [:compile, :test] 6 | 7 | namespace :version do 8 | desc "Bump version" 9 | task :bump do 10 | strscan_c_path = "ext/strscan/strscan.c" 11 | strscan_c = File.read(strscan_c_path).gsub(/STRSCAN_VERSION "(.+?)"/) do 12 | version = $1 13 | "STRSCAN_VERSION \"#{version.succ}\"" 14 | end 15 | File.write(strscan_c_path, strscan_c) 16 | 17 | strscan_java_path = "ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java" 18 | strscan_java = File.read(strscan_java_path).gsub(/STRSCAN_VERSION = "(.+?)"/) do 19 | version = $1 20 | "STRSCAN_VERSION = \"#{version.succ}\"" 21 | end 22 | File.write(strscan_java_path, strscan_java) 23 | end 24 | end 25 | 26 | if RUBY_ENGINE == "jruby" 27 | require 'rake/javaextensiontask' 28 | Rake::JavaExtensionTask.new("strscan") do |ext| 29 | require 'maven/ruby/maven' 30 | ext.source_version = '1.8' 31 | ext.target_version = '1.8' 32 | ext.ext_dir = 'ext/jruby' 33 | end 34 | elsif RUBY_ENGINE == "ruby" 35 | require 'rake/extensiontask' 36 | Rake::ExtensionTask.new("strscan") 37 | else 38 | task :compile 39 | end 40 | 41 | desc "Run test" 42 | task :test do 43 | extra_require_path = RUBY_ENGINE == 'jruby' ? "ext/jruby/lib" : "lib" 44 | ENV["RUBYOPT"] = "-I#{extra_require_path} -rbundler/setup" 45 | ruby("run-test.rb") 46 | end 47 | 48 | desc "Run benchmark" 49 | task :benchmark do 50 | ruby("-S", 51 | "benchmark-driver", 52 | "benchmark/scan.yaml") 53 | end 54 | 55 | RDoc::Task.new 56 | 57 | release_task = Rake.application["release"] 58 | release_task.prerequisites.delete("build") 59 | release_task.prerequisites.delete("release:rubygem_push") 60 | release_task_comment = release_task.comment 61 | if release_task_comment 62 | release_task.clear_comments 63 | release_task.comment = release_task_comment.gsub(/ and build.*$/, "") 64 | end 65 | -------------------------------------------------------------------------------- /benchmark/check.yaml: -------------------------------------------------------------------------------- 1 | prelude: |- 2 | $LOAD_PATH.unshift(File.expand_path("lib")) 3 | require "strscan" 4 | scanner = StringScanner.new("test string") 5 | str = "test" 6 | reg = /test/ 7 | benchmark: 8 | regexp_pattern: | 9 | scanner.check(/\w/) 10 | regexp_literal: | 11 | scanner.check(/test/) 12 | regexp_literal_var: | 13 | scanner.check(reg) 14 | string: | 15 | scanner.check("test") 16 | string_var: | 17 | scanner.check(str) 18 | -------------------------------------------------------------------------------- /benchmark/check_until.yaml: -------------------------------------------------------------------------------- 1 | prelude: |- 2 | $LOAD_PATH.unshift(File.expand_path("lib")) 3 | require "strscan" 4 | scanner = StringScanner.new("test string") 5 | str = "string" 6 | reg = /string/ 7 | benchmark: 8 | regexp: | 9 | scanner.check_until(/string/) 10 | regexp_var: | 11 | scanner.check_until(reg) 12 | string: | 13 | scanner.check_until("string") 14 | string_var: | 15 | scanner.check_until(str) 16 | -------------------------------------------------------------------------------- /benchmark/scan.yaml: -------------------------------------------------------------------------------- 1 | prelude: | 2 | $LOAD_PATH.unshift(File.expand_path("lib")) 3 | require "strscan" 4 | str = "test string" 5 | scanner = StringScanner.new(str) 6 | str = "test" 7 | reg = /test/ 8 | benchmark: 9 | check(reg): | 10 | scanner.check(reg) 11 | check(str): | 12 | scanner.check(str) 13 | match?(reg): | 14 | scanner.match?(reg) 15 | match?(str): | 16 | scanner.match?(str) 17 | -------------------------------------------------------------------------------- /benchmark/search.yaml: -------------------------------------------------------------------------------- 1 | prelude: | 2 | $LOAD_PATH.unshift(File.expand_path("lib")) 3 | require "strscan" 4 | str = "test string" 5 | scanner = StringScanner.new(str) 6 | str = "string" 7 | reg = /string/ 8 | benchmark: 9 | check_until(reg): | 10 | scanner.check_until(reg) 11 | check_until(str): | 12 | scanner.check_until(str) 13 | exist?(reg): | 14 | scanner.exist?(reg) 15 | exist?(str): | 16 | scanner.exist?(str) 17 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require "bundler/setup" 4 | require "strscan" 5 | 6 | # You can add fixtures and/or initialization code here to make experimenting 7 | # with your gem easier. You can also use a different console, if you like. 8 | 9 | # (If you use this, don't forget to add pry to your Gemfile!) 10 | # require "pry" 11 | # Pry.start 12 | 13 | require "irb" 14 | IRB.start(__FILE__) 15 | -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | IFS=$'\n\t' 4 | set -vx 5 | 6 | bundle install 7 | 8 | # Do any other automated setup that you need to do here 9 | -------------------------------------------------------------------------------- /doc/strscan/helper_methods.md: -------------------------------------------------------------------------------- 1 | ## Helper Methods 2 | 3 | These helper methods display values returned by scanner's methods. 4 | 5 | ### `put_situation(scanner)` 6 | 7 | Display scanner's situation: 8 | 9 | - Byte position (`#pos`). 10 | - Character position (`#charpos`) 11 | - Target string (`#rest`) and size (`#rest_size`). 12 | 13 | ```rb 14 | scanner = StringScanner.new('foobarbaz') 15 | scanner.scan(/foo/) 16 | put_situation(scanner) 17 | # Situation: 18 | # pos: 3 19 | # charpos: 3 20 | # rest: "barbaz" 21 | # rest_size: 6 22 | ``` 23 | 24 | ### `put_match_values(scanner)` 25 | 26 | Display the scanner's match values: 27 | 28 | ```rb 29 | scanner = StringScanner.new('Fri Dec 12 1975 14:39') 30 | pattern = /(?\w+) (?\w+) (?\d+) / 31 | scanner.match?(pattern) 32 | put_match_values(scanner) 33 | # Basic match values: 34 | # matched?: true 35 | # matched_size: 11 36 | # pre_match: "" 37 | # matched : "Fri Dec 12 " 38 | # post_match: "1975 14:39" 39 | # Captured match values: 40 | # size: 4 41 | # captures: ["Fri", "Dec", "12"] 42 | # named_captures: {"wday"=>"Fri", "month"=>"Dec", "day"=>"12"} 43 | # values_at: ["Fri Dec 12 ", "Fri", "Dec", "12", nil] 44 | # []: 45 | # [0]: "Fri Dec 12 " 46 | # [1]: "Fri" 47 | # [2]: "Dec" 48 | # [3]: "12" 49 | # [4]: nil 50 | ``` 51 | 52 | ### `match_values_cleared?(scanner)` 53 | 54 | Returns whether the scanner's match values are all properly cleared: 55 | 56 | ```rb 57 | scanner = StringScanner.new('foobarbaz') 58 | match_values_cleared?(scanner) # => true 59 | put_match_values(scanner) 60 | # Basic match values: 61 | # matched?: false 62 | # matched_size: nil 63 | # pre_match: nil 64 | # matched : nil 65 | # post_match: nil 66 | # Captured match values: 67 | # size: nil 68 | # captures: nil 69 | # named_captures: {} 70 | # values_at: nil 71 | # [0]: nil 72 | scanner.scan(/foo/) 73 | match_values_cleared?(scanner) # => false 74 | ``` 75 | 76 | ## The Code 77 | 78 | ```rb 79 | def put_situation(scanner) 80 | puts '# Situation:' 81 | puts "# pos: #{scanner.pos}" 82 | puts "# charpos: #{scanner.charpos}" 83 | puts "# rest: #{scanner.rest.inspect}" 84 | puts "# rest_size: #{scanner.rest_size}" 85 | end 86 | 87 | def put_match_values(scanner) 88 | puts '# Basic match values:' 89 | puts "# matched?: #{scanner.matched?}" 90 | value = scanner.matched_size || 'nil' 91 | puts "# matched_size: #{value}" 92 | puts "# pre_match: #{scanner.pre_match.inspect}" 93 | puts "# matched : #{scanner.matched.inspect}" 94 | puts "# post_match: #{scanner.post_match.inspect}" 95 | puts '# Captured match values:' 96 | puts "# size: #{scanner.size}" 97 | puts "# captures: #{scanner.captures}" 98 | puts "# named_captures: #{scanner.named_captures}" 99 | if scanner.size.nil? 100 | puts "# values_at: #{scanner.values_at(0)}" 101 | puts "# [0]: #{scanner[0]}" 102 | else 103 | puts "# values_at: #{scanner.values_at(*(0..scanner.size))}" 104 | puts "# []:" 105 | scanner.size.times do |i| 106 | puts "# [#{i}]: #{scanner[i].inspect}" 107 | end 108 | end 109 | end 110 | 111 | def match_values_cleared?(scanner) 112 | scanner.matched? == false && 113 | scanner.matched_size.nil? && 114 | scanner.matched.nil? && 115 | scanner.pre_match.nil? && 116 | scanner.post_match.nil? && 117 | scanner.size.nil? && 118 | scanner[0].nil? && 119 | scanner.captures.nil? && 120 | scanner.values_at(0..1).nil? && 121 | scanner.named_captures == {} 122 | end 123 | ``` 124 | 125 | -------------------------------------------------------------------------------- /doc/strscan/link_refs.txt: -------------------------------------------------------------------------------- 1 | [1]: rdoc-ref:StringScanner@Stored+String 2 | [2]: rdoc-ref:StringScanner@Byte+Position+-28Position-29 3 | [3]: rdoc-ref:StringScanner@Target+Substring 4 | [4]: rdoc-ref:StringScanner@Setting+the+Target+Substring 5 | [5]: rdoc-ref:StringScanner@Traversing+the+Target+Substring 6 | [6]: https://docs.ruby-lang.org/en/master/Regexp.html 7 | [7]: rdoc-ref:StringScanner@Character+Position 8 | [8]: https://docs.ruby-lang.org/en/master/String.html#method-i-5B-5D 9 | [9]: rdoc-ref:StringScanner@Match+Values 10 | [10]: rdoc-ref:StringScanner@Fixed-Anchor+Property 11 | [11]: rdoc-ref:StringScanner@Positions 12 | [13]: rdoc-ref:StringScanner@Captured+Match+Values 13 | [14]: rdoc-ref:StringScanner@Querying+the+Target+Substring 14 | [15]: rdoc-ref:StringScanner@Searching+the+Target+Substring 15 | [16]: https://docs.ruby-lang.org/en/master/Regexp.html#class-Regexp-label-Groups+and+Captures 16 | [17]: rdoc-ref:StringScanner@Matching 17 | [18]: rdoc-ref:StringScanner@Basic+Match+Values 18 | -------------------------------------------------------------------------------- /doc/strscan/methods/get_byte.md: -------------------------------------------------------------------------------- 1 | call-seq: 2 | get_byte -> byte_as_character or nil 3 | 4 | Returns the next byte, if available: 5 | 6 | - If the [position][2] 7 | is not at the end of the [stored string][1]: 8 | 9 | - Returns the next byte. 10 | - Increments the [byte position][2]. 11 | - Adjusts the [character position][7]. 12 | 13 | ```rb 14 | scanner = StringScanner.new(HIRAGANA_TEXT) 15 | # => # 16 | scanner.string # => "こんにちは" 17 | [scanner.get_byte, scanner.pos, scanner.charpos] # => ["\xE3", 1, 1] 18 | [scanner.get_byte, scanner.pos, scanner.charpos] # => ["\x81", 2, 2] 19 | [scanner.get_byte, scanner.pos, scanner.charpos] # => ["\x93", 3, 1] 20 | [scanner.get_byte, scanner.pos, scanner.charpos] # => ["\xE3", 4, 2] 21 | [scanner.get_byte, scanner.pos, scanner.charpos] # => ["\x82", 5, 3] 22 | [scanner.get_byte, scanner.pos, scanner.charpos] # => ["\x93", 6, 2] 23 | ``` 24 | 25 | - Otherwise, returns `nil`, and does not change the positions. 26 | 27 | ```rb 28 | scanner.terminate 29 | [scanner.get_byte, scanner.pos, scanner.charpos] # => [nil, 15, 5] 30 | ``` 31 | -------------------------------------------------------------------------------- /doc/strscan/methods/get_charpos.md: -------------------------------------------------------------------------------- 1 | call-seq: 2 | charpos -> character_position 3 | 4 | Returns the [character position][7] (initially zero), 5 | which may be different from the [byte position][2] 6 | given by method #pos: 7 | 8 | ```rb 9 | scanner = StringScanner.new(HIRAGANA_TEXT) 10 | scanner.string # => "こんにちは" 11 | scanner.getch # => "こ" # 3-byte character. 12 | scanner.getch # => "ん" # 3-byte character. 13 | put_situation(scanner) 14 | # Situation: 15 | # pos: 6 16 | # charpos: 2 17 | # rest: "にちは" 18 | # rest_size: 9 19 | ``` 20 | -------------------------------------------------------------------------------- /doc/strscan/methods/get_pos.md: -------------------------------------------------------------------------------- 1 | call-seq: 2 | pos -> byte_position 3 | 4 | Returns the integer [byte position][2], 5 | which may be different from the [character position][7]: 6 | 7 | ```rb 8 | scanner = StringScanner.new(HIRAGANA_TEXT) 9 | scanner.string # => "こんにちは" 10 | scanner.pos # => 0 11 | scanner.getch # => "こ" # 3-byte character. 12 | scanner.charpos # => 1 13 | scanner.pos # => 3 14 | ``` 15 | -------------------------------------------------------------------------------- /doc/strscan/methods/getch.md: -------------------------------------------------------------------------------- 1 | call-seq: 2 | getch -> character or nil 3 | 4 | Returns the next (possibly multibyte) character, 5 | if available: 6 | 7 | - If the [position][2] 8 | is at the beginning of a character: 9 | 10 | - Returns the character. 11 | - Increments the [character position][7] by 1. 12 | - Increments the [byte position][2] 13 | by the size (in bytes) of the character. 14 | 15 | ```rb 16 | scanner = StringScanner.new(HIRAGANA_TEXT) 17 | scanner.string # => "こんにちは" 18 | [scanner.getch, scanner.pos, scanner.charpos] # => ["こ", 3, 1] 19 | [scanner.getch, scanner.pos, scanner.charpos] # => ["ん", 6, 2] 20 | [scanner.getch, scanner.pos, scanner.charpos] # => ["に", 9, 3] 21 | [scanner.getch, scanner.pos, scanner.charpos] # => ["ち", 12, 4] 22 | [scanner.getch, scanner.pos, scanner.charpos] # => ["は", 15, 5] 23 | [scanner.getch, scanner.pos, scanner.charpos] # => [nil, 15, 5] 24 | ``` 25 | 26 | - If the [position][2] is within a multi-byte character 27 | (that is, not at its beginning), 28 | behaves like #get_byte (returns a 1-byte character): 29 | 30 | ```rb 31 | scanner.pos = 1 32 | [scanner.getch, scanner.pos, scanner.charpos] # => ["\x81", 2, 2] 33 | [scanner.getch, scanner.pos, scanner.charpos] # => ["\x93", 3, 1] 34 | [scanner.getch, scanner.pos, scanner.charpos] # => ["ん", 6, 2] 35 | ``` 36 | 37 | - If the [position][2] is at the end of the [stored string][1], 38 | returns `nil` and does not modify the positions: 39 | 40 | ```rb 41 | scanner.terminate 42 | [scanner.getch, scanner.pos, scanner.charpos] # => [nil, 15, 5] 43 | ``` 44 | -------------------------------------------------------------------------------- /doc/strscan/methods/scan.md: -------------------------------------------------------------------------------- 1 | call-seq: 2 | scan(pattern) -> substring or nil 3 | 4 | Attempts to [match][17] the given `pattern` 5 | at the beginning of the [target substring][3]. 6 | 7 | If the match succeeds: 8 | 9 | - Returns the matched substring. 10 | - Increments the [byte position][2] by substring.bytesize, 11 | and may increment the [character position][7]. 12 | - Sets [match values][9]. 13 | 14 | ```rb 15 | scanner = StringScanner.new(HIRAGANA_TEXT) 16 | scanner.string # => "こんにちは" 17 | scanner.pos = 6 18 | scanner.scan(/に/) # => "に" 19 | put_match_values(scanner) 20 | # Basic match values: 21 | # matched?: true 22 | # matched_size: 3 23 | # pre_match: "こん" 24 | # matched : "に" 25 | # post_match: "ちは" 26 | # Captured match values: 27 | # size: 1 28 | # captures: [] 29 | # named_captures: {} 30 | # values_at: ["に", nil] 31 | # []: 32 | # [0]: "に" 33 | # [1]: nil 34 | put_situation(scanner) 35 | # Situation: 36 | # pos: 9 37 | # charpos: 3 38 | # rest: "ちは" 39 | # rest_size: 6 40 | ``` 41 | 42 | If the match fails: 43 | 44 | - Returns `nil`. 45 | - Does not increment byte and character positions. 46 | - Clears match values. 47 | 48 | ```rb 49 | scanner.scan(/nope/) # => nil 50 | match_values_cleared?(scanner) # => true 51 | ``` 52 | -------------------------------------------------------------------------------- /doc/strscan/methods/scan_until.md: -------------------------------------------------------------------------------- 1 | call-seq: 2 | scan_until(pattern) -> substring or nil 3 | 4 | Attempts to [match][17] the given `pattern` 5 | anywhere (at any [position][2]) in the [target substring][3]. 6 | 7 | If the match attempt succeeds: 8 | 9 | - Sets [match values][9]. 10 | - Sets the [byte position][2] to the end of the matched substring; 11 | may adjust the [character position][7]. 12 | - Returns the matched substring. 13 | 14 | 15 | ```rb 16 | scanner = StringScanner.new(HIRAGANA_TEXT) 17 | scanner.string # => "こんにちは" 18 | scanner.pos = 6 19 | scanner.scan_until(/ち/) # => "にち" 20 | put_match_values(scanner) 21 | # Basic match values: 22 | # matched?: true 23 | # matched_size: 3 24 | # pre_match: "こんに" 25 | # matched : "ち" 26 | # post_match: "は" 27 | # Captured match values: 28 | # size: 1 29 | # captures: [] 30 | # named_captures: {} 31 | # values_at: ["ち", nil] 32 | # []: 33 | # [0]: "ち" 34 | # [1]: nil 35 | put_situation(scanner) 36 | # Situation: 37 | # pos: 12 38 | # charpos: 4 39 | # rest: "は" 40 | # rest_size: 3 41 | ``` 42 | 43 | If the match attempt fails: 44 | 45 | - Clears match data. 46 | - Returns `nil`. 47 | - Does not update positions. 48 | 49 | ```rb 50 | scanner.scan_until(/nope/) # => nil 51 | match_values_cleared?(scanner) # => true 52 | ``` 53 | -------------------------------------------------------------------------------- /doc/strscan/methods/set_pos.md: -------------------------------------------------------------------------------- 1 | call-seq: 2 | pos = n -> n 3 | pointer = n -> n 4 | 5 | Sets the [byte position][2] and the [character position][11]; 6 | returns `n`. 7 | 8 | Does not affect [match values][9]. 9 | 10 | For non-negative `n`, sets the position to `n`: 11 | 12 | ```rb 13 | scanner = StringScanner.new(HIRAGANA_TEXT) 14 | scanner.string # => "こんにちは" 15 | scanner.pos = 3 # => 3 16 | scanner.rest # => "んにちは" 17 | scanner.charpos # => 1 18 | ``` 19 | 20 | For negative `n`, counts from the end of the [stored string][1]: 21 | 22 | ```rb 23 | scanner.pos = -9 # => -9 24 | scanner.pos # => 6 25 | scanner.rest # => "にちは" 26 | scanner.charpos # => 2 27 | ``` 28 | -------------------------------------------------------------------------------- /doc/strscan/methods/skip.md: -------------------------------------------------------------------------------- 1 | call-seq: 2 | skip(pattern) match_size or nil 3 | 4 | Attempts to [match][17] the given `pattern` 5 | at the beginning of the [target substring][3]; 6 | 7 | If the match succeeds: 8 | 9 | - Increments the [byte position][2] by substring.bytesize, 10 | and may increment the [character position][7]. 11 | - Sets [match values][9]. 12 | - Returns the size (bytes) of the matched substring. 13 | 14 | ```rb 15 | scanner = StringScanner.new(HIRAGANA_TEXT) 16 | scanner.string # => "こんにちは" 17 | scanner.pos = 6 18 | scanner.skip(/に/) # => 3 19 | put_match_values(scanner) 20 | # Basic match values: 21 | # matched?: true 22 | # matched_size: 3 23 | # pre_match: "こん" 24 | # matched : "に" 25 | # post_match: "ちは" 26 | # Captured match values: 27 | # size: 1 28 | # captures: [] 29 | # named_captures: {} 30 | # values_at: ["に", nil] 31 | # []: 32 | # [0]: "に" 33 | # [1]: nil 34 | put_situation(scanner) 35 | # Situation: 36 | # pos: 9 37 | # charpos: 3 38 | # rest: "ちは" 39 | # rest_size: 6 40 | 41 | scanner.skip(/nope/) # => nil 42 | match_values_cleared?(scanner) # => true 43 | ``` 44 | -------------------------------------------------------------------------------- /doc/strscan/methods/skip_until.md: -------------------------------------------------------------------------------- 1 | call-seq: 2 | skip_until(pattern) -> matched_substring_size or nil 3 | 4 | Attempts to [match][17] the given `pattern` 5 | anywhere (at any [position][2]) in the [target substring][3]; 6 | does not modify the positions. 7 | 8 | If the match attempt succeeds: 9 | 10 | - Sets [match values][9]. 11 | - Returns the size of the matched substring. 12 | 13 | ```rb 14 | scanner = StringScanner.new(HIRAGANA_TEXT) 15 | scanner.string # => "こんにちは" 16 | scanner.pos = 6 17 | scanner.skip_until(/ち/) # => 6 18 | put_match_values(scanner) 19 | # Basic match values: 20 | # matched?: true 21 | # matched_size: 3 22 | # pre_match: "こんに" 23 | # matched : "ち" 24 | # post_match: "は" 25 | # Captured match values: 26 | # size: 1 27 | # captures: [] 28 | # named_captures: {} 29 | # values_at: ["ち", nil] 30 | # []: 31 | # [0]: "ち" 32 | # [1]: nil 33 | put_situation(scanner) 34 | # Situation: 35 | # pos: 12 36 | # charpos: 4 37 | # rest: "は" 38 | # rest_size: 3 39 | ``` 40 | 41 | If the match attempt fails: 42 | 43 | - Clears match values. 44 | - Returns `nil`. 45 | 46 | ```rb 47 | scanner.skip_until(/nope/) # => nil 48 | match_values_cleared?(scanner) # => true 49 | ``` 50 | -------------------------------------------------------------------------------- /doc/strscan/methods/terminate.md: -------------------------------------------------------------------------------- 1 | call-seq: 2 | terminate -> self 3 | 4 | Sets the scanner to end-of-string; 5 | returns +self+: 6 | 7 | - Sets both [positions][11] to end-of-stream. 8 | - Clears [match values][9]. 9 | 10 | ```rb 11 | scanner = StringScanner.new(HIRAGANA_TEXT) 12 | scanner.string # => "こんにちは" 13 | scanner.scan_until(/に/) 14 | put_situation(scanner) 15 | # Situation: 16 | # pos: 9 17 | # charpos: 3 18 | # rest: "ちは" 19 | # rest_size: 6 20 | match_values_cleared?(scanner) # => false 21 | 22 | scanner.terminate # => # 23 | put_situation(scanner) 24 | # Situation: 25 | # pos: 15 26 | # charpos: 5 27 | # rest: "" 28 | # rest_size: 0 29 | match_values_cleared?(scanner) # => true 30 | ``` 31 | -------------------------------------------------------------------------------- /doc/strscan/strscan.md: -------------------------------------------------------------------------------- 1 | \Class `StringScanner` supports processing a stored string as a stream; 2 | this code creates a new `StringScanner` object with string `'foobarbaz'`: 3 | 4 | ```rb 5 | require 'strscan' 6 | scanner = StringScanner.new('foobarbaz') 7 | ``` 8 | 9 | ## About the Examples 10 | 11 | All examples here assume that `StringScanner` has been required: 12 | 13 | ```rb 14 | require 'strscan' 15 | ``` 16 | 17 | Some examples here assume that these constants are defined: 18 | 19 | ```rb 20 | MULTILINE_TEXT = <<~EOT 21 | Go placidly amid the noise and haste, 22 | and remember what peace there may be in silence. 23 | EOT 24 | 25 | HIRAGANA_TEXT = 'こんにちは' 26 | 27 | ENGLISH_TEXT = 'Hello' 28 | ``` 29 | 30 | Some examples here assume that certain helper methods are defined: 31 | 32 | - `put_situation(scanner)`: 33 | Displays the values of the scanner's 34 | methods #pos, #charpos, #rest, and #rest_size. 35 | - `put_match_values(scanner)`: 36 | Displays the scanner's [match values][9]. 37 | - `match_values_cleared?(scanner)`: 38 | Returns whether the scanner's [match values][9] are cleared. 39 | 40 | See examples [here][ext/strscan/helper_methods_md.html]. 41 | 42 | ## The `StringScanner` \Object 43 | 44 | This code creates a `StringScanner` object 45 | (we'll call it simply a _scanner_), 46 | and shows some of its basic properties: 47 | 48 | ```rb 49 | scanner = StringScanner.new('foobarbaz') 50 | scanner.string # => "foobarbaz" 51 | put_situation(scanner) 52 | # Situation: 53 | # pos: 0 54 | # charpos: 0 55 | # rest: "foobarbaz" 56 | # rest_size: 9 57 | ``` 58 | 59 | The scanner has: 60 | 61 | * A stored string, which is: 62 | 63 | * Initially set by StringScanner.new(string) to the given `string` 64 | (`'foobarbaz'` in the example above). 65 | * Modifiable by methods #string=(new_string) and #concat(more_string). 66 | * Returned by method #string. 67 | 68 | More at [Stored String][1] below. 69 | 70 | * A _position_; 71 | a zero-based index into the bytes of the stored string (_not_ into its characters): 72 | 73 | * Initially set by StringScanner.new to `0`. 74 | * Returned by method #pos. 75 | * Modifiable explicitly by methods #reset, #terminate, and #pos=(new_pos). 76 | * Modifiable implicitly (various traversing methods, among others). 77 | 78 | More at [Byte Position][2] below. 79 | 80 | * A target substring, 81 | which is a trailing substring of the stored string; 82 | it extends from the current position to the end of the stored string: 83 | 84 | * Initially set by StringScanner.new(string) to the given `string` 85 | (`'foobarbaz'` in the example above). 86 | * Returned by method #rest. 87 | * Modified by any modification to either the stored string or the position. 88 | 89 | Most importantly: 90 | the searching and traversing methods operate on the target substring, 91 | which may be (and often is) less than the entire stored string. 92 | 93 | More at [Target Substring][3] below. 94 | 95 | ## Stored \String 96 | 97 | The stored string is the string stored in the `StringScanner` object. 98 | 99 | Each of these methods sets, modifies, or returns the stored string: 100 | 101 | | Method | Effect | 102 | |----------------------|-------------------------------------------------| 103 | | ::new(string) | Creates a new scanner for the given string. | 104 | | #string=(new_string) | Replaces the existing stored string. | 105 | | #concat(more_string) | Appends a string to the existing stored string. | 106 | | #string | Returns the stored string. | 107 | 108 | ## Positions 109 | 110 | A `StringScanner` object maintains a zero-based byte position 111 | and a zero-based character position. 112 | 113 | Each of these methods explicitly sets positions: 114 | 115 | | Method | Effect | 116 | |--------------------------|-----------------------------------------------------------| 117 | | #reset | Sets both positions to zero (beginning of stored string). | 118 | | #terminate | Sets both positions to the end of the stored string. | 119 | | #pos=(new_byte_position) | Sets byte position; adjusts character position. | 120 | 121 | ### Byte Position (Position) 122 | 123 | The byte position (or simply _position_) 124 | is a zero-based index into the bytes in the scanner's stored string; 125 | for a new `StringScanner` object, the byte position is zero. 126 | 127 | When the byte position is: 128 | 129 | * Zero (at the beginning), the target substring is the entire stored string. 130 | * Equal to the size of the stored string (at the end), 131 | the target substring is the empty string `''`. 132 | 133 | To get or set the byte position: 134 | 135 | * \#pos: returns the byte position. 136 | * \#pos=(new_pos): sets the byte position. 137 | 138 | Many methods use the byte position as the basis for finding matches; 139 | many others set, increment, or decrement the byte position: 140 | 141 | ```rb 142 | scanner = StringScanner.new('foobar') 143 | scanner.pos # => 0 144 | scanner.scan(/foo/) # => "foo" # Match found. 145 | scanner.pos # => 3 # Byte position incremented. 146 | scanner.scan(/foo/) # => nil # Match not found. 147 | scanner.pos # => 3 # Byte position not changed. 148 | ``` 149 | 150 | Some methods implicitly modify the byte position; 151 | see: 152 | 153 | * [Setting the Target Substring][4]. 154 | * [Traversing the Target Substring][5]. 155 | 156 | The values of these methods are derived directly from the values of #pos and #string: 157 | 158 | - \#charpos: the [character position][7]. 159 | - \#rest: the [target substring][3]. 160 | - \#rest_size: `rest.size`. 161 | 162 | ### Character Position 163 | 164 | The character position is a zero-based index into the _characters_ 165 | in the stored string; 166 | for a new `StringScanner` object, the character position is zero. 167 | 168 | \Method #charpos returns the character position; 169 | its value may not be reset explicitly. 170 | 171 | Some methods change (increment or reset) the character position; 172 | see: 173 | 174 | * [Setting the Target Substring][4]. 175 | * [Traversing the Target Substring][5]. 176 | 177 | Example (string includes multi-byte characters): 178 | 179 | ```rb 180 | scanner = StringScanner.new(ENGLISH_TEXT) # Five 1-byte characters. 181 | scanner.concat(HIRAGANA_TEXT) # Five 3-byte characters 182 | scanner.string # => "Helloこんにちは" # Twenty bytes in all. 183 | put_situation(scanner) 184 | # Situation: 185 | # pos: 0 186 | # charpos: 0 187 | # rest: "Helloこんにちは" 188 | # rest_size: 20 189 | scanner.scan(/Hello/) # => "Hello" # Five 1-byte characters. 190 | put_situation(scanner) 191 | # Situation: 192 | # pos: 5 193 | # charpos: 5 194 | # rest: "こんにちは" 195 | # rest_size: 15 196 | scanner.getch # => "こ" # One 3-byte character. 197 | put_situation(scanner) 198 | # Situation: 199 | # pos: 8 200 | # charpos: 6 201 | # rest: "んにちは" 202 | # rest_size: 12 203 | ``` 204 | 205 | ## Target Substring 206 | 207 | The target substring is the the part of the [stored string][1] 208 | that extends from the current [byte position][2] to the end of the stored string; 209 | it is always either: 210 | 211 | - The entire stored string (byte position is zero). 212 | - A trailing substring of the stored string (byte position positive). 213 | 214 | The target substring is returned by method #rest, 215 | and its size is returned by method #rest_size. 216 | 217 | Examples: 218 | 219 | ```rb 220 | scanner = StringScanner.new('foobarbaz') 221 | put_situation(scanner) 222 | # Situation: 223 | # pos: 0 224 | # charpos: 0 225 | # rest: "foobarbaz" 226 | # rest_size: 9 227 | scanner.pos = 3 228 | put_situation(scanner) 229 | # Situation: 230 | # pos: 3 231 | # charpos: 3 232 | # rest: "barbaz" 233 | # rest_size: 6 234 | scanner.pos = 9 235 | put_situation(scanner) 236 | # Situation: 237 | # pos: 9 238 | # charpos: 9 239 | # rest: "" 240 | # rest_size: 0 241 | ``` 242 | 243 | ### Setting the Target Substring 244 | 245 | The target substring is set whenever: 246 | 247 | * The [stored string][1] is set (position reset to zero; target substring set to stored string). 248 | * The [byte position][2] is set (target substring adjusted accordingly). 249 | 250 | ### Querying the Target Substring 251 | 252 | This table summarizes (details and examples at the links): 253 | 254 | | Method | Returns | 255 | |------------|-----------------------------------| 256 | | #rest | Target substring. | 257 | | #rest_size | Size (bytes) of target substring. | 258 | 259 | ### Searching the Target Substring 260 | 261 | A _search_ method examines the target substring, 262 | but does not advance the [positions][11] 263 | or (by implication) shorten the target substring. 264 | 265 | This table summarizes (details and examples at the links): 266 | 267 | | Method | Returns | Sets Match Values? | 268 | |-----------------------|-----------------------------------------------|--------------------| 269 | | #check(pattern) | Matched leading substring or +nil+. | Yes. | 270 | | #check_until(pattern) | Matched substring (anywhere) or +nil+. | Yes. | 271 | | #exist?(pattern) | Matched substring (anywhere) end index. | Yes. | 272 | | #match?(pattern) | Size of matched leading substring or +nil+. | Yes. | 273 | | #peek(size) | Leading substring of given length (bytes). | No. | 274 | | #peek_byte | Integer leading byte or +nil+. | No. | 275 | | #rest | Target substring (from byte position to end). | No. | 276 | 277 | ### Traversing the Target Substring 278 | 279 | A _traversal_ method examines the target substring, 280 | and, if successful: 281 | 282 | - Advances the [positions][11]. 283 | - Shortens the target substring. 284 | 285 | 286 | This table summarizes (details and examples at links): 287 | 288 | | Method | Returns | Sets Match Values? | 289 | |----------------------|------------------------------------------------------|--------------------| 290 | | #get_byte | Leading byte or +nil+. | No. | 291 | | #getch | Leading character or +nil+. | No. | 292 | | #scan(pattern) | Matched leading substring or +nil+. | Yes. | 293 | | #scan_byte | Integer leading byte or +nil+. | No. | 294 | | #scan_until(pattern) | Matched substring (anywhere) or +nil+. | Yes. | 295 | | #skip(pattern) | Matched leading substring size or +nil+. | Yes. | 296 | | #skip_until(pattern) | Position delta to end-of-matched-substring or +nil+. | Yes. | 297 | | #unscan | +self+. | No. | 298 | 299 | ## Querying the Scanner 300 | 301 | Each of these methods queries the scanner object 302 | without modifying it (details and examples at links) 303 | 304 | | Method | Returns | 305 | |---------------------|----------------------------------| 306 | | #beginning_of_line? | +true+ or +false+. | 307 | | #charpos | Character position. | 308 | | #eos? | +true+ or +false+. | 309 | | #fixed_anchor? | +true+ or +false+. | 310 | | #inspect | String representation of +self+. | 311 | | #pos | Byte position. | 312 | | #rest | Target substring. | 313 | | #rest_size | Size of target substring. | 314 | | #string | Stored string. | 315 | 316 | ## Matching 317 | 318 | `StringScanner` implements pattern matching via Ruby class [Regexp][6], 319 | and its matching behaviors are the same as Ruby's 320 | except for the [fixed-anchor property][10]. 321 | 322 | ### Matcher Methods 323 | 324 | Each matcher method takes a single argument `pattern`, 325 | and attempts to find a matching substring in the [target substring][3]. 326 | 327 | | Method | Pattern Type | Matches Target Substring | Success Return | May Update Positions? | 328 | |--------------|-------------------|--------------------------|--------------------|-----------------------| 329 | | #check | Regexp or String. | At beginning. | Matched substring. | No. | 330 | | #check_until | Regexp or String. | Anywhere. | Substring. | No. | 331 | | #match? | Regexp or String. | At beginning. | Match size. | No. | 332 | | #exist? | Regexp or String. | Anywhere. | Substring size. | No. | 333 | | #scan | Regexp or String. | At beginning. | Matched substring. | Yes. | 334 | | #scan_until | Regexp or String. | Anywhere. | Substring. | Yes. | 335 | | #skip | Regexp or String. | At beginning. | Match size. | Yes. | 336 | | #skip_until | Regexp or String. | Anywhere. | Substring size. | Yes. | 337 | 338 |
339 | 340 | Which matcher you choose will depend on: 341 | 342 | - Where you want to find a match: 343 | 344 | - Only at the beginning of the target substring: 345 | #check, #match?, #scan, #skip. 346 | - Anywhere in the target substring: 347 | #check_until, #exist?, #scan_until, #skip_until. 348 | 349 | - Whether you want to: 350 | 351 | - Traverse, by advancing the positions: 352 | #scan, #scan_until, #skip, #skip_until. 353 | - Keep the positions unchanged: 354 | #check, #check_until, #match?, #exist?. 355 | 356 | - What you want for the return value: 357 | 358 | - The matched substring: #check, #scan. 359 | - The substring: #check_until, #scan_until. 360 | - The match size: #match?, #skip. 361 | - The substring size: #exist?, #skip_until. 362 | 363 | ### Match Values 364 | 365 | The match values in a `StringScanner` object 366 | generally contain the results of the most recent attempted match. 367 | 368 | Each match value may be thought of as: 369 | 370 | * _Clear_: Initially, or after an unsuccessful match attempt: 371 | usually, `false`, `nil`, or `{}`. 372 | * _Set_: After a successful match attempt: 373 | `true`, string, array, or hash. 374 | 375 | Each of these methods clears match values: 376 | 377 | - ::new(string). 378 | - \#reset. 379 | - \#terminate. 380 | 381 | Each of these methods attempts a match based on a pattern, 382 | and either sets match values (if successful) or clears them (if not); 383 | 384 | - \#check(pattern) 385 | - \#check_until(pattern) 386 | - \#exist?(pattern) 387 | - \#match?(pattern) 388 | - \#scan(pattern) 389 | - \#scan_until(pattern) 390 | - \#skip(pattern) 391 | - \#skip_until(pattern) 392 | 393 | #### Basic Match Values 394 | 395 | Basic match values are those not related to captures. 396 | 397 | Each of these methods returns a basic match value: 398 | 399 | | Method | Return After Match | Return After No Match | 400 | |-----------------|----------------------------------------|-----------------------| 401 | | #matched? | +true+. | +false+. | 402 | | #matched_size | Size of matched substring. | +nil+. | 403 | | #matched | Matched substring. | +nil+. | 404 | | #pre_match | Substring preceding matched substring. | +nil+. | 405 | | #post_match | Substring following matched substring. | +nil+. | 406 | 407 |
408 | 409 | See examples below. 410 | 411 | #### Captured Match Values 412 | 413 | Captured match values are those related to [captures][16]. 414 | 415 | Each of these methods returns a captured match value: 416 | 417 | | Method | Return After Match | Return After No Match | 418 | |-----------------|-----------------------------------------|-----------------------| 419 | | #size | Count of captured substrings. | +nil+. | 420 | | #[](n) | nth captured substring. | +nil+. | 421 | | #captures | Array of all captured substrings. | +nil+. | 422 | | #values_at(*n) | Array of specified captured substrings. | +nil+. | 423 | | #named_captures | Hash of named captures. | {}. | 424 | 425 |
426 | 427 | See examples below. 428 | 429 | #### Match Values Examples 430 | 431 | Successful basic match attempt (no captures): 432 | 433 | ```rb 434 | scanner = StringScanner.new('foobarbaz') 435 | scanner.exist?(/bar/) 436 | put_match_values(scanner) 437 | # Basic match values: 438 | # matched?: true 439 | # matched_size: 3 440 | # pre_match: "foo" 441 | # matched : "bar" 442 | # post_match: "baz" 443 | # Captured match values: 444 | # size: 1 445 | # captures: [] 446 | # named_captures: {} 447 | # values_at: ["bar", nil] 448 | # []: 449 | # [0]: "bar" 450 | # [1]: nil 451 | ``` 452 | 453 | Failed basic match attempt (no captures); 454 | 455 | ```rb 456 | scanner = StringScanner.new('foobarbaz') 457 | scanner.exist?(/nope/) 458 | match_values_cleared?(scanner) # => true 459 | ``` 460 | 461 | Successful unnamed capture match attempt: 462 | 463 | ```rb 464 | scanner = StringScanner.new('foobarbazbatbam') 465 | scanner.exist?(/(foo)bar(baz)bat(bam)/) 466 | put_match_values(scanner) 467 | # Basic match values: 468 | # matched?: true 469 | # matched_size: 15 470 | # pre_match: "" 471 | # matched : "foobarbazbatbam" 472 | # post_match: "" 473 | # Captured match values: 474 | # size: 4 475 | # captures: ["foo", "baz", "bam"] 476 | # named_captures: {} 477 | # values_at: ["foobarbazbatbam", "foo", "baz", "bam", nil] 478 | # []: 479 | # [0]: "foobarbazbatbam" 480 | # [1]: "foo" 481 | # [2]: "baz" 482 | # [3]: "bam" 483 | # [4]: nil 484 | ``` 485 | 486 | Successful named capture match attempt; 487 | same as unnamed above, except for #named_captures: 488 | 489 | ```rb 490 | scanner = StringScanner.new('foobarbazbatbam') 491 | scanner.exist?(/(?foo)bar(?baz)bat(?bam)/) 492 | scanner.named_captures # => {"x"=>"foo", "y"=>"baz", "z"=>"bam"} 493 | ``` 494 | 495 | Failed unnamed capture match attempt: 496 | 497 | ```rb 498 | scanner = StringScanner.new('somestring') 499 | scanner.exist?(/(foo)bar(baz)bat(bam)/) 500 | match_values_cleared?(scanner) # => true 501 | ``` 502 | 503 | Failed named capture match attempt; 504 | same as unnamed above, except for #named_captures: 505 | 506 | ```rb 507 | scanner = StringScanner.new('somestring') 508 | scanner.exist?(/(?foo)bar(?baz)bat(?bam)/) 509 | match_values_cleared?(scanner) # => false 510 | scanner.named_captures # => {"x"=>nil, "y"=>nil, "z"=>nil} 511 | ``` 512 | 513 | ## Fixed-Anchor Property 514 | 515 | Pattern matching in `StringScanner` is the same as in Ruby's, 516 | except for its fixed-anchor property, 517 | which determines the meaning of `'\A'`: 518 | 519 | * `false` (the default): matches the current byte position. 520 | 521 | ```rb 522 | scanner = StringScanner.new('foobar') 523 | scanner.scan(/\A./) # => "f" 524 | scanner.scan(/\A./) # => "o" 525 | scanner.scan(/\A./) # => "o" 526 | scanner.scan(/\A./) # => "b" 527 | ``` 528 | 529 | * `true`: matches the beginning of the target substring; 530 | never matches unless the byte position is zero: 531 | 532 | ```rb 533 | scanner = StringScanner.new('foobar', fixed_anchor: true) 534 | scanner.scan(/\A./) # => "f" 535 | scanner.scan(/\A./) # => nil 536 | scanner.reset 537 | scanner.scan(/\A./) # => "f" 538 | ``` 539 | 540 | The fixed-anchor property is set when the `StringScanner` object is created, 541 | and may not be modified 542 | (see StringScanner.new); 543 | method #fixed_anchor? returns the setting. 544 | 545 | -------------------------------------------------------------------------------- /ext/jruby/lib/strscan.rb: -------------------------------------------------------------------------------- 1 | require 'strscan.jar' 2 | JRuby::Util.load_ext("org.jruby.ext.strscan.StringScannerLibrary") 3 | require "strscan/strscan" 4 | -------------------------------------------------------------------------------- /ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java: -------------------------------------------------------------------------------- 1 | /* 2 | ***** BEGIN LICENSE BLOCK ***** 3 | * Version: EPL 2.0/GPL 2.0/LGPL 2.1 4 | * 5 | * The contents of this file are subject to the Eclipse Public 6 | * License Version 2.0 (the "License"); you may not use this file 7 | * except in compliance with the License. You may obtain a copy of 8 | * the License at http://www.eclipse.org/legal/epl-v20.html 9 | * 10 | * Software distributed under the License is distributed on an "AS 11 | * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or 12 | * implied. See the License for the specific language governing 13 | * rights and limitations under the License. 14 | * 15 | * Alternatively, the contents of this file may be used under the terms of 16 | * either of the GNU General Public License Version 2 or later (the "GPL"), 17 | * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 18 | * in which case the provisions of the GPL or the LGPL are applicable instead 19 | * of those above. If you wish to allow use of your version of this file only 20 | * under the terms of either the GPL or the LGPL, and not to allow others to 21 | * use your version of this file under the terms of the EPL, indicate your 22 | * decision by deleting the provisions above and replace them with the notice 23 | * and other provisions required by the GPL or the LGPL. If you do not delete 24 | * the provisions above, a recipient may use your version of this file under 25 | * the terms of any one of the EPL, the GPL or the LGPL. 26 | ***** END LICENSE BLOCK *****/ 27 | 28 | package org.jruby.ext.strscan; 29 | 30 | import org.jcodings.Encoding; 31 | import org.joni.Matcher; 32 | import org.joni.NameEntry; 33 | import org.joni.Option; 34 | import org.joni.Regex; 35 | import org.joni.Region; 36 | import org.jruby.Ruby; 37 | import org.jruby.RubyArray; 38 | import org.jruby.RubyBoolean; 39 | import org.jruby.RubyClass; 40 | import org.jruby.RubyFixnum; 41 | import org.jruby.RubyHash; 42 | import org.jruby.RubyInteger; 43 | import org.jruby.RubyMatchData; 44 | import org.jruby.RubyNumeric; 45 | import org.jruby.RubyObject; 46 | import org.jruby.RubyRegexp; 47 | import org.jruby.RubyString; 48 | import org.jruby.RubySymbol; 49 | import org.jruby.anno.JRubyClass; 50 | import org.jruby.anno.JRubyMethod; 51 | import org.jruby.ast.util.ArgsUtil; 52 | import org.jruby.common.IRubyWarnings.ID; 53 | import org.jruby.exceptions.RaiseException; 54 | import org.jruby.runtime.ThreadContext; 55 | import org.jruby.runtime.builtin.IRubyObject; 56 | import org.jruby.util.ByteList; 57 | import org.jruby.util.StringSupport; 58 | import org.jruby.util.ConvertBytes; 59 | 60 | import java.util.Iterator; 61 | 62 | import static org.jruby.runtime.Visibility.PRIVATE; 63 | 64 | /** 65 | * JRuby implementation of the strscan library from Ruby. 66 | * 67 | * Original implementation by Kelly Nawrocke. Currently a loose port of the C implementation from CRuby. 68 | */ 69 | @JRubyClass(name = "StringScanner") 70 | public class RubyStringScanner extends RubyObject { 71 | private static final long serialVersionUID = -3722138049229128675L; 72 | 73 | private static final String STRSCAN_VERSION = "3.1.6"; 74 | 75 | private RubyString str; 76 | private int curr = 0; 77 | private int prev = -1; 78 | 79 | private transient Region regs; 80 | private transient Regex pattern; 81 | private boolean matched; 82 | private boolean fixedAnchor; 83 | 84 | public static RubyClass createScannerClass(final Ruby runtime) { 85 | RubyClass Object = runtime.getObject(); 86 | 87 | RubyClass scannerClass = runtime.defineClass("StringScanner", Object, RubyStringScanner::new); 88 | 89 | RubyClass standardError = runtime.getStandardError(); 90 | RubyClass error = scannerClass.defineClassUnder("Error", standardError, standardError.getAllocator()); 91 | if (!Object.isConstantDefined("ScanError")) { 92 | Object.defineConstant("ScanError", error); 93 | } 94 | 95 | RubyString version = runtime.newString(STRSCAN_VERSION); 96 | version.setFrozen(true); 97 | scannerClass.setConstant("Version", version); 98 | RubyString id = runtime.newString("$Id$"); 99 | id.setFrozen(true); 100 | scannerClass.setConstant("Id", id); 101 | 102 | scannerClass.defineAnnotatedMethods(RubyStringScanner.class); 103 | 104 | return scannerClass; 105 | } 106 | 107 | private void clearMatched() { 108 | matched = false; 109 | } 110 | 111 | private void clearNamedCaptures() { 112 | pattern = null; 113 | } 114 | 115 | private void clearMatchStatus() { 116 | clearMatched(); 117 | clearNamedCaptures(); 118 | } 119 | 120 | private void setMatched() { 121 | matched = true; 122 | } 123 | 124 | private boolean isMatched() { 125 | return matched; 126 | } 127 | 128 | private void check(ThreadContext context) { 129 | if (str == null) throw context.runtime.newArgumentError("uninitialized StringScanner object"); 130 | } 131 | 132 | protected RubyStringScanner(Ruby runtime, RubyClass type) { 133 | super(runtime, type); 134 | } 135 | 136 | @JRubyMethod(visibility = PRIVATE) 137 | public IRubyObject initialize(ThreadContext context, IRubyObject string) { 138 | return initialize(context, string, context.nil); 139 | } 140 | 141 | @JRubyMethod(visibility = PRIVATE) 142 | public IRubyObject initialize(ThreadContext context, IRubyObject string, IRubyObject dupOrOpts) { 143 | this.str = string.convertToString(); 144 | this.fixedAnchor = ArgsUtil.extractKeywordArg(context, "fixed_anchor", dupOrOpts).isTrue(); 145 | this.regs = Region.newRegion(0, 0); 146 | 147 | return this; 148 | } 149 | 150 | @JRubyMethod(visibility = PRIVATE) 151 | public IRubyObject initialize(ThreadContext context, IRubyObject string, IRubyObject dup, IRubyObject opts) { 152 | return initialize(context, string, opts); 153 | } 154 | 155 | @JRubyMethod(visibility = PRIVATE) 156 | public IRubyObject initialize_copy(ThreadContext context, IRubyObject other) { 157 | if (this == other) return this; 158 | if (!(other instanceof RubyStringScanner)) { 159 | throw context.runtime.newTypeError("wrong argument type " + other.getMetaClass() + " (expected StringScanner)"); 160 | } 161 | 162 | RubyStringScanner otherScanner = (RubyStringScanner) other; 163 | str = otherScanner.str; 164 | curr = otherScanner.curr; 165 | prev = otherScanner.prev; 166 | matched = otherScanner.matched; 167 | 168 | regs = otherScanner.regs.clone(); 169 | pattern = otherScanner.pattern; 170 | fixedAnchor = otherScanner.fixedAnchor; 171 | 172 | return this; 173 | } 174 | 175 | @JRubyMethod(name = "reset") 176 | public IRubyObject reset(ThreadContext context) { 177 | check(context); 178 | curr = 0; 179 | clearMatchStatus(); 180 | return this; 181 | } 182 | 183 | @JRubyMethod(name = "terminate") 184 | public IRubyObject terminate(ThreadContext context) { 185 | check(context); 186 | curr = str.getByteList().getRealSize(); 187 | clearMatchStatus(); 188 | return this; 189 | } 190 | 191 | @JRubyMethod(name = "clear") 192 | public IRubyObject clear(ThreadContext context) { 193 | check(context); 194 | Ruby runtime = context.runtime; 195 | if (runtime.isVerbose()) { 196 | runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#clear is obsolete; use #terminate instead"); 197 | } 198 | return terminate(context); 199 | } 200 | 201 | @JRubyMethod(name = "string") 202 | public RubyString string() { 203 | return str; 204 | } 205 | 206 | @JRubyMethod(name = "string=") 207 | public IRubyObject set_string(ThreadContext context, IRubyObject str) { 208 | this.str = RubyString.stringValue(str); 209 | curr = 0; 210 | clearMatchStatus(); 211 | return str; 212 | } 213 | 214 | @JRubyMethod(name = {"concat", "<<"}) 215 | public IRubyObject concat(ThreadContext context, IRubyObject obj) { 216 | check(context); 217 | str.append(obj.convertToString()); 218 | return this; 219 | } 220 | 221 | @JRubyMethod(name = {"pos", "pointer"}) 222 | public RubyFixnum pos(ThreadContext context) { 223 | check(context); 224 | return RubyFixnum.newFixnum(context.runtime, curr); 225 | } 226 | 227 | @JRubyMethod(name = {"pos=", "pointer="}) 228 | public IRubyObject set_pos(ThreadContext context, IRubyObject pos) { 229 | check(context); 230 | 231 | Ruby runtime = context.runtime; 232 | 233 | int i = RubyNumeric.num2int(pos); 234 | int size = str.getByteList().getRealSize(); 235 | if (i < 0) i += size; 236 | if (i < 0 || i > size) throw runtime.newRangeError("index out of range."); 237 | this.curr = i; 238 | 239 | return RubyFixnum.newFixnum(runtime, i); 240 | } 241 | 242 | @JRubyMethod(name = "charpos") 243 | public IRubyObject charpos(ThreadContext context) { 244 | Ruby runtime = context.runtime; 245 | 246 | ByteList strBL = str.getByteList(); 247 | int strBeg = strBL.begin(); 248 | 249 | return runtime.newFixnum(StringSupport.strLength(strBL.getEncoding(), strBL.unsafeBytes(), strBeg, strBeg + curr)); 250 | } 251 | 252 | private IRubyObject extractRange(Ruby runtime, int beg, int end) { 253 | ByteList byteList = str.getByteList(); 254 | int size = byteList.getRealSize(); 255 | 256 | if (beg > size) return runtime.getNil(); 257 | if (end > size) end = size; 258 | 259 | return newString(runtime, beg, end - beg); 260 | } 261 | 262 | private IRubyObject extractBegLen(Ruby runtime, int beg, int len) { 263 | assert len >= 0; 264 | 265 | int size = str.getByteList().getRealSize(); 266 | 267 | if (beg > size) return runtime.getNil(); 268 | len = Math.min(len, size - beg); 269 | 270 | return newString(runtime, beg, len); 271 | } 272 | 273 | // MRI: strscan_do_scan 274 | private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succptr, boolean getstr, boolean headonly) { 275 | final Ruby runtime = context.runtime; 276 | check(context); 277 | clearMatchStatus(); 278 | 279 | int restLen = restLen(); 280 | if (restLen < 0) { 281 | return context.nil; 282 | } 283 | 284 | ByteList strBL = str.getByteList(); 285 | int currPtr = strBL.getBegin() + curr; 286 | 287 | if (regex instanceof RubyRegexp) { 288 | pattern = ((RubyRegexp) regex).preparePattern(str); 289 | 290 | int range = currPtr + restLen; 291 | 292 | Matcher matcher = pattern.matcher(strBL.getUnsafeBytes(), matchTarget(), range); 293 | final int ret; 294 | if (headonly) { 295 | ret = RubyRegexp.matcherMatch(context, matcher, currPtr, range, Option.NONE); 296 | } else { 297 | ret = RubyRegexp.matcherSearch(context, matcher, currPtr, range, Option.NONE); 298 | } 299 | 300 | Region matchRegion = matcher.getRegion(); 301 | if (matchRegion == null) { 302 | regs = Region.newRegion(matcher.getBegin(), matcher.getEnd()); 303 | } else { 304 | regs = matchRegion; 305 | } 306 | 307 | if (ret == -2) { 308 | throw runtime.newRaiseException((RubyClass) getMetaClass().getConstant("ScanError"), "regexp buffer overflow"); 309 | } 310 | if (ret < 0) return context.nil; 311 | } else { 312 | RubyString pattern = regex.convertToString(); 313 | Encoding patternEnc = str.checkEncoding(pattern); 314 | ByteList patternBL = pattern.getByteList(); 315 | int patternSize = patternBL.realSize(); 316 | 317 | if (headonly) { 318 | if (restLen < pattern.size()) { 319 | return context.nil; 320 | } 321 | if (ByteList.memcmp(strBL.unsafeBytes(), currPtr, patternBL.unsafeBytes(), patternBL.begin(), patternSize) != 0) { 322 | return context.nil; 323 | } 324 | setRegisters(0, patternSize); 325 | } else { 326 | int pos = StringSupport.index(strBL, patternBL, currPtr, patternEnc); 327 | if (pos == -1) { 328 | return context.nil; 329 | } 330 | setRegisters(pos - curr, patternSize); 331 | } 332 | } 333 | 334 | setMatched(); 335 | prev = curr; 336 | 337 | if (succptr) { 338 | succ(); 339 | } 340 | 341 | int length = lastMatchLength(); 342 | 343 | if (getstr) { 344 | return extractBegLen(runtime, prev, length); 345 | } 346 | 347 | return RubyFixnum.newFixnum(runtime, length); 348 | } 349 | 350 | private int lastMatchLength() { 351 | if (fixedAnchor) { 352 | return regs.getEnd(0) - prev; 353 | } else { 354 | return regs.getEnd(0); 355 | } 356 | } 357 | 358 | private void succ() { 359 | if (fixedAnchor) { 360 | this.curr = regs.getEnd(0); 361 | } else { 362 | this.curr += regs.getEnd(0); 363 | } 364 | } 365 | 366 | private int currPtr() { 367 | return str.getByteList().getBegin() + curr; 368 | } 369 | 370 | private int matchTarget() { 371 | if (fixedAnchor) { 372 | return str.getByteList().getBegin(); 373 | } else { 374 | return str.getByteList().getBegin() + curr; 375 | } 376 | } 377 | 378 | private int restLen() { 379 | return str.size() - curr; 380 | } 381 | 382 | // MRI: set_registers 383 | private void setRegisters(int pos, int length) { 384 | if (fixedAnchor) { 385 | regs = Region.newRegion(pos + curr, pos + curr + length); 386 | } else { 387 | regs = Region.newRegion(pos, pos + length); 388 | } 389 | } 390 | 391 | @JRubyMethod(name = "scan") 392 | public IRubyObject scan(ThreadContext context, IRubyObject regex) { 393 | return scan(context, regex, true, true, true); 394 | } 395 | 396 | @JRubyMethod(name = "match?") 397 | public IRubyObject match_p(ThreadContext context, IRubyObject regex) { 398 | return scan(context, regex, false, false, true); 399 | } 400 | 401 | @JRubyMethod(name = "skip") 402 | public IRubyObject skip(ThreadContext context, IRubyObject regex) { 403 | return scan(context, regex, true, false, true); 404 | } 405 | 406 | @JRubyMethod(name = "check") 407 | public IRubyObject check(ThreadContext context, IRubyObject regex) { 408 | return scan(context, regex, false, true, true); 409 | } 410 | 411 | @JRubyMethod(name = "scan_full") 412 | public IRubyObject scan_full(ThreadContext context, IRubyObject regex, IRubyObject s, IRubyObject f) { 413 | return scan(context, regex, s.isTrue(), f.isTrue(), true); 414 | } 415 | 416 | @JRubyMethod(name = "scan_until") 417 | public IRubyObject scan_until(ThreadContext context, IRubyObject regex) { 418 | return scan(context, regex, true, true, false); 419 | } 420 | 421 | @JRubyMethod(name = "exist?") 422 | public IRubyObject exist_p(ThreadContext context, IRubyObject regex) { 423 | return scan(context, regex, false, false, false); 424 | } 425 | 426 | @JRubyMethod(name = "skip_until") 427 | public IRubyObject skip_until(ThreadContext context, IRubyObject regex) { 428 | return scan(context, regex, true, false, false); 429 | } 430 | 431 | @JRubyMethod(name = "check_until") 432 | public IRubyObject check_until(ThreadContext context, IRubyObject regex) { 433 | return scan(context, regex, false, true, false); 434 | } 435 | 436 | @JRubyMethod(name = "search_full") 437 | public IRubyObject search_full(ThreadContext context, IRubyObject regex, IRubyObject s, IRubyObject f) { 438 | return scan(context, regex, s.isTrue(), f.isTrue(), false); 439 | } 440 | 441 | // MRI: adjust_register_to_matched 442 | private void adjustRegisters() { 443 | if (fixedAnchor) { 444 | regs = Region.newRegion(prev, curr); 445 | } else { 446 | regs = Region.newRegion(0, curr - prev); 447 | } 448 | } 449 | 450 | private int adjustRegisterPosition(int position) { 451 | if (fixedAnchor) { 452 | return position; 453 | } else { 454 | return prev + position; 455 | } 456 | } 457 | 458 | @JRubyMethod(name = "getch") 459 | public IRubyObject getch(ThreadContext context) { 460 | return getchCommon(context); 461 | } 462 | 463 | public IRubyObject getchCommon(ThreadContext context) { 464 | check(context); 465 | clearMatchStatus(); 466 | ByteList strBL = str.getByteList(); 467 | int strSize = strBL.getRealSize(); 468 | 469 | if (curr >= strSize) return context.nil; 470 | 471 | Ruby runtime = context.runtime; 472 | 473 | Encoding strEnc = strBL.getEncoding(); 474 | int setBeg = strBL.getBegin(); 475 | 476 | int len = strEnc.isSingleByte() ? 1 : StringSupport.length(strEnc, strBL.getUnsafeBytes(), setBeg + curr, setBeg + strSize); 477 | len = Math.min(len, restLen()); 478 | 479 | prev = curr; 480 | curr += len; 481 | 482 | setMatched(); 483 | adjustRegisters(); 484 | 485 | return extractRange(runtime, 486 | adjustRegisterPosition(regs.getBeg(0)), 487 | adjustRegisterPosition(regs.getEnd(0))); 488 | } 489 | 490 | @JRubyMethod(name = "get_byte") 491 | public IRubyObject get_byte(ThreadContext context) { 492 | check(context); 493 | clearMatchStatus(); 494 | if (curr >= str.getByteList().getRealSize()) return context.nil; 495 | 496 | prev = curr; 497 | curr++; 498 | 499 | setMatched(); 500 | adjustRegisters(); 501 | 502 | return extractRange(context.runtime, 503 | adjustRegisterPosition(regs.getBeg(0)), 504 | adjustRegisterPosition(regs.getEnd(0))); 505 | } 506 | 507 | @JRubyMethod(name = "getbyte") 508 | public IRubyObject getbyte(ThreadContext context) { 509 | Ruby runtime = context.runtime; 510 | if (runtime.isVerbose()) { 511 | runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#getbyte is obsolete; use #get_byte instead"); 512 | } 513 | return get_byte(context); 514 | } 515 | 516 | @JRubyMethod(name = "scan_byte") 517 | public IRubyObject scan_byte(ThreadContext context) { 518 | Ruby runtime = context.runtime; 519 | check(context); 520 | clearMatchStatus(); 521 | ByteList byteList = str.getByteList(); 522 | int curr = this.curr; 523 | if (curr >= byteList.getRealSize()) return context.nil; 524 | 525 | int bite = byteList.get(curr); 526 | prev = curr; 527 | this.curr++; 528 | 529 | setMatched(); 530 | adjustRegisters(); 531 | return RubyFixnum.newFixnum(runtime, bite); 532 | } 533 | 534 | @JRubyMethod(name = "peek_byte") 535 | public IRubyObject peek_byte(ThreadContext context) { 536 | check(context); 537 | ByteList byteList = str.getByteList(); 538 | int curr = this.curr; 539 | if (curr >= byteList.getRealSize()) return context.nil; 540 | 541 | return RubyFixnum.newFixnum(context.runtime, byteList.get(curr)); 542 | } 543 | 544 | @JRubyMethod(name = "peek") 545 | public IRubyObject peek(ThreadContext context, IRubyObject length) { 546 | check(context); 547 | 548 | int len = RubyNumeric.num2int(length); 549 | if (len < 0) { 550 | throw context.runtime.newArgumentError("negative string size (or size too big)"); 551 | } 552 | 553 | ByteList value = str.getByteList(); 554 | int realSize = value.getRealSize(); 555 | if (curr >= realSize) return RubyString.newEmptyString(context.runtime); 556 | if (curr + len > realSize) len = realSize - curr; 557 | 558 | return extractBegLen(context.runtime, curr, len); 559 | } 560 | 561 | @JRubyMethod(name = "peep") 562 | public IRubyObject peep(ThreadContext context, IRubyObject length) { 563 | Ruby runtime = context.runtime; 564 | if (runtime.isVerbose()) { 565 | runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#peep is obsolete; use #peek instead"); 566 | } 567 | return peek(context, length); 568 | } 569 | 570 | @JRubyMethod(name = "scan_base10_integer", visibility = PRIVATE) 571 | public IRubyObject scan_base10_integer(ThreadContext context) { 572 | final Ruby runtime = context.runtime; 573 | check(context); 574 | clearMatchStatus(); 575 | 576 | strscanMustAsciiCompat(runtime); 577 | 578 | ByteList bytes = str.getByteList(); 579 | int ptr = curr; 580 | int len = 0; 581 | 582 | int remaining_len = bytes.realSize() - curr; 583 | 584 | if (remaining_len <= 0) { 585 | return context.nil; 586 | } 587 | 588 | if (bytes.get(ptr + len) == '-' || bytes.get(ptr + len) == '+') { 589 | len++; 590 | } 591 | 592 | if (len >= remaining_len || !Character.isDigit(bytes.get(ptr + len))) { 593 | return context.nil; 594 | } 595 | 596 | prev = ptr; 597 | 598 | while (len < remaining_len && Character.isDigit(bytes.get(ptr + len))) { 599 | len++; 600 | } 601 | 602 | return strscanParseInteger(runtime, bytes, ptr, len, 10); 603 | } 604 | 605 | @JRubyMethod(name = "scan_base16_integer", visibility = PRIVATE) 606 | public IRubyObject scan_base16_integer(ThreadContext context) { 607 | final Ruby runtime = context.runtime; 608 | check(context); 609 | clearMatchStatus(); 610 | 611 | strscanMustAsciiCompat(runtime); 612 | 613 | ByteList bytes = str.getByteList(); 614 | int ptr = this.curr; 615 | 616 | int remaining_len = bytes.realSize() - ptr; 617 | 618 | if (remaining_len <= 0) { 619 | return context.nil; 620 | } 621 | 622 | int len = 0; 623 | 624 | if (bytes.get(ptr + len) == '-' || bytes.get(ptr + len) == '+') { 625 | len++; 626 | } 627 | 628 | if ((remaining_len >= (len + 3)) && 629 | bytes.get(ptr + len) == '0' && 630 | bytes.get(ptr + len + 1) == 'x' && 631 | isHexChar(bytes.get(ptr + len + 2))) { 632 | len += 2; 633 | } 634 | 635 | if (len >= remaining_len || !isHexChar(bytes.get(ptr + len))) { 636 | return context.nil; 637 | } 638 | 639 | prev = ptr; 640 | 641 | while (len < remaining_len && isHexChar(bytes.get(ptr + len))) { 642 | len++; 643 | } 644 | 645 | return strscanParseInteger(runtime, bytes, ptr, len, 16); 646 | } 647 | 648 | private RubyInteger strscanParseInteger(Ruby runtime, ByteList bytes, int ptr, int len, int base) { 649 | this.curr = ptr + len; 650 | 651 | setMatched(); 652 | adjustRegisters(); 653 | 654 | return ConvertBytes.byteListToInum(runtime, bytes, ptr, ptr + len, base, true); 655 | } 656 | 657 | private void strscanMustAsciiCompat(Ruby runtime) { 658 | if (!str.getEncoding().isAsciiCompatible()) { 659 | throw runtime.newEncodingCompatibilityError("ASCII incompatible encoding: " + str.getEncoding()); 660 | } 661 | } 662 | 663 | private static boolean isHexChar(int c) { 664 | return Character.isDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'); 665 | } 666 | 667 | @JRubyMethod(name = "unscan") 668 | public IRubyObject unscan(ThreadContext context) { 669 | check(context); 670 | 671 | if (!isMatched()) { 672 | Ruby runtime = context.runtime; 673 | 674 | RubyClass errorClass = (RubyClass) runtime.getClassFromPath("StringScanner::Error"); 675 | throw RaiseException.from(runtime, errorClass, "unscan failed: previous match had failed"); 676 | } 677 | 678 | curr = prev; 679 | clearMatchStatus(); 680 | 681 | return this; 682 | } 683 | 684 | @JRubyMethod(name = "beginning_of_line?", alias = "bol?") 685 | public IRubyObject bol_p(ThreadContext context) { 686 | check(context); 687 | 688 | ByteList value = str.getByteList(); 689 | if (curr > value.getRealSize()) return context.nil; 690 | if (curr == 0) return context.tru; 691 | return value.getUnsafeBytes()[(value.getBegin() + curr) - 1] == (byte) '\n' ? context.tru : context.fals; 692 | } 693 | 694 | @JRubyMethod(name = "eos?") 695 | public RubyBoolean eos_p(ThreadContext context) { 696 | check(context); 697 | return curr >= str.getByteList().getRealSize() ? context.tru : context.fals; 698 | } 699 | 700 | @JRubyMethod(name = "empty?") 701 | public RubyBoolean empty_p(ThreadContext context) { 702 | Ruby runtime = context.runtime; 703 | if (runtime.isVerbose()) { 704 | runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#empty? is obsolete; use #eos? instead"); 705 | } 706 | return eos_p(context); 707 | } 708 | 709 | @JRubyMethod(name = "rest?") 710 | public RubyBoolean rest_p(ThreadContext context) { 711 | check(context); 712 | return curr >= str.getByteList().getRealSize() ? context.fals : context.tru; 713 | } 714 | 715 | @JRubyMethod(name = "matched?") 716 | public RubyBoolean matched_p(ThreadContext context) { 717 | check(context); 718 | return isMatched() ? context.tru : context.fals; 719 | } 720 | 721 | @JRubyMethod(name = "matched") 722 | public IRubyObject matched(ThreadContext context) { 723 | check(context); 724 | if (!isMatched()) return context.nil; 725 | return extractRange(context.runtime, 726 | adjustRegisterPosition(regs.getBeg(0)), 727 | adjustRegisterPosition(regs.getEnd(0))); 728 | } 729 | 730 | @JRubyMethod(name = "matched_size") 731 | public IRubyObject matched_size(ThreadContext context) { 732 | check(context); 733 | if (!isMatched()) return context.nil; 734 | return RubyFixnum.newFixnum(context.runtime, regs.getEnd(0) - regs.getBeg(0)); 735 | } 736 | 737 | @JRubyMethod(name = "matchedsize") 738 | public IRubyObject matchedsize(ThreadContext context) { 739 | Ruby runtime = context.runtime; 740 | if (runtime.isVerbose()) { 741 | runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#matchedsize is obsolete; use #matched_size instead"); 742 | } 743 | return matched_size(context); 744 | } 745 | 746 | @JRubyMethod(name = "[]") 747 | public IRubyObject op_aref(ThreadContext context, IRubyObject idx) { 748 | check(context); 749 | 750 | if (!isMatched()) { 751 | return context.nil; 752 | } 753 | 754 | Ruby runtime = context.runtime; 755 | 756 | if (idx instanceof RubySymbol || idx instanceof RubyString) { 757 | if (pattern == null) { 758 | throw runtime.newRaiseException((RubyClass) getMetaClass().getConstant("IndexError"), "undefined group name reference: " + idx); 759 | } 760 | } 761 | 762 | int i = RubyMatchData.backrefNumber(runtime, pattern, regs, idx); 763 | 764 | return extractRegion(context, i); 765 | } 766 | 767 | private IRubyObject extractRegion(ThreadContext context, int i) { 768 | int numRegs = regs.getNumRegs(); 769 | 770 | if (i < 0) i += numRegs; 771 | if (i < 0 || i >= numRegs || regs.getBeg(i) == -1) { 772 | return context.nil; 773 | } 774 | 775 | return extractRange(context.runtime, 776 | adjustRegisterPosition(regs.getBeg(i)), 777 | adjustRegisterPosition(regs.getEnd(i))); 778 | } 779 | 780 | @JRubyMethod(name = "pre_match") 781 | public IRubyObject pre_match(ThreadContext context) { 782 | check(context); 783 | if (!isMatched()) { 784 | return context.nil; 785 | } 786 | return extractRange(context.runtime, 0, adjustRegisterPosition(regs.getBeg(0))); 787 | } 788 | 789 | @JRubyMethod(name = "post_match") 790 | public IRubyObject post_match(ThreadContext context) { 791 | check(context); 792 | 793 | if (!isMatched()) { 794 | return context.nil; 795 | } 796 | 797 | return extractRange(context.runtime, 798 | adjustRegisterPosition(regs.getEnd(0)), 799 | str.getByteList().getRealSize()); 800 | } 801 | 802 | @JRubyMethod(name = "rest") 803 | public IRubyObject rest(ThreadContext context) { 804 | check(context); 805 | Ruby runtime = context.runtime; 806 | 807 | ByteList value = str.getByteList(); 808 | int realSize = value.getRealSize(); 809 | 810 | if (curr >= realSize) { 811 | return RubyString.newEmptyString(runtime, str.getEncoding()); 812 | } 813 | 814 | return extractRange(runtime, curr, realSize); 815 | } 816 | 817 | @JRubyMethod(name = "rest_size") 818 | public RubyFixnum rest_size(ThreadContext context) { 819 | check(context); 820 | Ruby runtime = context.runtime; 821 | 822 | ByteList value = str.getByteList(); 823 | int realSize = value.getRealSize(); 824 | 825 | if (curr >= realSize) return RubyFixnum.zero(runtime); 826 | 827 | return RubyFixnum.newFixnum(runtime, realSize - curr); 828 | } 829 | 830 | @JRubyMethod(name = "restsize") 831 | public RubyFixnum restsize(ThreadContext context) { 832 | Ruby runtime = context.runtime; 833 | if (runtime.isVerbose()) { 834 | runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#restsize is obsolete; use #rest_size instead"); 835 | } 836 | return rest_size(context); 837 | } 838 | 839 | @JRubyMethod(name = "inspect") 840 | @Override 841 | public IRubyObject inspect() { 842 | if (str == null) return inspect("(uninitialized)"); 843 | 844 | ByteList byteList = str.getByteList(); 845 | int realSize = byteList.getRealSize(); 846 | 847 | if (curr >= realSize) return inspect("fin"); 848 | 849 | if (curr == 0) return inspect(curr + "/" + realSize + " @ " + inspect2()); 850 | 851 | return inspect(curr + "/" + realSize + " " + inspect1() + " @ " + inspect2()); 852 | } 853 | 854 | @JRubyMethod(name = "fixed_anchor?") 855 | public IRubyObject fixed_anchor_p(ThreadContext context) { 856 | return RubyBoolean.newBoolean(context, fixedAnchor); 857 | } 858 | 859 | @JRubyMethod(name = "named_captures") 860 | public IRubyObject named_captures(ThreadContext context) { 861 | Ruby runtime = context.runtime; 862 | IRubyObject nil = context.nil; 863 | 864 | RubyHash captures = RubyHash.newHash(runtime); 865 | 866 | if (pattern == null) return captures; 867 | 868 | Iterator nameEntryIterator = pattern.namedBackrefIterator(); 869 | 870 | while (nameEntryIterator.hasNext()) { 871 | NameEntry nameEntry = nameEntryIterator.next(); 872 | IRubyObject value = nil; 873 | 874 | for (int i : nameEntry.getBackRefs()) { 875 | IRubyObject v = extractRegion(context, i); 876 | if (v != nil) { 877 | value = v; 878 | } 879 | } 880 | 881 | int nameP = nameEntry.nameP; 882 | captures.op_aset(context, RubyString.newStringShared(runtime, nameEntry.name, nameP, nameEntry.nameEnd - nameP), value); 883 | } 884 | 885 | return captures; 886 | } 887 | 888 | private IRubyObject inspect(String msg) { 889 | RubyString result = getRuntime().newString("#<" + getMetaClass() + " " + msg + ">"); 890 | return result; 891 | } 892 | 893 | private static final int INSPECT_LENGTH = 5; 894 | 895 | private static final byte[] DOT_BYTES = "...".getBytes(); 896 | 897 | private IRubyObject inspect1() { 898 | final Ruby runtime = getRuntime(); 899 | 900 | if (curr == 0) return RubyString.newEmptyString(runtime); 901 | 902 | if (curr > INSPECT_LENGTH) { 903 | return RubyString.newStringNoCopy(runtime, DOT_BYTES).append(str.substr(runtime, curr - INSPECT_LENGTH, INSPECT_LENGTH)).inspect(); 904 | } 905 | 906 | return str.substr(runtime, 0, curr).inspect(); 907 | } 908 | 909 | private IRubyObject inspect2() { 910 | final Ruby runtime = getRuntime(); 911 | 912 | ByteList byteList = str.getByteList(); 913 | int realSize = byteList.getRealSize(); 914 | 915 | if (curr >= realSize) return RubyString.newEmptyString(runtime); 916 | 917 | int len = realSize - curr; 918 | 919 | if (len > INSPECT_LENGTH) { 920 | return ((RubyString) str.substr(runtime, curr, INSPECT_LENGTH)).cat(DOT_BYTES).inspect(); 921 | } 922 | 923 | return str.substr(runtime, curr, len).inspect(); 924 | } 925 | 926 | @JRubyMethod(name = "size") 927 | public IRubyObject size(ThreadContext context) { 928 | if (!isMatched()) return context.nil; 929 | return context.runtime.newFixnum(regs.getNumRegs()); 930 | } 931 | 932 | @JRubyMethod(name = "captures") 933 | public IRubyObject captures(ThreadContext context) { 934 | int i, numRegs; 935 | RubyArray newAry; 936 | 937 | if (!isMatched()) return context.nil; 938 | 939 | Ruby runtime = context.runtime; 940 | 941 | numRegs = regs.getNumRegs(); 942 | newAry = RubyArray.newArray(runtime, numRegs); 943 | 944 | for (i = 1; i < numRegs; i++) { 945 | IRubyObject str; 946 | if (regs.getBeg(i) == -1) { 947 | str = context.nil; 948 | } else { 949 | str = extractRange(runtime, 950 | adjustRegisterPosition(regs.getBeg(i)), 951 | adjustRegisterPosition(regs.getEnd(i))); 952 | } 953 | newAry.push(str); 954 | } 955 | 956 | return newAry; 957 | } 958 | 959 | @JRubyMethod(name = "values_at", rest = true) 960 | public IRubyObject values_at(ThreadContext context, IRubyObject[] args) { 961 | int i; 962 | RubyArray newAry; 963 | 964 | if (!isMatched()) return context.nil; 965 | 966 | Ruby runtime = context.runtime; 967 | 968 | newAry = RubyArray.newArray(runtime, args.length); 969 | for (i = 0; i < args.length; i++) { 970 | newAry.push(op_aref(context, args[i])); 971 | } 972 | 973 | return newAry; 974 | } 975 | 976 | @JRubyMethod(name = "values_at") 977 | public IRubyObject values_at(ThreadContext context) { 978 | if (!isMatched()) return context.nil; 979 | 980 | return RubyArray.newEmptyArray(context.runtime); 981 | } 982 | 983 | @JRubyMethod(name = "values_at") 984 | public IRubyObject values_at(ThreadContext context, IRubyObject index) { 985 | if (!isMatched()) return context.nil; 986 | 987 | return RubyArray.newArray(context.runtime, op_aref(context, index)); 988 | } 989 | 990 | @JRubyMethod(name = "values_at") 991 | public IRubyObject values_at(ThreadContext context, IRubyObject index1, IRubyObject index2) { 992 | if (!isMatched()) return context.nil; 993 | 994 | return RubyArray.newArray(context.runtime, op_aref(context, index1), op_aref(context, index2)); 995 | } 996 | 997 | @JRubyMethod(name = "values_at") 998 | public IRubyObject values_at(ThreadContext context, IRubyObject index1, IRubyObject index2, IRubyObject index3) { 999 | if (!isMatched()) return context.nil; 1000 | 1001 | return RubyArray.newArray(context.runtime, op_aref(context, index1), op_aref(context, index2), op_aref(context, index3)); 1002 | } 1003 | 1004 | // MRI: str_new 1005 | private RubyString newString(Ruby runtime, int start, int length) { 1006 | ByteList byteList = str.getByteList(); 1007 | int begin = byteList.begin(); 1008 | 1009 | ByteList newByteList = new ByteList(byteList.unsafeBytes(), begin + start, length, byteList.getEncoding(), true); 1010 | 1011 | return RubyString.newString(runtime, newByteList); 1012 | } 1013 | 1014 | /** 1015 | * @deprecated Only defined for backward compatibility in CRuby. 1016 | */ 1017 | @Deprecated 1018 | @JRubyMethod(name = "must_C_version", meta = true) 1019 | public static IRubyObject mustCversion(IRubyObject recv) { 1020 | return recv; 1021 | } 1022 | } 1023 | -------------------------------------------------------------------------------- /ext/jruby/org/jruby/ext/strscan/StringScannerLibrary.java: -------------------------------------------------------------------------------- 1 | package org.jruby.ext.strscan; 2 | 3 | import java.io.IOException; 4 | 5 | import org.jruby.Ruby; 6 | import org.jruby.ext.strscan.RubyStringScanner; 7 | import org.jruby.runtime.load.Library; 8 | 9 | /** 10 | * @author kscott 11 | * 12 | */ 13 | public class StringScannerLibrary implements Library { 14 | 15 | /** 16 | * @see org.jruby.runtime.load.Library#load(org.jruby.Ruby) 17 | */ 18 | public void load(Ruby runtime, boolean wrap) throws IOException { 19 | RubyStringScanner.createScannerClass(runtime); 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /ext/strscan/extconf.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | require 'mkmf' 3 | if RUBY_ENGINE == 'ruby' 4 | $INCFLAGS << " -I$(top_srcdir)" if $extmk 5 | have_func("onig_region_memsize", "ruby.h") 6 | have_func("rb_reg_onig_match", "ruby.h") 7 | create_makefile 'strscan' 8 | else 9 | File.write('Makefile', dummy_makefile("").join) 10 | end 11 | -------------------------------------------------------------------------------- /ext/strscan/strscan.c: -------------------------------------------------------------------------------- 1 | /* 2 | $Id$ 3 | 4 | Copyright (c) 1999-2006 Minero Aoki 5 | 6 | This program is free software. 7 | You can redistribute this program under the terms of the Ruby's or 2-clause 8 | BSD License. For details, see the COPYING and LICENSE.txt files. 9 | */ 10 | 11 | #include "ruby/ruby.h" 12 | #include "ruby/re.h" 13 | #include "ruby/encoding.h" 14 | 15 | #ifdef RUBY_EXTCONF_H 16 | # include RUBY_EXTCONF_H 17 | #endif 18 | 19 | #ifdef HAVE_ONIG_REGION_MEMSIZE 20 | extern size_t onig_region_memsize(const struct re_registers *regs); 21 | #endif 22 | 23 | #include 24 | 25 | #define STRSCAN_VERSION "3.1.6" 26 | 27 | /* ======================================================================= 28 | Data Type Definitions 29 | ======================================================================= */ 30 | 31 | static VALUE StringScanner; 32 | static VALUE ScanError; 33 | static ID id_byteslice; 34 | 35 | static int usascii_encindex, utf8_encindex, binary_encindex; 36 | 37 | struct strscanner 38 | { 39 | /* multi-purpose flags */ 40 | unsigned long flags; 41 | #define FLAG_MATCHED (1 << 0) 42 | 43 | /* the string to scan */ 44 | VALUE str; 45 | 46 | /* scan pointers */ 47 | long prev; /* legal only when MATCHED_P(s) */ 48 | long curr; /* always legal */ 49 | 50 | /* the regexp register; legal only when MATCHED_P(s) */ 51 | struct re_registers regs; 52 | 53 | /* regexp used for last scan */ 54 | VALUE regex; 55 | 56 | /* anchor mode */ 57 | bool fixed_anchor_p; 58 | }; 59 | 60 | #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED) 61 | #define MATCHED(s) ((s)->flags |= FLAG_MATCHED) 62 | #define CLEAR_MATCHED(s) ((s)->flags &= ~FLAG_MATCHED) 63 | #define CLEAR_NAMED_CAPTURES(s) ((s)->regex = Qnil) 64 | #define CLEAR_MATCH_STATUS(s) do {\ 65 | CLEAR_MATCHED(s);\ 66 | CLEAR_NAMED_CAPTURES(s);\ 67 | } while (0) 68 | 69 | #define S_PBEG(s) (RSTRING_PTR((s)->str)) 70 | #define S_LEN(s) (RSTRING_LEN((s)->str)) 71 | #define S_PEND(s) (S_PBEG(s) + S_LEN(s)) 72 | #define CURPTR(s) (S_PBEG(s) + (s)->curr) 73 | #define S_RESTLEN(s) (S_LEN(s) - (s)->curr) 74 | 75 | #define EOS_P(s) ((s)->curr >= RSTRING_LEN(p->str)) 76 | 77 | #define GET_SCANNER(obj,var) do {\ 78 | (var) = check_strscan(obj);\ 79 | if (NIL_P((var)->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");\ 80 | } while (0) 81 | 82 | /* ======================================================================= 83 | Function Prototypes 84 | ======================================================================= */ 85 | 86 | static inline long minl _((const long n, const long x)); 87 | static VALUE extract_range _((struct strscanner *p, long beg_i, long end_i)); 88 | static VALUE extract_beg_len _((struct strscanner *p, long beg_i, long len)); 89 | 90 | static struct strscanner *check_strscan _((VALUE obj)); 91 | static void strscan_mark _((void *p)); 92 | static void strscan_free _((void *p)); 93 | static size_t strscan_memsize _((const void *p)); 94 | static VALUE strscan_s_allocate _((VALUE klass)); 95 | static VALUE strscan_initialize _((int argc, VALUE *argv, VALUE self)); 96 | static VALUE strscan_init_copy _((VALUE vself, VALUE vorig)); 97 | 98 | static VALUE strscan_s_mustc _((VALUE self)); 99 | static VALUE strscan_terminate _((VALUE self)); 100 | static VALUE strscan_clear _((VALUE self)); 101 | static VALUE strscan_get_string _((VALUE self)); 102 | static VALUE strscan_set_string _((VALUE self, VALUE str)); 103 | static VALUE strscan_concat _((VALUE self, VALUE str)); 104 | static VALUE strscan_get_pos _((VALUE self)); 105 | static VALUE strscan_set_pos _((VALUE self, VALUE pos)); 106 | static VALUE strscan_do_scan _((VALUE self, VALUE regex, 107 | int succptr, int getstr, int headonly)); 108 | static VALUE strscan_scan _((VALUE self, VALUE re)); 109 | static VALUE strscan_match_p _((VALUE self, VALUE re)); 110 | static VALUE strscan_skip _((VALUE self, VALUE re)); 111 | static VALUE strscan_check _((VALUE self, VALUE re)); 112 | static VALUE strscan_scan_full _((VALUE self, VALUE re, 113 | VALUE succp, VALUE getp)); 114 | static VALUE strscan_scan_until _((VALUE self, VALUE re)); 115 | static VALUE strscan_skip_until _((VALUE self, VALUE re)); 116 | static VALUE strscan_check_until _((VALUE self, VALUE re)); 117 | static VALUE strscan_search_full _((VALUE self, VALUE re, 118 | VALUE succp, VALUE getp)); 119 | static void adjust_registers_to_matched _((struct strscanner *p)); 120 | static VALUE strscan_getch _((VALUE self)); 121 | static VALUE strscan_get_byte _((VALUE self)); 122 | static VALUE strscan_getbyte _((VALUE self)); 123 | static VALUE strscan_peek _((VALUE self, VALUE len)); 124 | static VALUE strscan_peep _((VALUE self, VALUE len)); 125 | static VALUE strscan_scan_base10_integer _((VALUE self)); 126 | static VALUE strscan_unscan _((VALUE self)); 127 | static VALUE strscan_bol_p _((VALUE self)); 128 | static VALUE strscan_eos_p _((VALUE self)); 129 | static VALUE strscan_empty_p _((VALUE self)); 130 | static VALUE strscan_rest_p _((VALUE self)); 131 | static VALUE strscan_matched_p _((VALUE self)); 132 | static VALUE strscan_matched _((VALUE self)); 133 | static VALUE strscan_matched_size _((VALUE self)); 134 | static VALUE strscan_aref _((VALUE self, VALUE idx)); 135 | static VALUE strscan_pre_match _((VALUE self)); 136 | static VALUE strscan_post_match _((VALUE self)); 137 | static VALUE strscan_rest _((VALUE self)); 138 | static VALUE strscan_rest_size _((VALUE self)); 139 | 140 | static VALUE strscan_inspect _((VALUE self)); 141 | static VALUE inspect1 _((struct strscanner *p)); 142 | static VALUE inspect2 _((struct strscanner *p)); 143 | 144 | /* ======================================================================= 145 | Utils 146 | ======================================================================= */ 147 | 148 | static VALUE 149 | str_new(struct strscanner *p, const char *ptr, long len) 150 | { 151 | VALUE str = rb_str_new(ptr, len); 152 | rb_enc_copy(str, p->str); 153 | return str; 154 | } 155 | 156 | static inline long 157 | minl(const long x, const long y) 158 | { 159 | return (x < y) ? x : y; 160 | } 161 | 162 | static VALUE 163 | extract_range(struct strscanner *p, long beg_i, long end_i) 164 | { 165 | if (beg_i > S_LEN(p)) return Qnil; 166 | end_i = minl(end_i, S_LEN(p)); 167 | return str_new(p, S_PBEG(p) + beg_i, end_i - beg_i); 168 | } 169 | 170 | static VALUE 171 | extract_beg_len(struct strscanner *p, long beg_i, long len) 172 | { 173 | if (beg_i > S_LEN(p)) return Qnil; 174 | len = minl(len, S_LEN(p) - beg_i); 175 | return str_new(p, S_PBEG(p) + beg_i, len); 176 | } 177 | 178 | /* ======================================================================= 179 | Constructor 180 | ======================================================================= */ 181 | 182 | static void 183 | strscan_mark(void *ptr) 184 | { 185 | struct strscanner *p = ptr; 186 | rb_gc_mark(p->str); 187 | rb_gc_mark(p->regex); 188 | } 189 | 190 | static void 191 | strscan_free(void *ptr) 192 | { 193 | struct strscanner *p = ptr; 194 | onig_region_free(&(p->regs), 0); 195 | ruby_xfree(p); 196 | } 197 | 198 | static size_t 199 | strscan_memsize(const void *ptr) 200 | { 201 | const struct strscanner *p = ptr; 202 | size_t size = sizeof(*p) - sizeof(p->regs); 203 | #ifdef HAVE_ONIG_REGION_MEMSIZE 204 | size += onig_region_memsize(&p->regs); 205 | #endif 206 | return size; 207 | } 208 | 209 | static const rb_data_type_t strscanner_type = { 210 | "StringScanner", 211 | {strscan_mark, strscan_free, strscan_memsize}, 212 | 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED 213 | }; 214 | 215 | static VALUE 216 | strscan_s_allocate(VALUE klass) 217 | { 218 | struct strscanner *p; 219 | VALUE obj = TypedData_Make_Struct(klass, struct strscanner, &strscanner_type, p); 220 | 221 | CLEAR_MATCH_STATUS(p); 222 | onig_region_init(&(p->regs)); 223 | p->str = Qnil; 224 | return obj; 225 | } 226 | 227 | /* 228 | * :markup: markdown 229 | * :include: strscan/link_refs.txt 230 | * 231 | * call-seq: 232 | * StringScanner.new(string, fixed_anchor: false) -> string_scanner 233 | * 234 | * Returns a new `StringScanner` object whose [stored string][1] 235 | * is the given `string`; 236 | * sets the [fixed-anchor property][10]: 237 | * 238 | * ```rb 239 | * scanner = StringScanner.new('foobarbaz') 240 | * scanner.string # => "foobarbaz" 241 | * scanner.fixed_anchor? # => false 242 | * put_situation(scanner) 243 | * # Situation: 244 | * # pos: 0 245 | * # charpos: 0 246 | * # rest: "foobarbaz" 247 | * # rest_size: 9 248 | * ``` 249 | * 250 | */ 251 | static VALUE 252 | strscan_initialize(int argc, VALUE *argv, VALUE self) 253 | { 254 | struct strscanner *p; 255 | VALUE str, options; 256 | 257 | p = check_strscan(self); 258 | rb_scan_args(argc, argv, "11", &str, &options); 259 | options = rb_check_hash_type(options); 260 | if (!NIL_P(options)) { 261 | VALUE fixed_anchor; 262 | ID keyword_ids[1]; 263 | keyword_ids[0] = rb_intern("fixed_anchor"); 264 | rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor); 265 | if (fixed_anchor == Qundef) { 266 | p->fixed_anchor_p = false; 267 | } 268 | else { 269 | p->fixed_anchor_p = RTEST(fixed_anchor); 270 | } 271 | } 272 | else { 273 | p->fixed_anchor_p = false; 274 | } 275 | StringValue(str); 276 | RB_OBJ_WRITE(self, &p->str, str); 277 | 278 | return self; 279 | } 280 | 281 | static struct strscanner * 282 | check_strscan(VALUE obj) 283 | { 284 | return rb_check_typeddata(obj, &strscanner_type); 285 | } 286 | 287 | /* 288 | * :markup: markdown 289 | * :include: strscan/link_refs.txt 290 | * 291 | * call-seq: 292 | * dup -> shallow_copy 293 | * 294 | * Returns a shallow copy of `self`; 295 | * the [stored string][1] in the copy is the same string as in `self`. 296 | */ 297 | static VALUE 298 | strscan_init_copy(VALUE vself, VALUE vorig) 299 | { 300 | struct strscanner *self, *orig; 301 | 302 | self = check_strscan(vself); 303 | orig = check_strscan(vorig); 304 | if (self != orig) { 305 | self->flags = orig->flags; 306 | RB_OBJ_WRITE(vself, &self->str, orig->str); 307 | self->prev = orig->prev; 308 | self->curr = orig->curr; 309 | if (rb_reg_region_copy(&self->regs, &orig->regs)) 310 | rb_memerror(); 311 | RB_GC_GUARD(vorig); 312 | } 313 | 314 | return vself; 315 | } 316 | 317 | /* ======================================================================= 318 | Instance Methods 319 | ======================================================================= */ 320 | 321 | /* 322 | * call-seq: 323 | * StringScanner.must_C_version -> self 324 | * 325 | * Returns +self+; defined for backward compatibility. 326 | */ 327 | 328 | /* :nodoc: */ 329 | static VALUE 330 | strscan_s_mustc(VALUE self) 331 | { 332 | return self; 333 | } 334 | 335 | /* 336 | * :markup: markdown 337 | * :include: strscan/link_refs.txt 338 | * 339 | * call-seq: 340 | * reset -> self 341 | * 342 | * Sets both [byte position][2] and [character position][7] to zero, 343 | * and clears [match values][9]; 344 | * returns +self+: 345 | * 346 | * ```rb 347 | * scanner = StringScanner.new('foobarbaz') 348 | * scanner.exist?(/bar/) # => 6 349 | * scanner.reset # => # 350 | * put_situation(scanner) 351 | * # Situation: 352 | * # pos: 0 353 | * # charpos: 0 354 | * # rest: "foobarbaz" 355 | * # rest_size: 9 356 | * # => nil 357 | * match_values_cleared?(scanner) # => true 358 | * ``` 359 | * 360 | */ 361 | static VALUE 362 | strscan_reset(VALUE self) 363 | { 364 | struct strscanner *p; 365 | 366 | GET_SCANNER(self, p); 367 | p->curr = 0; 368 | CLEAR_MATCH_STATUS(p); 369 | return self; 370 | } 371 | 372 | /* 373 | * :markup: markdown 374 | * :include: strscan/link_refs.txt 375 | * :include: strscan/methods/terminate.md 376 | */ 377 | static VALUE 378 | strscan_terminate(VALUE self) 379 | { 380 | struct strscanner *p; 381 | 382 | GET_SCANNER(self, p); 383 | p->curr = S_LEN(p); 384 | CLEAR_MATCH_STATUS(p); 385 | return self; 386 | } 387 | 388 | /* 389 | * call-seq: 390 | * clear -> self 391 | * 392 | * This method is obsolete; use the equivalent method StringScanner#terminate. 393 | */ 394 | 395 | /* :nodoc: */ 396 | static VALUE 397 | strscan_clear(VALUE self) 398 | { 399 | rb_warning("StringScanner#clear is obsolete; use #terminate instead"); 400 | return strscan_terminate(self); 401 | } 402 | 403 | /* 404 | * :markup: markdown 405 | * :include: strscan/link_refs.txt 406 | * 407 | * call-seq: 408 | * string -> stored_string 409 | * 410 | * Returns the [stored string][1]: 411 | * 412 | * ```rb 413 | * scanner = StringScanner.new('foobar') 414 | * scanner.string # => "foobar" 415 | * scanner.concat('baz') 416 | * scanner.string # => "foobarbaz" 417 | * ``` 418 | * 419 | */ 420 | static VALUE 421 | strscan_get_string(VALUE self) 422 | { 423 | struct strscanner *p; 424 | 425 | GET_SCANNER(self, p); 426 | return p->str; 427 | } 428 | 429 | /* 430 | * :markup: markdown 431 | * :include: strscan/link_refs.txt 432 | * 433 | * call-seq: 434 | * string = other_string -> other_string 435 | * 436 | * Replaces the [stored string][1] with the given `other_string`: 437 | * 438 | * - Sets both [positions][11] to zero. 439 | * - Clears [match values][9]. 440 | * - Returns `other_string`. 441 | * 442 | * ```rb 443 | * scanner = StringScanner.new('foobar') 444 | * scanner.scan(/foo/) 445 | * put_situation(scanner) 446 | * # Situation: 447 | * # pos: 3 448 | * # charpos: 3 449 | * # rest: "bar" 450 | * # rest_size: 3 451 | * match_values_cleared?(scanner) # => false 452 | * 453 | * scanner.string = 'baz' # => "baz" 454 | * put_situation(scanner) 455 | * # Situation: 456 | * # pos: 0 457 | * # charpos: 0 458 | * # rest: "baz" 459 | * # rest_size: 3 460 | * match_values_cleared?(scanner) # => true 461 | * ``` 462 | * 463 | */ 464 | static VALUE 465 | strscan_set_string(VALUE self, VALUE str) 466 | { 467 | struct strscanner *p = check_strscan(self); 468 | 469 | StringValue(str); 470 | RB_OBJ_WRITE(self, &p->str, str); 471 | p->curr = 0; 472 | CLEAR_MATCH_STATUS(p); 473 | return str; 474 | } 475 | 476 | /* 477 | * :markup: markdown 478 | * :include: strscan/link_refs.txt 479 | * 480 | * call-seq: 481 | * concat(more_string) -> self 482 | * 483 | * - Appends the given `more_string` 484 | * to the [stored string][1]. 485 | * - Returns `self`. 486 | * - Does not affect the [positions][11] 487 | * or [match values][9]. 488 | * 489 | * 490 | * ```rb 491 | * scanner = StringScanner.new('foo') 492 | * scanner.string # => "foo" 493 | * scanner.terminate 494 | * scanner.concat('barbaz') # => # 495 | * scanner.string # => "foobarbaz" 496 | * put_situation(scanner) 497 | * # Situation: 498 | * # pos: 3 499 | * # charpos: 3 500 | * # rest: "barbaz" 501 | * # rest_size: 6 502 | * ``` 503 | * 504 | */ 505 | static VALUE 506 | strscan_concat(VALUE self, VALUE str) 507 | { 508 | struct strscanner *p; 509 | 510 | GET_SCANNER(self, p); 511 | StringValue(str); 512 | rb_str_append(p->str, str); 513 | return self; 514 | } 515 | 516 | /* 517 | * :markup: markdown 518 | * :include: strscan/link_refs.txt 519 | * :include: strscan/methods/get_pos.md 520 | */ 521 | static VALUE 522 | strscan_get_pos(VALUE self) 523 | { 524 | struct strscanner *p; 525 | 526 | GET_SCANNER(self, p); 527 | return LONG2NUM(p->curr); 528 | } 529 | 530 | /* 531 | * :markup: markdown 532 | * :include: strscan/link_refs.txt 533 | * :include: strscan/methods/get_charpos.md 534 | */ 535 | static VALUE 536 | strscan_get_charpos(VALUE self) 537 | { 538 | struct strscanner *p; 539 | 540 | GET_SCANNER(self, p); 541 | 542 | return LONG2NUM(rb_enc_strlen(S_PBEG(p), CURPTR(p), rb_enc_get(p->str))); 543 | } 544 | 545 | /* 546 | * :markup: markdown 547 | * :include: strscan/link_refs.txt 548 | * :include: strscan/methods/set_pos.md 549 | */ 550 | static VALUE 551 | strscan_set_pos(VALUE self, VALUE v) 552 | { 553 | struct strscanner *p; 554 | long i; 555 | 556 | GET_SCANNER(self, p); 557 | i = NUM2LONG(v); 558 | if (i < 0) i += S_LEN(p); 559 | if (i < 0) rb_raise(rb_eRangeError, "index out of range"); 560 | if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range"); 561 | p->curr = i; 562 | return LONG2NUM(i); 563 | } 564 | 565 | static inline UChar * 566 | match_target(struct strscanner *p) 567 | { 568 | if (p->fixed_anchor_p) { 569 | return (UChar *)S_PBEG(p); 570 | } 571 | else 572 | { 573 | return (UChar *)CURPTR(p); 574 | } 575 | } 576 | 577 | static inline void 578 | set_registers(struct strscanner *p, size_t pos, size_t length) 579 | { 580 | const int at = 0; 581 | OnigRegion *regs = &(p->regs); 582 | onig_region_clear(regs); 583 | if (onig_region_set(regs, at, 0, 0)) return; 584 | if (p->fixed_anchor_p) { 585 | regs->beg[at] = pos + p->curr; 586 | regs->end[at] = pos + p->curr + length; 587 | } 588 | else 589 | { 590 | regs->beg[at] = pos; 591 | regs->end[at] = pos + length; 592 | } 593 | } 594 | 595 | static inline void 596 | succ(struct strscanner *p) 597 | { 598 | if (p->fixed_anchor_p) { 599 | p->curr = p->regs.end[0]; 600 | } 601 | else 602 | { 603 | p->curr += p->regs.end[0]; 604 | } 605 | } 606 | 607 | static inline long 608 | last_match_length(struct strscanner *p) 609 | { 610 | if (p->fixed_anchor_p) { 611 | return p->regs.end[0] - p->prev; 612 | } 613 | else 614 | { 615 | return p->regs.end[0]; 616 | } 617 | } 618 | 619 | static inline long 620 | adjust_register_position(struct strscanner *p, long position) 621 | { 622 | if (p->fixed_anchor_p) { 623 | return position; 624 | } 625 | else { 626 | return p->prev + position; 627 | } 628 | } 629 | 630 | /* rb_reg_onig_match is available in Ruby 3.3 and later. */ 631 | #ifndef HAVE_RB_REG_ONIG_MATCH 632 | static OnigPosition 633 | rb_reg_onig_match(VALUE re, VALUE str, 634 | OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args), 635 | void *args, struct re_registers *regs) 636 | { 637 | OnigPosition result; 638 | regex_t *reg = rb_reg_prepare_re(re, str); 639 | 640 | bool tmpreg = reg != RREGEXP_PTR(re); 641 | if (!tmpreg) RREGEXP(re)->usecnt++; 642 | 643 | result = match(reg, str, regs, args); 644 | 645 | if (!tmpreg) RREGEXP(re)->usecnt--; 646 | if (tmpreg) { 647 | if (RREGEXP(re)->usecnt) { 648 | onig_free(reg); 649 | } 650 | else { 651 | onig_free(RREGEXP_PTR(re)); 652 | RREGEXP_PTR(re) = reg; 653 | } 654 | } 655 | 656 | if (result < 0) { 657 | if (result != ONIG_MISMATCH) { 658 | rb_raise(ScanError, "regexp buffer overflow"); 659 | } 660 | } 661 | 662 | return result; 663 | } 664 | #endif 665 | 666 | static OnigPosition 667 | strscan_match(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr) 668 | { 669 | struct strscanner *p = (struct strscanner *)args_ptr; 670 | 671 | return onig_match(reg, 672 | match_target(p), 673 | (UChar* )(CURPTR(p) + S_RESTLEN(p)), 674 | (UChar* )CURPTR(p), 675 | regs, 676 | ONIG_OPTION_NONE); 677 | } 678 | 679 | static OnigPosition 680 | strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr) 681 | { 682 | struct strscanner *p = (struct strscanner *)args_ptr; 683 | 684 | return onig_search(reg, 685 | match_target(p), 686 | (UChar *)(CURPTR(p) + S_RESTLEN(p)), 687 | (UChar *)CURPTR(p), 688 | (UChar *)(CURPTR(p) + S_RESTLEN(p)), 689 | regs, 690 | ONIG_OPTION_NONE); 691 | } 692 | 693 | static void 694 | strscan_enc_check(VALUE str1, VALUE str2) 695 | { 696 | if (RB_ENCODING_GET(str1) != RB_ENCODING_GET(str2)) { 697 | rb_enc_check(str1, str2); 698 | } 699 | } 700 | 701 | static VALUE 702 | strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly) 703 | { 704 | struct strscanner *p; 705 | 706 | GET_SCANNER(self, p); 707 | 708 | CLEAR_MATCH_STATUS(p); 709 | if (S_RESTLEN(p) < 0) { 710 | return Qnil; 711 | } 712 | 713 | if (RB_TYPE_P(pattern, T_REGEXP)) { 714 | OnigPosition ret; 715 | RB_OBJ_WRITE(self, &p->regex, pattern); 716 | ret = rb_reg_onig_match(p->regex, 717 | p->str, 718 | headonly ? strscan_match : strscan_search, 719 | (void *)p, 720 | &(p->regs)); 721 | 722 | if (ret == ONIG_MISMATCH) { 723 | return Qnil; 724 | } 725 | } 726 | else { 727 | StringValue(pattern); 728 | if (S_RESTLEN(p) < RSTRING_LEN(pattern)) { 729 | strscan_enc_check(p->str, pattern); 730 | return Qnil; 731 | } 732 | 733 | if (headonly) { 734 | strscan_enc_check(p->str, pattern); 735 | 736 | if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) { 737 | return Qnil; 738 | } 739 | set_registers(p, 0, RSTRING_LEN(pattern)); 740 | } 741 | else { 742 | rb_encoding *enc = rb_enc_check(p->str, pattern); 743 | long pos = rb_memsearch(RSTRING_PTR(pattern), RSTRING_LEN(pattern), 744 | CURPTR(p), S_RESTLEN(p), enc); 745 | if (pos == -1) { 746 | return Qnil; 747 | } 748 | set_registers(p, pos, RSTRING_LEN(pattern)); 749 | } 750 | } 751 | 752 | MATCHED(p); 753 | p->prev = p->curr; 754 | 755 | if (succptr) { 756 | succ(p); 757 | } 758 | { 759 | const long length = last_match_length(p); 760 | if (getstr) { 761 | return extract_beg_len(p, p->prev, length); 762 | } 763 | else { 764 | return INT2FIX(length); 765 | } 766 | } 767 | } 768 | 769 | /* 770 | * :markup: markdown 771 | * :include: strscan/link_refs.txt 772 | * :include: strscan/methods/scan.md 773 | */ 774 | static VALUE 775 | strscan_scan(VALUE self, VALUE re) 776 | { 777 | return strscan_do_scan(self, re, 1, 1, 1); 778 | } 779 | 780 | /* 781 | * :markup: markdown 782 | * :include: strscan/link_refs.txt 783 | * 784 | * call-seq: 785 | * match?(pattern) -> updated_position or nil 786 | * 787 | * Attempts to [match][17] the given `pattern` 788 | * at the beginning of the [target substring][3]; 789 | * does not modify the [positions][11]. 790 | * 791 | * If the match succeeds: 792 | * 793 | * - Sets [match values][9]. 794 | * - Returns the size in bytes of the matched substring. 795 | * 796 | * 797 | * ```rb 798 | * scanner = StringScanner.new('foobarbaz') 799 | * scanner.pos = 3 800 | * scanner.match?(/bar/) => 3 801 | * put_match_values(scanner) 802 | * # Basic match values: 803 | * # matched?: true 804 | * # matched_size: 3 805 | * # pre_match: "foo" 806 | * # matched : "bar" 807 | * # post_match: "baz" 808 | * # Captured match values: 809 | * # size: 1 810 | * # captures: [] 811 | * # named_captures: {} 812 | * # values_at: ["bar", nil] 813 | * # []: 814 | * # [0]: "bar" 815 | * # [1]: nil 816 | * put_situation(scanner) 817 | * # Situation: 818 | * # pos: 3 819 | * # charpos: 3 820 | * # rest: "barbaz" 821 | * # rest_size: 6 822 | * ``` 823 | * 824 | * If the match fails: 825 | * 826 | * - Clears match values. 827 | * - Returns `nil`. 828 | * - Does not increment positions. 829 | * 830 | * ```rb 831 | * scanner.match?(/nope/) # => nil 832 | * match_values_cleared?(scanner) # => true 833 | * ``` 834 | * 835 | */ 836 | static VALUE 837 | strscan_match_p(VALUE self, VALUE re) 838 | { 839 | return strscan_do_scan(self, re, 0, 0, 1); 840 | } 841 | 842 | /* 843 | * :markup: markdown 844 | * :include: strscan/link_refs.txt 845 | * :include: strscan/methods/skip.md 846 | */ 847 | static VALUE 848 | strscan_skip(VALUE self, VALUE re) 849 | { 850 | return strscan_do_scan(self, re, 1, 0, 1); 851 | } 852 | 853 | /* 854 | * :markup: markdown 855 | * :include: strscan/link_refs.txt 856 | * 857 | * call-seq: 858 | * check(pattern) -> matched_substring or nil 859 | * 860 | * Attempts to [match][17] the given `pattern` 861 | * at the beginning of the [target substring][3]; 862 | * does not modify the [positions][11]. 863 | * 864 | * If the match succeeds: 865 | * 866 | * - Returns the matched substring. 867 | * - Sets all [match values][9]. 868 | * 869 | * ```rb 870 | * scanner = StringScanner.new('foobarbaz') 871 | * scanner.pos = 3 872 | * scanner.check('bar') # => "bar" 873 | * put_match_values(scanner) 874 | * # Basic match values: 875 | * # matched?: true 876 | * # matched_size: 3 877 | * # pre_match: "foo" 878 | * # matched : "bar" 879 | * # post_match: "baz" 880 | * # Captured match values: 881 | * # size: 1 882 | * # captures: [] 883 | * # named_captures: {} 884 | * # values_at: ["bar", nil] 885 | * # []: 886 | * # [0]: "bar" 887 | * # [1]: nil 888 | * # => 0..1 889 | * put_situation(scanner) 890 | * # Situation: 891 | * # pos: 3 892 | * # charpos: 3 893 | * # rest: "barbaz" 894 | * # rest_size: 6 895 | * ``` 896 | * 897 | * If the match fails: 898 | * 899 | * - Returns `nil`. 900 | * - Clears all [match values][9]. 901 | * 902 | * ```rb 903 | * scanner.check(/nope/) # => nil 904 | * match_values_cleared?(scanner) # => true 905 | * ``` 906 | * 907 | */ 908 | static VALUE 909 | strscan_check(VALUE self, VALUE re) 910 | { 911 | return strscan_do_scan(self, re, 0, 1, 1); 912 | } 913 | 914 | /* 915 | * call-seq: 916 | * scan_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or nil 917 | * 918 | * Equivalent to one of the following: 919 | * 920 | * - +advance_pointer_p+ +true+: 921 | * 922 | * - +return_string_p+ +true+: StringScanner#scan(pattern). 923 | * - +return_string_p+ +false+: StringScanner#skip(pattern). 924 | * 925 | * - +advance_pointer_p+ +false+: 926 | * 927 | * - +return_string_p+ +true+: StringScanner#check(pattern). 928 | * - +return_string_p+ +false+: StringScanner#match?(pattern). 929 | * 930 | */ 931 | 932 | /* :nodoc: */ 933 | static VALUE 934 | strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f) 935 | { 936 | return strscan_do_scan(self, re, RTEST(s), RTEST(f), 1); 937 | } 938 | 939 | /* 940 | * :markup: markdown 941 | * :include: strscan/link_refs.txt 942 | * :include: strscan/methods/scan_until.md 943 | */ 944 | static VALUE 945 | strscan_scan_until(VALUE self, VALUE re) 946 | { 947 | return strscan_do_scan(self, re, 1, 1, 0); 948 | } 949 | 950 | /* 951 | * :markup: markdown 952 | * :include: strscan/link_refs.txt 953 | * 954 | * call-seq: 955 | * exist?(pattern) -> byte_offset or nil 956 | * 957 | * Attempts to [match][17] the given `pattern` 958 | * anywhere (at any [position][2]) 959 | * n the [target substring][3]; 960 | * does not modify the [positions][11]. 961 | * 962 | * If the match succeeds: 963 | * 964 | * - Returns a byte offset: 965 | * the distance in bytes between the current [position][2] 966 | * and the end of the matched substring. 967 | * - Sets all [match values][9]. 968 | * 969 | * ```rb 970 | * scanner = StringScanner.new('foobarbazbatbam') 971 | * scanner.pos = 6 972 | * scanner.exist?(/bat/) # => 6 973 | * put_match_values(scanner) 974 | * # Basic match values: 975 | * # matched?: true 976 | * # matched_size: 3 977 | * # pre_match: "foobarbaz" 978 | * # matched : "bat" 979 | * # post_match: "bam" 980 | * # Captured match values: 981 | * # size: 1 982 | * # captures: [] 983 | * # named_captures: {} 984 | * # values_at: ["bat", nil] 985 | * # []: 986 | * # [0]: "bat" 987 | * # [1]: nil 988 | * put_situation(scanner) 989 | * # Situation: 990 | * # pos: 6 991 | * # charpos: 6 992 | * # rest: "bazbatbam" 993 | * # rest_size: 9 994 | * ``` 995 | * 996 | * If the match fails: 997 | * 998 | * - Returns `nil`. 999 | * - Clears all [match values][9]. 1000 | * 1001 | * ```rb 1002 | * scanner.exist?(/nope/) # => nil 1003 | * match_values_cleared?(scanner) # => true 1004 | * ``` 1005 | * 1006 | */ 1007 | static VALUE 1008 | strscan_exist_p(VALUE self, VALUE re) 1009 | { 1010 | return strscan_do_scan(self, re, 0, 0, 0); 1011 | } 1012 | 1013 | /* 1014 | * :markup: markdown 1015 | * :include: strscan/link_refs.txt 1016 | * :include: strscan/methods/skip_until.md 1017 | */ 1018 | static VALUE 1019 | strscan_skip_until(VALUE self, VALUE re) 1020 | { 1021 | return strscan_do_scan(self, re, 1, 0, 0); 1022 | } 1023 | 1024 | /* 1025 | * :markup: markdown 1026 | * :include: strscan/link_refs.txt 1027 | * 1028 | * call-seq: 1029 | * check_until(pattern) -> substring or nil 1030 | * 1031 | * Attempts to [match][17] the given `pattern` 1032 | * anywhere (at any [position][2]) 1033 | * in the [target substring][3]; 1034 | * does not modify the [positions][11]. 1035 | * 1036 | * If the match succeeds: 1037 | * 1038 | * - Sets all [match values][9]. 1039 | * - Returns the matched substring, 1040 | * which extends from the current [position][2] 1041 | * to the end of the matched substring. 1042 | * 1043 | * ```rb 1044 | * scanner = StringScanner.new('foobarbazbatbam') 1045 | * scanner.pos = 6 1046 | * scanner.check_until(/bat/) # => "bazbat" 1047 | * put_match_values(scanner) 1048 | * # Basic match values: 1049 | * # matched?: true 1050 | * # matched_size: 3 1051 | * # pre_match: "foobarbaz" 1052 | * # matched : "bat" 1053 | * # post_match: "bam" 1054 | * # Captured match values: 1055 | * # size: 1 1056 | * # captures: [] 1057 | * # named_captures: {} 1058 | * # values_at: ["bat", nil] 1059 | * # []: 1060 | * # [0]: "bat" 1061 | * # [1]: nil 1062 | * put_situation(scanner) 1063 | * # Situation: 1064 | * # pos: 6 1065 | * # charpos: 6 1066 | * # rest: "bazbatbam" 1067 | * # rest_size: 9 1068 | * ``` 1069 | * 1070 | * If the match fails: 1071 | * 1072 | * - Clears all [match values][9]. 1073 | * - Returns `nil`. 1074 | * 1075 | * ```rb 1076 | * scanner.check_until(/nope/) # => nil 1077 | * match_values_cleared?(scanner) # => true 1078 | * ``` 1079 | * 1080 | */ 1081 | static VALUE 1082 | strscan_check_until(VALUE self, VALUE re) 1083 | { 1084 | return strscan_do_scan(self, re, 0, 1, 0); 1085 | } 1086 | 1087 | /* 1088 | * call-seq: 1089 | * search_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or position_delta or nil 1090 | * 1091 | * Equivalent to one of the following: 1092 | * 1093 | * - +advance_pointer_p+ +true+: 1094 | * 1095 | * - +return_string_p+ +true+: StringScanner#scan_until(pattern). 1096 | * - +return_string_p+ +false+: StringScanner#skip_until(pattern). 1097 | * 1098 | * - +advance_pointer_p+ +false+: 1099 | * 1100 | * - +return_string_p+ +true+: StringScanner#check_until(pattern). 1101 | * - +return_string_p+ +false+: StringScanner#exist?(pattern). 1102 | * 1103 | */ 1104 | 1105 | /* :nodoc: */ 1106 | static VALUE 1107 | strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f) 1108 | { 1109 | return strscan_do_scan(self, re, RTEST(s), RTEST(f), 0); 1110 | } 1111 | 1112 | static void 1113 | adjust_registers_to_matched(struct strscanner *p) 1114 | { 1115 | onig_region_clear(&(p->regs)); 1116 | if (p->fixed_anchor_p) { 1117 | onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr); 1118 | } 1119 | else { 1120 | onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev)); 1121 | } 1122 | } 1123 | 1124 | /* 1125 | * :markup: markdown 1126 | * :include: strscan/link_refs.txt 1127 | * :include: strscan/methods/getch.md 1128 | */ 1129 | static VALUE 1130 | strscan_getch(VALUE self) 1131 | { 1132 | struct strscanner *p; 1133 | long len; 1134 | 1135 | GET_SCANNER(self, p); 1136 | CLEAR_MATCH_STATUS(p); 1137 | if (EOS_P(p)) 1138 | return Qnil; 1139 | 1140 | len = rb_enc_mbclen(CURPTR(p), S_PEND(p), rb_enc_get(p->str)); 1141 | len = minl(len, S_RESTLEN(p)); 1142 | p->prev = p->curr; 1143 | p->curr += len; 1144 | MATCHED(p); 1145 | adjust_registers_to_matched(p); 1146 | return extract_range(p, 1147 | adjust_register_position(p, p->regs.beg[0]), 1148 | adjust_register_position(p, p->regs.end[0])); 1149 | } 1150 | 1151 | /* 1152 | * call-seq: 1153 | * scan_byte -> integer_byte 1154 | * 1155 | * Scans one byte and returns it as an integer. 1156 | * This method is not multibyte character sensitive. 1157 | * See also: #getch. 1158 | * 1159 | */ 1160 | static VALUE 1161 | strscan_scan_byte(VALUE self) 1162 | { 1163 | struct strscanner *p; 1164 | VALUE byte; 1165 | 1166 | GET_SCANNER(self, p); 1167 | CLEAR_MATCH_STATUS(p); 1168 | if (EOS_P(p)) 1169 | return Qnil; 1170 | 1171 | byte = INT2FIX((unsigned char)*CURPTR(p)); 1172 | p->prev = p->curr; 1173 | p->curr++; 1174 | MATCHED(p); 1175 | adjust_registers_to_matched(p); 1176 | return byte; 1177 | } 1178 | 1179 | /* 1180 | * Peeks at the current byte and returns it as an integer. 1181 | * 1182 | * s = StringScanner.new('ab') 1183 | * s.peek_byte # => 97 1184 | */ 1185 | static VALUE 1186 | strscan_peek_byte(VALUE self) 1187 | { 1188 | struct strscanner *p; 1189 | 1190 | GET_SCANNER(self, p); 1191 | if (EOS_P(p)) 1192 | return Qnil; 1193 | 1194 | return INT2FIX((unsigned char)*CURPTR(p)); 1195 | } 1196 | 1197 | /* 1198 | * :markup: markdown 1199 | * :include: strscan/link_refs.txt 1200 | * :include: strscan/methods/get_byte.md 1201 | */ 1202 | static VALUE 1203 | strscan_get_byte(VALUE self) 1204 | { 1205 | struct strscanner *p; 1206 | 1207 | GET_SCANNER(self, p); 1208 | CLEAR_MATCH_STATUS(p); 1209 | if (EOS_P(p)) 1210 | return Qnil; 1211 | 1212 | p->prev = p->curr; 1213 | p->curr++; 1214 | MATCHED(p); 1215 | adjust_registers_to_matched(p); 1216 | return extract_range(p, 1217 | adjust_register_position(p, p->regs.beg[0]), 1218 | adjust_register_position(p, p->regs.end[0])); 1219 | } 1220 | 1221 | /* 1222 | * call-seq: 1223 | * getbyte 1224 | * 1225 | * Equivalent to #get_byte. 1226 | * This method is obsolete; use #get_byte instead. 1227 | */ 1228 | 1229 | /* :nodoc: */ 1230 | static VALUE 1231 | strscan_getbyte(VALUE self) 1232 | { 1233 | rb_warning("StringScanner#getbyte is obsolete; use #get_byte instead"); 1234 | return strscan_get_byte(self); 1235 | } 1236 | 1237 | /* 1238 | * :markup: markdown 1239 | * :include: strscan/link_refs.txt 1240 | * 1241 | * call-seq: 1242 | * peek(length) -> substring 1243 | * 1244 | * Returns the substring `string[pos, length]`; 1245 | * does not update [match values][9] or [positions][11]: 1246 | * 1247 | * ```rb 1248 | * scanner = StringScanner.new('foobarbaz') 1249 | * scanner.pos = 3 1250 | * scanner.peek(3) # => "bar" 1251 | * scanner.terminate 1252 | * scanner.peek(3) # => "" 1253 | * ``` 1254 | * 1255 | */ 1256 | static VALUE 1257 | strscan_peek(VALUE self, VALUE vlen) 1258 | { 1259 | struct strscanner *p; 1260 | long len; 1261 | 1262 | GET_SCANNER(self, p); 1263 | 1264 | len = NUM2LONG(vlen); 1265 | if (EOS_P(p)) 1266 | return str_new(p, "", 0); 1267 | 1268 | len = minl(len, S_RESTLEN(p)); 1269 | return extract_beg_len(p, p->curr, len); 1270 | } 1271 | 1272 | /* 1273 | * call-seq: 1274 | * peep 1275 | * 1276 | * Equivalent to #peek. 1277 | * This method is obsolete; use #peek instead. 1278 | */ 1279 | 1280 | /* :nodoc: */ 1281 | static VALUE 1282 | strscan_peep(VALUE self, VALUE vlen) 1283 | { 1284 | rb_warning("StringScanner#peep is obsolete; use #peek instead"); 1285 | return strscan_peek(self, vlen); 1286 | } 1287 | 1288 | static VALUE 1289 | strscan_parse_integer(struct strscanner *p, int base, long len) 1290 | { 1291 | VALUE buffer_v, integer; 1292 | 1293 | char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1); 1294 | 1295 | MEMCPY(buffer, CURPTR(p), char, len); 1296 | buffer[len] = '\0'; 1297 | integer = rb_cstr2inum(buffer, base); 1298 | RB_ALLOCV_END(buffer_v); 1299 | p->curr += len; 1300 | 1301 | MATCHED(p); 1302 | adjust_registers_to_matched(p); 1303 | 1304 | return integer; 1305 | } 1306 | 1307 | static inline bool 1308 | strscan_ascii_compat_fastpath(VALUE str) { 1309 | int encindex = ENCODING_GET_INLINED(str); 1310 | // The overwhelming majority of strings are in one of these 3 encodings. 1311 | return encindex == utf8_encindex || encindex == binary_encindex || encindex == usascii_encindex; 1312 | } 1313 | 1314 | static inline void 1315 | strscan_must_ascii_compat(VALUE str) 1316 | { 1317 | // The overwhelming majority of strings are in one of these 3 encodings. 1318 | if (RB_LIKELY(strscan_ascii_compat_fastpath(str))) { 1319 | return; 1320 | } 1321 | 1322 | rb_must_asciicompat(str); 1323 | } 1324 | 1325 | static VALUE 1326 | strscan_scan_base10_integer(VALUE self) 1327 | { 1328 | char *ptr; 1329 | long len = 0; 1330 | struct strscanner *p; 1331 | 1332 | GET_SCANNER(self, p); 1333 | CLEAR_MATCH_STATUS(p); 1334 | 1335 | strscan_must_ascii_compat(p->str); 1336 | 1337 | ptr = CURPTR(p); 1338 | 1339 | long remaining_len = S_RESTLEN(p); 1340 | 1341 | if (remaining_len <= 0) { 1342 | return Qnil; 1343 | } 1344 | 1345 | if (ptr[len] == '-' || ptr[len] == '+') { 1346 | len++; 1347 | } 1348 | 1349 | if (!rb_isdigit(ptr[len])) { 1350 | return Qnil; 1351 | } 1352 | 1353 | p->prev = p->curr; 1354 | 1355 | while (len < remaining_len && rb_isdigit(ptr[len])) { 1356 | len++; 1357 | } 1358 | 1359 | return strscan_parse_integer(p, 10, len); 1360 | } 1361 | 1362 | static VALUE 1363 | strscan_scan_base16_integer(VALUE self) 1364 | { 1365 | char *ptr; 1366 | long len = 0; 1367 | struct strscanner *p; 1368 | 1369 | GET_SCANNER(self, p); 1370 | CLEAR_MATCH_STATUS(p); 1371 | 1372 | strscan_must_ascii_compat(p->str); 1373 | 1374 | ptr = CURPTR(p); 1375 | 1376 | long remaining_len = S_RESTLEN(p); 1377 | 1378 | if (remaining_len <= 0) { 1379 | return Qnil; 1380 | } 1381 | 1382 | if (ptr[len] == '-' || ptr[len] == '+') { 1383 | len++; 1384 | } 1385 | 1386 | if ((remaining_len >= (len + 3)) && ptr[len] == '0' && ptr[len + 1] == 'x' && rb_isxdigit(ptr[len + 2])) { 1387 | len += 2; 1388 | } 1389 | 1390 | if (len >= remaining_len || !rb_isxdigit(ptr[len])) { 1391 | return Qnil; 1392 | } 1393 | 1394 | p->prev = p->curr; 1395 | 1396 | while (len < remaining_len && rb_isxdigit(ptr[len])) { 1397 | len++; 1398 | } 1399 | 1400 | return strscan_parse_integer(p, 16, len); 1401 | } 1402 | 1403 | /* 1404 | * :markup: markdown 1405 | * :include: strscan/link_refs.txt 1406 | * 1407 | * call-seq: 1408 | * unscan -> self 1409 | * 1410 | * Sets the [position][2] to its value previous to the recent successful 1411 | * [match][17] attempt: 1412 | * 1413 | * ```rb 1414 | * scanner = StringScanner.new('foobarbaz') 1415 | * scanner.scan(/foo/) 1416 | * put_situation(scanner) 1417 | * # Situation: 1418 | * # pos: 3 1419 | * # charpos: 3 1420 | * # rest: "barbaz" 1421 | * # rest_size: 6 1422 | * scanner.unscan 1423 | * # => # 1424 | * put_situation(scanner) 1425 | * # Situation: 1426 | * # pos: 0 1427 | * # charpos: 0 1428 | * # rest: "foobarbaz" 1429 | * # rest_size: 9 1430 | * ``` 1431 | * 1432 | * Raises an exception if match values are clear: 1433 | * 1434 | * ```rb 1435 | * scanner.scan(/nope/) # => nil 1436 | * match_values_cleared?(scanner) # => true 1437 | * scanner.unscan # Raises StringScanner::Error. 1438 | * ``` 1439 | * 1440 | */ 1441 | static VALUE 1442 | strscan_unscan(VALUE self) 1443 | { 1444 | struct strscanner *p; 1445 | 1446 | GET_SCANNER(self, p); 1447 | if (! MATCHED_P(p)) 1448 | rb_raise(ScanError, "unscan failed: previous match record not exist"); 1449 | p->curr = p->prev; 1450 | CLEAR_MATCH_STATUS(p); 1451 | return self; 1452 | } 1453 | 1454 | /* 1455 | * 1456 | * :markup: markdown 1457 | * :include: strscan/link_refs.txt 1458 | * 1459 | * call-seq: 1460 | * beginning_of_line? -> true or false 1461 | * 1462 | * Returns whether the [position][2] is at the beginning of a line; 1463 | * that is, at the beginning of the [stored string][1] 1464 | * or immediately after a newline: 1465 | * 1466 | * scanner = StringScanner.new(MULTILINE_TEXT) 1467 | * scanner.string 1468 | * # => "Go placidly amid the noise and haste,\nand remember what peace there may be in silence.\n" 1469 | * scanner.pos # => 0 1470 | * scanner.beginning_of_line? # => true 1471 | * 1472 | * scanner.scan_until(/,/) # => "Go placidly amid the noise and haste," 1473 | * scanner.beginning_of_line? # => false 1474 | * 1475 | * scanner.scan(/\n/) # => "\n" 1476 | * scanner.beginning_of_line? # => true 1477 | * 1478 | * scanner.terminate 1479 | * scanner.beginning_of_line? # => true 1480 | * 1481 | * scanner.concat('x') 1482 | * scanner.terminate 1483 | * scanner.beginning_of_line? # => false 1484 | * 1485 | * StringScanner#bol? is an alias for StringScanner#beginning_of_line?. 1486 | */ 1487 | static VALUE 1488 | strscan_bol_p(VALUE self) 1489 | { 1490 | struct strscanner *p; 1491 | 1492 | GET_SCANNER(self, p); 1493 | if (CURPTR(p) > S_PEND(p)) return Qnil; 1494 | if (p->curr == 0) return Qtrue; 1495 | return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse; 1496 | } 1497 | 1498 | /* 1499 | * :markup: markdown 1500 | * :include: strscan/link_refs.txt 1501 | * 1502 | * call-seq: 1503 | * eos? -> true or false 1504 | * 1505 | * Returns whether the [position][2] 1506 | * is at the end of the [stored string][1]: 1507 | * 1508 | * ```rb 1509 | * scanner = StringScanner.new('foobarbaz') 1510 | * scanner.eos? # => false 1511 | * pos = 3 1512 | * scanner.eos? # => false 1513 | * scanner.terminate 1514 | * scanner.eos? # => true 1515 | * ``` 1516 | * 1517 | */ 1518 | static VALUE 1519 | strscan_eos_p(VALUE self) 1520 | { 1521 | struct strscanner *p; 1522 | 1523 | GET_SCANNER(self, p); 1524 | return EOS_P(p) ? Qtrue : Qfalse; 1525 | } 1526 | 1527 | /* 1528 | * call-seq: 1529 | * empty? 1530 | * 1531 | * Equivalent to #eos?. 1532 | * This method is obsolete, use #eos? instead. 1533 | */ 1534 | 1535 | /* :nodoc: */ 1536 | static VALUE 1537 | strscan_empty_p(VALUE self) 1538 | { 1539 | rb_warning("StringScanner#empty? is obsolete; use #eos? instead"); 1540 | return strscan_eos_p(self); 1541 | } 1542 | 1543 | /* 1544 | * call-seq: 1545 | * rest? 1546 | * 1547 | * Returns true if and only if there is more data in the string. See #eos?. 1548 | * This method is obsolete; use #eos? instead. 1549 | * 1550 | * s = StringScanner.new('test string') 1551 | * # These two are opposites 1552 | * s.eos? # => false 1553 | * s.rest? # => true 1554 | */ 1555 | 1556 | /* :nodoc: */ 1557 | static VALUE 1558 | strscan_rest_p(VALUE self) 1559 | { 1560 | struct strscanner *p; 1561 | 1562 | GET_SCANNER(self, p); 1563 | return EOS_P(p) ? Qfalse : Qtrue; 1564 | } 1565 | 1566 | /* 1567 | * :markup: markdown 1568 | * :include: strscan/link_refs.txt 1569 | * 1570 | * call-seq: 1571 | * matched? -> true or false 1572 | * 1573 | * Returns `true` of the most recent [match attempt][17] was successful, 1574 | * `false` otherwise; 1575 | * see [Basic Matched Values][18]: 1576 | * 1577 | * ```rb 1578 | * scanner = StringScanner.new('foobarbaz') 1579 | * scanner.matched? # => false 1580 | * scanner.pos = 3 1581 | * scanner.exist?(/baz/) # => 6 1582 | * scanner.matched? # => true 1583 | * scanner.exist?(/nope/) # => nil 1584 | * scanner.matched? # => false 1585 | * ``` 1586 | * 1587 | */ 1588 | static VALUE 1589 | strscan_matched_p(VALUE self) 1590 | { 1591 | struct strscanner *p; 1592 | 1593 | GET_SCANNER(self, p); 1594 | return MATCHED_P(p) ? Qtrue : Qfalse; 1595 | } 1596 | 1597 | /* 1598 | * :markup: markdown 1599 | * :include: strscan/link_refs.txt 1600 | * 1601 | * call-seq: 1602 | * matched -> matched_substring or nil 1603 | * 1604 | * Returns the matched substring from the most recent [match][17] attempt 1605 | * if it was successful, 1606 | * or `nil` otherwise; 1607 | * see [Basic Matched Values][18]: 1608 | * 1609 | * ```rb 1610 | * scanner = StringScanner.new('foobarbaz') 1611 | * scanner.matched # => nil 1612 | * scanner.pos = 3 1613 | * scanner.match?(/bar/) # => 3 1614 | * scanner.matched # => "bar" 1615 | * scanner.match?(/nope/) # => nil 1616 | * scanner.matched # => nil 1617 | * ``` 1618 | * 1619 | */ 1620 | static VALUE 1621 | strscan_matched(VALUE self) 1622 | { 1623 | struct strscanner *p; 1624 | 1625 | GET_SCANNER(self, p); 1626 | if (! MATCHED_P(p)) return Qnil; 1627 | return extract_range(p, 1628 | adjust_register_position(p, p->regs.beg[0]), 1629 | adjust_register_position(p, p->regs.end[0])); 1630 | } 1631 | 1632 | /* 1633 | * :markup: markdown 1634 | * :include: strscan/link_refs.txt 1635 | * 1636 | * call-seq: 1637 | * matched_size -> substring_size or nil 1638 | * 1639 | * Returns the size (in bytes) of the matched substring 1640 | * from the most recent match [match attempt][17] if it was successful, 1641 | * or `nil` otherwise; 1642 | * see [Basic Matched Values][18]: 1643 | * 1644 | * ```rb 1645 | * scanner = StringScanner.new('foobarbaz') 1646 | * scanner.matched_size # => nil 1647 | * 1648 | * pos = 3 1649 | * scanner.exist?(/baz/) # => 9 1650 | * scanner.matched_size # => 3 1651 | * 1652 | * scanner.exist?(/nope/) # => nil 1653 | * scanner.matched_size # => nil 1654 | * ``` 1655 | * 1656 | */ 1657 | static VALUE 1658 | strscan_matched_size(VALUE self) 1659 | { 1660 | struct strscanner *p; 1661 | 1662 | GET_SCANNER(self, p); 1663 | if (! MATCHED_P(p)) return Qnil; 1664 | return LONG2NUM(p->regs.end[0] - p->regs.beg[0]); 1665 | } 1666 | 1667 | static int 1668 | name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end, rb_encoding *enc) 1669 | { 1670 | if (RTEST(regexp)) { 1671 | int num = onig_name_to_backref_number(RREGEXP_PTR(regexp), 1672 | (const unsigned char* )name, 1673 | (const unsigned char* )name_end, 1674 | regs); 1675 | if (num >= 1) { 1676 | return num; 1677 | } 1678 | } 1679 | rb_enc_raise(enc, rb_eIndexError, "undefined group name reference: %.*s", 1680 | rb_long2int(name_end - name), name); 1681 | } 1682 | 1683 | /* 1684 | * 1685 | * :markup: markdown 1686 | * :include: strscan/link_refs.txt 1687 | * 1688 | * call-seq: 1689 | * [](specifier) -> substring or nil 1690 | * 1691 | * Returns a captured substring or `nil`; 1692 | * see [Captured Match Values][13]. 1693 | * 1694 | * When there are captures: 1695 | * 1696 | * ```rb 1697 | * scanner = StringScanner.new('Fri Dec 12 1975 14:39') 1698 | * scanner.scan(/(?\w+) (?\w+) (?\d+) /) 1699 | * ``` 1700 | * 1701 | * - `specifier` zero: returns the entire matched substring: 1702 | * 1703 | * ```rb 1704 | * scanner[0] # => "Fri Dec 12 " 1705 | * scanner.pre_match # => "" 1706 | * scanner.post_match # => "1975 14:39" 1707 | * ``` 1708 | * 1709 | * - `specifier` positive integer. returns the `n`th capture, or `nil` if out of range: 1710 | * 1711 | * ```rb 1712 | * scanner[1] # => "Fri" 1713 | * scanner[2] # => "Dec" 1714 | * scanner[3] # => "12" 1715 | * scanner[4] # => nil 1716 | * ``` 1717 | * 1718 | * - `specifier` negative integer. counts backward from the last subgroup: 1719 | * 1720 | * ```rb 1721 | * scanner[-1] # => "12" 1722 | * scanner[-4] # => "Fri Dec 12 " 1723 | * scanner[-5] # => nil 1724 | * ``` 1725 | * 1726 | * - `specifier` symbol or string. returns the named subgroup, or `nil` if no such: 1727 | * 1728 | * ```rb 1729 | * scanner[:wday] # => "Fri" 1730 | * scanner['wday'] # => "Fri" 1731 | * scanner[:month] # => "Dec" 1732 | * scanner[:day] # => "12" 1733 | * scanner[:nope] # => nil 1734 | * ``` 1735 | * 1736 | * When there are no captures, only `[0]` returns non-`nil`: 1737 | * 1738 | * ```rb 1739 | * scanner = StringScanner.new('foobarbaz') 1740 | * scanner.exist?(/bar/) 1741 | * scanner[0] # => "bar" 1742 | * scanner[1] # => nil 1743 | * ``` 1744 | * 1745 | * For a failed match, even `[0]` returns `nil`: 1746 | * 1747 | * ```rb 1748 | * scanner.scan(/nope/) # => nil 1749 | * scanner[0] # => nil 1750 | * scanner[1] # => nil 1751 | * ``` 1752 | * 1753 | */ 1754 | static VALUE 1755 | strscan_aref(VALUE self, VALUE idx) 1756 | { 1757 | const char *name; 1758 | struct strscanner *p; 1759 | long i; 1760 | 1761 | GET_SCANNER(self, p); 1762 | if (! MATCHED_P(p)) return Qnil; 1763 | 1764 | switch (TYPE(idx)) { 1765 | case T_SYMBOL: 1766 | idx = rb_sym2str(idx); 1767 | /* fall through */ 1768 | case T_STRING: 1769 | RSTRING_GETMEM(idx, name, i); 1770 | i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx)); 1771 | break; 1772 | default: 1773 | i = NUM2LONG(idx); 1774 | } 1775 | 1776 | if (i < 0) 1777 | i += p->regs.num_regs; 1778 | if (i < 0) return Qnil; 1779 | if (i >= p->regs.num_regs) return Qnil; 1780 | if (p->regs.beg[i] == -1) return Qnil; 1781 | 1782 | return extract_range(p, 1783 | adjust_register_position(p, p->regs.beg[i]), 1784 | adjust_register_position(p, p->regs.end[i])); 1785 | } 1786 | 1787 | /* 1788 | * :markup: markdown 1789 | * :include: strscan/link_refs.txt 1790 | * 1791 | * call-seq: 1792 | * size -> captures_count 1793 | * 1794 | * Returns the count of captures if the most recent match attempt succeeded, `nil` otherwise; 1795 | * see [Captures Match Values][13]: 1796 | * 1797 | * ```rb 1798 | * scanner = StringScanner.new('Fri Dec 12 1975 14:39') 1799 | * scanner.size # => nil 1800 | * 1801 | * pattern = /(?\w+) (?\w+) (?\d+) / 1802 | * scanner.match?(pattern) 1803 | * scanner.values_at(*0..scanner.size) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil] 1804 | * scanner.size # => 4 1805 | * 1806 | * scanner.match?(/nope/) # => nil 1807 | * scanner.size # => nil 1808 | * ``` 1809 | * 1810 | */ 1811 | static VALUE 1812 | strscan_size(VALUE self) 1813 | { 1814 | struct strscanner *p; 1815 | 1816 | GET_SCANNER(self, p); 1817 | if (! MATCHED_P(p)) return Qnil; 1818 | return INT2FIX(p->regs.num_regs); 1819 | } 1820 | 1821 | /* 1822 | * :markup: markdown 1823 | * :include: strscan/link_refs.txt 1824 | * 1825 | * call-seq: 1826 | * captures -> substring_array or nil 1827 | * 1828 | * Returns the array of [captured match values][13] at indexes `(1..)` 1829 | * if the most recent match attempt succeeded, or `nil` otherwise: 1830 | * 1831 | * ```rb 1832 | * scanner = StringScanner.new('Fri Dec 12 1975 14:39') 1833 | * scanner.captures # => nil 1834 | * 1835 | * scanner.exist?(/(?\w+) (?\w+) (?\d+) /) 1836 | * scanner.captures # => ["Fri", "Dec", "12"] 1837 | * scanner.values_at(*0..4) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil] 1838 | * 1839 | * scanner.exist?(/Fri/) 1840 | * scanner.captures # => [] 1841 | * 1842 | * scanner.scan(/nope/) 1843 | * scanner.captures # => nil 1844 | * ``` 1845 | * 1846 | */ 1847 | static VALUE 1848 | strscan_captures(VALUE self) 1849 | { 1850 | struct strscanner *p; 1851 | int i, num_regs; 1852 | VALUE new_ary; 1853 | 1854 | GET_SCANNER(self, p); 1855 | if (! MATCHED_P(p)) return Qnil; 1856 | 1857 | num_regs = p->regs.num_regs; 1858 | new_ary = rb_ary_new2(num_regs); 1859 | 1860 | for (i = 1; i < num_regs; i++) { 1861 | VALUE str; 1862 | if (p->regs.beg[i] == -1) 1863 | str = Qnil; 1864 | else 1865 | str = extract_range(p, 1866 | adjust_register_position(p, p->regs.beg[i]), 1867 | adjust_register_position(p, p->regs.end[i])); 1868 | rb_ary_push(new_ary, str); 1869 | } 1870 | 1871 | return new_ary; 1872 | } 1873 | 1874 | /* 1875 | * :markup: markdown 1876 | * :include: strscan/link_refs.txt 1877 | * 1878 | * call-seq: 1879 | * values_at(*specifiers) -> array_of_captures or nil 1880 | * 1881 | * Returns an array of captured substrings, or `nil` of none. 1882 | * 1883 | * For each `specifier`, the returned substring is `[specifier]`; 1884 | * see #[]. 1885 | * 1886 | * ```rb 1887 | * scanner = StringScanner.new('Fri Dec 12 1975 14:39') 1888 | * pattern = /(?\w+) (?\w+) (?\d+) / 1889 | * scanner.match?(pattern) 1890 | * scanner.values_at(*0..3) # => ["Fri Dec 12 ", "Fri", "Dec", "12"] 1891 | * scanner.values_at(*%i[wday month day]) # => ["Fri", "Dec", "12"] 1892 | * ``` 1893 | * 1894 | */ 1895 | 1896 | static VALUE 1897 | strscan_values_at(int argc, VALUE *argv, VALUE self) 1898 | { 1899 | struct strscanner *p; 1900 | long i; 1901 | VALUE new_ary; 1902 | 1903 | GET_SCANNER(self, p); 1904 | if (! MATCHED_P(p)) return Qnil; 1905 | 1906 | new_ary = rb_ary_new2(argc); 1907 | for (i = 0; i substring 1920 | * 1921 | * Returns the substring that precedes the matched substring 1922 | * from the most recent match attempt if it was successful, 1923 | * or `nil` otherwise; 1924 | * see [Basic Match Values][18]: 1925 | * 1926 | * ```rb 1927 | * scanner = StringScanner.new('foobarbaz') 1928 | * scanner.pre_match # => nil 1929 | * 1930 | * scanner.pos = 3 1931 | * scanner.exist?(/baz/) # => 6 1932 | * scanner.pre_match # => "foobar" # Substring of entire string, not just target string. 1933 | * 1934 | * scanner.exist?(/nope/) # => nil 1935 | * scanner.pre_match # => nil 1936 | * ``` 1937 | * 1938 | */ 1939 | static VALUE 1940 | strscan_pre_match(VALUE self) 1941 | { 1942 | struct strscanner *p; 1943 | 1944 | GET_SCANNER(self, p); 1945 | if (! MATCHED_P(p)) return Qnil; 1946 | return extract_range(p, 1947 | 0, 1948 | adjust_register_position(p, p->regs.beg[0])); 1949 | } 1950 | 1951 | /* 1952 | * :markup: markdown 1953 | * :include: strscan/link_refs.txt 1954 | * 1955 | * call-seq: 1956 | * post_match -> substring 1957 | * 1958 | * Returns the substring that follows the matched substring 1959 | * from the most recent match attempt if it was successful, 1960 | * or `nil` otherwise; 1961 | * see [Basic Match Values][18]: 1962 | * 1963 | * ```rb 1964 | * scanner = StringScanner.new('foobarbaz') 1965 | * scanner.post_match # => nil 1966 | * 1967 | * scanner.pos = 3 1968 | * scanner.match?(/bar/) # => 3 1969 | * scanner.post_match # => "baz" 1970 | * 1971 | * scanner.match?(/nope/) # => nil 1972 | * scanner.post_match # => nil 1973 | * ``` 1974 | * 1975 | */ 1976 | static VALUE 1977 | strscan_post_match(VALUE self) 1978 | { 1979 | struct strscanner *p; 1980 | 1981 | GET_SCANNER(self, p); 1982 | if (! MATCHED_P(p)) return Qnil; 1983 | return extract_range(p, 1984 | adjust_register_position(p, p->regs.end[0]), 1985 | S_LEN(p)); 1986 | } 1987 | 1988 | /* 1989 | * :markup: markdown 1990 | * :include: strscan/link_refs.txt 1991 | * 1992 | * call-seq: 1993 | * rest -> target_substring 1994 | * 1995 | * Returns the 'rest' of the [stored string][1] (all after the current [position][2]), 1996 | * which is the [target substring][3]: 1997 | * 1998 | * ```rb 1999 | * scanner = StringScanner.new('foobarbaz') 2000 | * scanner.rest # => "foobarbaz" 2001 | * scanner.pos = 3 2002 | * scanner.rest # => "barbaz" 2003 | * scanner.terminate 2004 | * scanner.rest # => "" 2005 | * ``` 2006 | * 2007 | */ 2008 | static VALUE 2009 | strscan_rest(VALUE self) 2010 | { 2011 | struct strscanner *p; 2012 | 2013 | GET_SCANNER(self, p); 2014 | if (EOS_P(p)) { 2015 | return str_new(p, "", 0); 2016 | } 2017 | return extract_range(p, p->curr, S_LEN(p)); 2018 | } 2019 | 2020 | /* 2021 | * :markup: markdown 2022 | * :include: strscan/link_refs.txt 2023 | * 2024 | * call-seq: 2025 | * rest_size -> integer 2026 | * 2027 | * Returns the size (in bytes) of the #rest of the [stored string][1]: 2028 | * 2029 | * ```rb 2030 | * scanner = StringScanner.new('foobarbaz') 2031 | * scanner.rest # => "foobarbaz" 2032 | * scanner.rest_size # => 9 2033 | * scanner.pos = 3 2034 | * scanner.rest # => "barbaz" 2035 | * scanner.rest_size # => 6 2036 | * scanner.terminate 2037 | * scanner.rest # => "" 2038 | * scanner.rest_size # => 0 2039 | * ``` 2040 | * 2041 | */ 2042 | static VALUE 2043 | strscan_rest_size(VALUE self) 2044 | { 2045 | struct strscanner *p; 2046 | long i; 2047 | 2048 | GET_SCANNER(self, p); 2049 | if (EOS_P(p)) { 2050 | return INT2FIX(0); 2051 | } 2052 | i = S_RESTLEN(p); 2053 | return INT2FIX(i); 2054 | } 2055 | 2056 | /* 2057 | * call-seq: 2058 | * restsize 2059 | * 2060 | * s.restsize is equivalent to s.rest_size. 2061 | * This method is obsolete; use #rest_size instead. 2062 | */ 2063 | 2064 | /* :nodoc: */ 2065 | static VALUE 2066 | strscan_restsize(VALUE self) 2067 | { 2068 | rb_warning("StringScanner#restsize is obsolete; use #rest_size instead"); 2069 | return strscan_rest_size(self); 2070 | } 2071 | 2072 | #define INSPECT_LENGTH 5 2073 | 2074 | /* 2075 | * :markup: markdown 2076 | * :include: strscan/link_refs.txt 2077 | * 2078 | * call-seq: 2079 | * inspect -> string 2080 | * 2081 | * Returns a string representation of `self` that may show: 2082 | * 2083 | * 1. The current [position][2]. 2084 | * 2. The size (in bytes) of the [stored string][1]. 2085 | * 3. The substring preceding the current position. 2086 | * 4. The substring following the current position (which is also the [target substring][3]). 2087 | * 2088 | * ```rb 2089 | * scanner = StringScanner.new("Fri Dec 12 1975 14:39") 2090 | * scanner.pos = 11 2091 | * scanner.inspect # => "#" 2092 | * ``` 2093 | * 2094 | * If at beginning-of-string, item 4 above (following substring) is omitted: 2095 | * 2096 | * ```rb 2097 | * scanner.reset 2098 | * scanner.inspect # => "#" 2099 | * ``` 2100 | * 2101 | * If at end-of-string, all items above are omitted: 2102 | * 2103 | * ```rb 2104 | * scanner.terminate 2105 | * scanner.inspect # => "#" 2106 | * ``` 2107 | * 2108 | */ 2109 | static VALUE 2110 | strscan_inspect(VALUE self) 2111 | { 2112 | struct strscanner *p; 2113 | VALUE a, b; 2114 | 2115 | p = check_strscan(self); 2116 | if (NIL_P(p->str)) { 2117 | a = rb_sprintf("#<%"PRIsVALUE" (uninitialized)>", rb_obj_class(self)); 2118 | return a; 2119 | } 2120 | if (EOS_P(p)) { 2121 | a = rb_sprintf("#<%"PRIsVALUE" fin>", rb_obj_class(self)); 2122 | return a; 2123 | } 2124 | if (p->curr == 0) { 2125 | b = inspect2(p); 2126 | a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld @ %"PRIsVALUE">", 2127 | rb_obj_class(self), 2128 | p->curr, S_LEN(p), 2129 | b); 2130 | return a; 2131 | } 2132 | a = inspect1(p); 2133 | b = inspect2(p); 2134 | a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld %"PRIsVALUE" @ %"PRIsVALUE">", 2135 | rb_obj_class(self), 2136 | p->curr, S_LEN(p), 2137 | a, b); 2138 | return a; 2139 | } 2140 | 2141 | static VALUE 2142 | inspect1(struct strscanner *p) 2143 | { 2144 | VALUE str; 2145 | long len; 2146 | 2147 | if (p->curr == 0) return rb_str_new2(""); 2148 | if (p->curr > INSPECT_LENGTH) { 2149 | str = rb_str_new_cstr("..."); 2150 | len = INSPECT_LENGTH; 2151 | } 2152 | else { 2153 | str = rb_str_new(0, 0); 2154 | len = p->curr; 2155 | } 2156 | rb_str_cat(str, CURPTR(p) - len, len); 2157 | return rb_str_dump(str); 2158 | } 2159 | 2160 | static VALUE 2161 | inspect2(struct strscanner *p) 2162 | { 2163 | VALUE str; 2164 | long len; 2165 | 2166 | if (EOS_P(p)) return rb_str_new2(""); 2167 | len = S_RESTLEN(p); 2168 | if (len > INSPECT_LENGTH) { 2169 | str = rb_str_new(CURPTR(p), INSPECT_LENGTH); 2170 | rb_str_cat2(str, "..."); 2171 | } 2172 | else { 2173 | str = rb_str_new(CURPTR(p), len); 2174 | } 2175 | return rb_str_dump(str); 2176 | } 2177 | 2178 | /* 2179 | * :markup: markdown 2180 | * :include: strscan/link_refs.txt 2181 | * 2182 | * call-seq: 2183 | * fixed_anchor? -> true or false 2184 | * 2185 | * Returns whether the [fixed-anchor property][10] is set. 2186 | */ 2187 | static VALUE 2188 | strscan_fixed_anchor_p(VALUE self) 2189 | { 2190 | struct strscanner *p; 2191 | p = check_strscan(self); 2192 | return p->fixed_anchor_p ? Qtrue : Qfalse; 2193 | } 2194 | 2195 | typedef struct { 2196 | VALUE self; 2197 | VALUE captures; 2198 | } named_captures_data; 2199 | 2200 | static int 2201 | named_captures_iter(const OnigUChar *name, 2202 | const OnigUChar *name_end, 2203 | int back_num, 2204 | int *back_refs, 2205 | OnigRegex regex, 2206 | void *arg) 2207 | { 2208 | named_captures_data *data = arg; 2209 | 2210 | VALUE key = rb_str_new((const char *)name, name_end - name); 2211 | VALUE value = RUBY_Qnil; 2212 | int i; 2213 | for (i = 0; i < back_num; i++) { 2214 | VALUE v = strscan_aref(data->self, INT2NUM(back_refs[i])); 2215 | if (!RB_NIL_P(v)) { 2216 | value = v; 2217 | } 2218 | } 2219 | rb_hash_aset(data->captures, key, value); 2220 | return 0; 2221 | } 2222 | 2223 | /* 2224 | * :markup: markdown 2225 | * :include: strscan/link_refs.txt 2226 | * 2227 | * call-seq: 2228 | * named_captures -> hash 2229 | * 2230 | * Returns the array of captured match values at indexes (1..) 2231 | * if the most recent match attempt succeeded, or nil otherwise; 2232 | * see [Captured Match Values][13]: 2233 | * 2234 | * ```rb 2235 | * scanner = StringScanner.new('Fri Dec 12 1975 14:39') 2236 | * scanner.named_captures # => {} 2237 | * 2238 | * pattern = /(?\w+) (?\w+) (?\d+) / 2239 | * scanner.match?(pattern) 2240 | * scanner.named_captures # => {"wday"=>"Fri", "month"=>"Dec", "day"=>"12"} 2241 | * 2242 | * scanner.string = 'nope' 2243 | * scanner.match?(pattern) 2244 | * scanner.named_captures # => {"wday"=>nil, "month"=>nil, "day"=>nil} 2245 | * 2246 | * scanner.match?(/nosuch/) 2247 | * scanner.named_captures # => {} 2248 | * ``` 2249 | * 2250 | */ 2251 | static VALUE 2252 | strscan_named_captures(VALUE self) 2253 | { 2254 | struct strscanner *p; 2255 | named_captures_data data; 2256 | GET_SCANNER(self, p); 2257 | data.self = self; 2258 | data.captures = rb_hash_new(); 2259 | if (!RB_NIL_P(p->regex)) { 2260 | onig_foreach_name(RREGEXP_PTR(p->regex), named_captures_iter, &data); 2261 | } 2262 | 2263 | return data.captures; 2264 | } 2265 | 2266 | /* ======================================================================= 2267 | Ruby Interface 2268 | ======================================================================= */ 2269 | 2270 | /* 2271 | * Document-class: StringScanner 2272 | * 2273 | * :markup: markdown 2274 | * 2275 | * :include: strscan/link_refs.txt 2276 | * :include: strscan/strscan.md 2277 | * 2278 | */ 2279 | void 2280 | Init_strscan(void) 2281 | { 2282 | #ifdef HAVE_RB_EXT_RACTOR_SAFE 2283 | rb_ext_ractor_safe(true); 2284 | #endif 2285 | 2286 | #undef rb_intern 2287 | ID id_scanerr = rb_intern("ScanError"); 2288 | VALUE tmp; 2289 | 2290 | id_byteslice = rb_intern("byteslice"); 2291 | 2292 | usascii_encindex = rb_usascii_encindex(); 2293 | utf8_encindex = rb_utf8_encindex(); 2294 | binary_encindex = rb_ascii8bit_encindex(); 2295 | 2296 | StringScanner = rb_define_class("StringScanner", rb_cObject); 2297 | ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError); 2298 | if (!rb_const_defined(rb_cObject, id_scanerr)) { 2299 | rb_const_set(rb_cObject, id_scanerr, ScanError); 2300 | } 2301 | tmp = rb_str_new2(STRSCAN_VERSION); 2302 | rb_obj_freeze(tmp); 2303 | rb_const_set(StringScanner, rb_intern("Version"), tmp); 2304 | tmp = rb_str_new2("$Id$"); 2305 | rb_obj_freeze(tmp); 2306 | rb_const_set(StringScanner, rb_intern("Id"), tmp); 2307 | 2308 | rb_define_alloc_func(StringScanner, strscan_s_allocate); 2309 | rb_define_private_method(StringScanner, "initialize", strscan_initialize, -1); 2310 | rb_define_private_method(StringScanner, "initialize_copy", strscan_init_copy, 1); 2311 | rb_define_singleton_method(StringScanner, "must_C_version", strscan_s_mustc, 0); 2312 | rb_define_method(StringScanner, "reset", strscan_reset, 0); 2313 | rb_define_method(StringScanner, "terminate", strscan_terminate, 0); 2314 | rb_define_method(StringScanner, "clear", strscan_clear, 0); 2315 | rb_define_method(StringScanner, "string", strscan_get_string, 0); 2316 | rb_define_method(StringScanner, "string=", strscan_set_string, 1); 2317 | rb_define_method(StringScanner, "concat", strscan_concat, 1); 2318 | rb_define_method(StringScanner, "<<", strscan_concat, 1); 2319 | rb_define_method(StringScanner, "pos", strscan_get_pos, 0); 2320 | rb_define_method(StringScanner, "pos=", strscan_set_pos, 1); 2321 | rb_define_method(StringScanner, "charpos", strscan_get_charpos, 0); 2322 | rb_define_method(StringScanner, "pointer", strscan_get_pos, 0); 2323 | rb_define_method(StringScanner, "pointer=", strscan_set_pos, 1); 2324 | 2325 | rb_define_method(StringScanner, "scan", strscan_scan, 1); 2326 | rb_define_method(StringScanner, "skip", strscan_skip, 1); 2327 | rb_define_method(StringScanner, "match?", strscan_match_p, 1); 2328 | rb_define_method(StringScanner, "check", strscan_check, 1); 2329 | rb_define_method(StringScanner, "scan_full", strscan_scan_full, 3); 2330 | 2331 | rb_define_method(StringScanner, "scan_until", strscan_scan_until, 1); 2332 | rb_define_method(StringScanner, "skip_until", strscan_skip_until, 1); 2333 | rb_define_method(StringScanner, "exist?", strscan_exist_p, 1); 2334 | rb_define_method(StringScanner, "check_until", strscan_check_until, 1); 2335 | rb_define_method(StringScanner, "search_full", strscan_search_full, 3); 2336 | 2337 | rb_define_method(StringScanner, "getch", strscan_getch, 0); 2338 | rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0); 2339 | rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0); 2340 | rb_define_method(StringScanner, "scan_byte", strscan_scan_byte, 0); 2341 | rb_define_method(StringScanner, "peek", strscan_peek, 1); 2342 | rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0); 2343 | rb_define_method(StringScanner, "peep", strscan_peep, 1); 2344 | 2345 | rb_define_private_method(StringScanner, "scan_base10_integer", strscan_scan_base10_integer, 0); 2346 | rb_define_private_method(StringScanner, "scan_base16_integer", strscan_scan_base16_integer, 0); 2347 | 2348 | rb_define_method(StringScanner, "unscan", strscan_unscan, 0); 2349 | 2350 | rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0); 2351 | rb_alias(StringScanner, rb_intern("bol?"), rb_intern("beginning_of_line?")); 2352 | rb_define_method(StringScanner, "eos?", strscan_eos_p, 0); 2353 | rb_define_method(StringScanner, "empty?", strscan_empty_p, 0); 2354 | rb_define_method(StringScanner, "rest?", strscan_rest_p, 0); 2355 | 2356 | rb_define_method(StringScanner, "matched?", strscan_matched_p, 0); 2357 | rb_define_method(StringScanner, "matched", strscan_matched, 0); 2358 | rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0); 2359 | rb_define_method(StringScanner, "[]", strscan_aref, 1); 2360 | rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0); 2361 | rb_define_method(StringScanner, "post_match", strscan_post_match, 0); 2362 | rb_define_method(StringScanner, "size", strscan_size, 0); 2363 | rb_define_method(StringScanner, "captures", strscan_captures, 0); 2364 | rb_define_method(StringScanner, "values_at", strscan_values_at, -1); 2365 | 2366 | rb_define_method(StringScanner, "rest", strscan_rest, 0); 2367 | rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0); 2368 | rb_define_method(StringScanner, "restsize", strscan_restsize, 0); 2369 | 2370 | rb_define_method(StringScanner, "inspect", strscan_inspect, 0); 2371 | 2372 | rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0); 2373 | 2374 | rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0); 2375 | 2376 | rb_require("strscan/strscan"); 2377 | } 2378 | -------------------------------------------------------------------------------- /lib/strscan/strscan.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class StringScanner 4 | # call-seq: 5 | # scan_integer(base: 10) 6 | # 7 | # If `base` isn't provided or is `10`, then it is equivalent to calling `#scan` with a `[+-]?\d+` pattern, 8 | # and returns an Integer or nil. 9 | # 10 | # If `base` is `16`, then it is equivalent to calling `#scan` with a `[+-]?(0x)?[0-9a-fA-F]+` pattern, 11 | # and returns an Integer or nil. 12 | # 13 | # The scanned string must be encoded with an ASCII compatible encoding, otherwise 14 | # Encoding::CompatibilityError will be raised. 15 | def scan_integer(base: 10) 16 | case base 17 | when 10 18 | scan_base10_integer 19 | when 16 20 | scan_base16_integer 21 | else 22 | raise ArgumentError, "Unsupported integer base: #{base.inspect}, expected 10 or 16" 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /run-test.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | $VERBOSE = true 4 | 5 | gem 'strscan' 6 | require 'strscan' 7 | puts "Loaded strscan from #{$".grep(/\/strscan\./).join(', ')}" 8 | puts "Gem from #{Gem.loaded_specs["strscan"]&.full_gem_path}" 9 | 10 | require_relative 'test/lib/helper' 11 | 12 | Dir.glob("test/strscan/**/*test_*.rb") do |test_rb| 13 | require File.expand_path(test_rb) 14 | end 15 | -------------------------------------------------------------------------------- /strscan.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | # 3 | source_version = ["", "ext/strscan/"].find do |dir| 4 | begin 5 | break File.open(File.join(__dir__, "#{dir}strscan.c")) {|f| 6 | f.gets("\n#define STRSCAN_VERSION ") 7 | f.gets[/\s*"(.+)"/, 1] 8 | } 9 | rescue Errno::ENOENT 10 | end 11 | end 12 | 13 | Gem::Specification.new do |s| 14 | s.name = "strscan" 15 | s.version = source_version 16 | s.summary = "Provides lexical scanning operations on a String." 17 | s.description = "Provides lexical scanning operations on a String." 18 | 19 | files = [ 20 | "COPYING", 21 | "LICENSE.txt", 22 | "lib/strscan/strscan.rb" 23 | ] 24 | 25 | s.require_paths = %w{lib} 26 | 27 | if RUBY_ENGINE == "jruby" 28 | files << "lib/strscan.jar" 29 | files << "ext/jruby/lib/strscan.rb" 30 | s.require_paths += %w{ext/jruby/lib} 31 | s.platform = "java" 32 | else 33 | files << "ext/strscan/extconf.rb" 34 | files << "ext/strscan/strscan.c" 35 | s.rdoc_options << "-idoc" 36 | s.extra_rdoc_files = [ 37 | ".rdoc_options", 38 | *Dir.glob("doc/strscan/**/*") 39 | ] 40 | s.extensions = %w{ext/strscan/extconf.rb} 41 | end 42 | s.files = files 43 | s.required_ruby_version = ">= 2.4.0" 44 | 45 | s.authors = ["Minero Aoki", "Sutou Kouhei", "Charles Oliver Nutter"] 46 | s.email = [nil, "kou@cozmixng.org", "headius@headius.com"] 47 | s.homepage = "https://github.com/ruby/strscan" 48 | s.licenses = ["Ruby", "BSD-2-Clause"] 49 | end 50 | -------------------------------------------------------------------------------- /test/lib/helper.rb: -------------------------------------------------------------------------------- 1 | require "test/unit" 2 | require "core_assertions" 3 | 4 | Test::Unit::TestCase.include Test::Unit::CoreAssertions 5 | -------------------------------------------------------------------------------- /test/strscan/test_ractor.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | require 'test/unit' 3 | 4 | class TestStringScannerRactor < Test::Unit::TestCase 5 | def setup 6 | omit("Ractor not defined") unless defined? Ractor 7 | end 8 | 9 | def test_ractor 10 | assert_in_out_err([], <<-"end;", ["stra", " ", "strb", " ", "strc"], []) 11 | class Ractor 12 | alias value take unless method_defined? :value # compat with Ruby 3.4 and olders 13 | end 14 | 15 | require "strscan" 16 | $VERBOSE = nil 17 | r = Ractor.new do 18 | s = StringScanner.new("stra strb strc", true) 19 | [ 20 | s.scan(/\\w+/), 21 | s.scan(/\\s+/), 22 | s.scan(/\\w+/), 23 | s.scan(/\\s+/), 24 | s.scan(/\\w+/), 25 | s.scan(/\\w+/), 26 | s.scan(/\\w+/) 27 | ] 28 | end 29 | puts r.value.compact 30 | end; 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /test/strscan/test_stringscanner.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # frozen_string_literal: true 3 | # 4 | # test/strscan/test_stringscanner.rb 5 | # 6 | 7 | require 'strscan' 8 | require 'test/unit' 9 | 10 | module StringScannerTests 11 | def test_peek_byte 12 | s = create_string_scanner('ab') 13 | assert_equal(97, s.peek_byte) 14 | assert_equal(97, s.scan_byte) 15 | assert_equal(98, s.peek_byte) 16 | assert_equal(98, s.scan_byte) 17 | assert_nil(s.peek_byte) 18 | assert_nil(s.scan_byte) 19 | end 20 | 21 | def test_scan_byte 22 | s = create_string_scanner('ab') 23 | assert_equal(2, s.match?(/(?ab)/)) # set named_captures 24 | assert_equal(97, s.scan_byte) 25 | assert_equal({}, s.named_captures) 26 | assert_equal(98, s.scan_byte) 27 | assert_nil(s.scan_byte) 28 | 29 | str = "\244\242".dup.force_encoding("euc-jp") 30 | s = StringScanner.new(str) 31 | assert_equal(str.getbyte(s.pos), s.scan_byte) 32 | assert_equal(str.getbyte(s.pos), s.scan_byte) 33 | assert_nil(s.scan_byte) 34 | end 35 | 36 | def test_s_new 37 | s = create_string_scanner('test string') 38 | assert_instance_of(StringScanner, s) 39 | assert_equal(false, s.eos?) 40 | 41 | str = 'test string'.dup 42 | s = create_string_scanner(str, false) 43 | assert_instance_of(StringScanner, s) 44 | assert_equal(false, s.eos?) 45 | assert_same(str, s.string) 46 | end 47 | 48 | UNINIT_ERROR = ArgumentError 49 | 50 | def test_s_allocate 51 | s = StringScanner.allocate 52 | assert_equal('#', s.inspect.sub(/StringScanner_C/, 'StringScanner')) 53 | assert_raise(UNINIT_ERROR) { s.eos? } 54 | assert_raise(UNINIT_ERROR) { s.scan(/a/) } 55 | s.string = 'test' 56 | assert_equal('#', s.inspect.sub(/StringScanner_C/, 'StringScanner')) 57 | assert_nothing_raised(UNINIT_ERROR) { s.eos? } 58 | assert_equal(false, s.eos?) 59 | end 60 | 61 | def test_s_mustc 62 | assert_nothing_raised(NotImplementedError) { 63 | StringScanner.must_C_version 64 | } 65 | end 66 | 67 | def test_dup 68 | s = create_string_scanner('test string') 69 | d = s.dup 70 | assert_equal(s.inspect, d.inspect) 71 | assert_equal(s.string, d.string) 72 | assert_equal(s.pos, d.pos) 73 | assert_equal(s.matched?, d.matched?) 74 | assert_equal(s.eos?, d.eos?) 75 | 76 | s = create_string_scanner('test string') 77 | s.scan(/test/) 78 | d = s.dup 79 | assert_equal(s.inspect, d.inspect) 80 | assert_equal(s.string, d.string) 81 | assert_equal(s.pos, d.pos) 82 | assert_equal(s.matched?, d.matched?) 83 | assert_equal(s.eos?, d.eos?) 84 | 85 | s = create_string_scanner('test string') 86 | s.scan(/test/) 87 | s.scan(/NOT MATCH/) 88 | d = s.dup 89 | assert_equal(s.inspect, d.inspect) 90 | assert_equal(s.string, d.string) 91 | assert_equal(s.pos, d.pos) 92 | assert_equal(s.matched?, d.matched?) 93 | assert_equal(s.eos?, d.eos?) 94 | 95 | s = create_string_scanner('test string') 96 | s.terminate 97 | d = s.dup 98 | assert_equal(s.inspect, d.inspect) 99 | assert_equal(s.string, d.string) 100 | assert_equal(s.pos, d.pos) 101 | assert_equal(s.matched?, d.matched?) 102 | assert_equal(s.eos?, d.eos?) 103 | end 104 | 105 | def test_const_Version 106 | assert_instance_of(String, StringScanner::Version) 107 | assert_equal(true, StringScanner::Version.frozen?) 108 | end 109 | 110 | def test_const_Id 111 | assert_instance_of(String, StringScanner::Id) 112 | assert_equal(true, StringScanner::Id.frozen?) 113 | end 114 | 115 | def test_inspect 116 | str = 'test string'.dup 117 | s = create_string_scanner(str, false) 118 | assert_instance_of(String, s.inspect) 119 | assert_equal(s.inspect, s.inspect) 120 | assert_equal('#', s.inspect.sub(/StringScanner_C/, 'StringScanner')) 121 | s.get_byte 122 | assert_equal('#', s.inspect.sub(/StringScanner_C/, 'StringScanner')) 123 | 124 | s = create_string_scanner("\n") 125 | assert_equal('#', s.inspect) 126 | end 127 | 128 | def test_eos? 129 | s = create_string_scanner('test string') 130 | assert_equal(false, s.eos?) 131 | assert_equal(false, s.eos?) 132 | s.scan(/\w+/) 133 | assert_equal(false, s.eos?) 134 | assert_equal(false, s.eos?) 135 | s.scan(/\s+/) 136 | s.scan(/\w+/) 137 | assert_equal(true, s.eos?) 138 | assert_equal(true, s.eos?) 139 | s.scan(/\w+/) 140 | assert_equal(true, s.eos?) 141 | 142 | s = create_string_scanner('test'.dup) 143 | s.scan(/te/) 144 | s.string.replace('') 145 | assert_equal(true, s.eos?) 146 | end 147 | 148 | def test_bol? 149 | s = create_string_scanner("a\nbbb\n\ncccc\nddd\r\neee") 150 | assert_equal(true, s.bol?) 151 | assert_equal(true, s.bol?) 152 | s.scan(/a/) 153 | assert_equal(false, s.bol?) 154 | assert_equal(false, s.bol?) 155 | s.scan(/\n/) 156 | assert_equal(true, s.bol?) 157 | s.scan(/b/) 158 | assert_equal(false, s.bol?) 159 | s.scan(/b/) 160 | assert_equal(false, s.bol?) 161 | s.scan(/b/) 162 | assert_equal(false, s.bol?) 163 | s.scan(/\n/) 164 | assert_equal(true, s.bol?) 165 | s.unscan 166 | assert_equal(false, s.bol?) 167 | s.scan(/\n/) 168 | s.scan(/\n/) 169 | assert_equal(true, s.bol?) 170 | s.scan(/c+\n/) 171 | assert_equal(true, s.bol?) 172 | s.scan(/d+\r\n/) 173 | assert_equal(true, s.bol?) 174 | s.scan(/e+/) 175 | assert_equal(false, s.bol?) 176 | end 177 | 178 | def test_string 179 | s = create_string_scanner('test string') 180 | assert_equal('test string', s.string) 181 | s.scan(/(?test)/) # set named_captures 182 | assert_equal('test string', s.string) 183 | s.string = 'a' 184 | assert_equal({}, s.named_captures) 185 | assert_equal('a', s.string) 186 | s.scan(/a/) 187 | s.string = 'b' 188 | assert_equal(0, s.pos) 189 | end 190 | 191 | def test_string_set_is_equal 192 | name = 'tenderlove' 193 | 194 | s = create_string_scanner(name) 195 | assert_equal(name.object_id, s.string.object_id) 196 | 197 | s.string = name 198 | assert_equal(name.object_id, s.string.object_id) 199 | end 200 | 201 | def test_string_append 202 | s = create_string_scanner('tender'.dup) 203 | s << 'love' 204 | assert_equal('tenderlove', s.string) 205 | 206 | s.string = 'tender'.dup 207 | s << 'love' 208 | assert_equal('tenderlove', s.string) 209 | end 210 | 211 | def test_pos 212 | s = create_string_scanner('test string') 213 | assert_equal(0, s.pos) 214 | s.get_byte 215 | assert_equal(1, s.pos) 216 | s.get_byte 217 | assert_equal(2, s.pos) 218 | s.terminate 219 | assert_equal(11, s.pos) 220 | end 221 | 222 | def test_pos_unicode 223 | s = create_string_scanner("abcädeföghi") 224 | assert_equal(0, s.charpos) 225 | assert_equal("abcä", s.scan_until(/ä/)) 226 | assert_equal(4, s.charpos) 227 | assert_equal("defö", s.scan_until(/ö/)) 228 | assert_equal(8, s.charpos) 229 | s.terminate 230 | assert_equal(11, s.charpos) 231 | end 232 | 233 | def test_charpos_not_use_string_methods 234 | omit("not supported on TruffleRuby") if RUBY_ENGINE == "truffleruby" 235 | 236 | string = +'abcädeföghi' 237 | scanner = create_string_scanner(string) 238 | 239 | class << string 240 | EnvUtil.suppress_warning do 241 | undef_method(*instance_methods) 242 | end 243 | end 244 | 245 | assert_equal(0, scanner.charpos) 246 | assert_equal("abcä", scanner.scan_until(/ä/)) 247 | assert_equal(4, scanner.charpos) 248 | assert_equal("defö", scanner.scan_until(/ö/)) 249 | assert_equal(8, scanner.charpos) 250 | end 251 | 252 | def test_concat 253 | s = create_string_scanner('a'.dup) 254 | s.scan(/a/) 255 | s.concat('b') 256 | assert_equal(false, s.eos?) 257 | assert_equal('b', s.scan(/b/)) 258 | assert_equal(true, s.eos?) 259 | s.concat('c') 260 | assert_equal(false, s.eos?) 261 | assert_equal('c', s.scan(/c/)) 262 | assert_equal(true, s.eos?) 263 | end 264 | 265 | def test_scan 266 | s = create_string_scanner("stra strb\0strc", true) 267 | tmp = s.scan(/\w+/) 268 | assert_equal('stra', tmp) 269 | 270 | tmp = s.scan(/\s+/) 271 | assert_equal(' ', tmp) 272 | 273 | assert_equal('strb', s.scan(/\w+/)) 274 | assert_equal("\u0000", s.scan(/\0/)) 275 | 276 | tmp = s.scan(/\w+/) 277 | assert_equal('strc', tmp) 278 | 279 | assert_nil(s.scan(/\w+/)) 280 | assert_nil(s.scan(/\w+/)) 281 | 282 | 283 | str = 'stra strb strc'.dup 284 | s = create_string_scanner(str, false) 285 | tmp = s.scan(/\w+/) 286 | assert_equal('stra', tmp) 287 | 288 | tmp = s.scan(/\s+/) 289 | assert_equal(' ', tmp) 290 | 291 | assert_equal('strb', s.scan(/\w+/)) 292 | assert_equal(' ', s.scan(/\s+/)) 293 | 294 | tmp = s.scan(/\w+/) 295 | assert_equal('strc', tmp) 296 | 297 | assert_nil(s.scan(/\w+/)) 298 | assert_nil(s.scan(/\w+/)) 299 | 300 | s = create_string_scanner('test'.dup) 301 | s.scan(/te/) 302 | # This assumes #string does not duplicate string, 303 | # but it is implementation specific issue. 304 | # DO NOT RELY ON THIS FEATURE. 305 | s.string.replace('') 306 | # unspecified: assert_equal(2, s.pos 307 | assert_equal(nil, s.scan(/test/)) 308 | 309 | # [ruby-bugs:4361] 310 | s = create_string_scanner("") 311 | assert_equal("", s.scan(//)) 312 | assert_equal("", s.scan(//)) 313 | end 314 | 315 | def test_scan_string 316 | s = create_string_scanner("stra strb\0strc") 317 | assert_equal('str', s.scan('str')) 318 | assert_equal('str', s[0]) 319 | assert_equal(3, s.pos) 320 | assert_equal('a ', s.scan('a ')) 321 | assert_equal('strb', s.scan('strb')) 322 | assert_equal("\u0000", s.scan("\0")) 323 | assert_equal('strc', s.scan('strc')) 324 | 325 | str = 'stra strb strc'.dup 326 | s = create_string_scanner(str, false) 327 | matched = s.scan('str') 328 | assert_equal('str', matched) 329 | 330 | s = create_string_scanner("str") 331 | assert_equal(nil, s.scan("str\0\0")) 332 | end 333 | 334 | def test_skip 335 | s = create_string_scanner('stra strb strc', true) 336 | assert_equal(4, s.skip(/\w+/)) 337 | assert_equal(1, s.skip(/\s+/)) 338 | assert_equal(4, s.skip(/\w+/)) 339 | assert_equal(1, s.skip(/\s+/)) 340 | assert_equal(4, s.skip(/\w+/)) 341 | assert_nil( s.skip(/\w+/)) 342 | assert_nil( s.skip(/\s+/)) 343 | assert_equal(true, s.eos?) 344 | 345 | s = create_string_scanner('test'.dup) 346 | s.scan(/te/) 347 | s.string.replace('') 348 | assert_equal(nil, s.skip(/./)) 349 | 350 | # [ruby-bugs:4361] 351 | s = create_string_scanner("") 352 | assert_equal(0, s.skip(//)) 353 | assert_equal(0, s.skip(//)) 354 | end 355 | 356 | def test_skip_with_begenning_of_string_anchor_match 357 | s = create_string_scanner("a\nb") 358 | assert_equal(2, s.skip(/a\n/)) 359 | assert_equal(1, s.skip(/\Ab/)) 360 | end 361 | 362 | def test_skip_with_begenning_of_line_anchor_match 363 | s = create_string_scanner("a\nbc") 364 | assert_equal(2, s.skip(/a\n/)) 365 | assert_equal(1, s.skip(/^b/)) 366 | assert_equal(1, s.skip(/^c/)) 367 | end 368 | 369 | def test_getch 370 | s = create_string_scanner('abcde') 371 | assert_equal(3, s.match?(/(?abc)/)) # set named_captures 372 | assert_equal('a', s.getch) 373 | assert_equal({}, s.named_captures) 374 | assert_equal('b', s.getch) 375 | assert_equal('c', s.getch) 376 | assert_equal('d', s.getch) 377 | assert_equal('e', s.getch) 378 | assert_nil( s.getch) 379 | 380 | s = create_string_scanner("\244\242".dup.force_encoding("euc-jp")) 381 | assert_equal("\244\242".dup.force_encoding("euc-jp"), s.getch) 382 | assert_nil(s.getch) 383 | 384 | s = create_string_scanner('test'.dup) 385 | s.scan(/te/) 386 | s.string.replace('') 387 | assert_equal(nil, s.getch) 388 | end 389 | 390 | def test_get_byte 391 | s = create_string_scanner('abcde') 392 | assert_equal(3, s.match?(/(?abc)/)) # set named_captures 393 | assert_equal('a', s.get_byte) 394 | assert_equal({}, s.named_captures) 395 | assert_equal('b', s.get_byte) 396 | assert_equal('c', s.get_byte) 397 | assert_equal('d', s.get_byte) 398 | assert_equal('e', s.get_byte) 399 | assert_nil( s.get_byte) 400 | assert_nil( s.get_byte) 401 | 402 | s = create_string_scanner("\244\242".dup.force_encoding("euc-jp")) 403 | assert_equal("\244".dup.force_encoding("euc-jp"), s.get_byte) 404 | assert_equal("\242".dup.force_encoding("euc-jp"), s.get_byte) 405 | assert_nil(s.get_byte) 406 | 407 | s = create_string_scanner('test'.dup) 408 | s.scan(/te/) 409 | s.string.replace('') 410 | assert_equal(nil, s.get_byte) 411 | end 412 | 413 | def test_matched 414 | s = create_string_scanner('stra strb strc') 415 | s.scan(/\w+/) 416 | assert_equal('stra', s.matched) 417 | s.scan_until(/\w+/) 418 | assert_equal('strb', s.matched) 419 | s.scan(/\s+/) 420 | assert_equal(' ', s.matched) 421 | s.scan(/\w+/) 422 | assert_equal('strc', s.matched) 423 | s.scan(/\w+/) 424 | assert_nil(s.matched) 425 | s.getch 426 | assert_nil(s.matched) 427 | 428 | s = create_string_scanner('stra strb strc') 429 | s.getch 430 | assert_equal('s', s.matched) 431 | s.get_byte 432 | assert_equal('t', s.matched) 433 | assert_equal('t', s.matched) 434 | end 435 | 436 | def test_matched_string 437 | s = create_string_scanner('stra strb strc') 438 | s.scan('stra') 439 | assert_equal('stra', s.matched) 440 | s.scan_until('strb') 441 | assert_equal('strb', s.matched) 442 | s.scan(' ') 443 | assert_equal(' ', s.matched) 444 | s.scan('strc') 445 | assert_equal('strc', s.matched) 446 | s.scan('c') 447 | assert_nil(s.matched) 448 | s.getch 449 | assert_nil(s.matched) 450 | end 451 | 452 | def test_AREF 453 | s = create_string_scanner('stra strb strc') 454 | 455 | s.scan(/\s+/) 456 | assert_nil( s[-2]) 457 | assert_nil( s[-1]) 458 | assert_nil( s[0]) 459 | assert_nil( s[1]) 460 | assert_nil( s[:c]) 461 | assert_nil( s['c']) 462 | 463 | s.scan("not match") 464 | assert_nil( s[-2]) 465 | assert_nil( s[-1]) 466 | assert_nil( s[0]) 467 | assert_nil( s[1]) 468 | assert_nil( s[:c]) 469 | assert_nil( s['c']) 470 | 471 | s.check(/\w+/) 472 | assert_nil( s[-2]) 473 | assert_equal('stra', s[-1]) 474 | assert_equal('stra', s[0]) 475 | assert_nil( s[1]) 476 | assert_raise(IndexError) { s[:c] } 477 | assert_raise(IndexError) { s['c'] } 478 | 479 | s.scan("stra") 480 | assert_nil( s[-2]) 481 | assert_equal('stra', s[-1]) 482 | assert_equal('stra', s[0]) 483 | assert_nil( s[1]) 484 | assert_raise(IndexError) { s[:c] } 485 | assert_raise(IndexError) { s['c'] } 486 | 487 | s.skip(/\s+/) 488 | assert_nil( s[-2]) 489 | assert_equal(' ', s[-1]) 490 | assert_equal(' ', s[0]) 491 | assert_nil( s[1]) 492 | 493 | s.scan(/(s)t(r)b/) 494 | assert_nil( s[-100]) 495 | assert_nil( s[-4]) 496 | assert_equal('strb', s[-3]) 497 | assert_equal('s', s[-2]) 498 | assert_equal('r', s[-1]) 499 | assert_equal('strb', s[0]) 500 | assert_equal('s', s[1]) 501 | assert_equal('r', s[2]) 502 | assert_nil( s[3]) 503 | assert_nil( s[100]) 504 | 505 | s.scan(/\s+/) 506 | 507 | s.getch 508 | assert_nil( s[-2]) 509 | assert_equal('s', s[-1]) 510 | assert_equal('s', s[0]) 511 | assert_nil( s[1]) 512 | 513 | s.get_byte 514 | assert_nil( s[-2]) 515 | assert_equal('t', s[-1]) 516 | assert_equal('t', s[0]) 517 | assert_nil( s[1]) 518 | 519 | s.scan(/.*/) 520 | s.scan(/./) 521 | assert_nil( s[0]) 522 | assert_nil( s[0]) 523 | 524 | 525 | s = create_string_scanner("\244\242".dup.force_encoding("euc-jp")) 526 | s.getch 527 | assert_equal("\244\242".dup.force_encoding("euc-jp"), s[0]) 528 | 529 | s = create_string_scanner("foo bar baz") 530 | s.scan(/(?\w+) (?\w+) (\w+)/) 531 | assert_equal('foo', s[1]) 532 | assert_equal('bar', s[2]) 533 | assert_nil(s[3]) 534 | assert_equal('foo', s[:a]) 535 | assert_equal('bar', s[:b]) 536 | assert_raise(IndexError) { s[:c] } 537 | assert_equal('foo', s['a']) 538 | assert_equal('bar', s['b']) 539 | assert_raise(IndexError) { s['c'] } 540 | # see https://github.com/jruby/jruby/issues/7644 541 | unless RUBY_ENGINE == "jruby" && RbConfig::CONFIG['host_os'] =~ /mswin|win32|mingw/ 542 | assert_raise_with_message(IndexError, /\u{30c6 30b9 30c8}/) { s["\u{30c6 30b9 30c8}"] } 543 | end 544 | end 545 | 546 | def test_pre_match 547 | s = create_string_scanner('a b c d e') 548 | s.scan(/\w/) 549 | assert_equal('', s.pre_match) 550 | s.skip(/\s/) 551 | assert_equal('a', s.pre_match) 552 | s.scan('b') 553 | assert_equal('a ', s.pre_match) 554 | s.scan_until(/c/) 555 | assert_equal('a b ', s.pre_match) 556 | s.getch 557 | assert_equal('a b c', s.pre_match) 558 | s.get_byte 559 | assert_equal('a b c ', s.pre_match) 560 | s.get_byte 561 | assert_equal('a b c d', s.pre_match) 562 | s.scan(/never match/) 563 | assert_nil(s.pre_match) 564 | end 565 | 566 | def test_pre_match_string 567 | s = create_string_scanner('a b c d e') 568 | s.scan('a') 569 | assert_equal('', s.pre_match) 570 | s.skip(' ') 571 | assert_equal('a', s.pre_match) 572 | s.scan('b') 573 | assert_equal('a ', s.pre_match) 574 | s.scan_until('c') 575 | assert_equal('a b ', s.pre_match) 576 | s.getch 577 | assert_equal('a b c', s.pre_match) 578 | s.get_byte 579 | assert_equal('a b c ', s.pre_match) 580 | s.get_byte 581 | assert_equal('a b c d', s.pre_match) 582 | s.scan('never match') 583 | assert_nil(s.pre_match) 584 | end 585 | 586 | def test_post_match 587 | s = create_string_scanner('a b c d e') 588 | s.scan(/\w/) 589 | assert_equal(' b c d e', s.post_match) 590 | s.skip(/\s/) 591 | assert_equal('b c d e', s.post_match) 592 | s.scan('b') 593 | assert_equal(' c d e', s.post_match) 594 | s.scan_until(/c/) 595 | assert_equal(' d e', s.post_match) 596 | s.getch 597 | assert_equal('d e', s.post_match) 598 | s.get_byte 599 | assert_equal(' e', s.post_match) 600 | s.get_byte 601 | assert_equal('e', s.post_match) 602 | s.scan(/never match/) 603 | assert_nil(s.post_match) 604 | s.scan(/./) 605 | assert_equal('', s.post_match) 606 | s.scan(/./) 607 | assert_nil(s.post_match) 608 | end 609 | 610 | def test_post_match_string 611 | s = create_string_scanner('a b c d e') 612 | s.scan('a') 613 | assert_equal(' b c d e', s.post_match) 614 | s.skip(' ') 615 | assert_equal('b c d e', s.post_match) 616 | s.scan('b') 617 | assert_equal(' c d e', s.post_match) 618 | s.scan_until('c') 619 | assert_equal(' d e', s.post_match) 620 | s.getch 621 | assert_equal('d e', s.post_match) 622 | s.get_byte 623 | assert_equal(' e', s.post_match) 624 | s.get_byte 625 | assert_equal('e', s.post_match) 626 | s.scan('never match') 627 | assert_nil(s.post_match) 628 | end 629 | 630 | def test_terminate 631 | s = create_string_scanner('abcd') 632 | s.scan(/(?ab)/) # set named_captures 633 | s.terminate 634 | assert_equal({}, s.named_captures) 635 | assert_equal(true, s.eos?) 636 | s.terminate 637 | assert_equal(true, s.eos?) 638 | end 639 | 640 | def test_reset 641 | s = create_string_scanner('abcd') 642 | s.scan(/(?ab)/) # set named_captures 643 | s.reset 644 | assert_equal({}, s.named_captures) 645 | assert_equal(0, s.pos) 646 | s.scan(/\w+/) 647 | s.reset 648 | assert_equal(0, s.pos) 649 | s.reset 650 | assert_equal(0, s.pos) 651 | end 652 | 653 | def test_matched_size 654 | s = create_string_scanner('test string') 655 | assert_nil(s.matched_size) 656 | s.scan(/test/) 657 | assert_equal(4, s.matched_size) 658 | assert_equal(4, s.matched_size) 659 | s.scan(//) 660 | assert_equal(0, s.matched_size) 661 | s.scan(/x/) 662 | assert_nil(s.matched_size) 663 | assert_nil(s.matched_size) 664 | s.terminate 665 | assert_nil(s.matched_size) 666 | 667 | s = create_string_scanner('test string') 668 | assert_nil(s.matched_size) 669 | s.scan(/test/) 670 | assert_equal(4, s.matched_size) 671 | s.terminate 672 | assert_nil(s.matched_size) 673 | end 674 | 675 | def test_empty_encoding_utf8 676 | ss = create_string_scanner('') 677 | assert_equal(Encoding::UTF_8, ss.rest.encoding) 678 | end 679 | 680 | def test_empty_encoding_ascii_8bit 681 | ss = create_string_scanner(''.dup.force_encoding("ASCII-8BIT")) 682 | assert_equal(Encoding::ASCII_8BIT, ss.rest.encoding) 683 | end 684 | 685 | def test_encoding 686 | ss = create_string_scanner("\xA1\xA2".dup.force_encoding("euc-jp")) 687 | assert_equal(Encoding::EUC_JP, ss.scan(/./e).encoding) 688 | end 689 | 690 | def test_encoding_string 691 | str = "\xA1\xA2".dup.force_encoding("euc-jp") 692 | ss = create_string_scanner(str) 693 | assert_equal(str.dup, ss.scan(str.dup)) 694 | end 695 | 696 | def test_invalid_encoding_string 697 | str = "\xA1\xA2".dup.force_encoding("euc-jp") 698 | ss = create_string_scanner(str) 699 | assert_raise(Encoding::CompatibilityError) do 700 | ss.scan(str.encode("UTF-8")) 701 | end 702 | end 703 | 704 | def test_generic_regexp 705 | ss = create_string_scanner("\xA1\xA2".dup.force_encoding("euc-jp")) 706 | t = ss.scan(/./) 707 | assert_equal("\xa1\xa2".dup.force_encoding("euc-jp"), t) 708 | end 709 | 710 | def test_set_pos 711 | s = create_string_scanner("test string") 712 | s.pos = 7 713 | assert_equal("ring", s.rest) 714 | end 715 | 716 | def test_match_p 717 | s = create_string_scanner("test string") 718 | assert_equal(4, s.match?(/\w+/)) 719 | assert_equal(4, s.match?(/\w+/)) 720 | assert_equal(nil, s.match?(/\s+/)) 721 | end 722 | 723 | def test_check 724 | s = create_string_scanner("Foo Bar Baz") 725 | assert_equal("Foo", s.check(/Foo/)) 726 | assert_equal(0, s.pos) 727 | assert_equal("Foo", s.matched) 728 | assert_equal(nil, s.check(/Bar/)) 729 | assert_equal(nil, s.matched) 730 | end 731 | 732 | def test_scan_full 733 | s = create_string_scanner("Foo Bar Baz") 734 | assert_equal(4, s.scan_full(/Foo /, false, false)) 735 | assert_equal(0, s.pos) 736 | assert_equal(nil, s.scan_full(/Baz/, false, false)) 737 | assert_equal("Foo ", s.scan_full(/Foo /, false, true)) 738 | assert_equal(0, s.pos) 739 | assert_equal(nil, s.scan_full(/Baz/, false, false)) 740 | assert_equal(4, s.scan_full(/Foo /, true, false)) 741 | assert_equal(4, s.pos) 742 | assert_equal(nil, s.scan_full(/Baz /, false, false)) 743 | assert_equal("Bar ", s.scan_full(/Bar /, true, true)) 744 | assert_equal(8, s.pos) 745 | assert_equal(nil, s.scan_full(/az/, false, false)) 746 | end 747 | 748 | def test_exist_p 749 | s = create_string_scanner("test string") 750 | assert_equal(3, s.exist?(/s/)) 751 | assert_equal(0, s.pos) 752 | s.scan(/test/) 753 | assert_equal(2, s.exist?(/s/)) 754 | assert_equal(4, s.pos) 755 | assert_equal(nil, s.exist?(/e/)) 756 | end 757 | 758 | def test_exist_p_invalid_argument 759 | s = create_string_scanner("test string") 760 | assert_raise(TypeError) do 761 | s.exist?(1) 762 | end 763 | end 764 | 765 | def test_exist_p_string 766 | s = create_string_scanner("test string") 767 | assert_equal(3, s.exist?("s")) 768 | assert_equal(0, s.pos) 769 | s.scan("test") 770 | assert_equal(2, s.exist?("s")) 771 | assert_equal(4, s.pos) 772 | assert_equal(nil, s.exist?("e")) 773 | end 774 | 775 | def test_scan_until 776 | s = create_string_scanner("Foo Bar\0Baz") 777 | assert_equal("Foo", s.scan_until(/Foo/)) 778 | assert_equal(3, s.pos) 779 | assert_equal(" Bar", s.scan_until(/Bar/)) 780 | assert_equal(7, s.pos) 781 | assert_equal(nil, s.skip_until(/Qux/)) 782 | assert_equal("\u0000Baz", s.scan_until(/Baz/)) 783 | assert_equal(11, s.pos) 784 | end 785 | 786 | def test_scan_until_string 787 | s = create_string_scanner("Foo Bar\0Baz") 788 | assert_equal("Foo", s.scan_until("Foo")) 789 | assert_equal(3, s.pos) 790 | assert_equal(" Bar", s.scan_until("Bar")) 791 | assert_equal(7, s.pos) 792 | assert_equal(nil, s.skip_until("Qux")) 793 | assert_equal("\u0000Baz", s.scan_until("Baz")) 794 | assert_equal(11, s.pos) 795 | 796 | s = create_string_scanner("str") 797 | assert_equal(nil, s.scan_until("str\0\0")) 798 | end 799 | 800 | def test_skip_until 801 | s = create_string_scanner("Foo Bar Baz") 802 | assert_equal(3, s.skip_until(/Foo/)) 803 | assert_equal(3, s.pos) 804 | assert_equal(4, s.skip_until(/Bar/)) 805 | assert_equal(7, s.pos) 806 | assert_equal(nil, s.skip_until(/Qux/)) 807 | end 808 | 809 | def test_skip_until_string 810 | s = create_string_scanner("Foo Bar Baz") 811 | assert_equal(3, s.skip_until("Foo")) 812 | assert_equal(3, s.pos) 813 | assert_equal(4, s.skip_until("Bar")) 814 | assert_equal(7, s.pos) 815 | assert_equal(nil, s.skip_until("Qux")) 816 | end 817 | 818 | def test_check_until 819 | s = create_string_scanner("Foo Bar Baz") 820 | assert_equal("Foo", s.check_until(/Foo/)) 821 | assert_equal(0, s.pos) 822 | assert_equal("Foo Bar", s.check_until(/Bar/)) 823 | assert_equal(0, s.pos) 824 | assert_equal(nil, s.check_until(/Qux/)) 825 | end 826 | 827 | def test_check_until_string 828 | s = create_string_scanner("Foo Bar Baz") 829 | assert_equal("Foo", s.check_until("Foo")) 830 | assert_equal(0, s.pos) 831 | assert_equal("Foo Bar", s.check_until("Bar")) 832 | assert_equal(0, s.pos) 833 | assert_equal(nil, s.check_until("Qux")) 834 | end 835 | 836 | def test_search_full 837 | s = create_string_scanner("Foo Bar Baz") 838 | assert_equal(8, s.search_full(/Bar /, false, false)) 839 | assert_equal(0, s.pos) 840 | assert_equal("Foo Bar ", s.search_full(/Bar /, false, true)) 841 | assert_equal(0, s.pos) 842 | assert_equal(8, s.search_full(/Bar /, true, false)) 843 | assert_equal(8, s.pos) 844 | assert_equal("Baz", s.search_full(/az/, true, true)) 845 | assert_equal(11, s.pos) 846 | end 847 | 848 | def test_search_full_string 849 | s = create_string_scanner("Foo Bar Baz") 850 | assert_equal(8, s.search_full("Bar ", false, false)) 851 | assert_equal(0, s.pos) 852 | assert_equal("Foo Bar ", s.search_full("Bar ", false, true)) 853 | assert_equal(0, s.pos) 854 | assert_equal(8, s.search_full("Bar ", true, false)) 855 | assert_equal(8, s.pos) 856 | assert_equal("Baz", s.search_full("az", true, true)) 857 | assert_equal(11, s.pos) 858 | end 859 | 860 | def test_peek 861 | s = create_string_scanner("test string") 862 | assert_equal("test st", s.peek(7)) 863 | assert_equal("test st", s.peek(7)) 864 | s.scan(/test/) 865 | assert_equal(" stri", s.peek(5)) 866 | assert_equal(" string", s.peek(10)) 867 | s.scan(/ string/) 868 | assert_equal("", s.peek(10)) 869 | end 870 | 871 | def test_unscan 872 | s = create_string_scanner('test string') 873 | assert_equal(4, s.skip(/(?test)/)) # set named_captures 874 | s.unscan 875 | assert_equal({}, s.named_captures) 876 | assert_equal("te", s.scan(/../)) 877 | assert_equal(nil, s.scan(/\d/)) 878 | assert_raise(ScanError) { s.unscan } 879 | end 880 | 881 | def test_rest 882 | s = create_string_scanner('test string') 883 | assert_equal("test string", s.rest) 884 | s.scan(/test/) 885 | assert_equal(" string", s.rest) 886 | s.scan(/ string/) 887 | assert_equal("", s.rest) 888 | s.scan(/ string/) 889 | end 890 | 891 | def test_rest_size 892 | s = create_string_scanner('test string') 893 | assert_equal(11, s.rest_size) 894 | s.scan(/test/) 895 | assert_equal(7, s.rest_size) 896 | s.scan(/ string/) 897 | assert_equal(0, s.rest_size) 898 | s.scan(/ string/) 899 | end 900 | 901 | def test_inspect2 902 | s = create_string_scanner('test string test') 903 | s.scan(/test strin/) 904 | assert_equal('#', s.inspect) 905 | end 906 | 907 | def test_aref_without_regex 908 | s = create_string_scanner('abc') 909 | s.get_byte 910 | assert_raise(IndexError) { s[:c] } 911 | assert_raise(IndexError) { s['c'] } 912 | s.getch 913 | assert_raise(IndexError) { s[:c] } 914 | assert_raise(IndexError) { s['c'] } 915 | end 916 | 917 | def test_size 918 | s = create_string_scanner("Fri Dec 12 1975 14:39") 919 | s.scan(/(\w+) (\w+) (\d+) /) 920 | assert_equal(4, s.size) 921 | end 922 | 923 | def test_captures 924 | s = create_string_scanner("Timestamp: Fri Dec 12 1975 14:39") 925 | s.scan("Timestamp: ") 926 | s.scan(/(\w+) (\w+) (\d+) (1980)?/) 927 | assert_equal(["Fri", "Dec", "12", nil], s.captures) 928 | s.scan(/(\w+) (\w+) (\d+) /) 929 | assert_nil(s.captures) 930 | end 931 | 932 | def test_values_at 933 | s = create_string_scanner("Timestamp: Fri Dec 12 1975 14:39") 934 | s.scan("Timestamp: ") 935 | s.scan(/(\w+) (\w+) (\d+) /) 936 | assert_equal(["Fri Dec 12 ", "12", nil, "Dec"], s.values_at(0, -1, 5, 2)) 937 | s.scan(/(\w+) (\w+) (\d+) /) 938 | assert_nil(s.values_at(0, -1, 5, 2)) 939 | end 940 | 941 | def test_scan_aref_repeatedly 942 | s = StringScanner.new('test string') 943 | assert_equal("test", s.scan(/\w(\w)(\w*)/)) 944 | assert_equal("test", s[0]) 945 | assert_equal("e", s[1]) 946 | assert_equal("st", s[2]) 947 | assert_nil( s.scan(/\w+/)) 948 | assert_nil( s[0]) 949 | assert_nil( s[1]) 950 | assert_nil( s[2]) 951 | assert_equal(" ", s.scan(/\s+/)) 952 | assert_equal(" ", s[0]) 953 | assert_nil( s[1]) 954 | assert_nil( s[2]) 955 | assert_equal("string", s.scan(/\w(\w)(\w*)/)) 956 | assert_equal("string", s[0]) 957 | assert_equal("t", s[1]) 958 | assert_equal("ring", s[2]) 959 | end 960 | 961 | def test_named_captures 962 | scan = StringScanner.new("foobarbaz") 963 | assert_equal({}, scan.named_captures) 964 | assert_equal(9, scan.match?(/(?foo)(?bar)(?baz)/)) 965 | assert_equal({"f" => "foo", "r" => "bar", "z" => "baz"}, scan.named_captures) 966 | assert_equal(9, scan.match?("foobarbaz")) 967 | assert_equal({}, scan.named_captures) 968 | end 969 | 970 | def test_named_captures_same_name_union 971 | scan = StringScanner.new("123") 972 | assert_equal(1, scan.match?(/(?0)|(?1)|(?2)/)) 973 | assert_equal({"number" => "1"}, scan.named_captures) 974 | end 975 | 976 | def test_scan_integer 977 | s = create_string_scanner('abc') 978 | assert_equal(3, s.match?(/(?abc)/)) # set named_captures 979 | assert_nil(s.scan_integer) 980 | assert_equal({}, s.named_captures) 981 | assert_equal(0, s.pos) 982 | refute_predicate(s, :matched?) 983 | 984 | s = create_string_scanner('123abc') 985 | assert_equal(123, s.scan_integer) 986 | assert_equal(3, s.pos) 987 | assert_predicate(s, :matched?) 988 | 989 | s = create_string_scanner('-123abc') 990 | assert_equal(-123, s.scan_integer) 991 | assert_equal(4, s.pos) 992 | assert_predicate(s, :matched?) 993 | 994 | s = create_string_scanner('+123') 995 | assert_equal(123, s.scan_integer) 996 | assert_equal(4, s.pos) 997 | assert_predicate(s, :matched?) 998 | 999 | s = create_string_scanner('-abc') 1000 | assert_nil(s.scan_integer) 1001 | assert_equal(0, s.pos) 1002 | refute_predicate(s, :matched?) 1003 | 1004 | s = create_string_scanner('-') 1005 | assert_nil(s.scan_integer) 1006 | assert_equal(0, s.pos) 1007 | refute_predicate(s, :matched?) 1008 | 1009 | s = create_string_scanner('+') 1010 | assert_nil(s.scan_integer) 1011 | assert_equal(0, s.pos) 1012 | refute_predicate(s, :matched?) 1013 | 1014 | huge_integer = '1' * 2_000 1015 | s = create_string_scanner(huge_integer) 1016 | assert_equal(huge_integer.to_i, s.scan_integer) 1017 | assert_equal(2_000, s.pos) 1018 | assert_predicate(s, :matched?) 1019 | 1020 | s = create_string_scanner('abc1') 1021 | s.pos = 3 1022 | assert_equal(1, s.scan_integer) 1023 | assert_equal(4, s.pos) 1024 | assert_predicate(s, :matched?) 1025 | end 1026 | 1027 | def test_scan_integer_unmatch 1028 | s = create_string_scanner('123abc') 1029 | assert_equal(123, s.scan_integer) 1030 | assert_equal(3, s.pos) 1031 | 1032 | s.unscan 1033 | assert_equal(0, s.pos) 1034 | end 1035 | 1036 | def test_scan_integer_encoding 1037 | s = create_string_scanner('123abc'.encode(Encoding::UTF_32LE)) 1038 | assert_raise(Encoding::CompatibilityError) do 1039 | s.scan_integer 1040 | end 1041 | end 1042 | 1043 | def test_scan_integer_matched 1044 | s = create_string_scanner("42abc") 1045 | assert_equal(42, s.scan_integer) 1046 | assert_equal("42", s.matched) 1047 | 1048 | s = create_string_scanner("42abc") 1049 | assert_equal(0x42abc, s.scan_integer(base: 16)) 1050 | assert_equal("42abc", s.matched) 1051 | end 1052 | 1053 | def test_scan_integer_base_16 1054 | s = create_string_scanner('0') 1055 | assert_equal(0x0, s.scan_integer(base: 16)) 1056 | assert_equal(1, s.pos) 1057 | assert_predicate(s, :matched?) 1058 | 1059 | s = create_string_scanner('abc') 1060 | assert_equal(3, s.match?(/(?abc)/)) # set named_captures 1061 | assert_equal(0xabc, s.scan_integer(base: 16)) 1062 | assert_equal({}, s.named_captures) 1063 | assert_equal(3, s.pos) 1064 | assert_predicate(s, :matched?) 1065 | 1066 | s = create_string_scanner('123abc') 1067 | assert_equal(0x123abc, s.scan_integer(base: 16)) 1068 | assert_equal(6, s.pos) 1069 | assert_predicate(s, :matched?) 1070 | 1071 | s = create_string_scanner('0x123abc') 1072 | assert_equal(0x123abc, s.scan_integer(base: 16)) 1073 | assert_equal(8, s.pos) 1074 | assert_predicate(s, :matched?) 1075 | 1076 | s = create_string_scanner('0x123ABC') 1077 | assert_equal(0x123abc, s.scan_integer(base: 16)) 1078 | assert_equal(8, s.pos) 1079 | assert_predicate(s, :matched?) 1080 | 1081 | s = create_string_scanner('-0x123ABC') 1082 | assert_equal(-0x123abc, s.scan_integer(base: 16)) 1083 | assert_equal(9, s.pos) 1084 | assert_predicate(s, :matched?) 1085 | 1086 | s = create_string_scanner('+0x123ABC') 1087 | assert_equal(+0x123abc, s.scan_integer(base: 16)) 1088 | assert_equal(9, s.pos) 1089 | assert_predicate(s, :matched?) 1090 | 1091 | s = create_string_scanner('0x') 1092 | assert_equal(0, s.scan_integer(base: 16)) 1093 | assert_equal(1, s.pos) 1094 | assert_predicate(s, :matched?) 1095 | 1096 | s = create_string_scanner('0xyz') 1097 | assert_equal(0, s.scan_integer(base: 16)) 1098 | assert_equal(1, s.pos) 1099 | assert_predicate(s, :matched?) 1100 | 1101 | s = create_string_scanner('-0x') 1102 | assert_equal(0, s.scan_integer(base: 16)) 1103 | assert_equal(2, s.pos) 1104 | assert_predicate(s, :matched?) 1105 | 1106 | s = create_string_scanner('+0x') 1107 | assert_equal(0, s.scan_integer(base: 16)) 1108 | assert_equal(2, s.pos) 1109 | assert_predicate(s, :matched?) 1110 | 1111 | s = create_string_scanner('-123abc') 1112 | assert_equal(-0x123abc, s.scan_integer(base: 16)) 1113 | assert_equal(7, s.pos) 1114 | assert_predicate(s, :matched?) 1115 | 1116 | s = create_string_scanner('+123') 1117 | assert_equal(0x123, s.scan_integer(base: 16)) 1118 | assert_equal(4, s.pos) 1119 | assert_predicate(s, :matched?) 1120 | 1121 | s = create_string_scanner('-abc') 1122 | assert_equal(-0xabc, s.scan_integer(base: 16)) 1123 | assert_equal(4, s.pos) 1124 | assert_predicate(s, :matched?) 1125 | 1126 | huge_integer = 'F' * 2_000 1127 | s = create_string_scanner(huge_integer) 1128 | assert_equal(huge_integer.to_i(16), s.scan_integer(base: 16)) 1129 | assert_equal(2_000, s.pos) 1130 | assert_predicate(s, :matched?) 1131 | end 1132 | end 1133 | 1134 | class TestStringScanner < Test::Unit::TestCase 1135 | include StringScannerTests 1136 | 1137 | def create_string_scanner(string, *args) 1138 | StringScanner.new(string, *args) 1139 | end 1140 | 1141 | def test_fixed_anchor_true 1142 | assert_equal(true, StringScanner.new("a", fixed_anchor: true).fixed_anchor?) 1143 | end 1144 | 1145 | def test_fixed_anchor_false 1146 | assert_equal(false, StringScanner.new("a").fixed_anchor?) 1147 | assert_equal(false, StringScanner.new("a", true).fixed_anchor?) 1148 | assert_equal(false, StringScanner.new("a", false).fixed_anchor?) 1149 | assert_equal(false, StringScanner.new("a", {}).fixed_anchor?) 1150 | assert_equal(false, StringScanner.new("a", fixed_anchor: nil).fixed_anchor?) 1151 | assert_equal(false, StringScanner.new("a", fixed_anchor: false).fixed_anchor?) 1152 | end 1153 | end 1154 | 1155 | class TestStringScannerFixedAnchor < Test::Unit::TestCase 1156 | include StringScannerTests 1157 | 1158 | def create_string_scanner(string, *args) 1159 | StringScanner.new(string, fixed_anchor: true) 1160 | end 1161 | 1162 | def test_skip_with_begenning_of_string_anchor_match 1163 | s = create_string_scanner("a") 1164 | assert_equal(1, s.skip(/\Aa/)) 1165 | end 1166 | 1167 | def test_skip_with_begenning_of_string_anchor_not_match 1168 | s = create_string_scanner("a\nb") 1169 | assert_equal(2, s.skip(/a\n/)) 1170 | assert_nil( s.skip(/\Ab/)) 1171 | end 1172 | 1173 | def test_skip_with_begenning_of_line_anchor_match 1174 | s = create_string_scanner("a\nb") 1175 | assert_equal(2, s.skip(/a\n/)) 1176 | assert_equal(1, s.skip(/^b/)) 1177 | end 1178 | 1179 | def test_skip_with_begenning_of_line_anchor_not_match 1180 | s = create_string_scanner("ab") 1181 | assert_equal(1, s.skip(/a/)) 1182 | assert_nil( s.skip(/^b/)) 1183 | end 1184 | 1185 | # ruby/strscan#86 1186 | def test_scan_shared_string 1187 | s = "hellohello"[5..-1] 1188 | ss = StringScanner.new(s).scan(/hello/) 1189 | 1190 | assert_equal("hello", ss) 1191 | end 1192 | end 1193 | --------------------------------------------------------------------------------