├── .gitignore ├── .travis.yml ├── AUTHORS ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── examples └── pcredemo.rs ├── libpcre-sys ├── Cargo.toml ├── build.rs ├── ext │ └── pcre-8.39.tar.bz2 └── src │ └── lib.rs ├── pkg.rs ├── src ├── detail │ └── mod.rs └── lib.rs └── tests └── test.rs /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | Cargo.lock 3 | /libpcre-sys/target 4 | /doc 5 | /target 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: rust 3 | rust: 4 | - stable 5 | - beta 6 | - nightly 7 | matrix: 8 | allow_failures: 9 | - rust: nightly 10 | notifications: 11 | email: 12 | - cadencemarseille@gmail.com 13 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Artem 2 | Cadence Marseille 3 | Damien Schoof 4 | James Rowe 5 | Mikhail Borisov 6 | Pascal Hertleif 7 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pcre" 3 | version = "0.2.3" 4 | authors = ["The rust-pcre authors"] 5 | license = "MIT/Apache-2.0" 6 | readme = "README.md" 7 | keywords = ["regexp", "regex", "regular-expressions", "pcre"] 8 | repository = "https://github.com/cadencemarseille/rust-pcre" 9 | documentation = "http://www.rust-ci.org/cadencemarseille/rust-pcre/doc/pcre/" 10 | homepage = "https://github.com/cadencemarseille/rust-pcre" 11 | description = """ 12 | Rust wrapper for libpcre. 13 | """ 14 | 15 | [lib] 16 | name = "pcre" 17 | 18 | [dependencies] 19 | enum-set = ">= 0.0.5" 20 | libc = "0.2" 21 | 22 | [dependencies.libpcre-sys] 23 | path = "libpcre-sys" 24 | version = "0.2.2" 25 | 26 | [dev-dependencies] 27 | getopts = "0.2" 28 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 The rust-pcre authors. 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rust-pcre 2 | [Rust](https://github.com/rust-lang/rust) 1.x+ wrapper for [libpcre](http://pcre.org/) 8.20+. 3 | 4 | [![Build Status](https://travis-ci.org/cadencemarseille/rust-pcre.svg?branch=master)](https://travis-ci.org/cadencemarseille/rust-pcre) 5 | 6 | ## Quick Start 7 | 8 | To use rust-pcre, you can either install libpcre 8.20+ and register with pkg-config or you can let rust-pcre build libpcre from source. 9 | 10 | ### Debian 11 | 12 | Debian Squeeze's package for libpcre is for version 8.02 of the library, which is too old. You can either install a newer version of libpcre and register it with pkg-config or just let rust-pcre automatically build libpcre from source. 13 | 14 | On Debian Wheezy and newer, install the `libpcre3-dev` package: 15 | 16 | sudo apt-get install libpcre3-dev 17 | 18 | 19 | ### Fedora 20 | 21 | Install the `pcre-devel` package. 22 | 23 | ### Mac OS X 24 | 25 | Mac OS 10.7 ships with version 8.02 of libpcre. You can either install a newer version of libpcre and register it with pkg-config or just let rust-pcre automatically build libpcre from source. 26 | 27 | [Homebrew](http://brew.sh/) is highly recommended for installing libpcre. With Homebrew, installing the latest versions of Rust and libpcre is as simple as: 28 | 29 | brew install rust pcre 30 | 31 | To upgrade: 32 | 33 | brew update && brew upgrade rust pcre 34 | 35 | ### Ubuntu 36 | The libpcre packages for Ubuntu 10.04 LTS 'Lucid Lynx' and Ubuntu 12.04 LTS 'Precise Pangolin' are too old. You can either install a newer version of libpcre and register it with pkg-config or just let rust-pcre automatically build libpcre from source. 37 | 38 | On Ubuntu 12.10 'Quantal Quetzal' and newer, install the `libpcre3-dev` package: 39 | 40 | sudo apt-get install libpcre3-dev 41 | 42 | ## Usage 43 | The basic use of the library involves compiling a pattern regular expression: 44 | 45 | let mut re = match Pcre::compile(pattern) { 46 | Err(err) => { 47 | // compilation failed 48 | return; 49 | }, 50 | Ok(re) => re 51 | }; 52 | 53 | You can also pass options: 54 | 55 | let mut compile_options: EnumSet = EnumSet::new(); 56 | compile_options.insert(CompileOption::Caseless); 57 | let mut re = Pcre::compile_with_options(pattern, &compile_options).unwrap(); 58 | 59 | To test against a subject string, use one of the exec(), exec_from(), or exec_from_with_options() methods. For example: 60 | 61 | let m = match re.exec(subject) { 62 | None => { println("No match"); return; }, 63 | Some(m) => m 64 | }; 65 | 66 | See the [source of `pcredemo`](https://github.com/cadencemarseille/rust-pcre/blob/master/examples/pcredemo.rs) for a complete example. 67 | -------------------------------------------------------------------------------- /examples/pcredemo.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The rust-pcre authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | // This is a port of the pcre project's `pcredemo` sample using rust-pcre bindings. 10 | 11 | extern crate enum_set; 12 | extern crate getopts; 13 | extern crate pcre; 14 | 15 | use enum_set::{EnumSet}; 16 | use getopts::{Options}; 17 | use pcre::{CompileOption, Match, Pcre, pcre_version}; 18 | use std::collections::{BTreeMap}; 19 | use std::env; 20 | use std::io::{stderr, Write}; 21 | use std::string::{String}; 22 | use std::vec::{Vec}; 23 | 24 | fn print_usage(program: &str, opts: &Options) { 25 | let brief = format!("Usage: {} [options] pattern subject", program); 26 | print!("{}", opts.usage(&brief)); 27 | } 28 | 29 | fn print_version_info() { 30 | println!("rust-pcre 0.2.3 compiled against libpcre {}", pcre_version()); 31 | } 32 | 33 | fn print_match(m: &Match, name_table: &BTreeMap>) { 34 | println!("Match succeeded at offset {}", m.group_start(0)); 35 | 36 | // Show captured substrings by number. 37 | let mut i = 0; 38 | while i < m.string_count() { 39 | println!("{}: {}", i, m.group(i)); 40 | i += 1; 41 | } 42 | 43 | let name_count = name_table.len(); 44 | if name_count <= 0 { 45 | println!("No named substrings"); 46 | } else { 47 | println!("Named substrings:"); 48 | for (name, n_vec) in name_table.iter() { 49 | for n in n_vec.iter() { 50 | println!("({}) {}: {}", *n, *name, m.group(*n)); 51 | } 52 | } 53 | } 54 | } 55 | 56 | fn main() { 57 | let args: Vec = env::args().collect(); 58 | let program = args[0].clone(); 59 | 60 | let mut opts = Options::new(); 61 | opts.optflag("g", "", "find all matches"); 62 | opts.optflag("h", "help", "print usage and exit"); 63 | opts.optflag("", "version", "print version information and exit"); 64 | let opt_matches = match opts.parse(&args[1..]) { 65 | Ok(m) => m, 66 | Err(f) => { 67 | writeln!(stderr(), "Error: {}", f).unwrap(); 68 | //env::set_exit_status(1); 69 | return; 70 | } 71 | }; 72 | 73 | if opt_matches.opt_present("h") || opt_matches.opt_present("help") { 74 | print_usage(&program, &opts); 75 | return; 76 | } 77 | 78 | if opt_matches.opt_present("version") { 79 | print_version_info(); 80 | return; 81 | } 82 | 83 | let find_all = opt_matches.opt_present("g"); 84 | if opt_matches.free.len() == 0 { 85 | writeln!(stderr(), "Error: No pattern").unwrap(); 86 | //env::set_exit_status(1); 87 | return; 88 | } else if opt_matches.free.len() == 1 { 89 | writeln!(stderr(), "Error: No subject").unwrap(); 90 | //env::set_exit_status(1); 91 | return; 92 | } else if opt_matches.free.len() > 2 { 93 | writeln!(stderr(), "Error: Too many command line arguments").unwrap(); 94 | //env::set_exit_status(1); 95 | return; 96 | } 97 | 98 | let pattern = opt_matches.free[0].clone(); 99 | let subject = opt_matches.free[1].clone(); 100 | 101 | let mut compile_options: EnumSet = EnumSet::new(); 102 | compile_options.insert(CompileOption::DupNames); 103 | let mut re = match Pcre::compile_with_options(&pattern, &compile_options) { 104 | Err(err) => { 105 | writeln!(stderr(), "Error: The pattern could not be compiled: {}", err).unwrap(); 106 | //env::set_exit_status(1); 107 | return; 108 | }, 109 | Ok(re) => re 110 | }; 111 | let name_table = re.name_table(); 112 | 113 | let opt_m = re.exec(&subject); 114 | let m = match opt_m { 115 | None => { 116 | println!("No match"); 117 | //env::set_exit_status(1); 118 | return; 119 | } 120 | Some(m) => m 121 | }; 122 | print_match(&m, &name_table); 123 | 124 | if find_all { 125 | let mut start_offset = m.group_end(0); 126 | loop { 127 | let opt_m = re.exec_from(&subject, start_offset); 128 | let m = match opt_m { 129 | None => { 130 | println!("\nNo more matches"); 131 | return; 132 | } 133 | Some(m) => m 134 | }; 135 | 136 | println!(""); 137 | print_match(&m, &name_table); 138 | 139 | start_offset = m.group_end(0); 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /libpcre-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "libpcre-sys" 3 | version = "0.2.2" 4 | authors = ["The rust-pcre authors"] 5 | license = "MIT/Apache-2.0" 6 | keywords = ["pcre"] 7 | repository = "https://github.com/cadencemarseille/rust-pcre" 8 | links = "pcre" 9 | build = "build.rs" 10 | description = "Native bindings to libpcre" 11 | 12 | [lib] 13 | name = "libpcre_sys" 14 | 15 | [dependencies] 16 | libc = "0.2" 17 | 18 | [build-dependencies] 19 | bzip2 = "0.3" 20 | pkg-config = "0.3" 21 | tar = "0.4" 22 | -------------------------------------------------------------------------------- /libpcre-sys/build.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The rust-pcre authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | extern crate bzip2; 10 | extern crate pkg_config; 11 | extern crate tar; 12 | 13 | use bzip2::read::{BzDecoder}; 14 | use std::env; 15 | use std::fs::{OpenOptions}; 16 | use std::io::{ErrorKind}; 17 | use std::path::{Path}; 18 | use std::process::{Command}; 19 | use tar::{Archive}; 20 | 21 | const BUNDLED_PCRE_VERSION: &'static str = "8.39"; 22 | 23 | fn main() { 24 | if pkg_config::Config::new().atleast_version("8.20").find("libpcre").is_ok() { 25 | return; 26 | } 27 | 28 | let cargo_manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); 29 | let out_dir = env::var("OUT_DIR").unwrap(); 30 | 31 | let ext_pathbuf = Path::new(&cargo_manifest_dir).join("ext"); 32 | 33 | let pcre_tbz2_pathbuf = ext_pathbuf.join(format!("pcre-{}.tar.bz2", BUNDLED_PCRE_VERSION)); 34 | let pcre_tbz2_f = OpenOptions::new().read(true).open(pcre_tbz2_pathbuf).unwrap(); 35 | let decompressor = BzDecoder::new(pcre_tbz2_f); 36 | 37 | let mut archive = Archive::new(decompressor); 38 | if !archive.unpack(&out_dir).is_ok() { 39 | panic!("failed to extract the tarball"); 40 | } 41 | 42 | let pcre_pathbuf = Path::new(&out_dir).join(format!("pcre-{}", BUNDLED_PCRE_VERSION)); 43 | 44 | if cfg!(unix) { 45 | let mut cmd = Command::new("autoreconf"); 46 | cmd.current_dir(&pcre_pathbuf); 47 | let status = match cmd.status() { 48 | Err(ref e) if e.kind() == ErrorKind::NotFound => { 49 | panic!("failed to execute `autoreconf`: {}. Are the Autotools installed?", e); 50 | }, 51 | Err(e) => { 52 | panic!("failed to execute `autoreconf`: {}", e); 53 | }, 54 | Ok(status) => status 55 | }; 56 | if !status.success() { 57 | panic!("`autoreconf` did not run successfully."); 58 | } 59 | 60 | let mut cmd = Command::new("./configure"); 61 | cmd.arg("--with-pic"); 62 | cmd.arg("--disable-shared"); 63 | cmd.arg("--disable-cpp"); 64 | cmd.arg("--enable-jit"); 65 | cmd.arg("--enable-utf"); 66 | cmd.arg("--enable-unicode-properties"); 67 | cmd.arg(format!("--prefix={}", Path::new(&out_dir).display())); 68 | cmd.current_dir(&pcre_pathbuf); 69 | let status = match cmd.status() { 70 | Err(e) => { 71 | panic!("failed to execute `./configure`: {}", e); 72 | }, 73 | Ok(status) => status 74 | }; 75 | if !status.success() { 76 | panic!("`./configure --with-pic ...` did not run successfully."); 77 | } 78 | 79 | let mut cmd = Command::new("make"); 80 | cmd.arg("install"); 81 | cmd.current_dir(&pcre_pathbuf); 82 | let status = match cmd.status() { 83 | Err(ref e) if e.kind() == ErrorKind::NotFound => { 84 | panic!("failed to execute `make`: {}. Is GNU Make installed?", e); 85 | }, 86 | Err(e) => { 87 | panic!("failed to execute `make`: {}", e); 88 | }, 89 | Ok(status) => status 90 | }; 91 | if !status.success() { 92 | panic!("`make install` did not run successfully."); 93 | } 94 | 95 | println!("cargo:rustc-link-search=native={}", Path::new(&out_dir).join("lib").as_path().display()); 96 | } else { 97 | let mut cmd = Command::new("cmake"); 98 | cmd.arg("."); 99 | cmd.arg("-DBUILD_SHARED_LIBS=OFF"); 100 | cmd.arg("-DPCRE_BUILD_PCRECPP=OFF"); 101 | cmd.arg("-DPCRE_BUILD_PCREGREP=OFF"); 102 | cmd.arg("-DPCRE_BUILD_TESTS=OFF"); 103 | cmd.arg("-DPCRE_BUILD_PCRE8=ON"); 104 | cmd.arg("-DPCRE_SUPPORT_JIT=ON"); 105 | cmd.arg("-DPCRE_SUPPORT_UTF=ON"); 106 | cmd.arg("-DPCRE_SUPPORT_UNICODE_PROPERTIES=ON"); 107 | cmd.current_dir(&pcre_pathbuf); 108 | let status = match cmd.status() { 109 | Err(ref e) if e.kind() == ErrorKind::NotFound => { 110 | panic!("failed to execute `cmake`: {}. Is CMake installed?", e); 111 | }, 112 | Err(e) => { 113 | panic!("failed to execute `cmake`: {}", e); 114 | }, 115 | Ok(status) => status 116 | }; 117 | if !status.success() { 118 | panic!("`cmake . -DBUILD_SHARED_LIBS=OFF ...` did not run successfully."); 119 | } 120 | 121 | let mut cmd = Command::new("cmake"); 122 | cmd.arg("--build").arg(".").current_dir(&pcre_pathbuf); 123 | let status = match cmd.status() { 124 | Err(ref e) if e.kind() == ErrorKind::NotFound => { 125 | panic!("failed to execute `cmake`: {}. Is CMake installed?", e); 126 | }, 127 | Err(e) => { 128 | panic!("failed to execute `cmake`: {}", e); 129 | }, 130 | Ok(status) => status 131 | }; 132 | if !status.success() { 133 | panic!("`cmake --build .` did not run successfully."); 134 | } 135 | 136 | println!("cargo:rustc-link-search=native={}", pcre_pathbuf.as_path().display()); 137 | } 138 | 139 | println!("cargo:rustc-link-lib=static=pcre"); 140 | } 141 | -------------------------------------------------------------------------------- /libpcre-sys/ext/pcre-8.39.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cadencemarseille/rust-pcre/68271ca8ca53224892178d362d53e3fc3825578f/libpcre-sys/ext/pcre-8.39.tar.bz2 -------------------------------------------------------------------------------- /libpcre-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The rust-pcre authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | extern crate libc; 10 | 11 | use libc::{c_char, c_int, c_uchar, c_ulong, c_void}; 12 | use std::option::{Option}; 13 | use std::ptr; 14 | 15 | #[allow(non_camel_case_types)] 16 | pub type compile_options = c_int; 17 | #[allow(non_camel_case_types)] 18 | pub type exec_options = c_int; 19 | #[allow(non_camel_case_types)] 20 | pub type fullinfo_field = c_int; 21 | #[allow(non_camel_case_types)] 22 | pub type study_options = c_int; 23 | 24 | pub const PCRE_UTF8: compile_options = 0x00000800; 25 | 26 | // PCRE_NO_UTF8_CHECK is both a compile and exec option 27 | pub const PCRE_NO_UTF8_CHECK: c_int = 0x00002000; 28 | 29 | pub const PCRE_ERROR_NOMATCH: c_int = -1; 30 | pub const PCRE_ERROR_NULL: c_int = -2; 31 | 32 | pub const PCRE_INFO_CAPTURECOUNT: fullinfo_field = 2; 33 | pub const PCRE_INFO_NAMEENTRYSIZE: fullinfo_field = 7; 34 | pub const PCRE_INFO_NAMECOUNT: fullinfo_field = 8; 35 | pub const PCRE_INFO_NAMETABLE: fullinfo_field = 9; 36 | 37 | //const PCRE_EXTRA_STUDY_DATA: c_ulong = 0x0001; 38 | const PCRE_EXTRA_MATCH_LIMIT: c_ulong = 0x0002; 39 | //const PCRE_EXTRA_CALLOUT_DATA: c_ulong = 0x0004; 40 | //const PCRE_EXTRA_TABLES: c_ulong = 0x0008; 41 | const PCRE_EXTRA_MATCH_LIMIT_RECURSION: c_ulong = 0x0010; 42 | const PCRE_EXTRA_MARK: c_ulong = 0x0020; 43 | //const PCRE_EXTRA_EXECUTABLE_JIT: c_ulong = 0x0040; 44 | 45 | #[allow(non_camel_case_types)] 46 | pub enum pcre {} 47 | 48 | #[allow(non_camel_case_types)] 49 | #[repr(C)] 50 | pub struct pcre_extra { 51 | flags: c_ulong, 52 | study_data: *mut c_void, 53 | match_limit_: c_ulong, 54 | callout_data: *mut c_void, 55 | tables: *const c_uchar, 56 | match_limit_recursion_: c_ulong, 57 | mark: *mut *mut c_uchar, 58 | executable_jit: *mut c_void 59 | } 60 | 61 | impl pcre_extra { 62 | /// Returns the match limit, if previously set by [set_match_limit()](#method.set_match_limit). 63 | /// 64 | /// The default value for this limit is set when PCRE is built. The default default is 10 million. 65 | pub fn match_limit(&self) -> Option { 66 | if (self.flags & PCRE_EXTRA_MATCH_LIMIT) == 0 { 67 | None 68 | } else { 69 | Some(self.match_limit_ as usize) 70 | } 71 | } 72 | 73 | /// Sets the match limit to `limit` instead of using PCRE's default. 74 | pub fn set_match_limit(&mut self, limit: u32) { 75 | self.flags |= PCRE_EXTRA_MATCH_LIMIT; 76 | self.match_limit_ = limit as c_ulong; 77 | } 78 | 79 | /// Returns the recursion depth limit, if previously set by [set_match_limit_recursion()](#method.set_match_limit_recursion). 80 | /// 81 | /// The default value for this limit is set when PCRE is built. 82 | pub fn match_limit_recursion(&self) -> Option { 83 | if (self.flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) == 0 { 84 | None 85 | } else { 86 | Some(self.match_limit_recursion_ as usize) 87 | } 88 | } 89 | 90 | /// Sets the recursion depth limit to `limit` instead of using PCRE's default. 91 | pub fn set_match_limit_recursion(&mut self, limit: u32) { 92 | self.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; 93 | self.match_limit_ = limit as c_ulong; 94 | } 95 | 96 | /// Sets the mark field. 97 | pub unsafe fn set_mark(&mut self, mark: &mut *mut c_uchar) { 98 | self.flags |= PCRE_EXTRA_MARK; 99 | self.mark = mark as *mut *mut c_uchar; 100 | } 101 | 102 | /// Unsets the mark field. PCRE will not save mark names when matching the compiled regular expression. 103 | pub fn unset_mark(&mut self) { 104 | self.flags &= !PCRE_EXTRA_MARK; 105 | self.mark = ptr::null_mut(); 106 | } 107 | } 108 | 109 | #[link(name = "pcre")] 110 | extern { 111 | pub static pcre_free: extern "C" fn(ptr: *mut c_void); 112 | 113 | pub fn pcre_compile(pattern: *const c_char, options: compile_options, errptr: *mut *const c_char, erroffset: *mut c_int, tableptr: *const c_uchar) -> *mut pcre; 114 | pub fn pcre_exec(code: *const pcre, extra: *const pcre_extra, subject: *const c_char, length: c_int, startoffset: c_int, options: exec_options, ovector: *mut c_int, ovecsize: c_int) -> c_int; 115 | pub fn pcre_free_study(extra: *mut pcre_extra); 116 | pub fn pcre_fullinfo(code: *const pcre, extra: *const pcre_extra, what: fullinfo_field, where_: *mut c_void) -> c_int; 117 | // Note: libpcre's pcre_refcount() function is not thread-safe. 118 | pub fn pcre_refcount(code: *mut pcre, adjust: c_int) -> c_int; 119 | pub fn pcre_study(code: *const pcre, options: study_options, errptr: *mut *const c_char) -> *mut pcre_extra; 120 | pub fn pcre_version() -> *const c_char; 121 | } 122 | -------------------------------------------------------------------------------- /pkg.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The rust-pcre authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | #![allow(unused_must_use)] 9 | #![feature(phase)] 10 | 11 | #[phase(plugin, link)] extern crate log; 12 | extern crate rustc; 13 | 14 | use rustc::driver::driver::host_triple; 15 | use std::from_str::{from_str}; 16 | use std::io; 17 | use std::io::{Command, FilePermission}; 18 | use std::io::fs::{mkdir, File, PathExtensions}; 19 | use std::option::{Option}; 20 | use std::os; 21 | use std::str; 22 | use std::string; 23 | 24 | #[deriving(Eq, PartialEq)] 25 | struct Version { 26 | major: uint, 27 | minor: uint 28 | } 29 | 30 | impl Version { 31 | pub fn parse(version_str: &str) -> Option { 32 | let mut it = version_str.split('.'); 33 | match (it.next().and_then(from_str::), it.next().and_then(from_str::)) { 34 | (Some(major), Some(minor)) => Some(Version { major: major, minor: minor }), 35 | _ => None 36 | } 37 | } 38 | } 39 | 40 | impl std::fmt::Show for Version { 41 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 42 | write!(f, "{:u}.{:u}", self.major, self.minor) 43 | } 44 | } 45 | 46 | impl PartialOrd for Version { 47 | fn partial_cmp(&self, other: &Version) -> Option { 48 | Some(self.cmp(other)) 49 | } 50 | } 51 | 52 | impl Ord for Version { 53 | fn cmp(&self, other: &Version) -> Ordering { 54 | (self.major, self.minor).cmp(&(other.major, other.minor)) 55 | } 56 | } 57 | 58 | fn cd(path: &Path) { 59 | if !os::change_dir(path) { 60 | fail!("Package script error: Failed to `cd` into `{}`", path.display()); 61 | } 62 | } 63 | 64 | fn main() { 65 | let pcre_libdir = match os::getenv("PCRE_LIBDIR") { 66 | None => { 67 | let pcre_config_output = match Command::new("pcre-config").arg("--prefix").output() { 68 | Err(e) => { 69 | match e.kind { 70 | io::FileNotFound => fail!("Package script error: Could not run `pcre-config` because no such executable is in the executable search PATH. Make sure that you have installed a dev package for libpcre and/or make sure that libpcre's bindir is added to your PATH (currently \"{}\").", os::getenv("PATH").unwrap_or(String::from_str(""))), 71 | _ => fail!("Package script error: Could not run `pcre-config`: {}", e) 72 | } 73 | }, 74 | Ok(pcre_config_output) => pcre_config_output 75 | }; 76 | if !pcre_config_output.status.success() { 77 | fail!("Package script error: `pcre-config --prefix` failed"); 78 | } 79 | let output_ptr = pcre_config_output.output.as_ptr(); 80 | let output_len = pcre_config_output.output.len(); 81 | let prefix_str = unsafe { string::raw::from_buf_len(output_ptr, output_len) }; 82 | // `pcre-config` adds a newline to the end, which we need to trim away. 83 | String::from_str(prefix_str.as_slice().trim()).push_str("/lib") 84 | }, 85 | Some(pcre_libdir) => pcre_libdir 86 | }; 87 | let pcre_lib_path = Path::new(pcre_libdir); 88 | 89 | let workspace_path = os::getcwd(); 90 | 91 | // Check the version 92 | let target_build_path = workspace_path.join("build").join(host_triple()); 93 | if !target_build_path.exists() { 94 | if mkdir(&target_build_path, FilePermission::from_bits_truncate(0x1FF)).is_err() { 95 | fail!("Package script error: Failed to create target build directory `{}`", target_build_path.display()); 96 | } 97 | } 98 | let out_path = target_build_path.join("pcre"); 99 | if !out_path.exists() { 100 | if mkdir(&out_path, FilePermission::from_bits_truncate(0x1FF)).is_err() { 101 | fail!("Package script error: Failed to create output directory `{}`", out_path.display()); 102 | } 103 | } 104 | 105 | let versioncheck_rs_path = out_path.join("versioncheck.rs"); 106 | { 107 | let mut f = match File::create(&versioncheck_rs_path) { 108 | Err(e) => fail!("Package script error: Failed to open `{}` for writing: {}", versioncheck_rs_path.display(), e), 109 | Ok(f) => f 110 | }; 111 | let contents = format!("\ 112 | extern crate libc; 113 | 114 | use std::c_str::{{CString}}; 115 | use libc::{{c_char, c_int, c_uchar, c_void}}; 116 | use std::ptr; 117 | use std::ptr::{{RawPtr}}; 118 | use std::slice; 119 | 120 | type options = c_int; 121 | struct pcre; 122 | struct pcre_extra; 123 | 124 | #[link(name = \"pcre\")] 125 | extern {{ 126 | static pcre_free: extern \"C\" fn(ptr: *const c_void); 127 | 128 | fn pcre_compile(pattern: *const c_char, options: options, errptr: *mut *const c_char, erroffset: *mut c_int, tableptr: *const c_uchar) -> *const pcre; 129 | fn pcre_exec(code: *const pcre, extra: *const pcre_extra, subject: *const c_char, length: c_int, startoffset: c_int, options: options, ovector: *mut c_int, ovecsize: c_int) -> c_int; 130 | fn pcre_version() -> *const c_char; 131 | }} 132 | 133 | fn main () {{ 134 | unsafe {{ 135 | let version_cstring = CString::new(pcre_version(), false); 136 | let version_str = version_cstring.as_str().unwrap().to_string(); 137 | 138 | let pattern = \"^\\\\d+\\\\.\\\\d+\"; 139 | pattern.with_c_str(|pattern_c_str| {{ 140 | let mut err: *const c_char = ptr::null(); 141 | let mut erroffset: c_int = 0; 142 | let code = pcre_compile(pattern_c_str, 0, &mut err, &mut erroffset, ptr::null()); 143 | if code.is_null() {{ 144 | if code.is_null() {{ 145 | let err_cstring = CString::new(err, false); 146 | match err_cstring.as_str() {{ 147 | None => fail!(\"pcre_compile() failed at offset {{}}\", erroffset as uint), 148 | Some(err_str) => fail!(\"pcre_compile() failed at offset {{}}: {{}}\", erroffset as uint, err_str) 149 | }} 150 | }} 151 | }} 152 | assert!(code.is_not_null()); 153 | 154 | let ovecsize = 1 * 3; 155 | let mut ovector = Vec::from_elem(ovecsize, 0 as c_int); 156 | version_str.with_c_str_unchecked(|version_c_str| {{ 157 | let rc = pcre_exec(code, ptr::null(), version_c_str, version_str.len() as c_int, 0, 0, ovector.as_mut_ptr(), ovecsize as c_int); 158 | if rc < 0 {{ 159 | fail!(\"pcre_exec() failed\"); 160 | }} 161 | 162 | print!(\"{{}}\", version_str.as_slice().slice_to(*ovector.get(1) as uint)); 163 | }}); 164 | 165 | pcre_free(code as *const c_void); 166 | }}); 167 | }} 168 | }} 169 | "); 170 | f.write_str(contents.as_slice()).map_err(|e| -> () { 171 | fail!("Package script error: Failed to write to `{}`: {}", versioncheck_rs_path.display(), e); 172 | }); 173 | } 174 | 175 | // Compile and run `versioncheck.rs` 176 | cd(&out_path); 177 | let rustc_output = match Command::new("rustc").arg("versioncheck.rs").arg("-L").arg(pcre_lib_path.clone()).output() { 178 | Err(e) => fail!("Package script error: Failed to run `rustc`: {}", e), 179 | Ok(rustc_output) => rustc_output 180 | }; 181 | if !rustc_output.status.success() { 182 | println!("{}", str::from_utf8(rustc_output.output.as_slice())); 183 | println!("{}", str::from_utf8(rustc_output.error.as_slice())); 184 | fail!("Package script error: `rustc versioncheck.rs` failed: {}", rustc_output.status); 185 | } 186 | let versioncheck_output = match Command::new("./versioncheck").output() { 187 | Err(e) => fail!("Package script error: Failed to run `./versioncheck`: {}", e), 188 | Ok(versioncheck_output) => versioncheck_output 189 | }; 190 | if !versioncheck_output.status.success() { 191 | println!("{}", str::from_utf8(versioncheck_output.output.as_slice())); 192 | println!("{}", str::from_utf8(versioncheck_output.error.as_slice())); 193 | fail!("versioncheck error: {}", versioncheck_output.status); 194 | } 195 | cd(&workspace_path); 196 | 197 | let output_ptr = versioncheck_output.output.as_ptr(); 198 | let output_len = versioncheck_output.output.len(); 199 | let output_str = unsafe { string::raw::from_buf_len(output_ptr, output_len) }; 200 | debug!("output_str = `{}`", output_str); 201 | 202 | // The "no debug symbols in executable" warning may be present in the output. 203 | // https://github.com/mozilla/rust/issues/3495 204 | let mut output_rsplit_iter = output_str.as_slice().split('\n').rev(); 205 | let version_str: String = match output_rsplit_iter.next() { 206 | None => output_str.clone(), 207 | Some(version_str) => version_str.to_string() 208 | }; 209 | 210 | debug!("libpcre version {:s}", version_str.as_slice()); 211 | 212 | let min_required_version = Version::parse("8.20").unwrap(); 213 | let pcre_version = match Version::parse(version_str.as_slice()) { 214 | None => fail!("Package script error: Failed to parse version string '{}'", version_str.as_slice()), 215 | Some(pcre_version) => pcre_version 216 | }; 217 | 218 | if pcre_version < min_required_version { 219 | fail!("Package script error: Found libpcre version {}, but at least version {} is required", version_str.as_slice(), min_required_version); 220 | } 221 | 222 | // Create directories `bin` and `lib` 223 | let bin_path = workspace_path.join("bin"); 224 | if !bin_path.exists() { 225 | if mkdir(&bin_path, FilePermission::from_bits_truncate(0x1FF)).is_err() { 226 | fail!("Package script error: Failed to create the `bin` directory"); 227 | } 228 | } 229 | let lib_path = workspace_path.join("lib"); 230 | if !lib_path.exists() { 231 | if mkdir(&lib_path, FilePermission::from_bits_truncate(0x1FF)).is_err() { 232 | fail!("Package script error: Failed to create the `lib` directory"); 233 | } 234 | } 235 | 236 | // Compile libpcre-*.rlib 237 | match Command::new("rustc").arg("--out-dir").arg(lib_path).arg("src/pcre/mod.rs").arg("-L").arg(pcre_lib_path.clone()).output() { 238 | Err(e) => fail!("Package script error: Failed to run `rustc`: {}", e), 239 | Ok(rustc_output) => { 240 | if !rustc_output.status.success() { 241 | println!("{}", str::from_utf8(rustc_output.output.as_slice())); 242 | println!("{}", str::from_utf8(rustc_output.error.as_slice())); 243 | fail!("Package script error: `rustc src/pcre/mod.rs` failed: {}", rustc_output.status); 244 | } 245 | } 246 | } 247 | 248 | match Command::new("rustc").arg("-o").arg(bin_path.join("pcredemo")).arg("src/pcredemo/main.rs").arg("-L").arg("lib").arg("-L").arg(pcre_lib_path.clone()).output() { 249 | Err(e) => fail!("Package script error: Failed to run `rustc`: {}", e), 250 | Ok(rustc_output) => { 251 | if !rustc_output.status.success() { 252 | println!("{}", str::from_utf8(rustc_output.output.as_slice())); 253 | println!("{}", str::from_utf8(rustc_output.error.as_slice())); 254 | fail!("Package script error: `rustc src/pcredemo/main.rs` failed: {}", rustc_output.status); 255 | } 256 | } 257 | } 258 | } 259 | -------------------------------------------------------------------------------- /src/detail/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The rust-pcre authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | use enum_set::{EnumSet}; 10 | use libc::{c_char, c_int, c_uchar, c_void}; 11 | use libpcre_sys; 12 | pub use libpcre_sys::{pcre, compile_options, exec_options, fullinfo_field, study_options, PCRE_UTF8, PCRE_NO_UTF8_CHECK, PCRE_ERROR_NOMATCH, PCRE_ERROR_NULL}; 13 | use std::ffi::{CStr}; 14 | use std::ptr; 15 | use std::result::{Result}; 16 | use std::string::{String}; 17 | 18 | pub unsafe fn pcre_compile(pattern: *const c_char, options: &EnumSet<::CompileOption>, tableptr: *const c_uchar) -> Result<*mut pcre, (Option, c_int)> { 19 | assert!(!pattern.is_null()); 20 | let converted_options = options.iter().fold(0, |converted_options, option| converted_options | (option as compile_options)) | PCRE_UTF8 | PCRE_NO_UTF8_CHECK; 21 | let mut err: *const c_char = ptr::null(); 22 | let mut erroffset: c_int = 0; 23 | let code = libpcre_sys::pcre_compile(pattern, converted_options, &mut err, &mut erroffset, tableptr); 24 | 25 | if code.is_null() { 26 | // "Otherwise, if compilation of a pattern fails, pcre_compile() returns 27 | // NULL, and sets the variable pointed to by errptr to point to a textual 28 | // error message. This is a static string that is part of the library. You 29 | // must not try to free it." 30 | // http://pcre.org/pcre.txt 31 | let err_cstr = CStr::from_ptr(err); 32 | // http://illegalargumentexception.blogspot.com/2015/05/rust-utf-8-byte-array-to-string.html 33 | // TODO Investigate memory allocations and check for alternative solutions. 34 | match String::from_utf8(Vec::from(err_cstr.to_bytes())) { 35 | Err(_) => Err((None, erroffset)), 36 | Ok(err_str) => Err((Some(err_str), erroffset)) 37 | } 38 | } else { 39 | assert!(!code.is_null()); 40 | assert_eq!(erroffset, 0); 41 | 42 | Ok(code) 43 | } 44 | } 45 | 46 | pub unsafe fn pcre_exec(code: *const pcre, extra: *const ::PcreExtra, subject: *const c_char, length: c_int, startoffset: c_int, options: &EnumSet<::ExecOption>, ovector: *mut c_int, ovecsize: c_int) -> c_int { 47 | assert!(!code.is_null()); 48 | assert!(ovecsize >= 0 && ovecsize % 3 == 0); 49 | let converted_options = options.iter().fold(0, |converted_options, option| converted_options | (option as compile_options)) | PCRE_NO_UTF8_CHECK; 50 | let rc = libpcre_sys::pcre_exec(code, extra, subject, length, startoffset, converted_options, ovector, ovecsize); 51 | if rc == PCRE_ERROR_NOMATCH { 52 | return -1; 53 | } else if rc < 0 && rc != PCRE_ERROR_NULL { 54 | panic!("pcre_exec"); 55 | } 56 | 57 | rc 58 | } 59 | 60 | pub unsafe fn pcre_free(ptr: *mut c_void) { 61 | libpcre_sys::pcre_free(ptr); 62 | } 63 | 64 | pub unsafe fn pcre_free_study(extra: *mut ::PcreExtra) { 65 | libpcre_sys::pcre_free_study(extra); 66 | } 67 | 68 | pub unsafe fn pcre_fullinfo(code: *const pcre, extra: *const ::PcreExtra, what: fullinfo_field, where_: *mut c_void) { 69 | assert!(!code.is_null()); 70 | let rc = libpcre_sys::pcre_fullinfo(code, extra, what, where_); 71 | if rc < 0 && rc != PCRE_ERROR_NULL { 72 | panic!("pcre_fullinfo"); 73 | } 74 | } 75 | 76 | pub unsafe fn pcre_refcount(code: *mut ::detail::pcre, adjust: c_int) -> c_int { 77 | assert!(!code.is_null()); 78 | let curr_refcount = libpcre_sys::pcre_refcount(code, 0); 79 | if curr_refcount + adjust < 0 { 80 | panic!("refcount underflow"); 81 | } else if curr_refcount + adjust > 65535 { 82 | panic!("refcount overflow"); 83 | } 84 | libpcre_sys::pcre_refcount(code, adjust) 85 | } 86 | 87 | pub unsafe fn pcre_study(code: *const ::detail::pcre, options: &EnumSet<::StudyOption>) -> *mut ::PcreExtra { 88 | assert!(!code.is_null()); 89 | let converted_options = options.iter().fold(0, |converted_options, option| converted_options | (option as study_options)); 90 | let mut err: *const c_char = ptr::null(); 91 | let extra = libpcre_sys::pcre_study(code, converted_options, &mut err); 92 | // "The third argument for pcre_study() is a pointer for an error message. If 93 | // studying succeeds (even if no data is returned), the variable it points to is 94 | // set to NULL. Otherwise it is set to point to a textual error message. This is 95 | // a static string that is part of the library. You must not try to free it." 96 | // http://pcre.org/pcre.txt 97 | if !err.is_null() { 98 | let err_cstr = CStr::from_ptr(err); 99 | match String::from_utf8(Vec::from(err_cstr.to_bytes())) { 100 | Err(_) => panic!("pcre_study() failed"), 101 | Ok(err_str) => panic!("pcre_study() failed: {}", err_str) 102 | } 103 | panic!("pcre_study"); 104 | } 105 | assert!(err.is_null()); 106 | 107 | extra 108 | } 109 | 110 | pub fn pcre_version() -> String { 111 | let version_cstr = unsafe { CStr::from_ptr(libpcre_sys::pcre_version()) }; 112 | String::from_utf8(Vec::from(version_cstr.to_bytes())).unwrap() 113 | } 114 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2015 The rust-pcre authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | extern crate enum_set; 10 | extern crate libc; 11 | extern crate libpcre_sys; 12 | 13 | use enum_set::{CLike, EnumSet}; 14 | use libc::{c_char, c_int, c_uchar, c_void}; 15 | use std::collections::{BTreeMap}; 16 | use std::ffi::{CStr, CString}; 17 | use std::marker::{PhantomData}; 18 | use std::option::{Option}; 19 | use std::ptr; 20 | use std::result::{Result}; 21 | use std::string::{String}; 22 | use std::vec::{Vec}; 23 | 24 | mod detail; 25 | 26 | #[derive(Clone)] 27 | pub enum CompileOption { 28 | Caseless = 0x00000001, 29 | Multiline = 0x00000002, 30 | DotAll = 0x00000004, 31 | Extended = 0x00000008, 32 | Anchored = 0x00000010, 33 | DollarEndOnly = 0x00000020, 34 | Extra = 0x00000040, 35 | Ungreedy = 0x00000200, 36 | NoAutoCapture = 0x00001000, 37 | AutoCallout = 0x00004000, 38 | FirstLine = 0x00040000, 39 | DupNames = 0x00080000, 40 | NewlineCR = 0x00100000, 41 | NewlineLF = 0x00200000, 42 | NewlineCRLF = 0x00300000, 43 | NewlineAny = 0x00400000, 44 | NewlineAnyCRLF = 0x00500000, 45 | BsrAnyCRLF = 0x00800000, 46 | BsrUnicode = 0x01000000, 47 | JavaScriptCompat = 0x02000000, 48 | Ucp = 0x20000000 49 | } 50 | 51 | #[derive(Clone)] 52 | pub enum ExecOption { 53 | ExecAnchored = 0x00000010, 54 | ExecNotBol = 0x00000080, 55 | ExecNotEol = 0x00000100, 56 | ExecNotEmpty = 0x00000400, 57 | ExecPartialSoft = 0x00008000, 58 | ExecNewlineCR = 0x00100000, 59 | ExecNewlineLF = 0x00200000, 60 | ExecNewlineCRLF = 0x00300000, 61 | ExecNewlineAny = 0x00400000, 62 | ExecNewlineAnyCRLF = 0x00500000, 63 | ExecBsrAnyCRLF = 0x00800000, 64 | ExecBsrUnicode = 0x01000000, 65 | ExecNoStartOptimise = 0x04000000, 66 | ExecPartialHard = 0x08000000, 67 | ExecNotEmptyAtStart = 0x10000000 68 | } 69 | 70 | #[allow(non_upper_case_globals)] 71 | pub const ExecPartial: ExecOption = ExecOption::ExecPartialSoft; 72 | #[allow(non_upper_case_globals)] 73 | pub const ExecNoStartOptimize: ExecOption = ExecOption::ExecNoStartOptimise; 74 | 75 | #[derive(Clone)] 76 | pub enum StudyOption { 77 | StudyJitCompile = 0x0001, 78 | StudyJitPartialSoftCompile = 0x0002, 79 | StudyJitPartialHardCompile = 0x0004, 80 | 81 | /// Always create an extra block. Note: Requires PCRE version 8.32 or later. 82 | StudyExtraNeeded = 0x0008 83 | } 84 | 85 | #[derive(Debug)] 86 | pub struct CompilationError { 87 | 88 | opt_err: Option, 89 | 90 | erroffset: c_int 91 | 92 | } 93 | 94 | /// Wrapper for libpcre's `pcre` object (representing a compiled regular expression). 95 | #[derive(Debug)] 96 | pub struct Pcre { 97 | 98 | code: *const detail::pcre, 99 | 100 | extra: *mut PcreExtra, 101 | 102 | capture_count_: c_int, 103 | 104 | /// A spot to place a pointer-to-mark name string. 105 | mark_: *mut c_uchar 106 | 107 | } 108 | 109 | pub type PcreExtra = libpcre_sys::pcre_extra; 110 | 111 | /// Represents a match of a subject string against a regular expression. 112 | pub struct Match<'a> { 113 | 114 | subject: &'a str, 115 | 116 | partial_ovector: Vec, 117 | 118 | string_count_: c_int 119 | 120 | } 121 | 122 | /// Iterator type for iterating matches within a subject string. 123 | pub struct MatchIterator<'a, 'p> { 124 | 125 | code: *const detail::pcre, 126 | 127 | extra: *const PcreExtra, 128 | 129 | capture_count: c_int, 130 | 131 | subject: &'a str, 132 | 133 | offset: c_int, 134 | 135 | options: EnumSet, 136 | 137 | ovector: Vec, 138 | 139 | _marker: PhantomData<&'p mut Pcre> 140 | 141 | } 142 | 143 | impl CLike for CompileOption { 144 | unsafe fn from_u32(n: u32) -> CompileOption { 145 | use CompileOption::*; 146 | match n { 147 | 1 => Caseless, 148 | 2 => Multiline, 149 | 3 => DotAll, 150 | 4 => Extended, 151 | 5 => Anchored, 152 | 6 => DollarEndOnly, 153 | 7 => Extra, 154 | 8 => Ungreedy, 155 | 9 => NoAutoCapture, 156 | 10 => AutoCallout, 157 | 11 => FirstLine, 158 | 12 => DupNames, 159 | 13 => NewlineCR, 160 | 14 => NewlineLF, 161 | 15 => NewlineCRLF, 162 | 16 => NewlineAny, 163 | 17 => NewlineAnyCRLF, 164 | 18 => BsrAnyCRLF, 165 | 19 => BsrUnicode, 166 | 20 => JavaScriptCompat, 167 | 21 => Ucp, 168 | _ => panic!("unknown CompileOption number {}", n) 169 | } 170 | } 171 | 172 | fn to_u32(&self) -> u32 { 173 | use CompileOption::*; 174 | match *self { 175 | Caseless => 1, 176 | Multiline => 2, 177 | DotAll => 3, 178 | Extended => 4, 179 | Anchored => 5, 180 | DollarEndOnly => 6, 181 | Extra => 7, 182 | Ungreedy => 8, 183 | NoAutoCapture => 9, 184 | AutoCallout => 10, 185 | FirstLine => 11, 186 | DupNames => 12, 187 | NewlineCR => 13, 188 | NewlineLF => 14, 189 | NewlineCRLF => 15, 190 | NewlineAny => 16, 191 | NewlineAnyCRLF => 17, 192 | BsrAnyCRLF => 18, 193 | BsrUnicode => 19, 194 | JavaScriptCompat => 20, 195 | Ucp => 21 196 | } 197 | } 198 | } 199 | 200 | impl CLike for ExecOption { 201 | unsafe fn from_u32(n: u32) -> ExecOption { 202 | use ExecOption::*; 203 | match n { 204 | 1 => ExecAnchored, 205 | 2 => ExecNotBol, 206 | 3 => ExecNotEol, 207 | 4 => ExecNotEmpty, 208 | 5 => ExecPartialSoft, 209 | 6 => ExecNewlineCR, 210 | 7 => ExecNewlineLF, 211 | 8 => ExecNewlineCRLF, 212 | 9 => ExecNewlineAny, 213 | 10 => ExecNewlineAnyCRLF, 214 | 11 => ExecBsrAnyCRLF, 215 | 12 => ExecBsrUnicode, 216 | 13 => ExecNoStartOptimise, 217 | 14 => ExecPartialHard, 218 | 15 => ExecNotEmptyAtStart, 219 | _ => panic!("unknown ExecOption number {}", n) 220 | } 221 | } 222 | 223 | fn to_u32(&self) -> u32 { 224 | use ExecOption::*; 225 | match *self { 226 | ExecAnchored => 1, 227 | ExecNotBol => 2, 228 | ExecNotEol => 3, 229 | ExecNotEmpty => 4, 230 | ExecPartialSoft => 5, 231 | ExecNewlineCR => 6, 232 | ExecNewlineLF => 7, 233 | ExecNewlineCRLF => 8, 234 | ExecNewlineAny => 9, 235 | ExecNewlineAnyCRLF => 10, 236 | ExecBsrAnyCRLF => 11, 237 | ExecBsrUnicode => 12, 238 | ExecNoStartOptimise => 13, 239 | ExecPartialHard => 14, 240 | ExecNotEmptyAtStart => 15 241 | } 242 | } 243 | } 244 | 245 | impl CLike for StudyOption { 246 | unsafe fn from_u32(n: u32) -> StudyOption { 247 | use StudyOption::*; 248 | match n { 249 | 1 => StudyJitCompile, 250 | 2 => StudyJitPartialSoftCompile, 251 | 3 => StudyJitPartialHardCompile, 252 | 4 => StudyExtraNeeded, 253 | _ => panic!("unknown StudyOption number {}", n) 254 | } 255 | } 256 | 257 | fn to_u32(&self) -> u32 { 258 | use StudyOption::*; 259 | match *self { 260 | StudyJitCompile => 1, 261 | StudyJitPartialSoftCompile => 2, 262 | StudyJitPartialHardCompile => 3, 263 | StudyExtraNeeded => 4 264 | } 265 | } 266 | } 267 | 268 | impl CompilationError { 269 | pub fn message(&self) -> Option { 270 | self.opt_err.clone() 271 | } 272 | 273 | pub fn offset(&self) -> usize { 274 | self.erroffset as usize 275 | } 276 | } 277 | 278 | impl std::fmt::Display for CompilationError { 279 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 280 | match self.opt_err { 281 | None => write!(f, "compilation failed at offset {}", self.erroffset as usize), 282 | Some(ref s) => write!(f, "compilation failed at offset {}: {}", self.erroffset as usize, s) 283 | } 284 | } 285 | } 286 | 287 | impl Pcre { 288 | /// Compiles the given regular expression. 289 | /// 290 | /// # Argument 291 | /// * `pattern` - The regular expression. 292 | pub fn compile(pattern: &str) -> Result { 293 | let no_options: EnumSet = EnumSet::new(); 294 | Pcre::compile_with_options(pattern, &no_options) 295 | } 296 | 297 | /// Compiles a regular expression using the given bitwise-OR'd options `options`. 298 | /// 299 | /// # Arguments 300 | /// * `pattern` - The regular expression. 301 | /// * `options` - Bitwise-OR'd compilation options. See the libpcre manpages, 302 | /// `man 3 pcre_compile`, for more information. 303 | pub fn compile_with_options(pattern: &str, options: &EnumSet) -> Result { 304 | let pattern_cstring = CString::new(pattern).unwrap(); 305 | unsafe { 306 | // Use the default character tables. 307 | let tableptr: *const c_uchar = ptr::null(); 308 | match detail::pcre_compile(pattern_cstring.as_ptr(), options, tableptr) { 309 | Err((opt_err, erroffset)) => Err(CompilationError { 310 | opt_err: opt_err, 311 | erroffset: erroffset 312 | }), 313 | Ok(mut_code) => { 314 | let code = mut_code as *const detail::pcre; 315 | assert!(!code.is_null()); 316 | // Take a reference. 317 | detail::pcre_refcount(code as *mut detail::pcre, 1); 318 | 319 | let extra: *mut PcreExtra = ptr::null_mut(); 320 | 321 | let mut capture_count: c_int = 0; 322 | detail::pcre_fullinfo(code, extra as *const PcreExtra, libpcre_sys::PCRE_INFO_CAPTURECOUNT, 323 | &mut capture_count as *mut c_int as *mut c_void); 324 | 325 | Ok(Pcre { 326 | code: code, 327 | extra: extra, 328 | capture_count_: capture_count, 329 | mark_: ptr::null_mut() 330 | }) 331 | } 332 | } 333 | } 334 | } 335 | 336 | /// Returns the number of capture groups in the regular expression, including one for 337 | /// each named capture group. 338 | /// 339 | /// This count does not include "group 0", which is the full substring within a subject 340 | /// string that matches the regular expression. 341 | /// 342 | /// # See also 343 | /// * [name_count()](#method.name_count) - Returns the number of named capture groups. 344 | pub fn capture_count(&self) -> usize { 345 | self.capture_count_ as usize 346 | } 347 | 348 | /// Enables the use of the mark field when matching the compiled regular expression. The 349 | /// pattern must have been previously studied and an extra block must have been created. 350 | /// 351 | /// To ensure that an extra block has been created, call [study_with_options()](#method.study_with_options) 352 | /// passing the [`StudyExtraNeeded`](enum.StudyOption.html#variant.StudyExtraNeeded) study option. 353 | /// 354 | /// # Return value 355 | /// `true` if the use of the mark field could be enabled. `false` otherwise, which signifies 356 | /// that an extra block needs to be created. 357 | pub fn enable_mark(&mut self) -> bool { 358 | unsafe { 359 | if self.extra.is_null() { 360 | false 361 | } else { 362 | (*self.extra).set_mark(&mut self.mark_); 363 | true 364 | } 365 | } 366 | } 367 | 368 | /// Returns the extra block, if one has been created. 369 | pub fn extra(&mut self) -> Option<&mut PcreExtra> { 370 | unsafe { 371 | if self.extra.is_null() { 372 | None 373 | } else { 374 | Some(&mut *(self.extra)) 375 | } 376 | } 377 | } 378 | 379 | /// Matches the compiled regular expression against a given subject string `subject`. 380 | /// If no match is found, then `None` is returned. Otherwise, a `Match` object is returned 381 | /// which provides access to the captured substrings as slices of the subject string. 382 | /// 383 | /// # Argument 384 | /// * `subject` - The subject string. 385 | /// 386 | /// # Performance notes 387 | /// This method is intended to be used to find individual matches. If multiple matches 388 | /// are desired, then a `MatchIterator` should be used because it is more efficient. 389 | /// 390 | /// If a regular expression will be used often, it might be worth studying it to possibly 391 | /// speed up matching. See the [study()](#method.study) method. 392 | #[inline] 393 | pub fn exec<'a, 'p>(&'p mut self, subject: &'a str) -> Option> { 394 | self.exec_from(subject, 0) 395 | } 396 | 397 | /// Matches the compiled regular expression against a given subject string `subject` 398 | /// starting at offset `startoffset` within the subject string. If no match is found, 399 | /// then `None` is returned. Otherwise, a `Match` object is returned which provides 400 | /// access to the captured substrings as slices of the subject string. 401 | /// 402 | /// # Arguments 403 | /// * `subject` - The subject string. 404 | /// * `startoffset` - Starting offset within `subject` at which to begin looking for 405 | /// a match. 406 | /// 407 | /// # Performance notes 408 | /// This method is intended to be used to find individual matches. If multiple matches 409 | /// are desired, then a `MatchIterator` should be used because it is more efficient. 410 | /// 411 | /// If a regular expression will be used often, it might be worth studying it to possibly 412 | /// speed up matching. See the [study()](#method.study) method. 413 | #[inline] 414 | pub fn exec_from<'a, 'p>(&'p mut self, subject: &'a str, startoffset: usize) -> Option> { 415 | let no_options: EnumSet = EnumSet::new(); 416 | self.exec_from_with_options(subject, startoffset, &no_options) 417 | } 418 | 419 | /// Matches the compiled regular expression against a given subject string `subject` 420 | /// starting at offset `startoffset` within the subject string and using the given 421 | /// bitwise-OR'd matching options `options`. If no match is found, then `None` is 422 | /// returned. Otherwise, a `Match` object is returned which provides access to the 423 | /// captured substrings as slices of the subject string. 424 | /// 425 | /// # Arguments 426 | /// * `subject` - The subject string. 427 | /// * `startoffset` - Starting offset within `subject` at which to begin looking for 428 | /// a match. 429 | /// * `options` - Bitwise-OR'd matching options. See the libpcre manpages, `man 3 pcre_exec`, 430 | /// for more information. 431 | /// 432 | /// # Performance notes 433 | /// This method is intended to be used to find individual matches. If multiple matches 434 | /// are desired, then a `MatchIterator` should be used because it is more efficient. 435 | /// 436 | /// If a regular expression will be used often, it might be worth studying it to possibly 437 | /// speed up matching. See the [study()](#method.study) method. 438 | #[inline] 439 | pub fn exec_from_with_options<'a, 'p>(&'p mut self, subject: &'a str, startoffset: usize, options: &EnumSet) -> Option> { 440 | let ovecsize = (self.capture_count_ + 1) * 3; 441 | let mut ovector = vec![0 as c_int; ovecsize as usize]; 442 | 443 | unsafe { 444 | let rc = detail::pcre_exec(self.code, 445 | self.extra as *const PcreExtra, 446 | subject.as_ptr() as *const c_char, 447 | subject.len() as c_int, 448 | startoffset as c_int, 449 | options, 450 | ovector.as_mut_ptr(), 451 | ovecsize as c_int); 452 | if rc >= 0 { 453 | Some(Match { 454 | subject: subject, 455 | partial_ovector: ovector[..(((self.capture_count_ + 1) * 2) as usize)].to_vec(), 456 | string_count_: rc 457 | }) 458 | } else { 459 | None 460 | } 461 | } 462 | } 463 | 464 | /// Returns the mark name from PCRE if set. 465 | /// 466 | /// # Return value 467 | /// `Some(str)` if PCRE returned a value for the mark. 468 | /// `None` if either there was no mark set or [enable_mark()](#method.enable_mark) was not called, 469 | /// or was unsuccessful. 470 | #[inline] 471 | pub fn mark(&self) -> Option { 472 | self.mark_bytes().map (|bytes| String::from_utf8(Vec::from(bytes)).unwrap()) 473 | } 474 | 475 | /// Returns the mark name from PCRE if set. 476 | /// 477 | /// # Return value 478 | /// `Some(&[u8])` if PCRE returned a value for the mark. 479 | /// `None` if either there was no mark set or [enable_mark()](#method.enable_mark) was not called, 480 | /// or was unsuccessful. 481 | #[inline] 482 | pub fn mark_bytes(&self) -> Option<&[u8]> { 483 | unsafe { 484 | if self.mark_.is_null() { 485 | None 486 | } else { 487 | let mark_cstr = CStr::from_ptr(self.mark_ as *const c_char); 488 | Some(mark_cstr.to_bytes()) 489 | } 490 | } 491 | } 492 | 493 | /// Creates a `MatchIterator` for iterating through matches within the given subject 494 | /// string `subject`. 495 | /// 496 | /// # Argument 497 | /// * `subject` - The subject string. 498 | #[inline] 499 | pub fn matches<'a, 'p>(&'p mut self, subject: &'a str) -> MatchIterator<'a, 'p> { 500 | let no_options: EnumSet = EnumSet::new(); 501 | self.matches_with_options(subject, &no_options) 502 | } 503 | 504 | /// Creates a `MatchIterator` for iterating through matches within the given subject 505 | /// string `subject` using the given bitwise-OR'd matching options `options`. 506 | /// 507 | /// # Arguments 508 | /// * `subject` - The subject string. 509 | /// * `options` - Bitwise-OR'd matching options. See the libpcre manpages, `man 3 pcre_exec`, 510 | /// for more information. 511 | #[inline] 512 | pub fn matches_with_options<'a, 'p>(&'p mut self, subject: &'a str, options: &EnumSet) -> MatchIterator<'a, 'p> { 513 | unsafe { 514 | let ovecsize = (self.capture_count_ + 1) * 3; 515 | MatchIterator { 516 | code: { detail::pcre_refcount(self.code as *mut detail::pcre, 1); self.code }, 517 | extra: self.extra as *const PcreExtra, 518 | capture_count: self.capture_count_, 519 | subject: subject, 520 | offset: 0, 521 | options: options.clone(), 522 | ovector: vec![0 as c_int; ovecsize as usize], 523 | _marker: PhantomData 524 | } 525 | } 526 | } 527 | 528 | /// Returns the number of named capture groups in the regular expression. 529 | pub fn name_count(&self) -> usize { 530 | unsafe { 531 | let mut name_count: c_int = 0; 532 | detail::pcre_fullinfo(self.code, self.extra as *const PcreExtra, libpcre_sys::PCRE_INFO_NAMECOUNT, &mut name_count as *mut c_int as *mut c_void); 533 | name_count as usize 534 | } 535 | } 536 | 537 | /// Creates a name-to-number translation table that maps the name of each named capture 538 | /// group to the assigned group numbers. 539 | /// 540 | /// The value type of the returned `BTreeMap` is a `usize` vector because there can be 541 | /// more than one group number for a given name if the PCRE_DUPNAMES option is used 542 | /// when compiling the regular expression. 543 | pub fn name_table(&self) -> BTreeMap> { 544 | unsafe { 545 | let name_count = self.name_count(); 546 | let mut tabptr: *const c_uchar = ptr::null(); 547 | detail::pcre_fullinfo(self.code, self.extra as *const PcreExtra, libpcre_sys::PCRE_INFO_NAMETABLE, &mut tabptr as *mut *const c_uchar as *mut c_void); 548 | let mut name_entry_size: c_int = 0; 549 | detail::pcre_fullinfo(self.code, self.extra as *const PcreExtra, libpcre_sys::PCRE_INFO_NAMEENTRYSIZE, &mut name_entry_size as *mut c_int as *mut c_void); 550 | 551 | let mut name_table: BTreeMap> = BTreeMap::new(); 552 | 553 | let mut i = 0; 554 | while i < name_count { 555 | let n: usize = ((ptr::read(tabptr) as usize) << 8) | (ptr::read(tabptr.offset(1)) as usize); 556 | let name_cstr = CStr::from_ptr(tabptr.offset(2) as *const c_char); 557 | // TODO Check memory allocations 558 | let name: String = String::from_utf8(Vec::from(name_cstr.to_bytes())).unwrap(); 559 | // TODO Avoid the double lookup. 560 | // https://github.com/mozilla/rust/issues/9068 561 | if !name_table.contains_key(&name) { 562 | name_table.insert(name, vec![n]); 563 | } else { 564 | name_table.get_mut(&name).unwrap().push(n); 565 | } 566 | tabptr = tabptr.offset(name_entry_size as isize); 567 | i += 1; 568 | } 569 | 570 | name_table 571 | } 572 | } 573 | 574 | /// Studies the regular expression to see if additional information can be extracted 575 | /// which might speed up matching. 576 | /// 577 | /// # Return value 578 | /// `true` if additional information could be extracted. `false` otherwise. 579 | pub fn study(&mut self) -> bool { 580 | let no_options: EnumSet = EnumSet::new(); 581 | self.study_with_options(&no_options) 582 | } 583 | 584 | /// Studies the regular expression using the given bitwise-OR'd study options `options` 585 | /// to see if additional information can be extracted which might speed up matching. 586 | /// 587 | /// # Argument 588 | /// * `options` - Study options. See the libpcre manpages, `man 3 pcre_study`, for more 589 | /// information about each option. 590 | /// 591 | /// # Return value 592 | /// `true` if additional information could be extracted or the [`StudyExtraNeeded`](enum.StudyOption.html#variant.StudyExtraNeeded) 593 | /// option was passed. `false` otherwise. 594 | pub fn study_with_options(&mut self, options: &EnumSet) -> bool { 595 | unsafe { 596 | // If something else has a reference to `code` then it probably has a pointer to 597 | // the current study data (if any). Thus, we shouldn't free the current study data 598 | // in that case. 599 | if detail::pcre_refcount(self.code as *mut detail::pcre, 0) != 1 { 600 | false 601 | } else { 602 | // Free any current study data. 603 | detail::pcre_free_study(self.extra as *mut PcreExtra); 604 | self.extra = ptr::null_mut(); 605 | 606 | let extra = detail::pcre_study(self.code, options); 607 | self.extra = extra; 608 | !extra.is_null() 609 | } 610 | } 611 | } 612 | } 613 | 614 | impl Drop for Pcre { 615 | fn drop(&mut self) { 616 | unsafe { 617 | if detail::pcre_refcount(self.code as *mut detail::pcre, -1) == 0 { 618 | detail::pcre_free_study(self.extra as *mut PcreExtra); 619 | detail::pcre_free(self.code as *mut detail::pcre as *mut c_void); 620 | } 621 | self.extra = ptr::null_mut(); 622 | self.code = ptr::null(); 623 | } 624 | } 625 | } 626 | 627 | impl<'a> Match<'a> { 628 | /// Returns the start index within the subject string of capture group `n`. 629 | /// 630 | /// If the capture group is present in the pattern but wasn't captured then the start of it will be `usize::max_value()`. 631 | /// Happens with the optional groups, `/(optional)?/`. 632 | pub fn group_start(&self, n: usize) -> usize { 633 | self.partial_ovector[(n * 2) as usize] as usize 634 | } 635 | 636 | /// Returns the end index within the subject string of capture group `n`. 637 | /// 638 | /// If the capture group is present in the pattern but wasn't captured then the end of it will be `usize::max_value()`. 639 | /// Happens with the optional groups, `/(optional)?/`. 640 | pub fn group_end(&self, n: usize) -> usize { 641 | self.partial_ovector[(n * 2 + 1) as usize] as usize 642 | } 643 | 644 | /// Returns the length of the substring for capture group `n`. 645 | pub fn group_len(&self, n: usize) -> usize { 646 | let group_offsets = &self.partial_ovector[((n * 2) as usize)..]; 647 | (group_offsets[1] - group_offsets[0]) as usize 648 | } 649 | 650 | /// Returns the substring for capture group `n` as a slice. 651 | #[inline] 652 | pub fn group(&self, n: usize) -> &'a str { 653 | let group_offsets = &self.partial_ovector[((n * 2) as usize)..]; 654 | let start = group_offsets[0]; 655 | let end = group_offsets[1]; 656 | &self.subject[(start as usize)..(end as usize)] 657 | } 658 | 659 | /// Returns the number of substrings captured. 660 | pub fn string_count(&self) -> usize { 661 | self.string_count_ as usize 662 | } 663 | } 664 | 665 | impl<'a, 'p> Drop for MatchIterator<'a, 'p> { 666 | fn drop(&mut self) { 667 | unsafe { 668 | if detail::pcre_refcount(self.code as *mut detail::pcre, -1) == 0 { 669 | detail::pcre_free_study(self.extra as *mut PcreExtra); 670 | detail::pcre_free(self.code as *mut detail::pcre as *mut c_void); 671 | } 672 | self.extra = ptr::null(); 673 | self.code = ptr::null(); 674 | } 675 | } 676 | } 677 | 678 | impl<'a, 'p> Iterator for MatchIterator<'a, 'p> { 679 | type Item = Match<'a>; 680 | 681 | /// Gets the next match. 682 | #[inline] 683 | fn next(&mut self) -> Option> { 684 | unsafe { 685 | let rc = detail::pcre_exec(self.code, 686 | self.extra, 687 | self.subject.as_ptr() as *const c_char, 688 | self.subject.len() as c_int, 689 | self.offset, 690 | &self.options, 691 | self.ovector.as_mut_ptr(), 692 | self.ovector.len() as c_int); 693 | if rc >= 0 { 694 | // Update the iterator state. 695 | self.offset = self.ovector[1]; 696 | 697 | Some(Match { 698 | subject: self.subject, 699 | partial_ovector: self.ovector[..(((self.capture_count + 1) * 2) as usize)].to_vec(), 700 | string_count_: rc 701 | }) 702 | } else { 703 | None 704 | } 705 | } 706 | } 707 | } 708 | 709 | /// Returns libpcre version information. 710 | pub fn pcre_version() -> String { 711 | detail::pcre_version() 712 | } 713 | -------------------------------------------------------------------------------- /tests/test.rs: -------------------------------------------------------------------------------- 1 | extern crate enum_set; 2 | extern crate pcre; 3 | 4 | use enum_set::{EnumSet}; 5 | use pcre::{CompileOption, Pcre, StudyOption}; 6 | 7 | #[test] 8 | #[should_panic] 9 | fn test_compile_nul() { 10 | // Nul bytes are not allowed in the pattern string. 11 | drop(Pcre::compile("\0abc")); 12 | } 13 | 14 | #[test] 15 | fn test_compile_bad_pattern() { 16 | let err = Pcre::compile("[").unwrap_err(); 17 | assert_eq!(err.offset(), 1); 18 | } 19 | 20 | #[test] 21 | #[should_panic] 22 | fn test_compile_bad_pattern2() { 23 | Pcre::compile("[").unwrap(); // Should be Err, will fail. 24 | } 25 | 26 | #[test] 27 | fn test_compile_capture_count() { 28 | let re = Pcre::compile("(?:abc)(def)").unwrap(); 29 | assert_eq!(re.capture_count(), 1); 30 | } 31 | 32 | #[test] 33 | fn test_exec_basic() { 34 | let mut re = Pcre::compile("^...$").unwrap(); 35 | assert_eq!(re.capture_count(), 0); 36 | let m = re.exec("abc").unwrap(); 37 | assert_eq!(m.group(0), "abc"); 38 | } 39 | 40 | #[test] 41 | fn test_exec_no_match() { 42 | let mut re = Pcre::compile("abc").unwrap(); 43 | assert!(re.exec("def").is_none()); 44 | } 45 | 46 | #[test] 47 | fn test_exec_nul_byte() { 48 | // Nul bytes *are* allowed in subject strings, however. 49 | let mut re = Pcre::compile("abc\\0def").unwrap(); 50 | let m = re.exec("abc\0def").unwrap(); 51 | assert_eq!(m.group(0), "abc\0def"); 52 | } 53 | 54 | #[test] 55 | fn test_exec_from_basic() { 56 | let mut re = Pcre::compile("abc").unwrap(); 57 | let subject = "abcabc"; 58 | let m1 = re.exec_from(subject, 1).unwrap(); 59 | assert_eq!(m1.group_start(0), 3); 60 | assert_eq!(m1.group_end(0), 6); 61 | assert_eq!(m1.group_len(0), 3); 62 | let m2 = re.exec(subject).unwrap(); 63 | assert_eq!(m2.group_start(0), 0); 64 | } 65 | 66 | #[test] 67 | fn test_study_basic() { 68 | let mut re = Pcre::compile("abc").unwrap(); 69 | let mut study_res = re.study(); 70 | assert!(study_res); 71 | // Re-study the pattern two more times (to check for leaks when the test program 72 | // is run through Valgrind). 73 | study_res = re.study(); 74 | assert!(study_res); 75 | study_res = re.study(); 76 | assert!(study_res); 77 | } 78 | 79 | #[test] 80 | fn test_matches_basic() { 81 | let subject = "\0abc1111abcabc___ababc+a"; 82 | let mut re = Pcre::compile("abc").unwrap(); 83 | let mut it = re.matches(subject); 84 | 85 | let mut opt_m = it.next(); 86 | assert!(opt_m.is_some()); 87 | let mut m = opt_m.unwrap(); 88 | assert_eq!(m.group_start(0), 1); 89 | assert_eq!(m.group_end(0), 4); 90 | 91 | let opt_m2 = it.next(); 92 | assert!(opt_m2.is_some()); 93 | let m2 = opt_m2.unwrap(); 94 | assert_eq!(m2.group_start(0), 8); 95 | assert_eq!(m2.group_end(0), 11); 96 | // Verify that getting the next match has not changed the first match data. 97 | assert_eq!(m.group_start(0), 1); 98 | assert_eq!(m.group_end(0), 4); 99 | 100 | opt_m = it.next(); 101 | assert!(opt_m.is_some()); 102 | m = opt_m.unwrap(); 103 | assert_eq!(m.group_start(0), 11); 104 | assert_eq!(m.group_end(0), 14); 105 | 106 | opt_m = it.next(); 107 | assert!(opt_m.is_some()); 108 | m = opt_m.unwrap(); 109 | assert_eq!(m.group_start(0), 19); 110 | assert_eq!(m.group_end(0), 22); 111 | 112 | opt_m = it.next(); 113 | assert!(opt_m.is_none()); 114 | } 115 | 116 | #[test] 117 | fn test_extra_mark() { 118 | let pattern = "X(*MARK:A)Y|X(*MARK:B)Z"; 119 | let subject1 = "XY"; 120 | let subject2 = "XZ"; 121 | 122 | let mut compile_options: EnumSet = EnumSet::new(); 123 | compile_options.insert(CompileOption::Extra); 124 | 125 | let mut re = Pcre::compile_with_options(pattern, &compile_options).unwrap(); 126 | 127 | // first try to get the mark from the compile to make sure it fails 128 | assert_eq!(re.mark(), None); 129 | 130 | let mut study_options: EnumSet = EnumSet::new(); 131 | //study_options.add(StudyOption::StudyExtraNeeded); 132 | study_options.insert(StudyOption::StudyJitCompile); 133 | let study = re.study_with_options(&study_options); 134 | // Double check to make sure the study worked 135 | assert!(study); 136 | 137 | // Now after studying, we still should not be able to get the mark (since we still need 138 | // to set the option in the extra AND execute it) 139 | assert_eq!(re.mark(), None); 140 | 141 | // set that I am using the extra mark field 142 | let extra = re.enable_mark(); 143 | // This will fail only if I didn't study first 144 | assert!(extra); 145 | 146 | // We still haven't run the pcre_exec yet so get mark should be None still 147 | assert_eq!(re.mark(), None); 148 | 149 | // Now execute and we should be able to get the mark 150 | let opt_m1 = re.exec(subject1); 151 | assert!(opt_m1.is_some()); 152 | 153 | // It should match XY 154 | let m1 = opt_m1.unwrap(); 155 | assert_eq!(m1.group(0), "XY"); 156 | 157 | // and the marked value should be A 158 | let mark1 = re.mark(); 159 | assert!(mark1.is_some()); 160 | assert_eq!(mark1.unwrap(), "A"); 161 | 162 | let opt_m2 = re.exec(subject2); 163 | assert!(opt_m2.is_some()); 164 | 165 | let m2 = opt_m2.unwrap(); 166 | // It should match XZ 167 | assert_eq!(m2.group(0), "XZ"); 168 | 169 | // and the marked value should be B 170 | assert_eq!(re.mark().unwrap(), "B"); 171 | } 172 | 173 | #[test] 174 | fn test_optional_capture() { 175 | let mut re = Pcre::compile("(foo)?bar").unwrap(); 176 | let subject = "bar"; 177 | let m1 = re.exec(subject).unwrap(); 178 | assert!(m1.group_start(0) == 0 && m1.group_end(0) == 3 && m1.group_len(0) == 3); // bar 179 | assert_eq!(m1.group_len(1), 0); 180 | // That might come out as a surprise. 181 | assert_eq!(m1.group_start(1), usize::max_value()); // c_int -1 182 | assert_eq!(m1.group_end(1), usize::max_value()); // c_int -1 183 | } 184 | --------------------------------------------------------------------------------