├── codegen ├── LICENSE-MIT ├── LICENSE-APACHE ├── Cargo.toml └── src │ ├── parser.rs │ └── lib.rs ├── lib ├── LICENSE-MIT ├── LICENSE-APACHE ├── src │ ├── lib.rs │ ├── input │ │ ├── mod.rs │ │ ├── length.rs │ │ ├── input.rs │ │ ├── show.rs │ │ ├── string.rs │ │ ├── pear.rs │ │ ├── text_file.rs │ │ ├── text.rs │ │ └── cursor.rs │ ├── error.rs │ ├── result.rs │ ├── debug.rs │ ├── expected.rs │ ├── macros.rs │ ├── combinators.rs │ └── parsers.rs ├── Cargo.toml └── tests │ ├── contextualize.rs │ ├── rewind.rs │ ├── peek.rs │ ├── custom_expected.rs │ ├── marker.rs │ └── parsers.rs ├── examples ├── http │ ├── version.txt │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── uri │ ├── version.txt │ ├── fuzz │ │ ├── .gitignore │ │ ├── fuzz_targets │ │ │ ├── fuzz_uri.rs │ │ │ └── fuzz_uri_display.rs │ │ └── Cargo.toml │ ├── Cargo.toml │ └── src │ │ ├── utils.rs │ │ ├── tables.rs │ │ ├── indexed.rs │ │ └── old.rs ├── media_type │ ├── version.txt │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── group │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── ini │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── parens │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── json │ ├── Cargo.toml │ ├── src │ │ ├── main.rs │ │ └── lib.rs │ ├── benches │ │ └── json.rs │ └── assets │ │ └── simple.json └── old │ └── exprs │ └── src │ └── main.rs ├── README.md ├── .gitignore ├── Cargo.toml ├── scripts ├── bump_version.sh └── publish.sh ├── .github └── workflows │ └── test.yml ├── LICENSE-MIT └── LICENSE-APACHE /codegen/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /lib/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /examples/http/version.txt: -------------------------------------------------------------------------------- 1 | HTTP/1.0 2 | -------------------------------------------------------------------------------- /examples/uri/version.txt: -------------------------------------------------------------------------------- 1 | HTTP/1.0 2 | -------------------------------------------------------------------------------- /lib/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /codegen/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /examples/media_type/version.txt: -------------------------------------------------------------------------------- 1 | HTTP/1.0 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Nothing to see here. Yet. (keep this a secret) 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | examples/json/assets/canada.json 4 | -------------------------------------------------------------------------------- /examples/uri/fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target 3 | corpus 4 | artifacts 5 | -------------------------------------------------------------------------------- /examples/group/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "group" 3 | version = "0.0.0" 4 | workspace = "../../" 5 | edition = "2018" 6 | 7 | [dependencies] 8 | pear = { path = "../../lib" } 9 | -------------------------------------------------------------------------------- /examples/http/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "http_version" 3 | version = "0.0.0" 4 | workspace = "../../" 5 | publish = false 6 | 7 | [dependencies] 8 | pear = { path = "../../lib" } 9 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "lib/", 4 | "codegen/", 5 | "examples/ini", 6 | "examples/group", 7 | "examples/http", 8 | "examples/parens", 9 | "examples/json", 10 | ] 11 | -------------------------------------------------------------------------------- /examples/uri/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "uri" 3 | version = "0.0.0" 4 | workspace = "../../" 5 | 6 | [dependencies] 7 | pear = { path = "../../lib" } 8 | pear_codegen = { path = "../../codegen" } 9 | -------------------------------------------------------------------------------- /examples/ini/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ini" 3 | version = "0.0.0" 4 | workspace = "../../" 5 | edition = "2018" 6 | 7 | [dependencies] 8 | pear = { path = "../../lib" } 9 | # time = "0.1" 10 | -------------------------------------------------------------------------------- /examples/uri/fuzz/fuzz_targets/fuzz_uri.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] extern crate libfuzzer_sys; 3 | extern crate uri; 4 | 5 | use uri::parse_bytes; 6 | 7 | fuzz_target!(|data: &[u8]| { 8 | let _ = parse_bytes(data); 9 | }); 10 | -------------------------------------------------------------------------------- /examples/media_type/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "media_type" 3 | version = "0.0.0" 4 | workspace = "../../" 5 | edition = "2018" 6 | 7 | [dependencies] 8 | pear = { path = "../../lib" } 9 | pear_codegen = { path = "../../codegen" } 10 | -------------------------------------------------------------------------------- /examples/parens/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "parens" 3 | version = "0.0.0" 4 | workspace = "../../" 5 | edition = "2018" 6 | 7 | [dependencies] 8 | pear = { path = "../../lib" } 9 | pear_codegen = { path = "../../codegen" } 10 | # time = "0.1" 11 | -------------------------------------------------------------------------------- /lib/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![warn(rust_2018_idioms)] 2 | 3 | #[doc(hidden)] pub use inlinable_string; 4 | 5 | #[macro_use] pub mod macros; 6 | pub mod input; 7 | pub mod result; 8 | pub mod error; 9 | pub mod parsers; 10 | pub mod combinators; 11 | 12 | mod expected; 13 | 14 | #[doc(hidden)] pub mod debug; 15 | -------------------------------------------------------------------------------- /examples/json/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "json" 3 | version = "0.0.0" 4 | workspace = "../../" 5 | edition = "2018" 6 | 7 | [dependencies] 8 | pear = { path = "../../lib" } 9 | pear_codegen = { path = "../../codegen" } 10 | 11 | [dev-dependencies] 12 | criterion = "0.4" 13 | 14 | [[bench]] 15 | name = "json" 16 | harness = false 17 | -------------------------------------------------------------------------------- /scripts/bump_version.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | # 4 | # Bumps the version number from to on all libraries. 5 | # 6 | 7 | if [ -z ${1} ] || [ -z ${2} ]; then 8 | echo "Usage: $0 " 9 | echo "Example: $0 0.1.1 0.1.2" 10 | exit 1 11 | fi 12 | 13 | find . -name "*.toml" | xargs sed -i.bak "s/${1}/${2}/g" 14 | find . -name "*.bak" | xargs rm 15 | -------------------------------------------------------------------------------- /scripts/publish.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | set -e 3 | 4 | # 5 | # Publishes the current versions of core and codegen to crates.io. 6 | # 7 | 8 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 9 | LIB_DIR="${SCRIPT_DIR}/../lib" 10 | CODEGEN_DIR="${SCRIPT_DIR}/../codegen" 11 | 12 | # Publish all the things. 13 | for dir in "${CODEGEN_DIR}" "${LIB_DIR}"; do 14 | pushd ${dir} 15 | echo ":::: Publishing '${dir}..." 16 | cargo publish 17 | popd 18 | done 19 | -------------------------------------------------------------------------------- /lib/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pear" 3 | version = "0.2.9" 4 | authors = ["Sergio Benitez "] 5 | description = "A pear is a fruit." 6 | license = "MIT OR Apache-2.0" 7 | edition = "2018" 8 | repository = "https://github.com/SergioBenitez/Pear" 9 | 10 | [dependencies] 11 | yansi = { version = "1.0.0-rc.1", optional = true } 12 | pear_codegen = { version = "0.2.9", path = "../codegen" } 13 | inlinable_string = "0.1.12" 14 | 15 | [features] 16 | default = ["color"] 17 | color = ["yansi"] 18 | -------------------------------------------------------------------------------- /codegen/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pear_codegen" 3 | version = "0.2.9" 4 | authors = ["Sergio Benitez "] 5 | description = "A (codegen) pear is a fruit." 6 | license = "MIT OR Apache-2.0" 7 | edition = "2018" 8 | repository = "https://github.com/SergioBenitez/Pear" 9 | 10 | [lib] 11 | proc-macro = true 12 | 13 | [dependencies] 14 | quote = "1.0.30" 15 | proc-macro2 = { version = "1.0.50" } 16 | syn = { version = "2.0.30", features = ["full", "extra-traits", "visit-mut"] } 17 | proc-macro2-diagnostics = "0.10.1" 18 | 19 | [dev-dependencies] 20 | pear = { path = "../lib" } 21 | -------------------------------------------------------------------------------- /examples/uri/fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | 2 | [package] 3 | name = "uri-fuzz" 4 | version = "0.0.1" 5 | authors = ["Automatically generated"] 6 | publish = false 7 | 8 | [package.metadata] 9 | cargo-fuzz = true 10 | 11 | [dependencies.uri] 12 | path = ".." 13 | 14 | [dependencies.libfuzzer-sys] 15 | git = "https://github.com/rust-fuzz/libfuzzer-sys.git" 16 | 17 | # Prevent this from interfering with workspaces 18 | [workspace] 19 | members = ["."] 20 | 21 | [[bin]] 22 | name = "fuzz_uri" 23 | path = "fuzz_targets/fuzz_uri.rs" 24 | 25 | [[bin]] 26 | name = "fuzz_uri_display" 27 | path = "fuzz_targets/fuzz_uri_display.rs" 28 | -------------------------------------------------------------------------------- /lib/tests/contextualize.rs: -------------------------------------------------------------------------------- 1 | use pear::input::{Pear, Text}; 2 | use pear::{macros::*, parsers::*}; 3 | 4 | type Result<'a, T> = pear::input::Result>; 5 | 6 | macro_rules! parse_me { 7 | ([$n:expr; $i:expr; $m:expr; $T:ty] $e:expr) => { 8 | (eat_slice($i, "a")?, $e, eat_slice($i, "c")?).1 9 | } 10 | } 11 | 12 | #[parser] 13 | fn combo<'a>(input: &mut Pear>) -> Result<'a, &'a str> { 14 | parse_me!(eat_slice("b")?) 15 | } 16 | 17 | #[test] 18 | fn text_contextualize() { 19 | let result = parse!(combo: Text::from("abc")); 20 | assert_eq!(result.unwrap(), "b"); 21 | } 22 | -------------------------------------------------------------------------------- /examples/uri/src/utils.rs: -------------------------------------------------------------------------------- 1 | // pub fn merge<'a, T>(a: &'a [T], b: &'a [T]) -> &'a [T] { 2 | // match (a.len(), b.len()) { 3 | // (_, 0) => a, 4 | // (0, _) => b, 5 | // (a_len, b_len) => unsafe { 6 | // let a_last: *const T = a.get_unchecked(a_len); 7 | // let b_first: *const T = b.get_unchecked(0);; 8 | // if a_last != b_first { 9 | // panic!("the two slices are not adjacent: {:?}, {:?}", a_last, b_first); 10 | // } 11 | 12 | // let a_first: *const T = a.get_unchecked(0); 13 | // ::std::slice::from_raw_parts(a_first, a_len + b_len) 14 | // } 15 | // } 16 | // } 17 | -------------------------------------------------------------------------------- /lib/src/input/mod.rs: -------------------------------------------------------------------------------- 1 | mod input; 2 | mod length; 3 | mod string; 4 | mod cursor; 5 | mod text; 6 | mod text_file; 7 | mod show; 8 | mod pear; 9 | 10 | pub use self::pear::{Pear, Debugger, Options}; 11 | pub use input::{Input, Rewind, Token, Slice, ParserInfo}; 12 | pub use cursor::{Cursor, Extent}; 13 | pub use text::{Text, Span}; 14 | pub use length::Length; 15 | pub use show::Show; 16 | 17 | use crate::error; 18 | 19 | pub type Expected = error::Expected<::Token, ::Slice>; 20 | pub type ParseError = error::ParseError<::Context, Expected>; 21 | pub type Result = std::result::Result>; 22 | 23 | // TODO: Implement new inputs: `Bytes` (akin to `Text`), `Cursor` but for 24 | // files/anything `Read`. 25 | -------------------------------------------------------------------------------- /examples/uri/fuzz/fuzz_targets/fuzz_uri_display.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] extern crate libfuzzer_sys; 3 | extern crate uri; 4 | 5 | use uri::{Uri, parse_bytes}; 6 | 7 | fuzz_target!(|data: &[u8]| { 8 | if let Ok(uri) = parse_bytes(data) { 9 | if let Uri::Authority(ref auth) = uri { 10 | if auth.port().is_some() { 11 | return; 12 | } 13 | } else if let Uri::Absolute(ref abs) = uri { 14 | if let Some(auth) = abs.authority() { 15 | if auth.port().is_some() { 16 | return; 17 | } 18 | } 19 | } 20 | 21 | let string = ::std::str::from_utf8(data).expect("parsed UTF-8"); 22 | assert_eq!(string.to_string(), uri.to_string()); 23 | } 24 | }); 25 | -------------------------------------------------------------------------------- /examples/json/src/main.rs: -------------------------------------------------------------------------------- 1 | use pear::macros::parse; 2 | 3 | use json::*; 4 | 5 | fn main() { 6 | let test = r#" 7 | { 8 | "Image": { 9 | "Width": 800, 10 | "Height": 600, 11 | "Title": "View from 15th Floor", 12 | "Thumbnail": { 13 | "Url": "http://www.example.com/image/481989943", 14 | "Height": 125, 15 | "Width": 100e10 16 | }, 17 | "Animated" : false, 18 | "IDs": [116, 943, 234, 38793) 19 | }, 20 | "escaped characters": "\u2192\uD83D\uDE00\"\t\uD834\uDD1E" 21 | }"#; 22 | 23 | let result = parse!(value: pear::input::Text::from(test)); 24 | match result { 25 | Ok(v) => println!("Value: {:#?}", v), 26 | Err(e) => println!("Error: {}", e) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /examples/parens/src/main.rs: -------------------------------------------------------------------------------- 1 | #![warn(rust_2018_idioms)] 2 | 3 | use pear::input::{Text, Pear, Result}; 4 | use pear::macros::{parser, parse, parse_declare}; 5 | use pear::parsers::*; 6 | 7 | parse_declare!(Input<'a>(Token = char, Slice = &'a str, Many = &'a str)); 8 | 9 | #[parser] 10 | fn parens<'a, I: Input<'a>>(input: &mut Pear) -> Result<(), I> { 11 | eat('(')?; 12 | 13 | // parse_try!(parens()); 14 | pear::macros::switch! { 15 | _ if true => parens()?, 16 | _ => parens()? 17 | } 18 | 19 | eat(')')?; 20 | } 21 | 22 | fn main() { 23 | let result = parse!(parens: Text::from("((((()))))")); 24 | if let Err(e) = result { println!("Error 0: {}", e); } 25 | 26 | let result = parse!(parens: Text::from("((())))")); 27 | if let Err(e) = result { println!("Error 1: {}", e); } 28 | 29 | let result = parse!(parens: Text::from("(((()))")); 30 | if let Err(e) = result { println!("Error 2: {}", e); } 31 | } 32 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test: 7 | name: "${{ matrix.os.name }} ${{ matrix.test.name }}" 8 | 9 | strategy: 10 | matrix: 11 | os: 12 | - name: Linux 13 | distro: ubuntu-latest 14 | - name: Windows 15 | distro: windows-latest 16 | - name: macOS 17 | distro: macOS-latest 18 | test: 19 | - name: Beta 20 | toolchain: beta 21 | flag: 22 | - name: Nightly 23 | toolchain: nightly 24 | flag: "--all-features" 25 | 26 | runs-on: ${{ matrix.os.distro }} 27 | 28 | steps: 29 | - name: Checkout Sources 30 | uses: actions/checkout@v2 31 | 32 | - name: Install Rust 33 | uses: actions-rs/toolchain@v1 34 | with: 35 | profile: minimal 36 | toolchain: ${{ matrix.test.toolchain }} 37 | override: true 38 | 39 | - name: Run Tests 40 | uses: actions-rs/cargo@v1 41 | with: 42 | command: test 43 | args: ${{ matrix.test.flag }} 44 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2017 Sergio Benitez 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in 6 | the Software without restriction, including without limitation the rights to 7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 | the Software, and to permit persons to whom the Software is furnished to do so, 9 | subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all 12 | copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /examples/json/benches/json.rs: -------------------------------------------------------------------------------- 1 | use pear::input::Result; 2 | use pear::macros::parse; 3 | 4 | use json::*; 5 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 6 | 7 | #[inline(always)] 8 | fn parse_json<'a, I: Input<'a>>(input: I) -> Result, I> { 9 | let result = parse!(value: input); 10 | assert!(result.is_ok()); 11 | result 12 | } 13 | 14 | // #[bench] 15 | // fn canada(b: &mut Bencher) { 16 | // let data = include_str!("../assets/canada.json"); 17 | // b.iter(|| parse_json(data)); 18 | // } 19 | 20 | // This is the benchmark from PEST. Unfortunately, our parser here is fully 21 | // fleshed out: it actually creates the `value`, while the PEST one just checks 22 | // if it parses. As a result, our parser will be much slower. You can immitate 23 | // the PEST parser's behavior by changing the parser so that it doesn't build 24 | // real values and instead returns dummy values. 25 | pub fn simple_data(c: &mut Criterion) { 26 | let data = include_str!("../assets/simple.json"); 27 | c.bench_function("simple", |b| b.iter(|| black_box(parse_json(data)))); 28 | } 29 | 30 | criterion_group!(json, simple_data); 31 | criterion_main!(json); 32 | -------------------------------------------------------------------------------- /lib/tests/rewind.rs: -------------------------------------------------------------------------------- 1 | use pear::input::{Text, Pear}; 2 | use pear::{macros::*, parsers::*}; 3 | 4 | type Result<'a, T> = pear::input::Result>; 5 | 6 | #[parser(rewind)] 7 | fn ab<'a>(input: &mut Pear>) -> Result<'a, ()> { 8 | eat('a')?; 9 | eat('b')?; 10 | eof()?; 11 | } 12 | 13 | #[parser(rewind)] 14 | fn abc<'a>(input: &mut Pear>) -> Result<'a, ()> { 15 | eat('a')?; 16 | eat('b')?; 17 | eat('c')?; 18 | eof()?; 19 | } 20 | 21 | #[parser(rewind)] 22 | fn abcd<'a>(input: &mut Pear>) -> Result<'a, ()> { 23 | eat('a')?; 24 | eat('b')?; 25 | eat('c')?; 26 | eat('d')?; 27 | eof()?; 28 | } 29 | 30 | #[parser] 31 | fn combo<'a>(input: &mut Pear>) -> Result<'a, &'a str> { 32 | switch! { 33 | ab() => "ab", 34 | abc() => "abc", 35 | abcd() => "abcd", 36 | _ => parse_error!("not ab, abc, or abcd")? 37 | } 38 | } 39 | 40 | #[test] 41 | fn test_rewinding_ab() { 42 | let result = parse!(combo: Text::from("ab")).unwrap(); 43 | assert_eq!(result, "ab") 44 | } 45 | 46 | #[test] 47 | fn test_rewinding_abc() { 48 | let result = parse!(combo: Text::from("abc")).unwrap(); 49 | assert_eq!(result, "abc") 50 | } 51 | 52 | #[test] 53 | fn test_rewinding_abcd() { 54 | let result = parse!(combo: Text::from("abcd")).unwrap(); 55 | assert_eq!(result, "abcd") 56 | } 57 | 58 | #[test] 59 | fn test_rewinding_fail() { 60 | let result = parse!(combo: Text::from("a")); 61 | assert!(result.is_err()); 62 | 63 | let result = parse!(combo: Text::from("abcdef")); 64 | assert!(result.is_err()); 65 | } 66 | -------------------------------------------------------------------------------- /lib/tests/peek.rs: -------------------------------------------------------------------------------- 1 | use pear::{macros::*, parsers::*}; 2 | 3 | type Input<'a> = pear::input::Pear>; 4 | type Result<'a, T> = pear::input::Result>; 5 | 6 | #[parser(rewind, peek)] 7 | fn ab<'a>(input: &mut Input<'a>) -> Result<'a, ()> { 8 | eat('a')?; 9 | eat('b')?; 10 | eof()?; 11 | } 12 | 13 | #[parser(rewind, peek)] 14 | fn abc<'a>(input: &mut Input<'a>) -> Result<'a, ()> { 15 | eat('a')?; 16 | eat('b')?; 17 | eat('c')?; 18 | eof()?; 19 | } 20 | 21 | #[parser(rewind, peek)] 22 | fn abcd<'a>(input: &mut Input<'a>) -> Result<'a, ()> { 23 | eat('a')?; 24 | eat('b')?; 25 | eat('c')?; 26 | eat('d')?; 27 | eof()?; 28 | } 29 | 30 | #[parser] 31 | fn combo<'a>(input: &mut Input<'a>) -> Result<'a, &'a str> { 32 | switch! { 33 | ab() => eat_slice("ab")?, 34 | abc() => eat_slice("abc")?, 35 | abcd() => eat_slice("abcd")?, 36 | _ => parse_error!("not ab, abc, or abcd")? 37 | } 38 | } 39 | 40 | #[test] 41 | fn test_peeking_ab() { 42 | let result = parse!(combo: Input::new("ab")).unwrap(); 43 | assert_eq!(result, "ab") 44 | } 45 | 46 | #[test] 47 | fn test_peeking_abc() { 48 | let result = parse!(combo: Input::new("abc")).unwrap(); 49 | assert_eq!(result, "abc") 50 | } 51 | 52 | #[test] 53 | fn test_peeking_abcd() { 54 | let result = parse!(combo: Input::new("abcd")).unwrap(); 55 | assert_eq!(result, "abcd") 56 | } 57 | 58 | #[test] 59 | fn test_peeking_fail() { 60 | let result = parse!(combo: Input::new("a")); 61 | assert!(result.is_err()); 62 | 63 | let result = parse!(combo: Input::new("abcdef")); 64 | assert!(result.is_err()); 65 | } 66 | -------------------------------------------------------------------------------- /lib/src/error.rs: -------------------------------------------------------------------------------- 1 | use crate::input::{Show, ParserInfo}; 2 | 3 | pub use crate::expected::Expected; 4 | 5 | #[derive(Debug, Clone)] 6 | pub struct ParseError { 7 | pub error: E, 8 | pub info: ErrorInfo, 9 | pub stack: Vec>, 10 | } 11 | 12 | #[derive(Debug, Clone)] 13 | pub struct ErrorInfo { 14 | pub parser: ParserInfo, 15 | pub context: C, 16 | } 17 | 18 | impl ErrorInfo { 19 | pub fn new(parser: ParserInfo, context: C) -> Self { 20 | Self { parser, context } 21 | } 22 | } 23 | 24 | impl ParseError { 25 | pub fn new(parser: ParserInfo, error: E, context: C) -> ParseError { 26 | ParseError { error, info: ErrorInfo::new(parser, context), stack: vec![] } 27 | } 28 | 29 | pub fn push_info(&mut self, parser: ParserInfo, context: C) { 30 | self.stack.push(ErrorInfo::new(parser, context)); 31 | } 32 | 33 | #[inline(always)] 34 | pub fn into>(self) -> ParseError { 35 | ParseError { 36 | error: self.error.into(), 37 | info: self.info, 38 | stack: self.stack, 39 | } 40 | } 41 | } 42 | 43 | impl std::fmt::Display for ParseError { 44 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 45 | #[cfg(feature = "color")] yansi::disable(); 46 | write!(f, "{} ({})", self.error, &self.info.context as &dyn Show)?; 47 | #[cfg(feature = "color")] yansi::whenever(yansi::Condition::DEFAULT); 48 | 49 | for info in &self.stack { 50 | write!(f, "\n + {}", info.parser.name)?; 51 | write!(f, " {}", &info.context as &dyn Show)?; 52 | } 53 | 54 | Ok(()) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /lib/src/result.rs: -------------------------------------------------------------------------------- 1 | use crate::error::ParseError; 2 | 3 | /// An alias to a Result where: 4 | /// 5 | /// * `Ok` is `T`. 6 | /// * `Err` is a `ParseError` with context `C` and error `E` 7 | /// 8 | /// For a `Result` that is parameterized only by the input type, see 9 | /// [`input::Result`](crate::input::Result). 10 | pub type Result = std::result::Result>; 11 | 12 | #[doc(hidden)] 13 | pub trait IntoResult { 14 | fn into_result(self) -> Result; 15 | } 16 | 17 | impl IntoResult for T { 18 | #[inline(always)] 19 | fn into_result(self) -> Result { 20 | Ok(self) 21 | } 22 | } 23 | 24 | impl IntoResult for Result { 25 | #[inline(always)] 26 | fn into_result(self) -> Result { 27 | self 28 | } 29 | } 30 | 31 | // // This one will result in inference issues when `Ok(T)` is returned. 32 | // impl IntoResult for ::std::result::Result { 33 | // fn into_result(self) -> Result { 34 | // let name = unsafe { ::std::intrinsics::type_name::() }; 35 | // self.map_err(|e| ParseError::new(name, e.to_string())) 36 | // } 37 | // } 38 | 39 | // // This one won't but makes some things uglier to write. 40 | // impl> IntoResult for Result { 41 | // fn into_result(self) -> Result { 42 | // match self { 43 | // Ok(v) => Ok(v), 44 | // Err(e) => Err(ParseError { 45 | // error: e.error.into(), 46 | // contexts: e.contexts 47 | // }) 48 | // } 49 | // } 50 | // } 51 | 52 | // // This one won't but makes some things uglier to write. 53 | // impl IntoResult for Result { 54 | // fn into_result(self) -> Result { 55 | // self 56 | // } 57 | // } 58 | -------------------------------------------------------------------------------- /lib/src/input/length.rs: -------------------------------------------------------------------------------- 1 | /// Trait implemented for types that have a length as required by the 2 | /// [`Input::Slice`](crate::input::Input::Slice) associated type. 3 | pub trait Length { 4 | /// Returns the length of `self`. 5 | /// 6 | /// While the units of length are unspecified, the returned value must be 7 | /// consistent with the use of `n` in the [`Input::slice()`] method. In 8 | /// particular, if [`Input::slice(n)`] returns `Some(x)`, then `x.len()` 9 | /// must return `n`. 10 | /// 11 | /// [`Input::slice()`]: crate::input::Input::slice() 12 | /// [`Input::slice(n)`]: crate::input::Input::slice() 13 | fn len(&self) -> usize; 14 | 15 | /// Returns true iff the length of `self` is equal to zero. 16 | fn is_empty(&self) -> bool { self.len() == 0 } 17 | } 18 | 19 | impl Length for str { 20 | #[inline(always)] 21 | fn len(&self) -> usize { 22 | str::len(self) 23 | } 24 | } 25 | 26 | impl<'a, T> Length for &'a [T] { 27 | #[inline(always)] 28 | fn len(&self) -> usize { 29 | <[T]>::len(self) 30 | } 31 | } 32 | 33 | macro_rules! impl_length_for_sized_slice { 34 | ($($size:expr),*) => ($( 35 | impl<'a, T> Length for &'a [T; $size] { 36 | #[inline(always)] fn len(&self) -> usize { $size } 37 | } 38 | )*) 39 | } 40 | 41 | impl_length_for_sized_slice! { 42 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 43 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 44 | 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 45 | 30, 31, 32 46 | } 47 | 48 | impl Length for [T] { 49 | #[inline(always)] 50 | fn len(&self) -> usize { 51 | <[T]>::len(self) 52 | } 53 | } 54 | 55 | impl Length for Vec { 56 | #[inline(always)] 57 | fn len(&self) -> usize { 58 | >::len(self) 59 | } 60 | } 61 | 62 | impl<'a> Length for &'a str { 63 | #[inline(always)] 64 | fn len(&self) -> usize { 65 | str::len(self) 66 | } 67 | } 68 | 69 | impl Length for String { 70 | #[inline(always)] 71 | fn len(&self) -> usize { 72 | String::len(self) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /examples/group/src/main.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | #![warn(rust_2018_idioms)] 3 | 4 | use pear::input::{Pear, Text}; 5 | use pear::macros::{parser, switch, parse}; 6 | use pear::parsers::*; 7 | 8 | type Input<'a> = Pear>; 9 | type Result<'a, T> = pear::input::Result>; 10 | 11 | #[derive(Debug)] 12 | struct Tokens(Vec); 13 | 14 | #[derive(Debug)] 15 | enum Token { 16 | Group(Group), 17 | Ident(String) 18 | } 19 | 20 | #[derive(Debug)] 21 | struct Group { 22 | start: char, 23 | tokens: Tokens, 24 | end: char 25 | } 26 | 27 | #[inline] 28 | fn is_whitespace(&byte: &char) -> bool { 29 | byte == ' ' || byte == '\t' || byte == '\n' 30 | } 31 | 32 | #[inline] 33 | fn is_ident_char(&byte: &char) -> bool { 34 | matches!(byte, '0'..='9' | 'a'..='z' | 'A'..='Z') 35 | } 36 | 37 | #[inline] 38 | fn is_start_group_char(&c: &char) -> bool { 39 | c == '[' || c == '(' 40 | } 41 | 42 | #[inline] 43 | fn inverse(c: char) -> char { 44 | match c { 45 | '[' => ']', 46 | '(' => ')', 47 | _ => panic!("oh no!") 48 | } 49 | } 50 | 51 | #[parser] 52 | fn group<'a>(input: &mut Input<'a>, kind: char) -> Result<'a, Group> { 53 | let (start, tokens, end) = (eat(kind)?, tokens()?, eat(inverse(kind))?); 54 | Group { start, tokens, end } 55 | } 56 | 57 | #[parser] 58 | fn ident<'a>(input: &mut Input<'a>) -> Result<'a, String> { 59 | take_some_while(is_ident_char)?.to_string() 60 | } 61 | 62 | #[parser] 63 | fn tokens<'a>(input: &mut Input<'a>) -> Result<'a, Tokens> { 64 | let mut tokens = Vec::new(); 65 | loop { 66 | skip_while(is_whitespace)?; 67 | let token = switch! { 68 | c@peek_if_copy(is_start_group_char) => Token::Group(group(c)?), 69 | i@ident() => Token::Ident(i), 70 | _ => break, 71 | }; 72 | 73 | tokens.push(token); 74 | } 75 | 76 | Tokens(tokens) 77 | } 78 | 79 | const STRING: &str = "(( hi )) ([ (hey there ]) hi"; 80 | 81 | fn main() { 82 | let result = parse!(tokens: Text::from(STRING)); 83 | 84 | match result { 85 | Err(ref e) => println!("Error: {}", e), 86 | Ok(v) => println!("Got: {:#?}", v) 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /examples/old/exprs/src/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(plugin)] 2 | #![plugin(pear_codegen)] 3 | 4 | #[macro_use] extern crate pear; 5 | 6 | use pear::parsers::*; 7 | use pear::combinators::*; 8 | use pear::ParseResult; 9 | 10 | #[derive(Debug)] 11 | enum Op { 12 | Add, Sub, Mul, Div 13 | } 14 | 15 | #[derive(Debug)] 16 | enum Expr { 17 | Binary(Op, Box, Box), 18 | Int(isize) 19 | } 20 | 21 | impl Expr { 22 | fn eval(&self) -> isize { 23 | match *self { 24 | Expr::Binary(Op::Add, ref e1, ref e2) => e1.eval() + e2.eval(), 25 | Expr::Binary(Op::Sub, ref e1, ref e2) => e1.eval() - e2.eval(), 26 | Expr::Binary(Op::Mul, ref e1, ref e2) => e1.eval() * e2.eval(), 27 | Expr::Binary(Op::Div, ref e1, ref e2) => e1.eval() / e2.eval(), 28 | Expr::Int(val) => val 29 | } 30 | } 31 | } 32 | 33 | #[parser] 34 | fn int<'a>(string: &mut &'a str) -> ParseResult<&'a str, Expr> { 35 | let num = take_while(|c| c == '-' || c.is_numeric()); 36 | Expr::Int(from!(num.parse())) 37 | } 38 | 39 | #[parser] 40 | fn val<'a>(string: &mut &'a str) -> ParseResult<&'a str, Expr> { 41 | switch! { 42 | eat('(') => (surrounded(expr, char::is_whitespace), eat(')')).0, 43 | _ => int() 44 | } 45 | } 46 | 47 | #[parser] 48 | fn term<'a>(string: &mut &'a str) -> ParseResult<&'a str, Expr> { 49 | let left = surrounded(val, char::is_whitespace); 50 | 51 | switch! { 52 | any!(peek('*'), peek('/')) => { 53 | let op = switch! { 54 | eat('*') => Op::Mul, 55 | eat('/') => Op::Div 56 | }; 57 | 58 | let right = surrounded(term, char::is_whitespace); 59 | Expr::Binary(op, Box::new(left), Box::new(right)) 60 | }, 61 | _ => left 62 | } 63 | } 64 | 65 | #[parser] 66 | fn expr<'a>(ini_string: &mut &'a str) -> ParseResult<&'a str, Expr> { 67 | let left = surrounded(term, char::is_whitespace); 68 | 69 | switch! { 70 | any!(peek('+'), peek('-')) => { 71 | let op = switch! { 72 | eat('+') => Op::Add, 73 | eat('-') => Op::Sub 74 | }; 75 | 76 | let right = surrounded(expr, char::is_whitespace); 77 | Expr::Binary(op, Box::new(left), Box::new(right)) 78 | }, 79 | _ => left 80 | } 81 | } 82 | 83 | fn eval_expr(mut string: &str) -> Option { 84 | expr(&mut string).map(|e| e.eval()).ok() 85 | } 86 | 87 | fn main() { 88 | println!("Result: {:?}", eval_expr("(4 * (3 + 2)) * 2")); 89 | println!("Result: {:?}", eval_expr("-4 + -2 - 3")); 90 | println!("Result: {:?}", eval_expr("-1")); 91 | } 92 | -------------------------------------------------------------------------------- /lib/src/input/input.rs: -------------------------------------------------------------------------------- 1 | use crate::input::{Show, Length}; 2 | 3 | pub trait Token: Show + PartialEq { } 4 | 5 | pub trait Slice: Show + Length + PartialEq { } 6 | 7 | impl Token for T where T: Show + PartialEq { } 8 | 9 | impl Slice for S where S: Show + Length + PartialEq { } 10 | 11 | #[derive(Debug, Copy, Clone)] 12 | pub struct ParserInfo { 13 | pub name: &'static str, 14 | pub raw: bool, 15 | } 16 | 17 | pub trait Rewind: Sized + Input { 18 | /// Resets `self` to the position identified by `marker`. 19 | fn rewind_to(&mut self, marker: Self::Marker); 20 | } 21 | 22 | pub trait Input: Sized { 23 | type Token: Token; 24 | type Slice: Slice; 25 | type Many: Length; 26 | 27 | type Marker: Copy; 28 | type Context: Show; 29 | 30 | /// Returns a copy of the current token, if there is one. 31 | fn token(&mut self) -> Option; 32 | 33 | /// Returns a copy of the current slice of size `n`, if there is one. 34 | fn slice(&mut self, n: usize) -> Option; 35 | 36 | /// Checks if the current token fulfills `cond`. 37 | fn peek(&mut self, cond: F) -> bool 38 | where F: FnMut(&Self::Token) -> bool; 39 | 40 | /// Checks if the current slice of size `n` (if any) fulfills `cond`. 41 | fn peek_slice(&mut self, n: usize, cond: F) -> bool 42 | where F: FnMut(&Self::Slice) -> bool; 43 | 44 | /// Checks if the current token fulfills `cond`. If so, the token is 45 | /// consumed and returned. Otherwise, returns `None`. 46 | fn eat(&mut self, cond: F) -> Option 47 | where F: FnMut(&Self::Token) -> bool; 48 | 49 | /// Checks if the current slice of size `n` (if any) fulfills `cond`. If so, 50 | /// the slice is consumed and returned. Otherwise, returns `None`. 51 | fn eat_slice(&mut self, n: usize, cond: F) -> Option 52 | where F: FnMut(&Self::Slice) -> bool; 53 | 54 | /// Takes tokens while `cond` returns true, collecting them into a 55 | /// `Self::Many` and returning it. 56 | fn take(&mut self, cond: F) -> Self::Many 57 | where F: FnMut(&Self::Token) -> bool; 58 | 59 | /// Skips tokens while `cond` returns true. Returns the number of skipped 60 | /// tokens. 61 | fn skip(&mut self, cond: F) -> usize 62 | where F: FnMut(&Self::Token) -> bool; 63 | 64 | /// Returns `true` if there are at least `n` tokens remaining. 65 | fn has(&mut self, n: usize) -> bool; 66 | 67 | /// Emits a marker that represents the current parse position. 68 | #[allow(unused_variables)] 69 | fn mark(&mut self, info: &ParserInfo) -> Self::Marker; 70 | 71 | /// Returns a context to identify the input spanning from `mark` until but 72 | /// excluding the current position. 73 | fn context(&mut self, _mark: Self::Marker) -> Self::Context; 74 | } 75 | -------------------------------------------------------------------------------- /lib/src/input/show.rs: -------------------------------------------------------------------------------- 1 | // TODO: Print parser arguments in debug/error output. 2 | 3 | pub trait Show { 4 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result; 5 | } 6 | 7 | impl std::fmt::Display for &dyn Show { 8 | #[inline(always)] 9 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 10 | Show::fmt(*self, f) 11 | } 12 | } 13 | 14 | impl Show for &T { 15 | #[inline(always)] 16 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 17 | ::fmt(self, f) 18 | } 19 | } 20 | 21 | impl Show for Option { 22 | #[inline(always)] 23 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 24 | if let Some(val) = self { 25 | ::fmt(val, f)?; 26 | } 27 | 28 | Ok(()) 29 | } 30 | } 31 | 32 | impl Show for [T] { 33 | #[inline(always)] 34 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 35 | for (i, value) in self.iter().enumerate() { 36 | if i > 0 { write!(f, " ")?; } 37 | write!(f, "{}", value as &dyn Show)?; 38 | } 39 | 40 | Ok(()) 41 | } 42 | } 43 | 44 | impl Show for std::borrow::Cow<'_, T> { 45 | #[inline(always)] 46 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 47 | Show::fmt(self.as_ref(), f) 48 | } 49 | } 50 | 51 | macro_rules! impl_for_slice_len { 52 | ($($n:expr),*) => ($( 53 | impl Show for [T; $n] { 54 | #[inline(always)] 55 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 56 | Show::fmt(&self[..], f) 57 | } 58 | } 59 | )*) 60 | } 61 | 62 | impl_for_slice_len!( 63 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 64 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 65 | ); 66 | 67 | impl Show for Vec { 68 | #[inline(always)] 69 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 70 | Show::fmt(self.as_slice(), f) 71 | } 72 | } 73 | 74 | impl Show for u8 { 75 | #[inline(always)] 76 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 77 | if self.is_ascii() { 78 | write!(f, "'{}'", char::from(*self).escape_debug()) 79 | } else { 80 | write!(f, "byte {}", self) 81 | } 82 | } 83 | } 84 | 85 | impl_show_with! { Debug, 86 | u16, u32, u64, u128, usize, 87 | i8, i16, i32, i64, i128, isize 88 | } 89 | 90 | macro_rules! impl_with_tick_display { 91 | ($($T:ty,)*) => ($( 92 | impl Show for $T { 93 | #[inline(always)] 94 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 95 | write!(f, "{:?}", self) 96 | } 97 | } 98 | )*) 99 | } 100 | 101 | impl_with_tick_display! { 102 | &str, String, char, std::borrow::Cow<'static, str>, 103 | } 104 | -------------------------------------------------------------------------------- /examples/json/assets/simple.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "_id":"5741cfe6bf9f447a509a269e", 4 | "index":0, 5 | "guid":"642f0c2a-3d87-43ac-8f82-25f004e0c96a", 6 | "isActive":false, 7 | "balance":"$3,666.68", 8 | "picture":"http://placehold.it/32x32", 9 | "age":39, 10 | "eyeColor":"blue", 11 | "name":"Leonor Herman", 12 | "gender":"female", 13 | "company":"RODEOMAD", 14 | "email":"leonorherman@rodeomad.com", 15 | "phone":"+1 (848) 456-2962", 16 | "address":"450 Seeley Street, Iberia, North Dakota, 7859", 17 | "about":"Reprehenderit in anim laboris labore sint occaecat labore proident ipsum exercitation. Ut ea aliqua duis occaecat consectetur aliqua anim id. Dolor ea fugiat excepteur reprehenderit eiusmod enim non sit nisi. Mollit consequat anim mollit et excepteur qui laborum qui eiusmod. Qui ea amet incididunt cillum quis occaecat excepteur qui duis nisi. Dolore labore eu sunt consequat magna.\r\n", 18 | "registered":"2015-03-06T02:49:06 -02:00", 19 | "latitude":-29.402032, 20 | "longitude":151.088135, 21 | "tags":[ 22 | "Lorem", 23 | "voluptate", 24 | "aute", 25 | "ullamco", 26 | "elit", 27 | "esse", 28 | "culpa" 29 | ], 30 | "friends":[ 31 | { 32 | "id":0, 33 | "name":"Millicent Norman" 34 | }, 35 | { 36 | "id":1, 37 | "name":"Vincent Cannon" 38 | }, 39 | { 40 | "id":2, 41 | "name":"Gray Berry" 42 | } 43 | ], 44 | "greeting":"Hello, Leonor Herman! You have 4 unread messages.", 45 | "favoriteFruit":"apple" 46 | }, 47 | { 48 | "_id":"5741cfe69424f42d4493caa2", 49 | "index":1, 50 | "guid":"40ec6b43-e6e6-44e1-92a8-dc80cd5d7179", 51 | "isActive":true, 52 | "balance":"$2,923.78", 53 | "picture":"http://placehold.it/32x32", 54 | "age":36, 55 | "eyeColor":"blue", 56 | "name":"Barton Barnes", 57 | "gender":"male", 58 | "company":"BRAINQUIL", 59 | "email":"bartonbarnes@brainquil.com", 60 | "phone":"+1 (907) 553-3739", 61 | "address":"644 Falmouth Street, Sedley, Michigan, 5602", 62 | "about":"Et nulla laboris consectetur laborum labore. Officia dolor sint do amet excepteur dolore eiusmod. Occaecat pariatur sunt velit sunt ullamco labore commodo mollit sint dolore occaecat.\r\n", 63 | "registered":"2014-08-28T01:07:22 -03:00", 64 | "latitude":14.056553, 65 | "longitude":-61.911624, 66 | "tags":[ 67 | "laboris", 68 | "sunt", 69 | "esse", 70 | "tempor", 71 | "pariatur", 72 | "occaecat", 73 | "et" 74 | ], 75 | "friends":[ 76 | { 77 | "id":0, 78 | "name":"Tillman Mckay" 79 | }, 80 | { 81 | "id":1, 82 | "name":"Rivera Berg" 83 | }, 84 | { 85 | "id":2, 86 | "name":"Rosetta Erickson" 87 | } 88 | ], 89 | "greeting":"Hello, Barton Barnes! You have 2 unread messages.", 90 | "favoriteFruit":"banana" 91 | } 92 | ] 93 | -------------------------------------------------------------------------------- /lib/tests/custom_expected.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | 3 | use pear::input::{Text, Pear, Span, Expected}; 4 | use pear::{macros::*, parsers::*}; 5 | 6 | type Result<'a, T> = pear::result::Result, Error<'a>>; 7 | 8 | #[derive(Debug)] 9 | enum Error<'a> { 10 | Expected(Expected>), 11 | Other { 12 | message: Cow<'static, str>, 13 | second: Option> 14 | } 15 | } 16 | 17 | impl<'a> From for Error<'a> { 18 | fn from(message: String) -> Error<'a> { 19 | Error::Other { message: message.into(), second: None } 20 | } 21 | } 22 | 23 | impl<'a> From<&'static str> for Error<'a> { 24 | fn from(message: &'static str) -> Error<'a> { 25 | Error::Other { message: message.into(), second: None } 26 | } 27 | } 28 | 29 | impl<'a> From>> for Error<'a> { 30 | fn from(other: Expected>) -> Error<'a> { 31 | Error::Expected(other) 32 | } 33 | } 34 | 35 | impl_show_with!(Debug, Error<'_>); 36 | 37 | #[parser] 38 | fn combo<'a>(input: &mut Pear>) -> Result<'a, ()> { 39 | let start = switch! { 40 | peek('a') => eat_slice("abc")?, 41 | peek('b') => eat_slice("bat")?, 42 | _ => parse_error!("either bat or abc, please")? 43 | }; 44 | 45 | match start { 46 | "abc" => { 47 | eat_slice("def").or_else(|e| parse_error!(Error::Other { 48 | message: "def needs to follow abc".into(), 49 | second: Some(e.to_string().into()) 50 | }))?; 51 | }, 52 | "bat" => { 53 | eof().or_else(|_| parse_error!(Error::Other { 54 | message: "whoah whoah, bat must be at end".into(), 55 | second: None 56 | }))?; 57 | }, 58 | _ => unreachable!("only two options") 59 | } 60 | } 61 | 62 | impl<'a> Error<'a> { 63 | fn assert_expected(self) { 64 | if let Error::Other { .. } = self { 65 | panic!("expected 'Expected', was 'Other'") 66 | } 67 | } 68 | 69 | fn assert_other(self) { 70 | if let Error::Expected(..) = self { 71 | panic!("expected 'Other', was 'Expected'") 72 | } 73 | } 74 | } 75 | 76 | #[test] 77 | fn test_custom_expect_ok() { 78 | let result = parse!(combo: Text::from("bat")); 79 | assert!(result.is_ok()); 80 | 81 | let result = parse!(combo: Text::from("abcdef")); 82 | assert!(result.is_ok()); 83 | } 84 | 85 | #[test] 86 | fn test_custom_expect_expected() { 87 | let result = parse!(combo: Text::from("ab")); 88 | result.unwrap_err().error.assert_expected(); 89 | 90 | let result = parse!(combo: Text::from("ba")); 91 | result.unwrap_err().error.assert_expected(); 92 | } 93 | 94 | #[test] 95 | fn test_custom_expect_other() { 96 | let result = parse!(combo: Text::from("abc")); 97 | result.unwrap_err().error.assert_other(); 98 | 99 | let result = parse!(combo: Text::from("abcd")); 100 | result.unwrap_err().error.assert_other(); 101 | 102 | let result = parse!(combo: Text::from("batfoo")); 103 | result.unwrap_err().error.assert_other(); 104 | } 105 | -------------------------------------------------------------------------------- /examples/media_type/src/main.rs: -------------------------------------------------------------------------------- 1 | use pear::input::Result; 2 | use pear::macros::{parser, parse, switch, parse_declare, parse_error}; 3 | 4 | use pear::parsers::*; 5 | use pear::combinators::*; 6 | 7 | #[derive(Debug)] 8 | struct MediaType<'s> { 9 | top: &'s str, 10 | sub: &'s str, 11 | params: Vec<(&'s str, &'s str)> 12 | } 13 | 14 | #[inline] 15 | fn is_valid_token(&c: &char) -> bool { 16 | match c { 17 | '0'..='9' | 'a'..='z' | '^'..='~' | '#'..='\'' 18 | | '!' | '*' | '+' | '-' | '.' => true, 19 | _ => false 20 | } 21 | } 22 | 23 | #[inline(always)] 24 | fn is_whitespace(&byte: &char) -> bool { 25 | byte == ' ' || byte == '\t' 26 | } 27 | 28 | parse_declare!(Input<'a>(Token = char, Slice = &'a str, Many = &'a str)); 29 | 30 | #[parser] 31 | fn quoted_string<'a, I: Input<'a>>(input: &mut Pear) -> Result<&'a str, I> { 32 | eat('"')?; 33 | 34 | let mut is_escaped = false; 35 | let inner = take_while(|&c| { 36 | if is_escaped { is_escaped = false; return true; } 37 | if c == '\\' { is_escaped = true; return true; } 38 | c != '"' 39 | })?; 40 | 41 | eat('"')?; 42 | inner 43 | } 44 | 45 | #[parser] 46 | fn media_param<'a, I: Input<'a>>(input: &mut Pear) -> Result<(&'a str, &'a str), I> { 47 | let key = (take_some_while_until(is_valid_token, '=')?, eat('=')?).0; 48 | let value = switch! { 49 | peek('"') => quoted_string()?, 50 | _ => take_some_while_until(is_valid_token, ';')? 51 | }; 52 | 53 | (key, value) 54 | } 55 | 56 | #[parser] 57 | fn media_type<'a, I: Input<'a>>(input: &mut Pear) -> Result, I> { 58 | MediaType { 59 | top: take_some_while_until(is_valid_token, '/')?, 60 | sub: (eat('/')?, take_some_while_until(is_valid_token, ';')?).1, 61 | params: { 62 | skip_while(is_whitespace)?; 63 | prefixed_series(';', |i| surrounded(i, media_param, is_whitespace), ';')? 64 | } 65 | } 66 | } 67 | 68 | #[parser] 69 | fn weighted_media_type<'a, I: Input<'a>>(input: &mut Pear) -> Result<(MediaType<'a>, Option), I> { 70 | let media_type = media_type()?; 71 | let weight = match media_type.params.iter().next() { 72 | Some(&("q", value)) => match value.parse::().ok() { 73 | Some(q) if q > 1.0 => return Err(parse_error!("media-type weight >= 1.0")), 74 | Some(q) => Some(q), 75 | None => return Err(parse_error!("invalid media-type weight")) 76 | }, 77 | _ => None 78 | }; 79 | 80 | (media_type, weight) 81 | } 82 | 83 | #[parser] 84 | fn accept<'a, I: Input<'a>>(input: &mut Pear) -> Result, Option)>, I> { 85 | Ok(series(|i| surrounded(i, weighted_media_type, is_whitespace), ',')?) 86 | } 87 | 88 | fn main() { 89 | use pear::input::Text; 90 | 91 | println!("MEDIA TYPE: {:?}", parse!(media_type: Text::from("a/b; a=\"abc\"; c=d"))); 92 | println!("MEDIA TYPE: {:?}", parse!(media_type: "a/b; a=\"ab=\\\"c\\\"\"; c=d")); 93 | println!("MEDIA TYPE: {:?}", parse!(media_type: "a/b; a=b; c=d")); 94 | println!("MEDIA TYPE: {:?}", parse!(media_type: "a/b")); 95 | println!("ACCEPT: {:?}", parse!(accept: "a/b ; a=b , c/d")); 96 | println!("ACCEPT: {:?}", parse!(accept: "a/b, text/html")); 97 | println!("ACCEPT: {:?}", parse!(accept: "a/b; q=0.7 , c/d")); 98 | } 99 | -------------------------------------------------------------------------------- /lib/src/input/string.rs: -------------------------------------------------------------------------------- 1 | pub use crate::input::{Input, ParserInfo}; 2 | 3 | impl<'a> Input for &'a str { 4 | type Token = char; 5 | type Slice = &'a str; 6 | type Many = Self::Slice; 7 | 8 | type Marker = &'a str; 9 | type Context = &'a str; 10 | 11 | /// Returns a copy of the current token, if there is one. 12 | fn token(&mut self) -> Option { 13 | self.chars().next() 14 | } 15 | 16 | /// Returns a copy of the current slice of size `n`, if there is one. 17 | fn slice(&mut self, n: usize) -> Option { 18 | self.get(..n) 19 | } 20 | 21 | /// Checks if the current token fulfills `cond`. 22 | fn peek(&mut self, mut cond: F) -> bool 23 | where F: FnMut(&Self::Token) -> bool 24 | { 25 | self.token().map(|t| cond(&t)).unwrap_or(false) 26 | } 27 | 28 | /// Checks if the current slice of size `n` (if any) fulfills `cond`. 29 | fn peek_slice(&mut self, n: usize, mut cond: F) -> bool 30 | where F: FnMut(&Self::Slice) -> bool 31 | { 32 | self.slice(n).map(|s| cond(&s)).unwrap_or(false) 33 | } 34 | 35 | /// Checks if the current token fulfills `cond`. If so, the token is 36 | /// consumed and returned. Otherwise, returns `None`. 37 | fn eat(&mut self, mut cond: F) -> Option 38 | where F: FnMut(&Self::Token) -> bool 39 | { 40 | if let Some(token) = self.token() { 41 | if cond(&token) { 42 | *self = &self[token.len_utf8()..]; 43 | return Some(token) 44 | } 45 | } 46 | 47 | None 48 | } 49 | 50 | /// Checks if the current slice of size `n` (if any) fulfills `cond`. If so, 51 | /// the slice is consumed and returned. Otherwise, returns `None`. 52 | fn eat_slice(&mut self, n: usize, mut cond: F) -> Option 53 | where F: FnMut(&Self::Slice) -> bool 54 | { 55 | if let Some(slice) = self.slice(n) { 56 | if cond(&slice) { 57 | *self = &self[slice.len()..]; 58 | return Some(slice) 59 | } 60 | } 61 | 62 | None 63 | } 64 | 65 | /// Takes tokens while `cond` returns true, collecting them into a 66 | /// `Self::Many` and returning it. 67 | fn take(&mut self, mut cond: F) -> Self::Many 68 | where F: FnMut(&Self::Token) -> bool 69 | { 70 | let mut consumed = 0; 71 | for c in self.chars() { 72 | if !cond(&c) { break; } 73 | consumed += c.len_utf8(); 74 | } 75 | 76 | let value = &self[..consumed]; 77 | *self = &self[consumed..]; 78 | value 79 | } 80 | 81 | /// Skips tokens while `cond` returns true. Returns the number of skipped 82 | /// tokens. 83 | fn skip(&mut self, cond: F) -> usize 84 | where F: FnMut(&Self::Token) -> bool 85 | { 86 | self.take(cond).len() 87 | } 88 | 89 | /// Returns `true` if there are at least `n` tokens remaining. 90 | fn has(&mut self, n: usize) -> bool { 91 | self.len() >= n 92 | } 93 | 94 | fn mark(&mut self, _info: &ParserInfo) -> Self::Marker { 95 | *self 96 | } 97 | 98 | fn context(&mut self, mark: Self::Marker) -> Self::Context { 99 | let consumed = mark.len() - self.len(); 100 | &mark[..consumed] 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /examples/json/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![warn(rust_2018_idioms)] 2 | 3 | use std::collections::HashMap; 4 | 5 | use pear::input::{Pear, Result}; 6 | use pear::macros::{parser, switch, parse_declare, parse_error}; 7 | use pear::combinators::*; 8 | use pear::parsers::*; 9 | 10 | #[derive(Debug, PartialEq)] 11 | pub enum JsonValue<'a> { 12 | Null, 13 | Bool(bool), 14 | Number(f64), 15 | String(&'a str), 16 | Array(Vec>), 17 | Object(HashMap<&'a str, JsonValue<'a>>) 18 | } 19 | 20 | #[inline(always)] 21 | fn is_whitespace(&c: &char) -> bool { 22 | c.is_ascii_whitespace() 23 | } 24 | 25 | #[inline(always)] 26 | fn is_num(c: &char) -> bool { 27 | c.is_ascii_digit() 28 | } 29 | 30 | parse_declare!(pub Input<'a>(Token = char, Slice = &'a str, Many = &'a str)); 31 | 32 | #[parser] 33 | fn int<'a, I: Input<'a>>(input: &mut Pear) -> Result { 34 | take_some_while(is_num)?.parse().or_else(|e| parse_error!("{}", e)?) 35 | // take_some_while(|c| ('0'..='9').contains(c)); // BENCH 36 | // 1 // BENCH 37 | } 38 | 39 | #[parser] 40 | fn signed_int<'a, I: Input<'a>>(input: &mut Pear) -> Result { 41 | switch! { eat('-') => -int()?, _ => int()? } // NOT BENCH 42 | // (maybe!(eat('-')), int()).1 // BENCH 43 | } 44 | 45 | // This is terribly innefficient. 46 | #[parser] 47 | fn number<'a, I: Input<'a>>(input: &mut Pear) -> Result { 48 | let whole_num = signed_int()?; 49 | let frac = switch! { eat('.') => take_some_while(is_num)?, _ => "" }; 50 | let exp = switch! { eat_if(|&c| "eE".contains(c)) => signed_int()?, _ => 0 }; 51 | 52 | // NOT BENCH 53 | format!("{}.{}e{}", whole_num, frac, exp).parse() 54 | .or_else(|e| parse_error!("{}", e)?) 55 | 56 | // 0.0 // BENCH 57 | } 58 | 59 | #[parser] 60 | fn string<'a, I: Input<'a>>(input: &mut Pear) -> Result<&'a str, I> { 61 | eat('"')?; 62 | 63 | let mut is_escaped = false; 64 | let inner = take_while(|&c| { 65 | if is_escaped { is_escaped = false; return true; } 66 | if c == '\\' { is_escaped = true; return true; } 67 | c != '"' 68 | })?; 69 | 70 | eat('"')?; 71 | inner 72 | } 73 | 74 | #[parser] 75 | fn object<'a, I: Input<'a>>(input: &mut Pear) -> Result>, I> { 76 | Ok(delimited_collect('{', |i| { 77 | let key = surrounded(i, string, is_whitespace)?; 78 | let value = (eat(i, ':')?, surrounded(i, value, is_whitespace)?).1; 79 | Ok((key, value)) 80 | }, ',', '}')?) 81 | } 82 | 83 | #[parser] 84 | fn array<'a, I: Input<'a>>(input: &mut Pear) -> Result>, I> { 85 | Ok(delimited_collect('[', value, ',', ']')?) 86 | } 87 | 88 | #[parser] 89 | pub fn value<'a, I: Input<'a>>(input: &mut Pear) -> Result, I> { 90 | skip_while(is_whitespace)?; 91 | let val = switch! { 92 | eat_slice("null") => JsonValue::Null, 93 | eat_slice("true") => JsonValue::Bool(true), 94 | eat_slice("false") => JsonValue::Bool(false), 95 | peek('{') => JsonValue::Object(object()?), 96 | peek('[') => JsonValue::Array(array()?), 97 | peek('"') => JsonValue::String(string()?), 98 | peek_if(|c| *c == '-' || is_num(c)) => JsonValue::Number(number()?), 99 | token@peek_any() => parse_error!("unexpected input: {:?}", token)?, 100 | _ => parse_error!("unknown input")?, 101 | }; 102 | 103 | skip_while(is_whitespace)?; 104 | val 105 | } 106 | -------------------------------------------------------------------------------- /lib/tests/marker.rs: -------------------------------------------------------------------------------- 1 | use pear::input::Span; 2 | use pear::{macros::*, parsers::*}; 3 | 4 | type FourMarkers = (usize, usize, usize, usize); 5 | type Input<'a> = pear::input::Pear>; 6 | type Result<'a, T> = pear::input::Result>; 7 | 8 | #[parser] 9 | fn simple<'a>(input: &mut Input<'a>) -> Result<'a, FourMarkers> { 10 | let first = parse_last_marker!(); 11 | eat('.')?; 12 | let second = parse_last_marker!(); 13 | eat_slice("..")?; 14 | let third = parse_last_marker!(); 15 | eat_slice("..")?; 16 | let fourth = parse_last_marker!(); 17 | (first, second, third, fourth) 18 | } 19 | 20 | #[parser] 21 | fn simple_updating<'a>(input: &mut Input<'a>) -> Result<'a, FourMarkers> { 22 | let first = parse_current_marker!(); 23 | eat('.')?; 24 | let second = parse_current_marker!(); 25 | eat_slice("..")?; 26 | let third = parse_current_marker!(); 27 | eat_slice("..")?; 28 | let fourth = parse_current_marker!(); 29 | (first, second, third, fourth) 30 | } 31 | 32 | #[parser] 33 | fn resetting<'a>(input: &mut Input<'a>) -> Result<'a, FourMarkers> { 34 | let first = parse_last_marker!(); 35 | eat('.')?; 36 | parse_mark!(); 37 | let second = parse_last_marker!(); 38 | eat_slice("..")?; 39 | let third = parse_last_marker!(); 40 | eat_slice("..")?; 41 | parse_mark!(); 42 | let fourth = parse_last_marker!(); 43 | (first, second, third, fourth) 44 | } 45 | 46 | #[test] 47 | fn test_simple_marker() { 48 | let result = parse!(simple: Input::new(".....")).unwrap(); 49 | assert_eq!(result, (0, 0, 0, 0)); 50 | } 51 | 52 | #[test] 53 | fn test_updating_marker() { 54 | let result = parse!(simple_updating: Input::new(".....")).unwrap(); 55 | assert_eq!(result, (0, 1, 3, 5)); 56 | } 57 | 58 | #[test] 59 | fn test_resetting_marker() { 60 | let result = parse!(resetting: Input::new(".....")).unwrap(); 61 | assert_eq!(result, (0, 1, 1, 5)); 62 | } 63 | 64 | type TwoSpans<'a> = (Span<'a>, Span<'a>); 65 | 66 | #[parser] 67 | fn context<'a>(input: &mut Input<'a>) -> Result<'a, TwoSpans<'a>> { 68 | eat_slice("...")?; 69 | let first = parse_context!(); 70 | eat('\n')?; 71 | eat_slice("..")?; 72 | let second = parse_context!(); 73 | (first, second) 74 | } 75 | 76 | #[parser] 77 | fn resetting_context<'a>(input: &mut Input<'a>) -> Result<'a, TwoSpans<'a>> { 78 | eat_slice("...")?; 79 | let first = parse_context!(); 80 | eat('\n')?; 81 | parse_mark!(); 82 | eat_slice("..")?; 83 | let second = parse_context!(); 84 | (first, second) 85 | } 86 | 87 | #[test] 88 | fn test_context() { 89 | let (first, second) = parse!(context: Input::new("...\n..")).unwrap(); 90 | 91 | assert_eq!(first, Span { 92 | start: (1, 1, 0), 93 | end: (1, 4, 3), 94 | snippet: Some("..."), 95 | cursor: Some('\n'), 96 | }); 97 | 98 | assert_eq!(second, Span { 99 | start: (1, 1, 0), 100 | end: (2, 3, 6), 101 | snippet: Some("...\n.."), 102 | cursor: None, 103 | }); 104 | } 105 | 106 | #[test] 107 | fn test_resetting_context() { 108 | let (first, second) = parse!(resetting_context: Input::new("...\n..")).unwrap(); 109 | 110 | assert_eq!(first, Span { 111 | start: (1, 1, 0), 112 | end: (1, 4, 3), 113 | snippet: Some("..."), 114 | cursor: Some('\n'), 115 | }); 116 | 117 | assert_eq!(second, Span { 118 | start: (2, 1, 4), 119 | end: (2, 3, 6), 120 | snippet: Some(".."), 121 | cursor: None, 122 | }); 123 | } 124 | -------------------------------------------------------------------------------- /lib/src/input/pear.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use crate::input::{Input, Rewind, ParserInfo}; 4 | 5 | pub trait Debugger { 6 | fn on_entry(&mut self, info: &ParserInfo); 7 | fn on_exit(&mut self, info: &ParserInfo, ok: bool, ctxt: I::Context); 8 | } 9 | 10 | pub struct Options { 11 | pub stacked_context: bool, 12 | pub debugger: Option>>, 13 | } 14 | 15 | impl fmt::Debug for Options { 16 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 17 | f.debug_struct("Options") 18 | .field("stacked_context", &self.stacked_context) 19 | .field("debugger", &self.debugger.is_some()) 20 | .finish() 21 | } 22 | } 23 | 24 | impl Default for Options { 25 | #[cfg(debug_assertions)] 26 | fn default() -> Self { 27 | Options { 28 | stacked_context: true, 29 | debugger: Some(Box::::default()), 30 | } 31 | } 32 | 33 | #[cfg(not(debug_assertions))] 34 | fn default() -> Self { 35 | Options { 36 | stacked_context: false, 37 | debugger: None, 38 | } 39 | } 40 | } 41 | 42 | #[derive(Debug)] 43 | pub struct Pear { 44 | pub input: I, 45 | #[doc(hidden)] 46 | pub emit_error: bool, 47 | #[doc(hidden)] 48 | pub options: Options 49 | } 50 | 51 | impl Pear { 52 | pub fn new(input: A) -> Pear where I: From { 53 | Pear::from(I::from(input)) 54 | } 55 | } 56 | 57 | impl From for Pear { 58 | fn from(input: I) -> Pear { 59 | Pear { input, emit_error: true, options: Options::default() } 60 | } 61 | } 62 | 63 | impl std::ops::Deref for Pear { 64 | type Target = I; 65 | fn deref(&self) -> &Self::Target { 66 | &self.input 67 | } 68 | } 69 | 70 | impl std::ops::DerefMut for Pear { 71 | fn deref_mut(&mut self) -> &mut Self::Target { 72 | &mut self.input 73 | } 74 | } 75 | 76 | impl Input for Pear { 77 | type Token = I::Token; 78 | type Slice = I::Slice; 79 | type Many = I::Many; 80 | 81 | type Marker = I::Marker; 82 | type Context = I::Context; 83 | 84 | #[inline(always)] 85 | fn token(&mut self) -> Option { 86 | self.input.token() 87 | } 88 | 89 | #[inline(always)] 90 | fn slice(&mut self, n: usize) -> Option { 91 | self.input.slice(n) 92 | } 93 | 94 | #[inline(always)] 95 | fn has(&mut self, n: usize) -> bool { 96 | self.input.has(n) 97 | } 98 | 99 | #[inline(always)] 100 | fn peek(&mut self, cond: F) -> bool 101 | where F: FnMut(&Self::Token) -> bool 102 | { 103 | self.input.peek(cond) 104 | } 105 | 106 | #[inline(always)] 107 | fn peek_slice(&mut self, n: usize, cond: F) -> bool 108 | where F: FnMut(&Self::Slice) -> bool 109 | { 110 | self.input.peek_slice(n, cond) 111 | } 112 | 113 | #[inline(always)] 114 | fn eat(&mut self, cond: F) -> Option 115 | where F: FnMut(&Self::Token) -> bool 116 | { 117 | self.input.eat(cond) 118 | } 119 | 120 | #[inline(always)] 121 | fn eat_slice(&mut self, n: usize, cond: F) -> Option 122 | where F: FnMut(&Self::Slice) -> bool 123 | { 124 | self.input.eat_slice(n, cond) 125 | } 126 | 127 | #[inline(always)] 128 | fn take(&mut self, cond: F) -> Self::Many 129 | where F: FnMut(&Self::Token) -> bool 130 | { 131 | self.input.take(cond) 132 | } 133 | 134 | #[inline(always)] 135 | fn skip(&mut self, cond: F) -> usize 136 | where F: FnMut(&Self::Token) -> bool 137 | { 138 | self.input.skip(cond) 139 | } 140 | 141 | #[inline(always)] 142 | fn mark(&mut self, info: &ParserInfo) -> Self::Marker { 143 | self.input.mark(info) 144 | } 145 | 146 | #[inline(always)] 147 | fn context(&mut self, mark: Self::Marker) -> Self::Context { 148 | self.input.context(mark) 149 | } 150 | } 151 | 152 | impl Rewind for Pear { 153 | fn rewind_to(&mut self, marker: Self::Marker) { 154 | self.input.rewind_to(marker) 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /examples/ini/src/main.rs: -------------------------------------------------------------------------------- 1 | #![warn(rust_2018_idioms)] 2 | 3 | use std::fmt; 4 | 5 | use pear::parsers::*; 6 | use pear::input::{Pear, Result}; 7 | use pear::macros::{parser, parse, switch, parse_declare, parse_error}; 8 | 9 | #[derive(Debug, PartialEq)] 10 | enum Value<'s> { 11 | Boolean(bool), 12 | String(&'s str), 13 | Number(f64) 14 | } 15 | 16 | #[derive(Debug, PartialEq)] 17 | struct Property<'s> { 18 | name: &'s str, 19 | value: Value<'s> 20 | } 21 | 22 | #[derive(Debug, PartialEq)] 23 | struct Section<'s> { 24 | name: Option<&'s str>, 25 | properties: Vec> 26 | } 27 | 28 | #[derive(Debug, PartialEq)] 29 | struct IniConfig<'s> { 30 | sections: Vec> 31 | } 32 | 33 | impl fmt::Display for Value<'_> { 34 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 35 | match *self { 36 | Value::Boolean(b) => write!(f, "{}", b), 37 | Value::Number(n) => write!(f, "{}", n), 38 | Value::String(s) => write!(f, "{}", s), 39 | } 40 | } 41 | } 42 | 43 | impl fmt::Display for IniConfig<'_> { 44 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 45 | for section in self.sections.iter() { 46 | if let Some(name) = section.name { 47 | writeln!(f, "[({})]", name)?; 48 | } 49 | 50 | for property in section.properties.iter() { 51 | writeln!(f, "({})=({})", property.name, property.value)?; 52 | } 53 | } 54 | 55 | Ok(()) 56 | } 57 | } 58 | 59 | #[inline] 60 | fn is_whitespace(&byte: &char) -> bool { 61 | byte == ' ' || byte == '\t' || byte == '\n' 62 | } 63 | 64 | #[inline] 65 | fn is_num_char(&byte: &char) -> bool { 66 | matches!(byte, '0'..='9' | '.') 67 | } 68 | 69 | parse_declare!(Input<'a>(Token = char, Slice = &'a str, Many = &'a str)); 70 | 71 | #[parser] 72 | fn comment<'a, I: Input<'a>>(input: &mut Pear) -> Result<(), I> { 73 | (eat(';')?, skip_while(|c| *c != '\n')?); 74 | } 75 | 76 | #[parser] 77 | fn float<'a, I: Input<'a>>(input: &mut Pear) -> Result { 78 | take_some_while(is_num_char)?.parse().or_else(|e| parse_error!("{}", e)?) 79 | } 80 | 81 | #[parser] 82 | fn value<'a, I: Input<'a>>(input: &mut Pear) -> Result, I> { 83 | switch! { 84 | eat_slice("true") | eat_slice("yes") => Value::Boolean(true), 85 | eat_slice("false") | eat_slice("no") => Value::Boolean(false), 86 | peek_if(is_num_char) => Value::Number(float()?), 87 | _ => Value::String(take_some_while(|&c| !"\n;".contains(c))?.trim()), 88 | } 89 | } 90 | 91 | #[parser] 92 | fn heading<'a, I: Input<'a>>(input: &mut Pear) -> Result<&'a str, I> { 93 | delimited_some('[', |c| !is_whitespace(c), ']')? 94 | } 95 | 96 | #[parser] 97 | fn name<'a, I: Input<'a>>(input: &mut Pear) -> Result<&'a str, I> { 98 | take_some_while(|&c| !"=\n;".contains(c))?.trim_end() 99 | } 100 | 101 | #[parser] 102 | fn properties<'a, I: Input<'a>>(input: &mut Pear) -> Result>, I> { 103 | let mut properties = Vec::new(); 104 | loop { 105 | skip_while(is_whitespace)?; 106 | switch! { 107 | peek(';') | peek('[') | eof() => break, 108 | _ => { 109 | let (name, _, value) = (name()?, eat('=')?, value()?); 110 | skip_while(is_whitespace)?; 111 | properties.push(Property { name, value }); 112 | } 113 | } 114 | } 115 | 116 | properties 117 | } 118 | 119 | #[parser] 120 | fn ini<'a, I: Input<'a>>(input: &mut Pear) -> Result, I> { 121 | let mut sections = Vec::new(); 122 | loop { 123 | skip_while(is_whitespace)?; 124 | let (name, properties) = switch! { 125 | eof() => break, 126 | comment() => continue, 127 | peek('[') => (Some(heading()?), properties()?), 128 | _ => (None, properties()?), 129 | }; 130 | 131 | sections.push(Section { name, properties }) 132 | } 133 | 134 | IniConfig { sections } 135 | } 136 | 137 | const INI_STRING: &str = "\ 138 | ; a section 139 | a=b 140 | ; c is very special 141 | ; and don't you know it 142 | c=2.0 143 | 144 | [section] 145 | a=3 146 | c=1 147 | 148 | [section1] 149 | a=2 ; comment 150 | b=c 151 | 152 | [section2] 153 | a=1 154 | 155 | [section10 156 | "; 157 | 158 | fn main() { 159 | // let start = time::precise_time_ns(); 160 | // let result = parse!(ini: &mut PearNI_STRING); 161 | let result = parse!(ini: pear::input::Text::from(INI_STRING)); 162 | // let result = parse!(ini: &mut PearNI_STRING); 163 | // let end = time::precise_time_ns(); 164 | 165 | match result { 166 | Err(ref e) => println!("Error: {}", e), 167 | Ok(v) => println!("Got: {}", v) 168 | } 169 | 170 | // TODO: Make sure we can use the same parser for files and strings. 171 | // println!("Result (in {}us): {:?}", (end - start) / 1000, result); 172 | } 173 | -------------------------------------------------------------------------------- /examples/uri/src/tables.rs: -------------------------------------------------------------------------------- 1 | pub const PATH_CHARS: [u8; 256] = [ 2 | // 0 1 2 3 4 5 6 7 8 9 3 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x 4 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x 5 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x 6 | 0, 0, 0, b'!', 0, 0, b'$', b'%', b'&', b'\'', // 3x 7 | b'(', b')', b'*', b'+', b',', b'-', b'.', b'/', b'0', b'1', // 4x 8 | b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b':', b';', // 5x 9 | 0, b'=', 0, 0, b'@', b'A', b'B', b'C', b'D', b'E', // 6x 10 | b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', // 7x 11 | b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', // 8x 12 | b'Z', 0, 0, 0, 0, b'_', 0, b'a', b'b', b'c', // 9x 13 | b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', // 10x 14 | b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', // 11x 15 | b'x', b'y', b'z', 0, 0, 0, b'~', 0, 0, 0, // 12x 16 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 13x 17 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 14x 18 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 15x 19 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16x 20 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 17x 21 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 18x 22 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 19x 23 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20x 24 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 21x 25 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 22x 26 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 23x 27 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 24x 28 | 0, 0, 0, 0, 0, 0 // 25x 29 | ]; 30 | 31 | #[inline(always)] 32 | pub fn is_pchar(&c: &u8) -> bool { 33 | PATH_CHARS[c as usize] != 0 34 | } 35 | 36 | pub const REG_CHARS: [u8; 256] = [ 37 | // 0 1 2 3 4 5 6 7 8 9 38 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x 39 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x 40 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x 41 | 0, 0, 0, b'!', 0, 0, b'$', 0, b'&', b'\'', // 3x 42 | b'(', b')', b'*', b'+', b',', b'-', b'.', 0, b'0', b'1', // 4x 43 | b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', 0, b';', // 5x 44 | 0, b'=', 0, 0, 0, b'A', b'B', b'C', b'D', b'E', // 6x 45 | b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', // 7x 46 | b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', // 8x 47 | b'Z', 0, 0, 0, 0, b'_', 0, b'a', b'b', b'c', // 9x 48 | b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', // 10x 49 | b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', // 11x 50 | b'x', b'y', b'z', 0, 0, 0, b'~', 0, 0, 0, // 12x 51 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 13x 52 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 14x 53 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 15x 54 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16x 55 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 17x 56 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 18x 57 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 19x 58 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20x 59 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 21x 60 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 22x 61 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 23x 62 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 24x 63 | 0, 0, 0, 0, 0, 0 // 25x 64 | ]; 65 | 66 | #[inline(always)] 67 | pub fn is_reg_name_char(&c: &u8) -> bool { 68 | REG_CHARS[c as usize] != 0 69 | } 70 | 71 | #[cfg(test)] 72 | mod tests { 73 | fn test_char_table(table: &[u8]) { 74 | for (i, &v) in table.iter().enumerate() { 75 | if v != 0 { 76 | assert_eq!(i, v as usize); 77 | } 78 | } 79 | } 80 | 81 | #[test] 82 | fn check_tables() { 83 | test_char_table(&super::PATH_CHARS[..]); 84 | test_char_table(&super::REG_CHARS[..]); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /lib/src/debug.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use inlinable_string::InlinableString; 3 | 4 | use crate::input::{Show, Input, Debugger, ParserInfo}; 5 | 6 | type Index = usize; 7 | 8 | struct Tree { 9 | // All of the nodes in the tree live in this vector. 10 | nodes: Vec, 11 | // Maps from an index (`parent`) index `nodes` to a set of indexes in 12 | // `nodes` corresponding to the children of `key`. 13 | children: HashMap>, 14 | // This "tree" keeps track of which parent children are currently being 15 | // pushed to. A `push` adds to this stack while a `pop` removes from this 16 | // stack. If the stack is empty, the root is being pushed to. 17 | stack: Vec 18 | } 19 | 20 | impl Tree { 21 | fn new() -> Tree { 22 | Tree { 23 | nodes: vec![], 24 | children: HashMap::new(), 25 | stack: Vec::with_capacity(8) 26 | } 27 | } 28 | 29 | fn push(&mut self, node: T) -> Index { 30 | // Add the node to the tree and get its index. 31 | self.nodes.push(node); 32 | let index = self.nodes.len() - 1; 33 | 34 | // If the stack indicates we have a parent, add to its children. 35 | if !self.stack.is_empty() { 36 | let parent = self.stack[self.stack.len() - 1]; 37 | self.children.entry(parent).or_default().push(index); 38 | } 39 | 40 | // Make this the new parent. 41 | self.stack.push(index); 42 | index 43 | } 44 | 45 | fn pop_level(&mut self) -> Option { 46 | self.stack.pop() 47 | } 48 | 49 | fn clear(&mut self) { 50 | *self = Self::new(); 51 | } 52 | 53 | fn get(&self, index: Index) -> &T { 54 | &self.nodes[index] 55 | } 56 | 57 | fn get_mut(&mut self, index: Index) -> &mut T { 58 | &mut self.nodes[index] 59 | } 60 | 61 | fn get_children(&self, index: Index) -> &[Index] { 62 | match self.children.get(&index) { 63 | Some(children) => &children[..], 64 | None => &[] 65 | } 66 | } 67 | } 68 | 69 | impl Tree { 70 | fn debug_print(&self, sibling_map: &mut Vec, node: Index) { 71 | let parent_count = sibling_map.len(); 72 | for (i, &has_siblings) in sibling_map.iter().enumerate() { 73 | if i < parent_count - 1 { 74 | match has_siblings { 75 | true => print!(" │ "), 76 | false => print!(" ") 77 | } 78 | } else { 79 | match has_siblings { 80 | true => print!(" ├── "), 81 | false => print!(" └── ") 82 | } 83 | } 84 | } 85 | 86 | let info = self.get(node); 87 | let success = match info.success { 88 | Some(true) => " ✓", 89 | Some(false) => " ✗", 90 | None => "" 91 | }; 92 | 93 | #[cfg(feature = "color")] 94 | use yansi::{Style, Paint, Color::*}; 95 | 96 | #[cfg(feature = "color")] 97 | let style = match info.success { 98 | Some(true) => Green.into(), 99 | Some(false) => Red.into(), 100 | None => Style::default(), 101 | }; 102 | 103 | #[cfg(feature = "color")] 104 | println!("{}{} ({})", info.parser.name.paint(style), success.paint(style), info.context); 105 | 106 | #[cfg(not(feature = "color"))] 107 | println!("{}{} ({})", info.parser.name, success, info.context); 108 | 109 | let children = self.get_children(node); 110 | let num_children = children.len(); 111 | for (i, &child) in children.iter().enumerate() { 112 | let have_siblings = i != (num_children - 1); 113 | sibling_map.push(have_siblings); 114 | self.debug_print(sibling_map, child); 115 | sibling_map.pop(); 116 | } 117 | } 118 | } 119 | 120 | struct Info { 121 | parser: ParserInfo, 122 | context: InlinableString, 123 | success: Option, 124 | } 125 | 126 | impl Info { 127 | fn new(parser: ParserInfo) -> Self { 128 | Info { parser, context: iformat!(), success: None } 129 | } 130 | } 131 | 132 | pub struct TreeDebugger { 133 | tree: Tree, 134 | } 135 | 136 | impl Default for TreeDebugger { 137 | fn default() -> Self { 138 | Self { tree: Tree::new() } 139 | } 140 | } 141 | 142 | impl Debugger for TreeDebugger { 143 | fn on_entry(&mut self, p: &ParserInfo) { 144 | if !((p.raw && is_parse_debug!("full")) || (!p.raw && is_parse_debug!())) { 145 | return; 146 | } 147 | 148 | self.tree.push(Info::new(*p)); 149 | } 150 | 151 | fn on_exit(&mut self, p: &ParserInfo, ok: bool, ctxt: I::Context) { 152 | if !((p.raw && is_parse_debug!("full")) || (!p.raw && is_parse_debug!())) { 153 | return; 154 | } 155 | 156 | let index = self.tree.pop_level(); 157 | if let Some(last_node) = index { 158 | let last = self.tree.get_mut(last_node); 159 | last.success = Some(ok); 160 | last.context = iformat!("{}", &ctxt as &dyn Show); 161 | } 162 | 163 | // We've reached the end. Print the whole thing and clear the tree. 164 | if let Some(0) = index { 165 | self.tree.debug_print(&mut vec![], 0); 166 | self.tree.clear(); 167 | } 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /lib/tests/parsers.rs: -------------------------------------------------------------------------------- 1 | use pear::input::{Pear, Cursor, Extent}; 2 | use pear::{macros::*, parsers::*}; 3 | 4 | type Result<'a, T> = pear::input::Result>; 5 | type Input<'a> = pear::input::Pear>; 6 | 7 | #[parser] 8 | fn take_until_str<'a>(input: &mut Input<'a>, s: &str) -> Result<'a, &'a str> { 9 | take_while_slice(|&slice| !slice.ends_with(s))? 10 | } 11 | 12 | #[parser] 13 | fn test_until<'a>(input: &mut Input<'a>, s: &str, r: &str) -> Result<'a, &'a str> { 14 | (take_until_str(s)?, eat_slice(r)?).0 15 | } 16 | 17 | #[test] 18 | fn test_while_slice() { 19 | let result = parse!(test_until("]]", "]"): Input::new("[[ a ] b c ]]")); 20 | assert_eq!(result.unwrap(), "[[ a ] b c ]"); 21 | 22 | let r = parse!(test_until("]]]", "] hi"): Input::new("[[ a ]] b c ]]] hi")); 23 | assert_eq!(r.unwrap(), "[[ a ]] b c ]]"); 24 | 25 | let r = parse!(test_until("]", "]] b c ]]]"): Input::new("[[ a ]] b c ]]]")); 26 | assert_eq!(r.unwrap(), "[[ a "); 27 | } 28 | 29 | #[parser] 30 | fn take_until_and_str<'a>(input: &mut Input<'a>, s: &str) -> Result<'a, &'a str> { 31 | if s.is_empty() { 32 | parse_error!("what would that mean?")?; 33 | } 34 | 35 | let slice = take_while_slice(|&slice| !slice.ends_with(s))?; 36 | if slice.ends_with(&s[..s.len() - 1]) { 37 | parse_try!(skip_any()); 38 | &slice[..slice.len() - (s.len() - 1)] 39 | } else { 40 | slice 41 | } 42 | } 43 | 44 | #[parser] 45 | fn test_until_and<'a, 'b>(input: &mut Input<'a>, s: &str, r: &str) -> Result<'a, &'a str> { 46 | (take_until_and_str(s)?, eat_slice(r)?).0 47 | } 48 | 49 | #[test] 50 | fn test_while_slice_and() { 51 | let result = parse!(test_until_and("]]", ""): Input::new("[[ a ] b c ]]")); 52 | assert_eq!(result.unwrap(), "[[ a ] b c "); 53 | 54 | let r = parse!(test_until_and("]]]", " hi"): Input::new("[[ a ]] b c ]]] hi")); 55 | assert_eq!(r.unwrap(), "[[ a ]] b c "); 56 | 57 | let r = parse!(test_until_and("]", "] b c ]]]"): Input::new("[[ a ]] b c ]]]")); 58 | assert_eq!(r.unwrap(), "[[ a "); 59 | 60 | let r = parse!(test_until_and("]", ""): Input::new("hi")); 61 | assert_eq!(r.unwrap(), "hi"); 62 | 63 | let r = parse!(test_until_and("]", ""): Input::new("🐥hi")); 64 | assert_eq!(r.unwrap(), "🐥hi"); 65 | 66 | let r = parse!(test_until_and("]", "] b c ]]]"): Input::new("[[ 🐥 ]] b c ]]]")); 67 | assert_eq!(r.unwrap(), "[[ 🐥 "); 68 | } 69 | 70 | #[parser] 71 | fn test_until_window<'a>(input: &mut Input<'a>, s: &str, r: &str) -> Result<'a, &'a str> { 72 | (take_until_slice(s)?, eat_slice(r)?).0 73 | } 74 | 75 | #[test] 76 | fn test_while_slice_window() { 77 | let result = parse!(test_until_window("]]", "]]"): Input::new("[[ a ] b c ]]")); 78 | assert_eq!(result.unwrap(), "[[ a ] b c "); 79 | 80 | let r = parse!(test_until_window("]]]", "]]] hi"): Input::new("[[ a ]] b c ]]] hi")); 81 | assert_eq!(r.unwrap(), "[[ a ]] b c "); 82 | 83 | let r = parse!(test_until_window("]", "]] b c ]]]"): Input::new("[[ a ]] b c ]]]")); 84 | assert_eq!(r.unwrap(), "[[ a "); 85 | 86 | let r = parse!(test_until_window("]", "]] b c ]]]"): Input::new("[[ 🐥 ]] b c ]]]")); 87 | assert_eq!(r.unwrap(), "[[ 🐥 "); 88 | 89 | let r = parse!(test_until_window("]", ""): Input::new("🐥hi")); 90 | assert_eq!(r.unwrap(), "🐥hi"); 91 | } 92 | 93 | #[test] 94 | fn test_window_termination() { 95 | let result = take_while_window(&mut Input::new("a"), 2, |_| false); 96 | assert_eq!(result.unwrap(), "a"); 97 | 98 | let result = take_while_window(&mut Input::new("aa"), 2, |_| false); 99 | assert_eq!(result.unwrap(), ""); 100 | 101 | let result = take_some_while_some_window(&mut Input::new("a"), 2, |_| false); 102 | assert!(result.is_err()); 103 | 104 | let result = take_some_while_window(&mut Input::new("aa"), 2, |_| false); 105 | assert!(result.is_err()); 106 | 107 | let result = take_while_window(&mut Input::new("aa"), 2, |_| true); 108 | assert_eq!(result.unwrap(), "a"); 109 | 110 | let result = take_some_while_window(&mut Input::new("aa"), 2, |_| true); 111 | assert_eq!(result.unwrap(), "a"); 112 | 113 | let result = take_while_window(&mut Input::new("aaab"), 2, |&s| s == "aa"); 114 | assert_eq!(result.unwrap(), "aa"); 115 | 116 | let result = take_some_while_window(&mut Input::new("aaab"), 2, |&s| s == "aa"); 117 | assert_eq!(result.unwrap(), "aa"); 118 | 119 | let result = take_while_some_window(&mut Input::new("aa"), 2, |_| false); 120 | assert_eq!(result.unwrap(), ""); 121 | } 122 | 123 | type CResult<'a, T> = pear::input::Result, Cursor<&'a str>>; 124 | 125 | #[parser] 126 | fn take_until_cursor_str<'a>( 127 | input: &mut Pear>, 128 | s: &str 129 | ) -> CResult<'a, &'a str> { 130 | take_while_slice(|&slice| !slice.ends_with(s))? 131 | } 132 | 133 | #[test] 134 | fn test_cursor() { 135 | let input = "abchello"; 136 | let result = take_until_cursor_str(&mut Pear::new(input), "hell"); 137 | let extent = result.unwrap(); 138 | assert_eq!(extent, "abchel"); 139 | assert_eq!(extent.start, 0); 140 | assert_eq!(extent.end, 6); 141 | assert_eq!(extent, &input[extent.start..extent.end]); 142 | 143 | let input = "hellothisishe"; 144 | let mut cursor = Pear::new(input); 145 | 146 | peek_slice(&mut cursor, "hello").unwrap(); 147 | 148 | let extent = eat_any(&mut cursor).unwrap(); 149 | assert_eq!(extent, 'h'); 150 | 151 | let extent = take_until_cursor_str(&mut cursor, "this").unwrap(); 152 | assert_eq!(extent, "ellothi"); 153 | assert_eq!(extent, &input[extent.start..extent.end]); 154 | 155 | let extent = take_until_cursor_str(&mut cursor, "is").unwrap(); 156 | assert_eq!(extent, "si"); 157 | assert_eq!(extent, &input[extent.start..extent.end]); 158 | 159 | println!("{:?}", cursor); 160 | let extent = take_while(&mut cursor, |_| true).unwrap(); 161 | assert_eq!(extent, "she"); 162 | assert_eq!(extent, &input[extent.start..extent.end]); 163 | } 164 | -------------------------------------------------------------------------------- /lib/src/input/text_file.rs: -------------------------------------------------------------------------------- 1 | // use std::fs::File; 2 | // use std::io::{self, Read, BufReader}; 3 | 4 | // use std::cmp::min; 5 | // use std::marker::PhantomData; 6 | 7 | // // Ideally, this would hold a `String` inside. But we need a lifetime parameter 8 | // // here so we can return an &'a str from `peek_slice`. The alternative is to 9 | // // give a lifetime to the `Input` trait and use it in the `peek_slice` method. 10 | // // But that lifetime will pollute everything. Finally, the _correct_ thing is 11 | // // for Rust to let us reference the lifetime of `self` in an associated type. 12 | // // That requires something like https://github.com/rust-lang/rfcs/pull/1598. 13 | // #[derive(Debug)] 14 | // pub struct StringFile<'s> { 15 | // buffer: Vec, 16 | // consumed: usize, 17 | // pos: usize, 18 | // reader: BufReader, 19 | // _string: PhantomData<&'s str> 20 | // } 21 | 22 | // impl<'s> StringFile<'s> { 23 | // #[inline(always)] 24 | // pub fn open(path: &str) -> io::Result> { 25 | // Ok(StringFile::new(File::open(path)?, 1024)) 26 | // } 27 | 28 | // #[inline(always)] 29 | // pub fn open_with_cap(path: &str, cap: usize) -> io::Result> { 30 | // Ok(StringFile::new(File::open(path)?, cap)) 31 | // } 32 | 33 | // #[inline(always)] 34 | // pub fn new(file: File, cap: usize) -> StringFile<'s> { 35 | // StringFile { 36 | // buffer: vec![0; cap], 37 | // consumed: 0, 38 | // pos: 0, 39 | // reader: BufReader::new(file), 40 | // _string: PhantomData 41 | // } 42 | // } 43 | 44 | // #[inline(always)] 45 | // pub fn available(&self) -> usize { 46 | // self.pos - self.consumed 47 | // } 48 | 49 | // fn read_into_peek(&mut self, num: usize) -> io::Result { 50 | // if self.available() >= num { 51 | // return Ok(num); 52 | // } 53 | 54 | // let needed = num - self.available(); 55 | // let to_read = min(self.buffer.len() - self.pos, needed); 56 | // let (i, j) = (self.pos, self.pos + to_read); 57 | // let read = self.reader.read(&mut self.buffer[i..j])?; 58 | 59 | // self.pos += read; 60 | // Ok(self.available()) 61 | // } 62 | 63 | // // Panics if at least `num` aren't available. 64 | // #[inline(always)] 65 | // fn peek_bytes(&self, num: usize) -> &[u8] { 66 | // &self.buffer[self.consumed..(self.consumed + num)] 67 | // } 68 | 69 | // fn consume(&mut self, num: usize) { 70 | // if self.pos < num { 71 | // let left = (num - self.pos) as u64; 72 | // self.consumed = 0; 73 | // self.pos = 0; 74 | // // TOOD: Probably don't ignore this? 75 | // let _ = io::copy(&mut self.reader.by_ref().take(left), &mut io::sink()); 76 | // } else { 77 | // self.consumed += num; 78 | // } 79 | // } 80 | 81 | // #[inline] 82 | // fn peek_char(&mut self) -> Option { 83 | // let available = match self.read_into_peek(4) { 84 | // Ok(n) => n, 85 | // Err(_) => return None 86 | // }; 87 | 88 | // let bytes = self.peek_bytes(available); 89 | // let string = match ::std::str::from_utf8(bytes) { 90 | // Ok(string) => string, 91 | // Err(e) => match ::std::str::from_utf8(&bytes[..e.valid_up_to()]) { 92 | // Ok(string) => string, 93 | // Err(_) => return None 94 | // } 95 | // }; 96 | 97 | // string.chars().next() 98 | // } 99 | // } 100 | 101 | // impl<'s> Input for StringFile<'s> { 102 | // type Token = char; 103 | // type InSlice = &'s str; 104 | // type Slice = &'s str; 105 | // type Many = String; 106 | // type Context = &'s str; 107 | 108 | // // If we took Self::Token here, we'd know the length of the character. 109 | // #[inline(always)] 110 | // fn peek(&mut self) -> Option { 111 | // self.peek_char() 112 | // } 113 | 114 | // fn take_many bool>(&mut self, mut cond: F) -> Self::Many { 115 | // let mut result = String::new(); 116 | // while let Some(c) = self.peek_char() { 117 | // if cond(&c) { 118 | // result.push(c); 119 | // self.consume(c.len_utf8()); 120 | // } else { 121 | // break; 122 | // } 123 | // } 124 | 125 | // result 126 | // } 127 | 128 | // fn skip_many bool>(&mut self, mut cond: F) -> usize { 129 | // let mut taken = 0; 130 | // while let Some(c) = self.peek_char() { 131 | // if cond(&c) { 132 | // self.consume(c.len_utf8()); 133 | // taken += 1; 134 | // } else { 135 | // return taken; 136 | // } 137 | // } 138 | 139 | // taken 140 | // } 141 | 142 | // fn peek_slice(&mut self, slice: Self::InSlice) -> Option { 143 | // let available = match self.read_into_peek(slice.len()) { 144 | // Ok(n) => n, 145 | // Err(_) => return None 146 | // }; 147 | 148 | // let bytes = self.peek_bytes(available); 149 | // let string = match ::std::str::from_utf8(bytes) { 150 | // Ok(string) => string, 151 | // Err(e) => match ::std::str::from_utf8(&bytes[..e.valid_up_to()]) { 152 | // Ok(string) => string, 153 | // Err(_) => return None 154 | // } 155 | // }; 156 | 157 | // match string == slice { 158 | // true => Some(slice), 159 | // false => None 160 | // } 161 | // } 162 | 163 | // #[inline(always)] 164 | // fn advance(&mut self, count: usize) { 165 | // self.consume(count); 166 | // } 167 | 168 | // #[inline(always)] 169 | // fn is_empty(&mut self) -> bool { 170 | // match self.read_into_peek(1) { 171 | // Ok(0) | Err(_) => true, 172 | // Ok(_) => false, 173 | // } 174 | // } 175 | // } 176 | 177 | -------------------------------------------------------------------------------- /lib/src/expected.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use inlinable_string::InlinableString; 4 | 5 | use crate::input::Show; 6 | 7 | #[derive(Clone)] 8 | pub enum CowInlineString { 9 | Borrowed(&'static str), 10 | Inline(InlinableString) 11 | } 12 | 13 | impl std::ops::Deref for CowInlineString { 14 | type Target = str; 15 | fn deref(&self) -> &Self::Target { 16 | match self { 17 | CowInlineString::Borrowed(s) => s, 18 | CowInlineString::Inline(s) => s, 19 | } 20 | } 21 | } 22 | 23 | impl std::fmt::Display for CowInlineString { 24 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 25 | str::fmt(self, f) 26 | } 27 | } 28 | 29 | impl std::fmt::Debug for CowInlineString { 30 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 31 | str::fmt(self, f) 32 | } 33 | } 34 | 35 | pub enum Expected { 36 | Token(Option, Option), 37 | Slice(Option, Option), 38 | Eof(Option), 39 | Other(CowInlineString), 40 | Elided 41 | } 42 | 43 | impl Expected { 44 | pub fn token(expected: Option<&T>, found: Option) -> Self { 45 | let expected = expected.map(|t| iformat!("{}", t as &dyn Show)); 46 | Expected::Token(expected, found) 47 | } 48 | 49 | pub fn eof(found: Option) -> Self { 50 | Expected::Eof(found) 51 | } 52 | } 53 | 54 | impl Expected { 55 | pub fn slice(expected: Option<&S>, found: Option) -> Self { 56 | let expected = expected.map(|t| iformat!("{}", t as &dyn Show)); 57 | Expected::Slice(expected, found) 58 | } 59 | } 60 | 61 | impl Expected { 62 | pub fn map(self, t: FT, s: FS) -> Expected 63 | where FT: Fn(Token) -> T, FS: Fn(Slice) -> S 64 | { 65 | use Expected::*; 66 | 67 | match self { 68 | Token(e, v) => Token(e, v.map(t)), 69 | Slice(e, v) => Slice(e, v.map(s)), 70 | Eof(v) => Eof(v.map(t)), 71 | Other(v) => Other(v), 72 | Expected::Elided => Expected::Elided, 73 | } 74 | } 75 | } 76 | 77 | impl Expected { 78 | pub fn into_owned(self) -> Expected { 79 | self.map(|t| t.to_owned(), |s| s.to_owned()) 80 | } 81 | } 82 | 83 | impl From for Expected { 84 | #[inline(always)] 85 | fn from(string: String) -> Expected { 86 | Expected::Other(CowInlineString::Inline(InlinableString::from(string))) 87 | } 88 | } 89 | 90 | #[doc(hidden)] 91 | impl From for Expected { 92 | #[inline(always)] 93 | fn from(string: InlinableString) -> Expected { 94 | Expected::Other(CowInlineString::Inline(string)) 95 | } 96 | } 97 | 98 | impl From<&'static str> for Expected { 99 | #[inline(always)] 100 | fn from(string: &'static str) -> Expected { 101 | Expected::Other(CowInlineString::Borrowed(string)) 102 | } 103 | } 104 | 105 | impl fmt::Debug for Expected { 106 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 107 | match self { 108 | Expected::Token(e, v) => { 109 | f.debug_tuple("Expected::Token").field(&e).field(&v).finish() 110 | } 111 | Expected::Slice(e, v) => { 112 | f.debug_tuple("Expected::Slice").field(&e).field(&v).finish() 113 | } 114 | Expected::Eof(v) => { 115 | f.debug_tuple("Expected::Eof").field(&v).finish() 116 | } 117 | Expected::Other(v) => { 118 | f.debug_tuple("Expected::Other").field(&v).finish() 119 | } 120 | Expected::Elided => f.debug_tuple("Expected::Elided").finish() 121 | } 122 | } 123 | } 124 | 125 | impl Clone for Expected { 126 | fn clone(&self) -> Self { 127 | match self { 128 | Expected::Token(e, f) => Expected::Token(e.clone(), f.clone()), 129 | Expected::Slice(e, f) => Expected::Slice(e.clone(), f.clone()), 130 | Expected::Eof(f) => Expected::Eof(f.clone()), 131 | Expected::Other(v) => Expected::Other(v.clone()), 132 | Expected::Elided => Expected::Elided, 133 | } 134 | } 135 | } 136 | 137 | impl fmt::Display for Expected { 138 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 139 | match *self { 140 | Expected::Token(Some(ref expected), Some(ref found)) => { 141 | let found = found as &dyn Show; 142 | write!(f, "expected token {} but found {}", expected, found) 143 | } 144 | Expected::Token(None, Some(ref found)) => { 145 | let found = found as &dyn Show; 146 | write!(f, "unexpected token: {}", found) 147 | } 148 | Expected::Token(Some(ref expected), None) => { 149 | write!(f, "unexpected EOF: expected token {}", expected) 150 | } 151 | Expected::Token(None, None) => { 152 | write!(f, "unexpected EOF: expected some token") 153 | } 154 | Expected::Slice(Some(ref expected), Some(ref found)) => { 155 | let found = found as &dyn Show; 156 | write!(f, "expected slice {} but found {}", expected, found) 157 | } 158 | Expected::Slice(None, Some(ref found)) => { 159 | let found = found as &dyn Show; 160 | write!(f, "unexpected slice: {}", found) 161 | } 162 | Expected::Slice(Some(ref expected), None) => { 163 | write!(f, "unexpected EOF: expected slice {}", expected) 164 | } 165 | Expected::Slice(None, None) => { 166 | write!(f, "unexpected EOF: expected some slice") 167 | } 168 | Expected::Eof(None) => { 169 | write!(f, "expected EOF but input remains") 170 | } 171 | Expected::Eof(Some(ref found)) => { 172 | let found = found as &dyn Show; 173 | write!(f, "unexpected token {}", found) 174 | } 175 | Expected::Other(ref other) => write!(f, "{}", other), 176 | Expected::Elided => write!(f, "[ERROR ELIDED]") 177 | } 178 | } 179 | } 180 | 181 | #[cfg(test)] 182 | mod tests { 183 | use super::Expected; 184 | 185 | #[test] 186 | fn test_into_owned() { 187 | let expected: Expected = Expected::Slice(None, Some("hi")); 188 | let _owned: Expected = expected.into_owned(); 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /lib/src/input/text.rs: -------------------------------------------------------------------------------- 1 | pub use crate::input::{Input, Rewind, Show, ParserInfo}; 2 | 3 | #[cfg(feature = "color")] 4 | use yansi::Paint; 5 | 6 | #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] 7 | pub struct Span<'a> { 8 | /// Start line/column/offset. 9 | pub start: (usize, usize, usize), 10 | /// End line/column/offset. 11 | pub end: (usize, usize, usize), 12 | /// Where the parser was pointing. 13 | pub cursor: Option, 14 | /// Snippet between start and end. 15 | pub snippet: Option<&'a str>, 16 | } 17 | 18 | const SNIPPET_LEN: usize = 30; 19 | 20 | impl<'a> Show for Span<'a> { 21 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 22 | let (a, b, _) = self.start; 23 | let (c, d, _) = self.end; 24 | 25 | if self.start == self.end { 26 | write!(f, "{}:{}", a, b)?; 27 | } else { 28 | write!(f, "{}:{} to {}:{}", a, b, c, d)?; 29 | } 30 | 31 | let write_snippet = |f: &mut std::fmt::Formatter<'_>, snippet: &str| { 32 | for c in snippet.escape_debug() { write!(f, "{}", c)?; } 33 | Ok(()) 34 | }; 35 | 36 | if let Some(snippet) = self.snippet { 37 | write!(f, " \"")?; 38 | if snippet.len() > SNIPPET_LEN + 6 { 39 | write_snippet(f, &snippet[..SNIPPET_LEN / 2])?; 40 | 41 | #[cfg(feature = "color")] 42 | write!(f, " {} ", "...".blue())?; 43 | 44 | #[cfg(not(feature = "color"))] 45 | write!(f, " ... ")?; 46 | 47 | let end_start = snippet.len() - SNIPPET_LEN / 2; 48 | write_snippet(f, &snippet[end_start..])?; 49 | } else { 50 | write_snippet(f, snippet)?; 51 | } 52 | 53 | if let Some(cursor) = self.cursor { 54 | #[cfg(feature = "color")] 55 | write!(f, "{}", cursor.escape_debug().blue())?; 56 | 57 | #[cfg(not(feature = "color"))] 58 | write!(f, "{}", cursor.escape_debug())?; 59 | } 60 | 61 | write!(f, "\"")?; 62 | } else { 63 | #[cfg(feature = "color")] 64 | write!(f, " {}", "[EOF]".blue())?; 65 | 66 | #[cfg(not(feature = "color"))] 67 | write!(f, " [EOF]")?; 68 | } 69 | 70 | Ok(()) 71 | } 72 | } 73 | 74 | #[derive(Debug)] 75 | pub struct Text<'a> { 76 | current: &'a str, 77 | start: &'a str, 78 | } 79 | 80 | impl<'a> From<&'a str> for Text<'a> { 81 | fn from(start: &'a str) -> Text<'a> { 82 | Text { start, current: start } 83 | } 84 | } 85 | 86 | impl Rewind for Text<'_> { 87 | fn rewind_to(&mut self, marker: Self::Marker) { 88 | self.current = &self.start[marker..]; 89 | } 90 | } 91 | 92 | impl<'a> Input for Text<'a> { 93 | type Token = char; 94 | type Slice = &'a str; 95 | type Many = Self::Slice; 96 | 97 | type Marker = usize; 98 | type Context = Span<'a>; 99 | 100 | /// Returns a copy of the current token, if there is one. 101 | fn token(&mut self) -> Option { 102 | self.current.token() 103 | } 104 | 105 | /// Returns a copy of the current slice of size `n`, if there is one. 106 | fn slice(&mut self, n: usize) -> Option { 107 | self.current.slice(n) 108 | } 109 | 110 | /// Checks if the current token fulfills `cond`. 111 | fn peek(&mut self, cond: F) -> bool 112 | where F: FnMut(&Self::Token) -> bool 113 | { 114 | self.current.peek(cond) 115 | } 116 | 117 | /// Checks if the current slice of size `n` (if any) fulfills `cond`. 118 | fn peek_slice(&mut self, n: usize, cond: F) -> bool 119 | where F: FnMut(&Self::Slice) -> bool 120 | { 121 | self.current.peek_slice(n, cond) 122 | } 123 | 124 | /// Checks if the current token fulfills `cond`. If so, the token is 125 | /// consumed and returned. Otherwise, returns `None`. 126 | fn eat(&mut self, cond: F) -> Option 127 | where F: FnMut(&Self::Token) -> bool 128 | { 129 | self.current.eat(cond) 130 | } 131 | 132 | /// Checks if the current slice of size `n` (if any) fulfills `cond`. If so, 133 | /// the slice is consumed and returned. Otherwise, returns `None`. 134 | fn eat_slice(&mut self, n: usize, cond: F) -> Option 135 | where F: FnMut(&Self::Slice) -> bool 136 | { 137 | self.current.eat_slice(n, cond) 138 | } 139 | 140 | /// Takes tokens while `cond` returns true, collecting them into a 141 | /// `Self::Many` and returning it. 142 | fn take(&mut self, cond: F) -> Self::Many 143 | where F: FnMut(&Self::Token) -> bool 144 | { 145 | self.current.take(cond) 146 | } 147 | 148 | /// Skips tokens while `cond` returns true. Returns the number of skipped 149 | /// tokens. 150 | fn skip(&mut self, cond: F) -> usize 151 | where F: FnMut(&Self::Token) -> bool 152 | { 153 | self.current.skip(cond) 154 | } 155 | 156 | /// Returns `true` if there are at least `n` tokens remaining. 157 | fn has(&mut self, n: usize) -> bool { 158 | self.current.has(n) 159 | } 160 | 161 | #[inline(always)] 162 | fn mark(&mut self, _: &ParserInfo) -> Self::Marker { 163 | self.start.len() - self.current.len() 164 | } 165 | 166 | fn context(&mut self, mark: Self::Marker) -> Self::Context { 167 | let cursor = self.token(); 168 | let bytes_read = self.start.len() - self.current.len(); 169 | if bytes_read == 0 { 170 | Span { start: (1, 1, 0), end: (1, 1, 0), snippet: None, cursor } 171 | } else { 172 | let start_offset = mark; 173 | let end_offset = bytes_read; 174 | 175 | let to_start_str = &self.start[..start_offset]; 176 | let (start_line, start_col) = line_col(to_start_str); 177 | let start = (start_line, start_col, start_offset); 178 | 179 | let to_current_str = &self.start[..bytes_read]; 180 | let (end_line, end_col) = line_col(to_current_str); 181 | let end = (end_line, end_col, bytes_read); 182 | 183 | let snippet = if end_offset <= self.start.len() { 184 | Some(&self.start[start_offset..end_offset]) 185 | } else { 186 | None 187 | }; 188 | 189 | Span { start, end, cursor, snippet } 190 | } 191 | } 192 | } 193 | 194 | fn line_col(string: &str) -> (usize, usize) { 195 | if string.is_empty() { 196 | return (1, 1); 197 | } 198 | 199 | let (line_count, last_line) = string.lines().enumerate().last().unwrap(); 200 | if string.ends_with('\n') { 201 | (line_count + 2, 1) 202 | } else { 203 | (line_count + 1, last_line.len() + 1) 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /lib/src/macros.rs: -------------------------------------------------------------------------------- 1 | //! Macros. 2 | //! 3 | //! 4 | //! * [`parse_declare!`](#parse_declare) 5 | //! * [`parse_error!`](#parse_error) 6 | //! * [`impl_show_with!`](#impl_show_with) 7 | //! 8 | //! [`Input`]: crate::input::Input 9 | //! [`Result`]: crate::result::Result 10 | //! [`Input::mark()`]: crate::input::Input::mark() 11 | //! [`Input::unmark()`]: crate::input::Input::unmark() 12 | //! [`Input::context()`]: crate::input::Input::context() 13 | //! [`ParseError::push_context()`]: crate::error::ParseError::push_context() 14 | //! [`eof()`]: crate::parsers::eof() 15 | 16 | #[doc(inline)] 17 | pub use pear_codegen::{parser, switch}; 18 | #[doc(inline)] 19 | pub use crate::{parse, parse_declare, parse_error, parse_try, is_parse_debug}; 20 | #[doc(inline)] 21 | pub use crate::{parse_current_marker, parse_last_marker, parse_mark, parse_context}; 22 | #[doc(inline)] 23 | pub use crate::impl_show_with; 24 | 25 | /// Runs the parser with the given name and input, then [`parsers::eof()`]. 26 | /// 27 | /// Returns the combined result. 28 | /// 29 | /// Syntax: 30 | /// 31 | /// ```text 32 | /// parse := PARSER_NAME ( '(' (EXPR ',')* ')' )? ':' INPUT_EXPR 33 | /// 34 | /// PARSER_NAME := rust identifier to parser function 35 | /// INPUT_EXPR := any valid rust expression which resolves to a mutable 36 | /// reference to type that implements `Input` 37 | /// ``` 38 | #[macro_export] 39 | macro_rules! parse { 40 | ($parser:ident : &mut $e:expr) => ({ 41 | let input = &mut $e; 42 | (move || { 43 | let result = $parser(input)?; 44 | $crate::parsers::eof(input).map_err(|e| e.into())?; 45 | $crate::result::IntoResult::into_result(result) 46 | })() 47 | }); 48 | ($parser:ident : $e:expr) => (parse!($parser(): $e)); 49 | ($parser:ident ($($x:expr),*) : $e:expr) => ({ 50 | let mut input: $crate::input::Pear<_> = $e.into(); 51 | (move || { 52 | let result = $parser(&mut input $(, $x)*)?; 53 | $crate::parsers::eof(&mut input).map_err(|e| e.into())?; 54 | $crate::result::IntoResult::into_result(result) 55 | })() 56 | }) 57 | } 58 | 59 | #[doc(hidden)] 60 | #[macro_export(local_inner_macros)] 61 | macro_rules! parse_declare { 62 | (pub($($inner:tt)+) $($rest:tt)*) => { $crate::_parse_declare!([pub($($inner)+)] $($rest)*); }; 63 | (pub $($rest:tt)*) => { $crate::_parse_declare!([pub] $($rest)*); }; 64 | ($($rest:tt)*) => { $crate::_parse_declare!([] $($rest)*); } 65 | } 66 | 67 | #[doc(hidden)] 68 | #[macro_export(local_inner_macros)] 69 | macro_rules! _parse_declare { 70 | ([$($vis:tt)*] $input:ident $(<$($gen:tt),+>)* ($($T:ident = $t:ty),*)) => { 71 | $($vis)* trait $input $(<$($gen),+>)*: $crate::input::Input<$($T = $t),*> { } 72 | 73 | impl<$($($gen,)+)* T> $input $(<$($gen)+>)* for T 74 | where T: $crate::input::Input<$($T = $t),*> + $($($gen),+)* { } 75 | } 76 | } 77 | 78 | /// Like `format!` but tries to inline the string. 79 | #[doc(hidden)] 80 | #[macro_export] 81 | macro_rules! iformat { 82 | () => (iformat!("",)); 83 | ($fmt:expr) => (iformat!($fmt,)); 84 | ($fmt:expr, $($arg:tt)*) => ({ 85 | #[allow(unused_imports)] 86 | use std::fmt::Write; 87 | #[allow(unused_imports)] 88 | use $crate::inlinable_string::{InlinableString, StringExt}; 89 | let mut string = $crate::inlinable_string::InlinableString::new(); 90 | let _ = write!(string, $fmt, $($arg)*); 91 | string 92 | }) 93 | } 94 | 95 | /// Returns an `Err(ParseError::new($e))`. Can used like `format!` as well. 96 | #[macro_export] 97 | macro_rules! parse_error { 98 | ([$info:expr; $input:expr; $marker:expr; $T:ty] $err:expr) => ({ 99 | let context = $crate::parse_context!([$info; $input; $marker; $T]); 100 | Err($crate::error::ParseError::new(*$info, $err, context)) 101 | }); 102 | ([$n:expr; $i:expr; $m:expr; $T:ty] $fmt:expr, $($arg:tt)*) => { 103 | parse_error!([$n; $i; $m; $T] $crate::iformat!($fmt, $($arg)*)) 104 | }; 105 | } 106 | 107 | /// Returns the last marker that was set. 108 | /// 109 | /// Invoked with no arguments: `parse_marker!()` 110 | #[macro_export] 111 | macro_rules! parse_last_marker { 112 | ([$n:expr; $i:expr; $marker:expr; $T:ty]) => (*$marker); 113 | } 114 | 115 | /// Return the mark at the current parsing position. 116 | /// 117 | /// Invoked with no arguments: `parse_current_marker!()` 118 | #[macro_export] 119 | macro_rules! parse_current_marker { 120 | ([$info:expr; $input:expr; $marker:expr; $T:ty]) => ( 121 | $crate::input::Input::mark($input, $info) 122 | ) 123 | } 124 | 125 | /// Sets the marker to the current position. 126 | #[macro_export] 127 | macro_rules! parse_mark { 128 | ([$info:expr; $input:expr; $marker:expr; $T:ty]) => {{ 129 | *$marker = $crate::input::Input::mark($input, $info); 130 | }} 131 | } 132 | 133 | /// Returns the context from the current mark to the input position inclusive. 134 | /// 135 | /// Invoked with no arguments: `parse_context!()` 136 | #[macro_export] 137 | macro_rules! parse_context { 138 | ([$n:expr; $i:expr; $marker:expr; $T:ty]) => ( 139 | $crate::input::Input::context($i, *$marker) 140 | ); 141 | } 142 | 143 | /// Runs a parser returning `Some` if it succeeds or `None` otherwise. 144 | /// 145 | /// Take a single parser expression as input. Without additional arguments, 146 | /// returns the output in `Some` on success. If called as `parse_try!(parse_expr 147 | /// => result_expr)`, returns `result_expr` in `Some` on success. The result of 148 | /// the parse expression can be pattern-binded as `parse_try!(pat@pexpr => 149 | /// rexpr)`. 150 | // FIXME: This is an issue with rustc here where if `$input` is `expr` 151 | // everything fails. 152 | #[macro_export] 153 | macro_rules! parse_try { 154 | ([$n:expr; $input:ident; $m:expr; $T:ty] $e:expr) => {{ 155 | $crate::macros::switch! { [$n;$input;$m;$T] result@$e => { Some(result) }, _ => { None } } 156 | }}; 157 | ([$n:expr; $input:ident; $m:expr; $T:ty] $e:expr => $r:expr) => {{ 158 | $crate::macros::switch! { [$n;$input;$m;$T] $e => { Some($r) }, _ => { None } } 159 | }}; 160 | ([$n:expr; $input:ident; $m:expr; $T:ty] $e:expr => $r:expr => || $f:expr) => {{ 161 | $crate::macros::switch! { [$n;$input;$m;$T] $e => { $r }, _ => { $f } } 162 | }}; 163 | ([$n:expr; $input:ident; $m:expr; $T:ty] $pat:ident@$e:expr => $r:expr) => {{ 164 | $crate::macros::switch! { [$n;$input;$m;$T] $pat@$e => { Some($r) }, _ => { None } } 165 | }} 166 | } 167 | 168 | #[doc(hidden)] 169 | #[macro_export] 170 | macro_rules! is_parse_debug { 171 | () => ({ 172 | #[cfg(not(debug_assertions))] { false } 173 | #[cfg(debug_assertions)] { ::std::env::var("PARSE_DEBUG").is_ok() } 174 | }); 175 | 176 | ($kind:expr) => ({ 177 | #[cfg(not(debug_assertions))] { false } 178 | #[cfg(debug_assertions)] { 179 | ::std::env::var("PARSE_DEBUG").map(|v| v == $kind).unwrap_or(false) 180 | } 181 | }) 182 | } 183 | 184 | /// Implements the `Show` trait for $($T)+ using the existing trait `$trait`. 185 | #[macro_export] 186 | macro_rules! impl_show_with { 187 | ($trait:ident, $($T:ty),+) => ( 188 | $(impl $crate::input::Show for $T { 189 | #[inline(always)] 190 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 191 | std::fmt::$trait::fmt(self, f) 192 | } 193 | })+ 194 | ) 195 | } 196 | -------------------------------------------------------------------------------- /lib/src/combinators.rs: -------------------------------------------------------------------------------- 1 | use crate::input::{Pear, Input, Rewind, Token, Result}; 2 | use crate::macros::parser; 3 | use crate::parsers::*; 4 | 5 | pub trait Collection: Default + Extend { 6 | #[inline(always)] 7 | fn push(&mut self, item: A) { 8 | self.extend(Some(item)) 9 | } 10 | } 11 | 12 | impl> Collection for T { } 13 | 14 | /// Parses `p`, returning `Some` if it succeeds and `None` if it fails. Discards 15 | /// the error message. 16 | pub fn ok(input: &mut Pear, p: P) -> Option 17 | where I: Input, P: FnOnce(&mut Pear) -> Result 18 | { 19 | let save = input.emit_error; 20 | input.emit_error = false; 21 | let ok = p(input).ok(); 22 | input.emit_error = save; 23 | ok 24 | } 25 | 26 | /// Parses `p`, returning `true` if it succeeds and `false` if it fails. 27 | /// Discards the error message. 28 | pub fn succeeds(input: &mut Pear, p: P) -> bool 29 | where I: Input, P: FnOnce(&mut Pear) -> Result 30 | { 31 | ok(input, p).is_some() 32 | } 33 | 34 | /// Parses `p` until `p` fails, returning the last successful `p`. 35 | #[parser(raw)] 36 | pub fn last_of_many(input: &mut Pear, mut p: P) -> Result 37 | where I: Input, P: FnMut(&mut Pear) -> Result 38 | { 39 | loop { 40 | let output = p()?; 41 | if succeeds(input, eof) { 42 | return Ok(output); 43 | } 44 | } 45 | } 46 | 47 | /// Skips all tokens that match `f` before and after a `p`, returning `p`. 48 | #[parser(raw)] 49 | pub fn surrounded(input: &mut Pear, mut p: P, mut f: F) -> Result 50 | where I: Input, 51 | F: FnMut(&I::Token) -> bool, 52 | P: FnMut(&mut Pear) -> Result 53 | { 54 | skip_while(&mut f)?; 55 | let output = p()?; 56 | skip_while(&mut f)?; 57 | Ok(output) 58 | } 59 | 60 | /// Parses as many `p` as possible until EOF is reached, collecting them into a 61 | /// `C`. Fails if `p` every fails. `C` may be empty. 62 | #[parser(raw)] 63 | pub fn collect(input: &mut Pear, mut p: P) -> Result 64 | where C: Collection, I: Input, P: FnMut(&mut Pear) -> Result 65 | { 66 | let mut collection = C::default(); 67 | loop { 68 | if succeeds(input, eof) { 69 | return Ok(collection); 70 | } 71 | 72 | collection.push(p()?); 73 | } 74 | } 75 | 76 | /// Parses as many `p` as possible until EOF is reached, collecting them into a 77 | /// `C`. Fails if `p` ever fails. `C` is not allowed to be empty. 78 | #[parser(raw)] 79 | pub fn collect_some(input: &mut Pear, mut p: P) -> Result 80 | where C: Collection, I: Input, P: FnMut(&mut Pear) -> Result 81 | { 82 | let mut collection = C::default(); 83 | loop { 84 | collection.push(p()?); 85 | if succeeds(input, eof) { 86 | return Ok(collection); 87 | } 88 | } 89 | } 90 | 91 | /// Parses as many `p` as possible until EOF is reached or `p` fails, collecting 92 | /// them into a `C`. `C` may be empty. 93 | #[parser(raw)] 94 | pub fn try_collect(input: &mut Pear, mut p: P) -> Result 95 | where C: Collection, I: Input + Rewind, P: FnMut(&mut Pear) -> Result 96 | { 97 | let mut collection = C::default(); 98 | loop { 99 | if succeeds(input, eof) { 100 | return Ok(collection); 101 | } 102 | 103 | // FIXME: We should be able to call `parse_marker!` here. 104 | let start = input.mark(&crate::input::ParserInfo { 105 | name: "try_collect", 106 | raw: true 107 | }); 108 | 109 | match ok(input, |i| p(i)) { 110 | Some(val) => collection.push(val), 111 | None => { 112 | input.rewind_to(start); 113 | break; 114 | } 115 | } 116 | } 117 | 118 | Ok(collection) 119 | } 120 | 121 | /// Parses many `separator` delimited `p`s, the entire collection of which must 122 | /// start with `start` and end with `end`. `item` Gramatically, this is: 123 | /// 124 | /// START (item SEPERATOR)* END 125 | #[parser(raw)] 126 | pub fn delimited_collect( 127 | input: &mut Pear, 128 | start: T, 129 | mut item: P, 130 | separator: S, 131 | end: T, 132 | ) -> Result 133 | where C: Collection, 134 | I: Input, 135 | T: Token + Clone, 136 | S: Into>, 137 | P: FnMut(&mut Pear) -> Result, 138 | { 139 | eat(start)?; 140 | 141 | let seperator = separator.into(); 142 | let mut collection = C::default(); 143 | loop { 144 | if succeeds(input, |i| eat(i, end.clone())) { 145 | break; 146 | } 147 | 148 | collection.push(item()?); 149 | 150 | if let Some(ref separator) = seperator { 151 | if !succeeds(input, |i| eat(i, separator.clone())) { 152 | eat(end.clone())?; 153 | break; 154 | } 155 | } 156 | } 157 | 158 | Ok(collection) 159 | } 160 | 161 | /// Parses many `separator` delimited `p`s. Gramatically, this is: 162 | /// 163 | /// item (SEPERATOR item)* 164 | #[parser(raw)] 165 | pub fn series( 166 | input: &mut Pear, 167 | mut item: P, 168 | seperator: S, 169 | ) -> Result 170 | where C: Collection, 171 | I: Input, 172 | S: Token + Clone, 173 | P: FnMut(&mut Pear) -> Result, 174 | { 175 | let mut collection = C::default(); 176 | loop { 177 | collection.push(item()?); 178 | if !succeeds(input, |i| eat(i, seperator.clone())) { 179 | break; 180 | } 181 | } 182 | 183 | Ok(collection) 184 | } 185 | 186 | /// Parses many `separator` delimited `p`s with an optional trailing separator. 187 | /// Gramatically, this is: 188 | /// 189 | /// item (SEPERATOR item)* SEPERATOR? 190 | #[parser(raw)] 191 | pub fn trailing_series( 192 | input: &mut Pear, 193 | mut item: P, 194 | seperator: S, 195 | ) -> Result 196 | where C: Collection, 197 | I: Input, 198 | S: Token + Clone, 199 | P: FnMut(&mut Pear) -> Result, 200 | { 201 | let mut collection = C::default(); 202 | let mut have_some = false; 203 | loop { 204 | if have_some { 205 | if let Some(item) = ok(input, |i| item(i)) { 206 | collection.push(item); 207 | } else { 208 | break 209 | } 210 | } else { 211 | collection.push(item()?); 212 | have_some = true; 213 | } 214 | 215 | if !succeeds(input, |i| eat(i, seperator.clone())) { 216 | break; 217 | } 218 | } 219 | 220 | Ok(collection) 221 | } 222 | 223 | /// Parses many `separator` delimited `p`s that are collectively prefixed with 224 | /// `prefix`. Gramatically, this is: 225 | /// 226 | /// PREFIX (item SEPERATOR)* 227 | #[parser(raw)] 228 | pub fn prefixed_series( 229 | input: &mut Pear, 230 | prefix: T, 231 | item: P, 232 | seperator: T, 233 | ) -> Result 234 | where C: Collection, 235 | I: Input, 236 | T: Token + Clone, 237 | P: FnMut(&mut Pear) -> Result, 238 | { 239 | if !succeeds(input, |i| eat(i, prefix)) { 240 | return Ok(C::default()); 241 | } 242 | 243 | series(input, item, seperator) 244 | } 245 | -------------------------------------------------------------------------------- /examples/http/src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate pear; 2 | 3 | use pear::{parsers::*, combinators::*}; 4 | use pear::macros::{parser, switch, parse_error}; 5 | 6 | #[derive(Debug, PartialEq)] 7 | enum Method { 8 | Get, Head, Post, Put, Delete, Connect, Options, Trace, Patch 9 | } 10 | 11 | #[derive(Debug, PartialEq)] 12 | struct RequestLine<'a> { 13 | method: Method, 14 | uri: &'a str, 15 | version: (u8, u8) 16 | } 17 | 18 | #[derive(Debug, PartialEq)] 19 | struct Header<'a> { 20 | name: &'a str, 21 | value: &'a [u8], 22 | } 23 | 24 | #[derive(Debug, PartialEq)] 25 | struct Request<'a> { 26 | request_line: RequestLine<'a>, 27 | headers: Vec> 28 | } 29 | 30 | type Input<'a> = pear::input::Pear>; 31 | 32 | type Result<'a, T> = pear::input::Result>; 33 | 34 | #[parser] 35 | fn version<'a>(input: &mut Input<'a>) -> Result<'a, (u8, u8)> { 36 | eat_slice(b"HTTP/1.")?; 37 | let minor = eat_if(|&c| c == b'0' || c == b'1')?; 38 | (1, minor - b'0') 39 | } 40 | 41 | #[parser] 42 | fn method<'a>(input: &mut Input<'a>) -> Result<'a, Method> { 43 | switch! { 44 | eat_slice(b"GET") => Method::Get, 45 | eat_slice(b"HEAD") => Method::Head, 46 | eat_slice(b"POST") => Method::Post, 47 | eat_slice(b"PUT") => Method::Put, 48 | eat_slice(b"DELETE") => Method::Delete, 49 | eat_slice(b"CONNECT") => Method::Connect, 50 | eat_slice(b"OPTIONS") => Method::Options, 51 | eat_slice(b"PATCH") => Method::Patch, 52 | eat_slice(b"TRACE") => Method::Trace, 53 | _ => { 54 | let rogue = take_while(|&c| c != b' ')?; 55 | parse_error!("unknown method: {:?}", rogue)? 56 | } 57 | } 58 | } 59 | 60 | // This is incredibly permissive. 61 | #[parser] 62 | fn string<'a>(input: &mut Input<'a>) -> Result<'a, &'a str> { 63 | string_until(None)? 64 | } 65 | 66 | // This is incredibly permissive. 67 | #[parser] 68 | fn string_until<'a>(input: &mut Input<'a>, c: Option) -> Result<'a, &'a str> { 69 | let value = match c { 70 | Some(c) => take_some_while_until(is_ascii_line_byte, c)?, 71 | None => take_some_while(is_ascii_line_byte)?, 72 | }; 73 | 74 | unsafe { std::str::from_utf8_unchecked(&value) } 75 | } 76 | 77 | #[parser] 78 | fn request_line<'a>(input: &mut Input<'a>) -> Result<'a, RequestLine<'a>> { 79 | RequestLine { 80 | method: method()?, 81 | uri: (eat(b' ')?, string()?).1, 82 | version: (eat(b' ')?, version()?).1 83 | } 84 | } 85 | 86 | #[inline(always)] 87 | fn is_ascii_line_byte(&byte: &u8) -> bool { 88 | byte.is_ascii() && byte != b'\r' && byte != b'\n' && !is_whitespace(&byte) 89 | } 90 | 91 | #[inline(always)] 92 | fn is_line_byte(&byte: &u8) -> bool { 93 | byte != b'\r' && byte != b'\n' 94 | } 95 | 96 | #[inline(always)] 97 | fn is_whitespace(&byte: &u8) -> bool { 98 | byte == b' ' || byte == b'\t' 99 | } 100 | 101 | #[parser] 102 | fn line_end<'a>(input: &mut Input<'a>) -> Result<'a, ()> { 103 | eat_slice(b"\r\n")?; 104 | } 105 | 106 | // This is very, very liberal. 107 | #[parser] 108 | fn header<'a>(input: &mut Input<'a>) -> Result<'a, Header<'a>> { 109 | let name = string_until(Some(b':'))?; 110 | (eat(b':')?, skip_while(is_whitespace)?); 111 | let value = take_some_while(is_line_byte)?.values; 112 | line_end()?; 113 | 114 | Header { name, value } 115 | } 116 | 117 | #[parser] 118 | fn request<'a>(input: &mut Input<'a>) -> Result<'a, Request<'a>> { 119 | Request { 120 | request_line: (request_line()?, line_end()?).0, 121 | headers: (try_collect(header)?, line_end()?).0 122 | } 123 | } 124 | 125 | pub fn main() { 126 | let request_str = &[ 127 | "GET http://localhost:8080 HTTP/1.1", 128 | "Content-Type: application/json", 129 | "Accept: application/json", 130 | "X-Real-IP: 12.12.12.12", 131 | "", "this is the body" 132 | ].join("\r\n"); 133 | 134 | let mut cursor = Input::new(request_str.as_bytes()); 135 | let result = request(&mut cursor); 136 | match result { 137 | Ok(request) => println!("Parsed: {:?}", request), 138 | Err(e) => eprint!("Error: {}", e) 139 | } 140 | 141 | println!("Cursor: {:?}", std::str::from_utf8(cursor.items)); 142 | } 143 | 144 | #[cfg(test)] 145 | mod test { 146 | use super::*; 147 | use pear::macros::parse; 148 | 149 | macro_rules! assert_parse_eq { 150 | ($name:ident, $($from:expr => $to:expr),+) => ( 151 | $( 152 | match parse!($name: Input::new($from)) { 153 | Ok(output) => assert_eq!(output, $to), 154 | Err(e) => { 155 | println!("{:?} failed to parse as '{}'!", $from, stringify!($name)); 156 | panic!("Error: {}", e); 157 | } 158 | } 159 | )+ 160 | ); 161 | 162 | ($name:ident, $($from:expr => $to:expr),+,) => (assert_parse_eq!($name, $($from => $to),+)) 163 | } 164 | 165 | macro_rules! assert_no_parse { 166 | ($name:ident, $($val:expr),+) => ($( 167 | if let Ok(v) = parse!($name: Input::new($val)) { 168 | panic!("{:?} unexpectedly parsed as '{}' {:?}!", $val, stringify!($name), v); 169 | } 170 | )+); 171 | 172 | ($name:ident, $($val:expr),+,) => (assert_no_parse!($name, $($val),+)) 173 | } 174 | 175 | #[test] 176 | fn test_http_version() { 177 | assert_parse_eq!(version, 178 | b"HTTP/1.1" as &[u8] => (1, 1), 179 | b"HTTP/1.0" as &[u8] => (1, 0) 180 | ); 181 | 182 | assert_no_parse!(version, 183 | b"HTTP/2.1" as &[u8], 184 | b"HTTP/1." as &[u8], 185 | b"HTTP/1" as &[u8], 186 | b"http/1.1" as &[u8], 187 | b"HTTP1.1" as &[u8], 188 | b".1" as &[u8], 189 | b"" as &[u8], 190 | ); 191 | } 192 | 193 | #[test] 194 | fn test_method() { 195 | assert_parse_eq!(method, 196 | b"GET" as &[u8] => Method::Get, 197 | b"PUT" as &[u8] => Method::Put, 198 | b"POST" as &[u8] => Method::Post, 199 | b"DELETE" as &[u8] => Method::Delete, 200 | b"HEAD" as &[u8] => Method::Head, 201 | b"OPTIONS" as &[u8] => Method::Options, 202 | b"TRACE" as &[u8] => Method::Trace, 203 | b"CONNECT" as &[u8] => Method::Connect, 204 | b"PATCH" as &[u8] => Method::Patch, 205 | ); 206 | 207 | assert_no_parse!(method, 208 | b"get" as &[u8], 209 | b"GeT" as &[u8], 210 | b"" as &[u8], 211 | b"GERT" as &[u8], 212 | ); 213 | } 214 | 215 | #[test] 216 | fn test_header() { 217 | assert_parse_eq!(header, 218 | b"Content-Type: application/json\r\n" as &[u8] => Header { 219 | name: "Content-Type", 220 | value: b"application/json" 221 | }, 222 | b"Content-Type:application/json\r\n" as &[u8] => Header { 223 | name: "Content-Type", 224 | value: b"application/json" 225 | }, 226 | b"Content-Type: application/json\r\n" as &[u8] => Header { 227 | name: "Content-Type", 228 | value: b"application/json" 229 | }, 230 | b"a:b\r\n" as &[u8] => Header { 231 | name: "a", 232 | value: b"b" 233 | }, 234 | ); 235 | 236 | assert_no_parse!(header, 237 | b"Content-Type application/json\r\n" as &[u8], 238 | b"Content-Type: application/json" as &[u8], 239 | b": application/json\r\n" as &[u8], 240 | b":\r\n" as &[u8], 241 | ); 242 | } 243 | 244 | #[test] 245 | fn test_request() { 246 | assert_parse_eq!(header, 247 | b"Content-Type: application/json\r\n" as &[u8] => Header { 248 | name: "Content-Type", 249 | value: b"application/json" 250 | }, 251 | b"Content-Type:application/json\r\n" as &[u8] => Header { 252 | name: "Content-Type", 253 | value: b"application/json" 254 | }, 255 | b"Content-Type: application/json\r\n" as &[u8] => Header { 256 | name: "Content-Type", 257 | value: b"application/json" 258 | }, 259 | b"a:b\r\n" as &[u8] => Header { 260 | name: "a", 261 | value: b"b" 262 | }, 263 | ); 264 | 265 | assert_no_parse!(header, 266 | b"Content-Type application/json\r\n" as &[u8], 267 | b"Content-Type: application/json" as &[u8], 268 | b": application/json\r\n" as &[u8], 269 | b":\r\n" as &[u8], 270 | ); 271 | } 272 | } 273 | -------------------------------------------------------------------------------- /lib/src/input/cursor.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use crate::input::{Input, Show, Rewind, ParserInfo, Length}; 4 | 5 | #[derive(Debug)] 6 | pub struct Cursor { 7 | pub start: T, 8 | pub items: T, 9 | } 10 | 11 | impl From for Cursor { 12 | fn from(items: T) -> Self { 13 | Cursor { start: items, items } 14 | } 15 | } 16 | 17 | #[derive(Debug, PartialEq, Eq, Copy, Clone)] 18 | pub struct Extent { 19 | pub start: usize, 20 | pub end: usize, 21 | pub values: T, 22 | } 23 | 24 | impl From for Extent { 25 | fn from(values: T) -> Self { 26 | Extent { start: 0, end: values.len(), values } 27 | } 28 | } 29 | 30 | impl std::ops::Deref for Extent { 31 | type Target = T; 32 | 33 | fn deref(&self) -> &Self::Target { 34 | &self.values 35 | } 36 | } 37 | 38 | impl PartialEq for Extent { 39 | fn eq(&self, other: &T) -> bool { 40 | &self.values == other 41 | } 42 | } 43 | 44 | impl PartialEq> for &str { 45 | fn eq(&self, other: &Extent<&str>) -> bool { 46 | other == self 47 | } 48 | } 49 | 50 | impl PartialEq> for &[T] { 51 | fn eq(&self, other: &Extent<&[T]>) -> bool { 52 | other == self 53 | } 54 | } 55 | 56 | macro_rules! impl_for_slice_len { 57 | ($($n:expr),*) => ($( 58 | impl PartialEq> for &[T; $n] { 59 | fn eq(&self, other: &Extent<&[T]>) -> bool { 60 | &other.values[..] == *self 61 | } 62 | } 63 | )*) 64 | } 65 | 66 | impl_for_slice_len!( 67 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 68 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 69 | ); 70 | 71 | impl Length for Extent { 72 | fn len(&self) -> usize { 73 | self.end - self.start 74 | } 75 | } 76 | 77 | impl Show for Extent { 78 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 79 | write!(f, "{}..{} {}", self.start, self.end, &self.values as &dyn Show) 80 | } 81 | } 82 | 83 | impl Extent<&T> { 84 | pub fn into_owned(self) -> Extent { 85 | Extent { 86 | start: self.start, 87 | end: self.end, 88 | values: self.values.to_owned(), 89 | } 90 | } 91 | } 92 | 93 | pub trait Indexable: Sized { 94 | type One: Clone; 95 | type Iter: Iterator; 96 | 97 | fn head(&self) -> Option; 98 | fn length_of(token: Self::One) -> usize; 99 | fn slice>(&self, range: R) -> Option; 100 | fn iter(&self) -> Self::Iter; 101 | } 102 | 103 | use std::ops::{Bound, RangeBounds, Range}; 104 | 105 | fn abs>(range: R, start: usize, end: usize) -> Range { 106 | let start = match range.start_bound() { 107 | Bound::Unbounded => start, 108 | Bound::Included(&n) => n, 109 | Bound::Excluded(&n) => n.saturating_add(1), 110 | }; 111 | 112 | let end = match range.end_bound() { 113 | Bound::Unbounded => end, 114 | Bound::Included(&n) => n.saturating_add(1), 115 | Bound::Excluded(&n) => n, 116 | }; 117 | 118 | Range { start, end } 119 | } 120 | 121 | impl<'a> Indexable for &'a str { 122 | type One = char; 123 | type Iter = std::str::Chars<'a>; 124 | 125 | fn head(&self) -> Option { 126 | self.chars().next() 127 | } 128 | 129 | fn length_of(token: Self::One) -> usize { 130 | token.len_utf8() 131 | } 132 | 133 | fn slice>(&self, range: R) -> Option { 134 | self.get(abs(range, 0, self.len())) 135 | } 136 | 137 | fn iter(&self) -> Self::Iter { 138 | self.chars() 139 | } 140 | } 141 | 142 | impl<'a, T: Clone> Indexable for &'a [T] { 143 | type One = T; 144 | type Iter = std::iter::Cloned>; 145 | 146 | fn head(&self) -> Option { 147 | self.first().cloned() 148 | } 149 | 150 | fn length_of(_: Self::One) -> usize { 151 | 1 152 | } 153 | 154 | fn slice>(&self, range: R) -> Option { 155 | self.get(abs(range, 0, self.len())) 156 | } 157 | 158 | fn iter(&self) -> Self::Iter { 159 | (*self as &[T]).iter().cloned() 160 | } 161 | } 162 | 163 | impl Cursor { 164 | fn offset(&self) -> usize { 165 | self.start.len() - self.items.len() 166 | } 167 | } 168 | 169 | impl Cursor { 170 | /// Returns an `Extent` that spans from `a` to `b` if `a..b` is in bounds. 171 | pub fn span(&self, a: Extent, b: Extent) -> Option> { 172 | let start = std::cmp::min(a.start, b.start); 173 | let end = std::cmp::max(a.end, b.end); 174 | let values = self.start.slice(start..end)?; 175 | Some(Extent { start, end, values }) 176 | } 177 | } 178 | 179 | impl Input for Cursor 180 | where T::One: Show + PartialEq 181 | { 182 | type Token = T::One; 183 | type Slice = Extent; 184 | type Many = Extent; 185 | 186 | type Marker = usize; 187 | type Context = Extent; 188 | 189 | /// Returns a copy of the current token, if there is one. 190 | fn token(&mut self) -> Option { 191 | self.items.head() 192 | } 193 | 194 | /// Returns a copy of the current slice of size `n`, if there is one. 195 | fn slice(&mut self, n: usize) -> Option { 196 | Some(Extent { 197 | start: self.offset(), 198 | end: self.offset() + n, 199 | values: self.items.slice(..n)? 200 | }) 201 | } 202 | 203 | /// Checks if the current token fulfills `cond`. 204 | fn peek(&mut self, mut cond: F) -> bool 205 | where F: FnMut(&Self::Token) -> bool 206 | { 207 | self.token().map(|t| cond(&t)).unwrap_or(false) 208 | } 209 | 210 | /// Checks if the current slice of size `n` (if any) fulfills `cond`. 211 | fn peek_slice(&mut self, n: usize, mut cond: F) -> bool 212 | where F: FnMut(&Self::Slice) -> bool 213 | { 214 | self.slice(n).map(|s| cond(&s)).unwrap_or(false) 215 | } 216 | 217 | /// Checks if the current token fulfills `cond`. If so, the token is 218 | /// consumed and returned. Otherwise, returns `None`. 219 | fn eat(&mut self, mut cond: F) -> Option 220 | where F: FnMut(&Self::Token) -> bool 221 | { 222 | let token = self.token()?; 223 | if cond(&token) { 224 | self.items = self.items.slice(T::length_of(token.clone())..).unwrap(); 225 | Some(token) 226 | } else { 227 | None 228 | } 229 | } 230 | 231 | /// Checks if the current slice of size `n` (if any) fulfills `cond`. If so, 232 | /// the slice is consumed and returned. Otherwise, returns `None`. 233 | fn eat_slice(&mut self, n: usize, mut cond: F) -> Option 234 | where F: FnMut(&Self::Slice) -> bool 235 | { 236 | let slice = self.slice(n)?; 237 | if cond(&slice) { 238 | self.items = self.items.slice(n..).unwrap(); 239 | Some(slice) 240 | } else { 241 | None 242 | } 243 | } 244 | 245 | /// Takes tokens while `cond` returns true, collecting them into a 246 | /// `Self::Many` and returning it. 247 | fn take(&mut self, cond: F) -> Self::Many 248 | where F: FnMut(&Self::Token) -> bool 249 | { 250 | let start = self.offset(); 251 | let matches: usize = self.items.iter() 252 | .take_while(cond) 253 | .map(T::length_of) 254 | .sum(); 255 | 256 | let values = self.items.slice(..matches).unwrap(); 257 | self.items = self.items.slice(matches..).unwrap(); 258 | Extent { start, end: self.offset(), values } 259 | } 260 | 261 | /// Skips tokens while `cond` returns true. Returns the number of skipped 262 | /// tokens. 263 | fn skip(&mut self, cond: F) -> usize 264 | where F: FnMut(&Self::Token) -> bool 265 | { 266 | self.take(cond).len() 267 | } 268 | 269 | /// Returns `true` if there are at least `n` tokens remaining. 270 | fn has(&mut self, n: usize) -> bool { 271 | self.items.len() >= n 272 | } 273 | 274 | fn mark(&mut self, _: &ParserInfo) -> Self::Marker { 275 | self.offset() 276 | } 277 | 278 | /// Optionally returns a context to identify the current input position. By 279 | /// default, this method returns `None`, indicating that no context could be 280 | /// resolved. 281 | fn context(&mut self, mark: Self::Marker) -> Self::Context { 282 | let end = self.offset(); 283 | let values = self.start.slice(mark..end).unwrap(); 284 | Extent { start: mark, end, values } 285 | } 286 | } 287 | 288 | impl Rewind for Cursor 289 | where T::One: Show + PartialEq 290 | { 291 | fn rewind_to(&mut self, marker: Self::Marker) { 292 | self.items = self.start.slice(marker..).unwrap(); 293 | } 294 | } 295 | -------------------------------------------------------------------------------- /codegen/src/parser.rs: -------------------------------------------------------------------------------- 1 | use syn::spanned::Spanned; 2 | use syn::{punctuated::Punctuated, Token}; 3 | use syn::parse::{Parse as SynParse, ParseStream as SynParseStream}; 4 | use proc_macro2::{Span, Delimiter}; 5 | use proc_macro2_diagnostics::{Diagnostic, SpanDiagnosticExt}; 6 | 7 | pub type PResult = Result; 8 | 9 | pub trait Parse: Sized { 10 | fn parse(input: syn::parse::ParseStream) -> PResult; 11 | 12 | fn syn_parse(input: syn::parse::ParseStream) -> syn::parse::Result { 13 | Self::parse(input).map_err(|e| e.into()) 14 | } 15 | } 16 | 17 | trait ParseStreamExt { 18 | fn parse_group(self, delimiter: Delimiter, parser: F) -> syn::parse::Result 19 | where F: FnOnce(SynParseStream) -> syn::parse::Result; 20 | 21 | fn try_parse(self, parser: F) -> syn::parse::Result 22 | where F: Fn(SynParseStream) -> syn::parse::Result; 23 | } 24 | 25 | impl<'a> ParseStreamExt for SynParseStream<'a> { 26 | fn parse_group(self, delimiter: Delimiter, parser: F) -> syn::parse::Result 27 | where F: FnOnce(SynParseStream) -> syn::parse::Result 28 | { 29 | let content; 30 | match delimiter { 31 | Delimiter::Brace => { syn::braced!(content in self); }, 32 | Delimiter::Bracket => { syn::bracketed!(content in self); }, 33 | Delimiter::Parenthesis => { syn::parenthesized!(content in self); }, 34 | Delimiter::None => return parser(self), 35 | } 36 | 37 | parser(&content) 38 | } 39 | 40 | fn try_parse(self, parser: F) -> syn::parse::Result 41 | where F: Fn(SynParseStream) -> syn::parse::Result 42 | { 43 | let input = self.fork(); 44 | parser(&input)?; 45 | parser(self) 46 | } 47 | } 48 | 49 | #[derive(Debug)] 50 | pub struct CallPattern { 51 | pub name: Option, 52 | pub at: Option, 53 | pub expr: syn::ExprCall, 54 | } 55 | 56 | impl syn::parse::Parse for CallPattern { 57 | fn parse(input: syn::parse::ParseStream) -> syn::parse::Result { 58 | Self::syn_parse(input) 59 | } 60 | } 61 | 62 | impl quote::ToTokens for CallPattern { 63 | fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { 64 | let (expr, at) = (&self.expr, &self.at); 65 | match self.name { 66 | Some(ref name) => quote!(#name #at #expr).to_tokens(tokens), 67 | None => expr.to_tokens(tokens) 68 | } 69 | } 70 | } 71 | 72 | impl quote::ToTokens for Guard { 73 | fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { 74 | self.expr.to_tokens(tokens) 75 | } 76 | } 77 | 78 | type CallPatterns = Punctuated; 79 | 80 | #[derive(Debug)] 81 | pub enum Pattern { 82 | Wild(Token![_]), 83 | Calls(CallPatterns), 84 | } 85 | 86 | #[derive(Debug)] 87 | pub struct Guard { 88 | pub _if: Token![if], 89 | pub expr: syn::Expr, 90 | } 91 | 92 | #[derive(Debug)] 93 | pub struct Case { 94 | pub pattern: Pattern, 95 | pub expr: syn::Expr, 96 | pub guard: Option, 97 | pub span: Span, 98 | } 99 | 100 | #[derive(Debug)] 101 | pub struct Switch { 102 | pub context: Context, 103 | pub cases: Punctuated 104 | } 105 | 106 | // FIXME(syn): Something like this should be in `syn` 107 | fn parse_expr_call(input: SynParseStream) -> syn::parse::Result { 108 | let path: syn::ExprPath = input.parse()?; 109 | let paren_span = input.cursor().span(); 110 | let args = input.parse_group(Delimiter::Parenthesis, |i| { 111 | i.parse_terminated(syn::Expr::parse, Token![,]) 112 | })?; 113 | 114 | Ok(syn::ExprCall { 115 | attrs: vec![], 116 | func: Box::new(syn::Expr::Path(path)), 117 | paren_token: syn::token::Paren(paren_span), 118 | args 119 | }) 120 | } 121 | 122 | impl Parse for CallPattern { 123 | fn parse(input: SynParseStream) -> PResult { 124 | let name_at = input.try_parse(|input| { 125 | let ident: syn::Ident = input.parse()?; 126 | let at = input.parse::()?; 127 | Ok((ident, at)) 128 | }).ok(); 129 | 130 | let (name, at) = match name_at { 131 | Some((name, at)) => (Some(name), Some(at)), 132 | None => (None, None) 133 | }; 134 | 135 | Ok(CallPattern { name, at, expr: parse_expr_call(input)? }) 136 | } 137 | } 138 | 139 | impl Parse for Guard { 140 | fn parse(input: SynParseStream) -> PResult { 141 | Ok(Guard { 142 | _if: input.parse()?, 143 | expr: input.parse()?, 144 | }) 145 | } 146 | } 147 | 148 | impl Parse for Pattern { 149 | fn parse(input: SynParseStream) -> PResult { 150 | type CallPatterns = Punctuated; 151 | 152 | // Parse the pattern. 153 | let pattern = match input.parse::() { 154 | Ok(wild) => Pattern::Wild(wild), 155 | Err(_) => Pattern::Calls(input.call(CallPatterns::parse_separated_nonempty)?) 156 | }; 157 | 158 | // Validate the pattern. 159 | if let Pattern::Calls(ref calls) = pattern { 160 | let first_name = calls.first().and_then(|call| call.name.clone()); 161 | for call in calls.iter() { 162 | if first_name != call.name { 163 | let mut err = if let Some(ref ident) = call.name { 164 | ident.span() 165 | .error("captured name differs from declaration") 166 | } else { 167 | call.expr.span() 168 | .error("expected capture name due to previous declaration") 169 | }; 170 | 171 | err = match first_name { 172 | Some(p) => err.span_note(p.span(), "declared here"), 173 | None => err 174 | }; 175 | 176 | return Err(err); 177 | } 178 | } 179 | } 180 | 181 | Ok(pattern) 182 | } 183 | } 184 | 185 | impl Parse for Case { 186 | fn parse(input: SynParseStream) -> PResult { 187 | let case_span_start = input.cursor().span(); 188 | let pattern = Pattern::parse(input)?; 189 | let guard = match input.peek(Token![if]) { 190 | true => Some(Guard::parse(input)?), 191 | false => None, 192 | }; 193 | 194 | input.parse::]>()?; 195 | let expr: syn::Expr = input.parse()?; 196 | let span = case_span_start 197 | .join(input.cursor().span()) 198 | .unwrap_or(case_span_start); 199 | 200 | Ok(Case { pattern, expr, guard, span, }) 201 | } 202 | } 203 | 204 | #[derive(Debug)] 205 | pub struct Context { 206 | pub info: syn::Ident, 207 | pub input: syn::Expr, 208 | pub marker: syn::Expr, 209 | pub output: syn::Type, 210 | } 211 | 212 | impl Parse for Context { 213 | fn parse(stream: SynParseStream) -> PResult { 214 | let (info, input, marker, output) = stream.parse_group(Delimiter::Bracket, |inner| { 215 | let info: syn::Ident = inner.parse()?; 216 | inner.parse::()?; 217 | let input: syn::Expr = inner.parse()?; 218 | inner.parse::()?; 219 | let marker: syn::Expr = inner.parse()?; 220 | inner.parse::()?; 221 | let output: syn::Type = inner.parse()?; 222 | Ok((info, input, marker, output)) 223 | })?; 224 | 225 | Ok(Context { info, input, marker, output }) 226 | } 227 | } 228 | 229 | impl Parse for Switch { 230 | fn parse(stream: SynParseStream) -> PResult { 231 | let context = stream.try_parse(Context::syn_parse)?; 232 | let cases = stream.parse_terminated(Case::syn_parse, Token![,])?; 233 | if !stream.is_empty() { 234 | Err(stream.error("trailing characters; expected eof"))?; 235 | } 236 | 237 | if cases.is_empty() { 238 | Err(stream.error("switch cannot be empty"))?; 239 | } 240 | 241 | for case in cases.iter().take(cases.len() - 1) { 242 | if let Pattern::Wild(..) = case.pattern { 243 | if case.guard.is_none() { 244 | Err(case.span.error("unguarded `_` can only appear as the last case"))?; 245 | } 246 | } 247 | } 248 | 249 | Ok(Switch { context, cases }) 250 | } 251 | } 252 | 253 | #[derive(Debug, Clone)] 254 | pub struct AttrArgs { 255 | pub raw: Option, 256 | pub rewind: Option, 257 | pub peek: Option, 258 | } 259 | 260 | impl Parse for AttrArgs { 261 | fn parse(input: SynParseStream) -> PResult { 262 | let args = input.call(>::parse_terminated)?; 263 | let (mut raw, mut rewind, mut peek) = Default::default(); 264 | for case in args.iter() { 265 | if case == "raw" { 266 | raw = Some(case.span()); 267 | } else if case == "rewind" { 268 | rewind = Some(case.span()); 269 | } else if case == "peek" { 270 | peek = Some(case.span()); 271 | } else { 272 | return Err(case.span() 273 | .error(format!("unknown attribute argument `{}`", case)) 274 | .help("supported arguments are: `rewind`, `peek`")); 275 | } 276 | } 277 | 278 | Ok(AttrArgs { raw, rewind, peek }) 279 | } 280 | } 281 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2017 Sergio Benitez 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /examples/uri/src/indexed.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | use std::ops::{Index, Range}; 3 | use std::fmt::{self, Debug}; 4 | 5 | use pear::{Input, Slice, Position, Length}; 6 | 7 | pub trait AsPtr { 8 | fn as_ptr(&self) -> *const u8; 9 | // unsafe fn from_raw<'a>(raw: *const u8, length: usize) -> &T; 10 | } 11 | 12 | impl AsPtr for str { 13 | fn as_ptr(&self) -> *const u8 { 14 | str::as_ptr(self) 15 | } 16 | } 17 | 18 | impl AsPtr for [u8] { 19 | fn as_ptr(&self) -> *const u8 { 20 | <[u8]>::as_ptr(self) 21 | } 22 | } 23 | 24 | 25 | #[derive(PartialEq)] 26 | pub enum Indexed<'a, T: ?Sized + ToOwned + 'a> { 27 | Indexed(usize, usize), 28 | Concrete(Cow<'a, T>) 29 | } 30 | 31 | impl<'a, T: ?Sized + ToOwned + 'a, C: Into>> From for Indexed<'a, T> { 32 | #[inline(always)] 33 | fn from(value: C) -> Indexed<'a, T> { 34 | Indexed::Concrete(value.into()) 35 | } 36 | } 37 | 38 | impl<'a, T: ?Sized + ToOwned + 'a> Indexed<'a, T> { 39 | #[inline(always)] 40 | pub unsafe fn coerce(self) -> Indexed<'a, U> { 41 | match self { 42 | Indexed::Indexed(a, b) => Indexed::Indexed(a, b), 43 | _ => panic!("cannot convert indexed T to U unless indexed") 44 | } 45 | } 46 | } 47 | 48 | use std::ops::Add; 49 | 50 | impl<'a, T: ?Sized + ToOwned + 'a> Add for Indexed<'a, T> { 51 | type Output = Indexed<'a, T>; 52 | 53 | fn add(self, other: Indexed<'a, T>) -> Indexed<'a, T> { 54 | match self { 55 | Indexed::Indexed(a, b) => match other { 56 | Indexed::Indexed(c, d) if b == c && a < d => Indexed::Indexed(a, d), 57 | _ => panic!("+ requires indexed") 58 | } 59 | _ => panic!("+ requires indexed") 60 | } 61 | } 62 | } 63 | 64 | impl<'a, T: ?Sized + ToOwned + 'a> Indexed<'a, T> 65 | where T: Length + AsPtr + Index, Output = T> 66 | { 67 | // Returns `None` if `needle` is not a substring of `haystack`. 68 | pub fn checked_from(needle: &T, haystack: &T) -> Option> { 69 | let haystack_start = haystack.as_ptr() as usize; 70 | let needle_start = needle.as_ptr() as usize; 71 | 72 | if needle_start < haystack_start { 73 | return None; 74 | } 75 | 76 | if (needle_start + needle.len()) > (haystack_start + haystack.len()) { 77 | return None; 78 | } 79 | 80 | let start = needle_start - haystack_start; 81 | let end = start + needle.len(); 82 | Some(Indexed::Indexed(start, end)) 83 | } 84 | 85 | // Caller must ensure that `needle` is a substring of `haystack`. 86 | pub unsafe fn unchecked_from(needle: &T, haystack: &T) -> Indexed<'a, T> { 87 | let haystack_start = haystack.as_ptr() as usize; 88 | let needle_start = needle.as_ptr() as usize; 89 | 90 | let start = needle_start - haystack_start; 91 | let end = start + needle.len(); 92 | Indexed::Indexed(start, end) 93 | } 94 | 95 | /// Whether this string is derived from indexes or not. 96 | pub fn is_indexed(&self) -> bool { 97 | match *self { 98 | Indexed::Indexed(..) => true, 99 | Indexed::Concrete(..) => false, 100 | } 101 | } 102 | 103 | /// Whether this string is derived from indexes or not. 104 | pub fn is_empty(&self) -> bool { 105 | self.len() == 0 106 | } 107 | 108 | /// Retrieves the string `self` corresponds to. If `self` is derived from 109 | /// indexes, the corresponding subslice of `source` is returned. Otherwise, 110 | /// the concrete string is returned. 111 | /// 112 | /// # Panics 113 | /// 114 | /// Panics if `self` is an indexed string and `string` is None. 115 | // pub fn to_source(&self, source: Option<&'a T>) -> &T { 116 | pub fn to_source<'s>(&'s self, source: &'s Option>) -> &'s T { 117 | use std::borrow::Borrow; 118 | if self.is_indexed() && source.is_none() { 119 | panic!("Cannot convert indexed str to str without base string!") 120 | } 121 | 122 | match *self { 123 | Indexed::Indexed(i, j) => &source.as_ref().unwrap()[i..j], 124 | Indexed::Concrete(ref mstr) => mstr.as_ref(), 125 | } 126 | } 127 | 128 | } 129 | 130 | impl<'a, T: ToOwned + ?Sized + 'a> Clone for Indexed<'a, T> { 131 | fn clone(&self) -> Self { 132 | match *self { 133 | Indexed::Indexed(a, b) => Indexed::Indexed(a, b), 134 | Indexed::Concrete(ref cow) => Indexed::Concrete(cow.clone()) 135 | } 136 | } 137 | } 138 | 139 | impl<'a, T: ?Sized + 'a> Debug for Indexed<'a, T> 140 | where T: ToOwned + Debug, T::Owned: Debug 141 | { 142 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 143 | match *self { 144 | Indexed::Indexed(a, b) => fmt::Debug::fmt(&(a, b), f), 145 | Indexed::Concrete(ref cow) => fmt::Debug::fmt(cow, f), 146 | } 147 | } 148 | } 149 | 150 | impl<'a, T: ?Sized + Length + ToOwned + 'a> Length for Indexed<'a, T> { 151 | #[inline(always)] 152 | fn len(&self) -> usize { 153 | match *self { 154 | Indexed::Indexed(a, b) => (b - a) as usize, 155 | Indexed::Concrete(ref cow) => cow.len() 156 | } 157 | } 158 | } 159 | 160 | #[derive(Debug)] 161 | pub struct IndexedInput<'a, T: ?Sized + 'a> { 162 | source: &'a T, 163 | current: &'a T 164 | } 165 | 166 | impl<'a, T: ?Sized + 'a> IndexedInput<'a, T> { 167 | pub fn source(&self) -> &T { 168 | self.source 169 | } 170 | } 171 | 172 | impl<'a, T: ToOwned + ?Sized + 'a> IndexedInput<'a, T> { 173 | #[inline(always)] 174 | pub fn cow_source(&self) -> Cow<'a, T> { 175 | Cow::Borrowed(self.source) 176 | } 177 | } 178 | 179 | impl<'a> IndexedInput<'a, [u8]> { 180 | pub fn backtrack(&mut self, n: usize) -> ::pear::Result<(), Self> { 181 | let source_addr = self.source.as_ptr() as usize; 182 | let current_addr = self.current.as_ptr() as usize; 183 | if current_addr > n && (current_addr - n) >= source_addr { 184 | let size = self.current.len() + n; 185 | let addr = (current_addr - n) as *const u8; 186 | self.current = unsafe { ::std::slice::from_raw_parts(addr, size) }; 187 | Ok(()) 188 | } else { 189 | let diag = format!("({}, {:x} in {:x})", n, current_addr, source_addr); 190 | Err(parse_error!([backtrack; self] "internal error: {}", diag)) 191 | } 192 | } 193 | 194 | pub fn len(&self) -> usize { 195 | self.source.len() 196 | } 197 | } 198 | 199 | macro_rules! impl_indexed_input { 200 | ($T:ty, token = $token:ty) => ( 201 | impl<'a> From<&'a $T> for IndexedInput<'a, $T> { 202 | #[inline(always)] 203 | fn from(source: &'a $T) -> Self { 204 | IndexedInput { source: source, current: source } 205 | } 206 | } 207 | 208 | impl<'a, 'b: 'a> Slice> for &'b $T { 209 | fn eq_slice(&self, other: &Indexed<'a, $T>) -> bool { 210 | self == &other.to_source(&None) 211 | } 212 | 213 | fn into_slice(self) -> Indexed<'a, $T> { 214 | Indexed::Concrete(self.into()) 215 | } 216 | } 217 | 218 | impl<'a> Input for IndexedInput<'a, $T> { 219 | type Token = $token; 220 | type Slice = Indexed<'a, $T>; 221 | type Many = Indexed<'a, $T>; 222 | type Context = Context; 223 | 224 | /// Returns a copy of the current token, if there is one. 225 | fn token(&mut self) -> Option { 226 | self.current.token() 227 | } 228 | 229 | /// Returns a copy of the current slice of size `n`, if there is one. 230 | fn slice(&mut self, n: usize) -> Option { 231 | self.current.slice(n) 232 | .map(|s| unsafe { Indexed::unchecked_from(s, self.source) }) 233 | } 234 | 235 | /// Checks if the current token fulfills `cond`. 236 | fn peek(&mut self, cond: F) -> bool 237 | where F: FnMut(&Self::Token) -> bool 238 | { 239 | self.current.peek(cond) 240 | } 241 | 242 | /// Checks if the current slice of size `n` (if any) fulfills `cond`. 243 | fn peek_slice(&mut self, n: usize, mut cond: F) -> bool 244 | where F: FnMut(&Self::Slice) -> bool 245 | { 246 | self.current.peek_slice(n, |&s| cond(&Indexed::Concrete(s.into()))) 247 | } 248 | 249 | /// Checks if the current token fulfills `cond`. If so, the token is 250 | /// consumed and returned. Otherwise, returrustc --explain E0284ns `None`. 251 | fn eat(&mut self, cond: F) -> Option 252 | where F: FnMut(&Self::Token) -> bool 253 | { 254 | self.current.eat(cond) 255 | } 256 | 257 | /// Checks if the current slice of size `n` (if any) fulfills `cond`. If so, 258 | /// the slice is consumed and returned. Otherwise, returns `None`. 259 | fn eat_slice(&mut self, n: usize, mut cond: F) -> Option 260 | where F: FnMut(&Self::Slice) -> bool 261 | { 262 | self.current 263 | .eat_slice(n, |&s| cond(&Indexed::Concrete(s.into()))) 264 | .map(|s| unsafe { Indexed::unchecked_from(s, self.source) }) 265 | } 266 | 267 | /// Takes tokens while `cond` returns true, collecting them into a 268 | /// `Self::Many` and returning it. 269 | fn take(&mut self, cond: F) -> Self::Many 270 | where F: FnMut(&Self::Token) -> bool 271 | { 272 | let many = self.current.take(cond); 273 | unsafe { Indexed::unchecked_from(many, self.source) } 274 | } 275 | 276 | /// Skips tokens while `cond` returns true. Returns the number of skipped 277 | /// tokens. 278 | fn skip(&mut self, cond: F) -> usize 279 | where F: FnMut(&Self::Token) -> bool 280 | { 281 | self.current.skip(cond) 282 | } 283 | 284 | /// Returns `true` if there are no more tokens. 285 | fn is_eof(&mut self) -> bool { 286 | self.current.is_eof() 287 | } 288 | 289 | #[inline(always)] 290 | fn context(&mut self) -> Option { 291 | let offset = self.source.len() - self.current.len(); 292 | let bytes: &[u8] = self.current.as_ref(); 293 | let string = String::from_utf8(bytes.into()).ok()?; 294 | Some(Context { offset, string }) 295 | } 296 | } 297 | ) 298 | } 299 | 300 | #[derive(Debug, PartialEq, Eq, Clone, Hash)] 301 | pub struct Context { 302 | pub offset: usize, 303 | pub string: String 304 | } 305 | 306 | impl ::std::fmt::Display for Context { 307 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 308 | const LIMIT: usize = 7; 309 | write!(f, "{}", self.offset)?; 310 | 311 | if self.string.len() > LIMIT { 312 | write!(f, " ({}..)", &self.string[..LIMIT]) 313 | } else if !self.string.is_empty() { 314 | write!(f, " ({})", &self.string) 315 | } else { 316 | Ok(()) 317 | } 318 | } 319 | } 320 | 321 | impl_indexed_input!([u8], token = u8); 322 | impl_indexed_input!(str, token = char); 323 | -------------------------------------------------------------------------------- /lib/src/parsers.rs: -------------------------------------------------------------------------------- 1 | use crate::error::Expected; 2 | use crate::input::{Input, Pear, Length, Token, Slice, Result, Rewind}; 3 | use crate::combinators::succeeds; 4 | use crate::macros::parser; 5 | 6 | // TODO: provide more basic parsers in pear 7 | // - [f32, f64, i8, i32, ..., bool, etc.]: one for all reasonable built-ins 8 | // - quoted_string(allowed): '"' allowed* '"' 9 | // - escaped string, with some way to configure escapes 10 | 11 | #[inline] 12 | fn expected_token( 13 | input: &mut Pear, 14 | token: Option 15 | ) -> Expected 16 | where T: Token, I: Input 17 | { 18 | // TODO: Have some way to test this is being called minimally. 19 | if input.emit_error { 20 | Expected::token(token.as_ref(), input.token()) 21 | } else { 22 | Expected::Elided 23 | } 24 | } 25 | 26 | #[inline] 27 | fn expected_slice( 28 | input: &mut Pear, 29 | slice: S 30 | ) -> Expected 31 | where S: Slice, I: Input 32 | { 33 | // TODO: Have some way to test this is being called minimally. 34 | if input.emit_error { 35 | Expected::slice(Some(&slice), input.slice(slice.len())) 36 | } else { 37 | Expected::Elided 38 | } 39 | } 40 | 41 | /// Eats the current token if it is `token`. 42 | #[parser(raw)] 43 | pub fn eat(input: &mut Pear, token: T) -> Result 44 | where I: Input, T: Token 45 | { 46 | match input.eat(|t| &token == t) { 47 | Some(token) => Ok(token), 48 | None => return parse_error!(expected_token(input, Some(token))) 49 | } 50 | } 51 | 52 | /// Eats the token `token` if `cond` holds on the current token. 53 | #[parser(raw)] 54 | pub fn eat_if(input: &mut Pear, cond: F) -> Result 55 | where I: Input, F: FnMut(&I::Token) -> bool 56 | { 57 | match input.eat(cond) { 58 | Some(token) => Ok(token), 59 | None => parse_error!(expected_token::(input, None)) 60 | } 61 | } 62 | 63 | /// Eats the current token unconditionally. Fails if there are no tokens. 64 | #[parser(raw)] 65 | pub fn eat_any(input: &mut Pear) -> Result { 66 | match input.eat(|_| true) { 67 | Some(token) => Ok(token), 68 | None => return parse_error!(Expected::Token(None, None)) 69 | } 70 | } 71 | 72 | /// Skips the current token unconditionally. Fails if there are no tokens. 73 | #[parser(raw)] 74 | pub fn skip_any(input: &mut Pear) -> Result<(), I> { 75 | let mut skipped = false; 76 | input.skip(|_| { 77 | if !skipped { 78 | skipped = true; 79 | true 80 | } else { 81 | false 82 | } 83 | }); 84 | 85 | match skipped { 86 | true => Ok(()), 87 | false => return parse_error!(Expected::Token(None, None)), 88 | } 89 | } 90 | 91 | /// Eats the current slice if it is `slice`. 92 | #[parser(raw)] 93 | pub fn eat_slice(input: &mut Pear, slice: S) -> Result 94 | where I: Input, S: Slice 95 | { 96 | match input.eat_slice(slice.len(), |s| &slice == s) { 97 | Some(slice) => Ok(slice), 98 | None => return parse_error!(expected_slice(input, slice)) 99 | } 100 | } 101 | 102 | /// Succeeds if the current token is `token`. 103 | #[parser(raw)] 104 | pub fn peek(input: &mut Pear, token: T) -> Result<(), I> 105 | where I: Input, T: Token 106 | { 107 | match input.peek(|t| &token == t) { 108 | true => Ok(()), 109 | false => return parse_error!(expected_token(input, Some(token))) 110 | } 111 | } 112 | 113 | /// Succeeds if `cond` holds for the current token. 114 | #[parser(raw)] 115 | pub fn peek_if_copy(input: &mut Pear, cond: F) -> Result 116 | where I: Input, F: FnMut(&I::Token) -> bool 117 | { 118 | match input.peek(cond) { 119 | true => Ok(input.token().unwrap()), 120 | false => parse_error!(expected_token::(input, None)) 121 | } 122 | } 123 | 124 | /// Succeeds if `cond` holds for the current token. 125 | #[parser(raw)] 126 | pub fn peek_if(input: &mut Pear, cond: F) -> Result<(), I> 127 | where I: Input, F: FnMut(&I::Token) -> bool 128 | { 129 | match input.peek(cond) { 130 | true => Ok(()), 131 | false => parse_error!(expected_token::(input, None)) 132 | } 133 | } 134 | 135 | /// Succeeds if the current slice is `slice`. 136 | #[parser(raw)] 137 | pub fn peek_slice(input: &mut Pear, slice: S) -> Result<(), I> 138 | where I: Input, S: Slice 139 | { 140 | match input.peek_slice(slice.len(), |s| &slice == s) { 141 | true => Ok(()), 142 | false => return parse_error!(expected_slice(input, slice)), 143 | } 144 | } 145 | 146 | /// Succeeds if the current slice is `slice`. 147 | #[parser(raw)] 148 | pub fn peek_slice_if(input: &mut Pear, len: usize, cond: F) -> Result<(), I> 149 | where I: Input, F: FnMut(&I::Slice) -> bool 150 | { 151 | match input.peek_slice(len, cond) { 152 | true => Ok(()), 153 | false => return parse_error!(Expected::Slice(None, None)), 154 | } 155 | } 156 | 157 | /// Returns the current token. 158 | #[parser(raw)] 159 | pub fn peek_any(input: &mut Pear) -> Result { 160 | match input.token() { 161 | Some(peeked) => Ok(peeked), 162 | None => return parse_error!(Expected::Token(None, None)), 163 | } 164 | } 165 | 166 | /// Skips tokens while `cond` matches. 167 | #[parser(raw)] 168 | pub fn skip_while(input: &mut Pear, cond: F) -> Result 169 | where I: Input, F: FnMut(&I::Token) -> bool 170 | { 171 | Ok(input.skip(cond)) 172 | } 173 | 174 | /// Consumes tokens while `cond` matches and returns them. Succeeds even if no 175 | /// tokens match. 176 | #[parser(raw)] 177 | pub fn take_while(input: &mut Pear, cond: F) -> Result 178 | where I: Input, F: FnMut(&I::Token) -> bool 179 | { 180 | Ok(input.take(cond)) 181 | } 182 | 183 | /// Consumes no tokens. Always succeeds. Equivalent to `take_while(|_| false)`. 184 | #[parser(raw)] 185 | pub fn none(input: &mut Pear) -> Result { 186 | take_while(input, |_| false) 187 | } 188 | 189 | /// Consumes tokens while `cond` matches on a continously growing slice 190 | /// beginning at a length of `0` and ending when `cond` fails. Returns the slice 191 | /// between `0` and `cond` failing. Errors if no such slice exists. 192 | #[parser(raw)] 193 | pub fn take_while_slice(input: &mut Pear, mut f: F) -> Result 194 | where I: Input, F: FnMut(&I::Slice) -> bool 195 | { 196 | let mut len = 0; 197 | let mut last_good = None; 198 | loop { 199 | match input.slice(len) { 200 | // There's a slice and it matches the condition, keep going! 201 | Some(ref slice) if f(slice) => { 202 | last_good = Some(len); 203 | len += 1; 204 | } 205 | // There's no slice of length `n`, but there _might_ be a slice of 206 | // length `n + 1`, so we need to keep trying. 207 | None if input.has(len + 1) => len += 1, 208 | // There are no more slices or the match failed. We're done. 209 | _ => break, 210 | } 211 | } 212 | 213 | match last_good { 214 | Some(len) => Ok(input.eat_slice(len, |_| true).expect("slice exists")), 215 | None => return parse_error!(Expected::Slice(None, None)), 216 | } 217 | } 218 | 219 | /// Consumes tokens while `cond` matches on a window of tokens of size `n` and 220 | /// returns all of the tokens prior to the first failure to match. For example, 221 | /// given a string of "aaab" and a size 2 window predicate of `window == "aa"`, 222 | /// the return value is `"aa"` as the first failure to match is at `"ab"`. 223 | /// 224 | /// Always succeeds. If no tokens match, the result will be empty. If there are 225 | /// fewer than `n` tokens, takes all tokens and returns them. 226 | #[parser(raw)] 227 | pub fn take_while_window(input: &mut Pear, n: usize, mut f: F) -> Result 228 | where I: Input + Rewind, F: FnMut(&I::Slice) -> bool 229 | { 230 | if !input.has(n) { 231 | return Ok(input.take(|_| true)); 232 | } 233 | 234 | let start = parse_current_marker!(); 235 | let mut tokens = 0; 236 | loop { 237 | // See `take_while_slice` for an explanation of these arms. 238 | match input.slice(n) { 239 | Some(ref slice) if f(slice) => { 240 | if !succeeds(input, skip_any) { break; } 241 | tokens += 1; 242 | } 243 | None if input.has(n + 1) => { 244 | if !succeeds(input, skip_any) { break; } 245 | tokens += 1; 246 | } 247 | _ => break, 248 | } 249 | } 250 | 251 | input.rewind_to(start); 252 | Ok(input.take(|_| match tokens > 0 { 253 | true => { tokens -= 1; true }, 254 | false => false 255 | })) 256 | } 257 | 258 | /// Consumes tokens while `cond` matches on a window of tokens of size `n` and 259 | /// returns them. Fails if there no tokens match, otherwise returns all of the 260 | /// tokens before the first failure. 261 | #[parser(raw)] 262 | pub fn take_some_while_window(input: &mut Pear, n: usize, f: F) -> Result 263 | where I: Input + Rewind, F: FnMut(&I::Slice) -> bool 264 | { 265 | let result = take_while_window(n, f)?; 266 | if result.is_empty() { 267 | return parse_error!(Expected::Slice(None, None)); 268 | } 269 | 270 | Ok(result) 271 | } 272 | 273 | /// Consumes tokens while `cond` matches on a window of tokens of size `n` and 274 | /// returns them. Fails if there aren't at least `n` tokens, otherwise always 275 | /// otherwise always succeeds. If no tokens match, the result will be empty. 276 | #[parser(raw)] 277 | pub fn take_while_some_window(input: &mut Pear, n: usize, f: F) -> Result 278 | where I: Input + Rewind, F: FnMut(&I::Slice) -> bool 279 | { 280 | if !input.has(n) { 281 | return parse_error!(Expected::Slice(None, None)); 282 | } 283 | 284 | take_while_window(input, n, f) 285 | } 286 | 287 | /// Consumes tokens while `cond` matches on a window of tokens of size `n` and 288 | /// returns them. Fails if there aren't at least `n` tokens or if no tokens 289 | /// match, otherwise returns all of the tokens before the first failure. 290 | #[parser(raw)] 291 | pub fn take_some_while_some_window(input: &mut Pear, n: usize, f: F) -> Result 292 | where I: Input + Rewind, F: FnMut(&I::Slice) -> bool 293 | { 294 | if !input.has(n) { 295 | return parse_error!(Expected::Slice(None, None)); 296 | } 297 | 298 | take_some_while_window(input, n, f) 299 | } 300 | 301 | /// Consumes tokens while `cond` matches on a window of tokens of size `n` and 302 | /// returns them. Succeeds even if no tokens match. 303 | #[parser(raw)] 304 | pub fn take_until_slice(input: &mut Pear, slice: S) -> Result 305 | where I: Input + Rewind, S: Slice 306 | { 307 | take_while_window(input, slice.len(), |s| &slice != s) 308 | } 309 | 310 | /// Consumes tokens while `cond` matches and returns them. Succeeds only if at 311 | /// least one token matched `cond`. 312 | #[parser(raw)] 313 | pub fn take_some_while(input: &mut Pear, cond: F) -> Result 314 | where I: Input, F: FnMut(&I::Token) -> bool 315 | { 316 | let value = input.take(cond); 317 | if value.len() == 0 { 318 | return parse_error!(Expected::Token(None, None)); 319 | } 320 | 321 | Ok(value) 322 | } 323 | 324 | /// Consumes tokens while `cond` matches and the token is not `until`. Succeeds 325 | /// even if no tokens match. 326 | #[parser(raw)] 327 | pub fn take_while_until( 328 | input: &mut Pear, 329 | mut cond: F, 330 | until: T, 331 | ) -> Result 332 | where I: Input, 333 | T: Token, 334 | F: FnMut(&I::Token) -> bool 335 | { 336 | take_while(input, |t| cond(t) && (&until != t)) 337 | } 338 | 339 | /// Consumes tokens while `cond` matches and the token is not `until`. Succeeds 340 | /// only if at least one token matched `cond`. 341 | #[parser(raw)] 342 | pub fn take_some_while_until( 343 | input: &mut Pear, 344 | mut cond: F, 345 | until: T, 346 | ) -> Result 347 | where I: Input, 348 | T: Token, 349 | F: FnMut(&I::Token) -> bool 350 | { 351 | take_some_while(input, |t| cond(t) && (&until != t)) 352 | } 353 | 354 | /// Takes at most `n` tokens. 355 | #[parser(raw)] 356 | pub fn take_n(input: &mut Pear, n: usize) -> Result { 357 | let mut i = 0; 358 | Ok(input.take(|_| { let c = i < n; i += 1; c })) 359 | } 360 | 361 | /// Takes at most `n` tokens as long as `cond` holds. 362 | #[parser(raw)] 363 | pub fn take_n_while(input: &mut Pear, n: usize, mut cond: F) -> Result 364 | where I: Input, F: FnMut(&I::Token) -> bool 365 | { 366 | let mut i = 0; 367 | Ok(input.take(|c| { cond(c) && { let ok = i < n; i += 1; ok } })) 368 | } 369 | 370 | /// Take exactly `n` tokens, ensuring `cond` holds on all `n`. 371 | #[parser(raw)] 372 | pub fn take_n_if(input: &mut Pear, n: usize, mut cond: F) -> Result 373 | where I: Input, F: FnMut(&I::Token) -> bool 374 | { 375 | let mut i = 0; 376 | let v = input.take(|c| { cond(c) && { let ok = i < n; i += 1; ok } }); 377 | if v.len() != n { 378 | return parse_error!(Expected::Token(None, None)); 379 | } 380 | 381 | Ok(v) 382 | } 383 | 384 | /// Parse a token stream that starts with `start` and ends with `end`, returning 385 | /// all of the tokens in between. The tokens in between must match `cond`. 386 | /// Succeeds even if there are no tokens between `start` and `end`. 387 | #[parser(raw)] 388 | pub fn delimited( 389 | input: &mut Pear, 390 | start: T, 391 | mut cond: F, 392 | end: T, 393 | ) -> Result 394 | where I: Input, 395 | T: Token, 396 | F: FnMut(&I::Token) -> bool 397 | { 398 | eat(start)?; 399 | let output = input.take(|t| cond(t) && (&end != t)); 400 | eat(end)?; 401 | Ok(output) 402 | } 403 | 404 | /// Parse a token stream that starts with `start` and ends with `end`, returning 405 | /// all of the tokens in between. The tokens in between must match `cond`. There 406 | /// must be at least one token between `start` and `end`. 407 | #[parser(raw)] 408 | pub fn delimited_some( 409 | input: &mut Pear, 410 | start: T, 411 | mut cond: F, 412 | end: T, 413 | ) -> Result 414 | where I: Input, 415 | T: Token, 416 | F: FnMut(&I::Token) -> bool 417 | { 418 | eat(start)?; 419 | let output = take_some_while(|t| cond(t) && (&end != t))?; 420 | eat(end)?; 421 | Ok(output) 422 | } 423 | 424 | /// Succeeds only if the input has reached EOF. 425 | #[parser(raw)] 426 | pub fn eof(input: &mut Pear) -> Result<(), I> { 427 | Ok(if input.has(1) { 428 | let next = input.token(); 429 | parse_error!(Expected::Eof(next))? 430 | }) 431 | } 432 | 433 | /// Like `delimited` but keeps the `start` and `end`. 434 | #[parser(raw)] 435 | pub fn enclosed( 436 | input: &mut Pear, 437 | start: T, 438 | mut cond: F, 439 | end: T, 440 | ) -> Result 441 | where I: Input, 442 | T: Token, 443 | F: FnMut(&I::Token) -> bool 444 | { 445 | enum State { 446 | Start, 447 | Inner, 448 | End 449 | } 450 | 451 | let mut state = State::Start; 452 | let value = input.take(|t| { 453 | match state { 454 | State::Start if &start == t => { state = State::Inner; true }, 455 | State::Start => false, 456 | State::Inner if cond(t) => true, 457 | State::Inner if &end == t => { state = State::End; true }, 458 | State::Inner => false, 459 | State::End => false, 460 | } 461 | }); 462 | 463 | match state { 464 | State::Start => parse_error!(expected_token(input, Some(start))), 465 | State::Inner => parse_error!(expected_token(input, Some(end))), 466 | State::End => Ok(value) 467 | } 468 | } 469 | -------------------------------------------------------------------------------- /codegen/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![recursion_limit="256"] 2 | 3 | extern crate proc_macro; 4 | extern crate proc_macro2; 5 | extern crate syn; 6 | #[macro_use] extern crate quote; 7 | 8 | mod parser; 9 | 10 | use syn::parse::Parser; 11 | use syn::visit_mut::{self, VisitMut}; 12 | use syn::spanned::Spanned; 13 | 14 | use proc_macro2::TokenStream; 15 | use proc_macro2_diagnostics::{Diagnostic, SpanDiagnosticExt}; 16 | 17 | use crate::parser::*; 18 | 19 | fn parse_marker_ident(span: proc_macro2::Span) -> syn::Ident { 20 | const PARSE_MARKER_IDENT: &str = "____parse_parse_marker"; 21 | syn::Ident::new(PARSE_MARKER_IDENT, span) 22 | } 23 | 24 | fn parser_info_ident(span: proc_macro2::Span) -> syn::Ident { 25 | const PARSE_INFO_IDENT: &str = "____parse_parser_info"; 26 | syn::Ident::new(PARSE_INFO_IDENT, span) 27 | } 28 | 29 | #[derive(Copy, Clone)] 30 | enum State { 31 | Start, 32 | InTry 33 | } 34 | 35 | #[derive(Clone)] 36 | struct ParserTransformer { 37 | input: syn::Expr, 38 | output: syn::Type, 39 | state: State, 40 | } 41 | 42 | impl ParserTransformer { 43 | fn new(input: syn::Expr, output: syn::Type) -> ParserTransformer { 44 | ParserTransformer { input, output, state: State::Start } 45 | } 46 | } 47 | 48 | impl VisitMut for ParserTransformer { 49 | fn visit_expr_try_mut(&mut self, v: &mut syn::ExprTry) { 50 | let last_state = self.state; 51 | self.state = State::InTry; 52 | visit_mut::visit_expr_try_mut(self, v); 53 | self.state = last_state; 54 | 55 | let expr = &v.expr; 56 | let new_expr = quote_spanned!(expr.span() => #expr.map_err(|e| e.into())); 57 | let method_call: syn::Expr = syn::parse2(new_expr).expect("okay"); 58 | v.expr = Box::new(method_call); 59 | } 60 | 61 | fn visit_expr_call_mut(&mut self, call: &mut syn::ExprCall) { 62 | if let State::InTry = self.state { 63 | // TODO: Should we keep recursing? 64 | call.args.insert(0, self.input.clone()); 65 | 66 | // Only insert into the _first_ call. 67 | self.state = State::Start; 68 | } else { 69 | visit_mut::visit_expr_call_mut(self, call); 70 | } 71 | } 72 | 73 | fn visit_macro_mut(&mut self, m: &mut syn::Macro) { 74 | if let Some(segment) = m.path.segments.last() { 75 | let name = segment.ident.to_string(); 76 | if name == "switch" || name.starts_with("parse_") { 77 | let (input, output) = (&self.input, &self.output); 78 | let tokens = match syn::parse2::(m.tokens.clone()) { 79 | Ok(mut expr) => { 80 | let mut transformer = self.clone(); 81 | transformer.state = State::Start; 82 | visit_mut::visit_expr_mut(&mut transformer, &mut expr); 83 | quote!(#expr) 84 | }, 85 | Err(_) => m.tokens.clone() 86 | }; 87 | 88 | let info = parser_info_ident(self.input.span()); 89 | let mark = parse_marker_ident(m.span()); 90 | 91 | let parser_info = quote!([#info; #input; #mark; #output]); 92 | m.tokens = quote_spanned!(m.span() => #parser_info #tokens); 93 | } 94 | } 95 | } 96 | } 97 | 98 | fn extract_input_ident_ty(f: &syn::ItemFn) -> PResult<(syn::Ident, syn::Type)> { 99 | use syn::{FnArg::Typed, PatType, Pat::Ident, Type::Reference}; 100 | 101 | let first = f.sig.inputs.first().ok_or_else(|| { 102 | let paren_span = f.sig.paren_token.span.join(); 103 | paren_span.error("parsing functions require at least one input") 104 | })?; 105 | 106 | let e = first.span().error("invalid type for parser input"); 107 | match first { 108 | Typed(PatType { pat, ty, .. }) => match **pat { 109 | Ident(ref p) => match **ty { 110 | Reference(ref r) => Ok((p.ident.clone(), *r.elem.clone())), 111 | _ => Err(e) 112 | } 113 | _ => Err(e) 114 | } 115 | _ => Err(first.span().error("invalid type for parser input")) 116 | } 117 | } 118 | 119 | fn wrapping_fn_block( 120 | function: &syn::ItemFn, 121 | scope: TokenStream, 122 | args: &AttrArgs, 123 | ret_ty: &syn::Type, 124 | ) -> PResult { 125 | let (input, input_ty) = extract_input_ident_ty(function)?; 126 | let fn_block = &function.block; 127 | 128 | let span = function.span(); 129 | let mark_ident = parse_marker_ident(input.span()); 130 | let info_ident = parser_info_ident(function.sig.ident.span()); 131 | let result_map = match args.raw.is_some() { 132 | true => quote_spanned!(span => ( 133 | |#info_ident: &#scope::input::ParserInfo, #mark_ident: &mut <#input_ty as #scope::input::Input>::Marker| { 134 | #fn_block 135 | }) 136 | ), 137 | false => quote_spanned!(span => ( 138 | |#info_ident: &#scope::input::ParserInfo, #mark_ident: &mut <#input_ty as #scope::input::Input>::Marker| { 139 | use #scope::result::IntoResult; 140 | IntoResult::into_result(#fn_block) 141 | } 142 | )) 143 | }; 144 | 145 | let rewind_expr = |span| quote_spanned! { span => 146 | <#input_ty as #scope::input::Rewind>::rewind_to(#input, ___mark); 147 | }; 148 | 149 | let (rewind, peek) = (args.rewind.map(rewind_expr), args.peek.map(rewind_expr)); 150 | let new_block_tokens = { 151 | let (name, raw) = (&function.sig.ident, args.raw.is_some()); 152 | let name_str = name.to_string(); 153 | quote_spanned!(span => { 154 | let ___info = #scope::input::ParserInfo { name: #name_str, raw: #raw }; 155 | if let Some(ref mut ___debugger) = #input.options.debugger { 156 | ___debugger.on_entry(&___info); 157 | } 158 | 159 | let mut ___mark = #scope::input::Input::mark(#input, &___info); 160 | let mut ___res: #ret_ty = #result_map(&___info, &mut ___mark); 161 | match ___res { 162 | Ok(_) => { #peek }, 163 | Err(ref mut ___e) if #input.options.stacked_context => { 164 | let ___ctxt = #scope::input::Input::context(#input, ___mark); 165 | ___e.push_info(___info, ___ctxt); 166 | #rewind 167 | }, 168 | Err(_) => { #rewind }, 169 | } 170 | 171 | if #input.options.debugger.is_some() { 172 | let ___ctxt = #scope::input::Input::context(#input, ___mark); 173 | if let Some(ref mut ___debugger) = #input.options.debugger { 174 | ___debugger.on_exit(&___info, ___res.is_ok(), ___ctxt); 175 | } 176 | } 177 | 178 | ___res 179 | }) 180 | }; 181 | 182 | syn::parse(new_block_tokens.into()) 183 | .map_err(|e| function.span().error(format!("bad function: {}", e))) 184 | } 185 | 186 | fn parser_attribute(input: proc_macro::TokenStream, args: &AttrArgs) -> PResult { 187 | let input: proc_macro2::TokenStream = input.into(); 188 | let span = input.span(); 189 | let mut function: syn::ItemFn = syn::parse2(input).map_err(|_| { 190 | span.error("`parser` attribute only supports functions") 191 | })?; 192 | 193 | let ret_ty: syn::Type = match &function.sig.output { 194 | syn::ReturnType::Default => { 195 | return Err(function.sig.span().error("parse function requires return type")); 196 | }, 197 | syn::ReturnType::Type(_, ty) => (**ty).clone(), 198 | }; 199 | 200 | let (input_ident, _) = extract_input_ident_ty(&function)?; 201 | let input_expr: syn::Expr = syn::parse2(quote!(#input_ident)).unwrap(); 202 | let mut transformer = ParserTransformer::new(input_expr, ret_ty.clone()); 203 | visit_mut::visit_item_fn_mut(&mut transformer, &mut function); 204 | 205 | let scope = args.raw.map(|_| quote!(crate)).unwrap_or_else(|| quote!(pear)); 206 | let inline = syn::Attribute::parse_outer.parse2(quote!(#[inline])).unwrap(); 207 | function.block = Box::new(wrapping_fn_block(&function, scope, args, &ret_ty)?); 208 | function.attrs.extend(inline); 209 | 210 | Ok(quote! { 211 | #[allow(clippy::all, clippy::pedantic, clippy::nursery)] 212 | #function 213 | }) 214 | } 215 | 216 | impl Case { 217 | fn to_tokens<'a, I>(context: &Context, mut cases: I) -> TokenStream 218 | where I: Iterator 219 | { 220 | let this = match cases.next() { 221 | None => return quote!(), 222 | Some(case) => case 223 | }; 224 | 225 | let (input, output) = (&context.input, &context.output); 226 | let mut transformer = ParserTransformer::new(input.clone(), output.clone()); 227 | let mut case_expr = this.expr.clone(); 228 | visit_mut::visit_expr_mut(&mut transformer, &mut case_expr); 229 | 230 | match this.pattern { 231 | Pattern::Wild(..) => match this.guard.as_ref() { 232 | Some(guard) => { 233 | let rest_tokens = Case::to_tokens(context, cases); 234 | quote!(if #guard { #case_expr } else { #rest_tokens }) 235 | } 236 | None => quote!(#case_expr), 237 | } 238 | Pattern::Calls(ref calls) => { 239 | let case_branch = calls.iter().enumerate().map(|(i, call)| { 240 | let prefix = match i { 241 | 0 => quote!(if), 242 | _ => quote!(else if) 243 | }; 244 | 245 | let name = call.name.clone() 246 | .unwrap_or_else(|| syn::Ident::new("___", call.span())); 247 | 248 | // FIXME: We're repeating ourselves, aren't we? We alrady do 249 | // this in input insertion in the visitor. 250 | let mut call_expr = call.expr.clone(); 251 | call_expr.args.insert(0, input.clone()); 252 | let call_expr = quote!({ 253 | let ___preserve_error = #input.emit_error; 254 | #input.emit_error = false; 255 | let ___call_result = #call_expr; 256 | #input.emit_error = ___preserve_error; 257 | ___call_result 258 | }); 259 | 260 | let guarded_call = this.guard.as_ref() 261 | .map(|guard| &guard.expr) 262 | .map(|guard| quote!({ 263 | match #call_expr { 264 | Ok(#name) if #guard => Some(#name), 265 | _ => None, 266 | } 267 | })) 268 | .unwrap_or_else(|| quote!(#call_expr.ok())); 269 | 270 | quote! { 271 | #prefix let Some(#name) = #guarded_call { 272 | #case_expr 273 | } 274 | } 275 | }); 276 | 277 | let rest_tokens = Case::to_tokens(context, cases); 278 | quote_spanned! { this.span => 279 | #(#case_branch)* 280 | else { #rest_tokens } 281 | } 282 | } 283 | } 284 | } 285 | } 286 | 287 | impl Switch { 288 | fn to_tokens(&self) -> TokenStream { 289 | Case::to_tokens(&self.context, self.cases.iter()) 290 | } 291 | } 292 | 293 | /// The core attribute macro. Can only be applied to free functions with at 294 | /// least one parameter and a return value. To typecheck, the free function must 295 | /// meet the following typing requirements: 296 | /// 297 | /// - The _first_ parameter's type must be a mutable reference to a [`Pear`] 298 | /// here `I` implements [`Input`]. This is the _input_ parameter. 299 | /// - The return type must be [`Result`] where `I` is the inner type 300 | /// of the input parameter and `O` can be any type. 301 | /// 302 | /// The following transformations are applied to the _contents_ of the 303 | /// attributed function: 304 | /// 305 | /// - The functions first parameter (of type `&mut Pear`) is passed as the 306 | /// first parameter to every function call in the function with a posfix 307 | /// `?`. That is, every function call of the form `foo(a, b, c, ...)?` is 308 | /// converted to `foo(input, a, b, c, ...)?` where `input` is the input 309 | /// parameter. 310 | /// - The inputs to every macro whose name starts with `parse_` are prefixed 311 | /// with `[PARSER_NAME, INPUT, MARKER, OUTPUT]` where `PARSER_NAME` is the 312 | /// raw string literal of the functon's name, `INPUT` is the input 313 | /// parameter expression, `MARKER` is the marker expression, and `OUTPUT` 314 | /// is the output type. Aditionally, if the input to the macro is a valid 315 | /// Rust expression, it is applied the same transformations as a function 316 | /// atributed with `#[parser]`. 317 | /// 318 | /// Declare a `parse_` macro as: 319 | /// 320 | /// ```rust,ignore 321 | /// macro_rules! parse_my_macro { 322 | /// ([$n:expr; $i:expr; $m:expr; $T:ty] ..) => { 323 | /// /* .. */ 324 | /// } 325 | /// } 326 | /// ``` 327 | /// 328 | /// The following transformations are applied _around_ the attributed 329 | /// function: 330 | /// 331 | /// - The [`Input::mark()`] method is called before the function executes. 332 | /// The returned mark, if any, is stored on the stack. 333 | /// - A return value of `O` is automatically converted (or "lifted") into a 334 | /// type of [`Result`] by wrapping it in `Ok`. 335 | /// - If the function returns an `Err`, [`Input::context()`] is called with 336 | /// the current mark, and the returned context, if any, is pushed into the 337 | /// error via [`ParseError::push_context()`]. 338 | /// - The [`Input::unmark()`] method is called after the function executes, 339 | /// passing in the current mark. 340 | /// 341 | /// # Example 342 | /// 343 | /// ```rust 344 | /// use pear::input::{Pear, Text, Result}; 345 | /// use pear::macros::{parser, parse}; 346 | /// use pear::parsers::*; 347 | /// # 348 | /// # use pear::macros::parse_declare; 349 | /// # parse_declare!(Input<'a>(Token = char, Slice = &'a str, Many = &'a str)); 350 | /// 351 | /// #[parser] 352 | /// fn ab_in_dots<'a, I: Input<'a>>(input: &mut Pear) -> Result<&'a str, I> { 353 | /// eat('.')?; 354 | /// let inside = take_while(|&c| c == 'a' || c == 'b')?; 355 | /// eat('.')?; 356 | /// 357 | /// inside 358 | /// } 359 | /// 360 | /// # 361 | /// let x = parse!(ab_in_dots: Text::from(".abba.")); 362 | /// assert_eq!(x.unwrap(), "abba"); 363 | /// 364 | /// let x = parse!(ab_in_dots: Text::from(".ba.")); 365 | /// assert_eq!(x.unwrap(), "ba"); 366 | /// 367 | /// let x = parse!(ab_in_dots: Text::from("...")); 368 | /// assert!(x.is_err()); 369 | /// ``` 370 | #[proc_macro_attribute] 371 | pub fn parser( 372 | args: proc_macro::TokenStream, 373 | input: proc_macro::TokenStream 374 | ) -> proc_macro::TokenStream { 375 | let args = match AttrArgs::syn_parse.parse(args) { 376 | Ok(args) => args, 377 | Err(e) => return Diagnostic::from(e).emit_as_item_tokens().into(), 378 | }; 379 | 380 | match parser_attribute(input, &args) { 381 | Ok(tokens) => tokens.into(), 382 | Err(diag) => diag.emit_as_item_tokens().into(), 383 | } 384 | } 385 | 386 | /// Invoked much like match, except each condition must be a parser, which is 387 | /// executed, and the corresponding arm is executed only if the parser succeeds. 388 | /// Once a condition succeeds, no other condition is executed. 389 | /// 390 | /// ```rust,ignore 391 | /// switch! { 392 | /// parser() => expr, 393 | /// x@parser1() | x@parser2(a, b, c) => expr(x), 394 | /// _ => last_expr 395 | /// } 396 | /// ``` 397 | #[proc_macro] 398 | pub fn switch(input: proc_macro::TokenStream) -> proc_macro::TokenStream { 399 | // TODO: We lose diagnostic information by using syn's thing here. We need a 400 | // way to get a SynParseStream from a TokenStream to not do that. 401 | match Switch::syn_parse.parse(input) { 402 | Ok(switch) => switch.to_tokens().into(), 403 | Err(e) => Diagnostic::from(e).emit_as_expr_tokens().into(), 404 | } 405 | } 406 | -------------------------------------------------------------------------------- /examples/uri/src/old.rs: -------------------------------------------------------------------------------- 1 | #![feature(proc_macro)] 2 | #![allow(unused_imports, dead_code)] 3 | 4 | #[macro_use] extern crate pear; 5 | 6 | mod tables; 7 | mod utils; 8 | mod indexed; 9 | 10 | use std::borrow::Cow; 11 | use std::str::{from_utf8, from_utf8_unchecked}; 12 | use std::fmt::{self, Display}; 13 | 14 | use pear::{Length, parser, switch}; 15 | use pear::parsers::*; 16 | use pear::combinators::*; 17 | 18 | // use utils::merge; 19 | use indexed::{Indexed, IndexedInput}; 20 | use self::tables::{is_reg_name_char, is_pchar}; 21 | 22 | /* 23 | * 24 | * request-target = origin-form / absolute-form / authority-form / asterisk-form 25 | * 26 | * ------------------------------------------------------------------------------- 27 | * 28 | * asterisk-form = "*" 29 | * 30 | * ------------------------------------------------------------------------------- 31 | * 32 | * origin-form = absolute-path [ "?" query ] 33 | * 34 | * absolute-path = 1*( "/" segment ) 35 | * 36 | * ------------------------------------------------------------------------------- 37 | * 38 | * authority-form = authority 39 | * 40 | * ------------------------------------------------------------------------------- 41 | * 42 | * 1. look for ':', '@', '?' 43 | * 2. if neither is found, you have an authority, text is `host` 44 | * 3. if ':' is found, have either 'host', 'scheme', or 'userinfo' 45 | * * can only be host if: next four characters are port 46 | * * must be host if: text before ':' is empty, requires port 47 | * * if next (at most) four characters are numbers, then we have a host/port. 48 | * * if next character is '/' or there is none, then scheme 49 | * * otherwise try as scheme, fallback to userinfo if find '@' 50 | * 4. if '?' is found, have either 'host', 'scheme', or 'userinfo' 51 | * 5. if '@' is found, have 'userinfo' 52 | * 53 | * Alternatively, don't support path-rootless or path-empty, then it's not 54 | * ambigous: look for ':', '@', or '?': 55 | * * if none is found or found ':' but text before ':' is empty: authority 56 | * * if '@', must have authority, 57 | * * if '?', absolute 58 | * * if ':' followed by '/', must have absolute 59 | * * if ':' _not_ followed by '/', must have authority 60 | * 61 | * ------------------------------------------------------------------------------- 62 | * 63 | * absolute-form = absolute-URI 64 | * 65 | * absolute-URI = scheme ":" hier-part [ "?" query ] 66 | * 67 | * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 68 | * 69 | * hier-part = "//" authority path-abempty 70 | * / path-absolute 71 | * / path-rootless 72 | * / path-empty 73 | * 74 | * query = *( pchar / "/" / "?" ) 75 | * 76 | * authority = [ userinfo "@" ] host [ ":" port ] 77 | * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 78 | * host = IP-literal / IPv4address / reg-name 79 | * port = *DIGIT 80 | * 81 | * reg-name = *( unreserved / pct-encoded / sub-delims ) 82 | * 83 | * path-abempty = *( "/" segment ) 84 | * 85 | * path-absolute = "/" [ segment-nz *( "/" segment ) ] 86 | * path-noscheme = segment-nz-nc *( "/" segment ) 87 | * path-rootless = segment-nz *( "/" segment ) 88 | * path-empty = 0 89 | * 90 | * segment = *pchar 91 | * segment-nz = 1*pchar 92 | * 93 | * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 94 | * 95 | * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 96 | * pct-encoded = "%" HEXDIG HEXDIG 97 | * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 98 | * / "*" / "+" / "," / ";" / "=" 99 | * 100 | * IP-literal = "[" ( IPv6address / IPvFuture ) "]" 101 | * 102 | * IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) 103 | * 104 | * IPv6address = 6( h16 ":" ) ls32 105 | * / "::" 5( h16 ":" ) ls32 106 | * / [ h16 ] "::" 4( h16 ":" ) ls32 107 | * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 108 | * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 109 | * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 110 | * / [ *4( h16 ":" ) h16 ] "::" ls32 111 | * / [ *5( h16 ":" ) h16 ] "::" h16 112 | * / [ *6( h16 ":" ) h16 ] "::" 113 | * 114 | * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 115 | * 116 | * dec-octet = DIGIT ; 0-9 117 | * / %x31-39 DIGIT ; 10-99 118 | * / "1" 2DIGIT ; 100-199 119 | * / "2" %x30-34 DIGIT ; 200-249 120 | * / "25" %x30-35 ; 250-255 121 | * 122 | * ALPHA = %x41-5A / %x61-7A ; A-Z / a-z 123 | * HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" 124 | * DIGIT = %x30-39 ; 0-9 125 | * 126 | * ------------------------------------------------------------------------------- 127 | **/ 128 | 129 | // type Input<'a> = IndexedInput<'a, [u8]>; 130 | parse_declare!(Input<'a>(Token = u8, Slice = &'a [u8], Many = &'a [u8])); 131 | 132 | // #[derive(Debug, PartialEq)] 133 | // pub enum Error { 134 | // Empty, 135 | // Parse(ParseError) 136 | // } 137 | 138 | // impl From> for Error { 139 | // #[inline(always)] 140 | // fn from(error: ParseError) -> Self { 141 | // Error::Parse(error) 142 | // } 143 | // } 144 | 145 | type ByteInput<'a> = IndexedInput<'a, [u8]>; 146 | type IndexedStr<'a> = Indexed<'a, str>; 147 | type IndexedBytes<'a> = Indexed<'a, [u8]>; 148 | 149 | #[derive(Debug, PartialEq)] 150 | pub enum Uri<'a> { 151 | Origin(Origin<'a>), 152 | Authority(Authority<'a>), 153 | Absolute(Absolute<'a>), 154 | Asterisk, 155 | } 156 | 157 | macro_rules! impl_uri_from { 158 | ($type:ident) => ( 159 | impl<'a> From<$type<'a>> for Uri<'a> { 160 | fn from(other: $type<'a>) -> Uri<'a> { 161 | Uri::$type(other) 162 | } 163 | } 164 | ) 165 | } 166 | 167 | impl_uri_from!(Origin); 168 | impl_uri_from!(Authority); 169 | impl_uri_from!(Absolute); 170 | 171 | impl<'a> Uri<'a> { 172 | fn origin(path: &'a str, query: Option<&'a str>) -> Uri<'a> { 173 | Uri::Origin(Origin::new(path, query)) 174 | } 175 | 176 | #[inline] 177 | unsafe fn raw_absolute( 178 | source: Cow<'a, [u8]>, 179 | scheme: Indexed<'a, [u8]>, 180 | path: Indexed<'a, [u8]>, 181 | query: Option>, 182 | ) -> Uri<'a> { 183 | let origin = Origin::raw(source.clone(), path, query); 184 | Uri::Absolute(Absolute::raw(source.clone(), scheme, None, Some(origin))) 185 | } 186 | } 187 | 188 | impl<'a> Display for Uri<'a> { 189 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 190 | match *self { 191 | Uri::Origin(ref origin) => write!(f, "{}", origin), 192 | Uri::Authority(ref authority) => write!(f, "{}", authority), 193 | Uri::Absolute(ref absolute) => write!(f, "{}", absolute), 194 | Uri::Asterisk => write!(f, "*") 195 | } 196 | } 197 | } 198 | 199 | #[derive(Debug)] 200 | pub struct Origin<'a> { 201 | source: Option>, 202 | path: IndexedStr<'a>, 203 | query: Option>, 204 | } 205 | 206 | impl<'a, 'b> PartialEq> for Origin<'a> { 207 | fn eq(&self, other: &Origin<'b>) -> bool { 208 | self.path() == other.path() && self.query() == other.query() 209 | } 210 | } 211 | 212 | pub unsafe fn as_utf8_unchecked(input: Cow<[u8]>) -> Cow { 213 | match input { 214 | Cow::Borrowed(bytes) => Cow::Borrowed(::std::str::from_utf8_unchecked(bytes)), 215 | Cow::Owned(bytes) => Cow::Owned(String::from_utf8_unchecked(bytes)) 216 | } 217 | } 218 | 219 | impl<'a> Origin<'a> { 220 | #[inline] 221 | unsafe fn raw(source: Cow<'a, [u8]>, path: Indexed<'a, [u8]>, query: Option>) -> Origin<'a> { 222 | Origin { 223 | source: Some(as_utf8_unchecked(source)), 224 | path: path.coerce(), 225 | query: query.map(|q| q.coerce()) 226 | } 227 | } 228 | 229 | fn new(path: P, query: Option) -> Origin<'a> 230 | where P: Into>, Q: Into> 231 | { 232 | Origin { 233 | source: None, 234 | path: Indexed::from(path), 235 | query: query.map(|q| Indexed::from(q)) 236 | } 237 | } 238 | 239 | #[inline] 240 | fn path(&self) -> &str { 241 | self.path.to_source(&self.source) 242 | } 243 | 244 | #[inline] 245 | fn query(&self) -> Option<&str> { 246 | self.query.as_ref().map(|q| q.to_source(&self.source)) 247 | } 248 | } 249 | 250 | impl<'a> Display for Origin<'a> { 251 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 252 | write!(f, "{}", self.path())?; 253 | if let Some(q) = self.query() { 254 | write!(f, "?{}", q)?; 255 | } 256 | 257 | Ok(()) 258 | } 259 | } 260 | 261 | #[derive(Debug)] 262 | pub struct Authority<'a> { 263 | source: Option>, 264 | user_info: Option>, 265 | host: IndexedStr<'a>, 266 | port: Option, 267 | } 268 | 269 | impl<'a> Authority<'a> { 270 | unsafe fn raw( 271 | source: Cow<'a, [u8]>, 272 | user_info: Option>, 273 | host: Indexed<'a, [u8]>, 274 | port: Option 275 | ) -> Authority<'a> { 276 | Authority { 277 | source: Some(as_utf8_unchecked(source)), 278 | user_info: user_info.map(|u| u.coerce()), 279 | host: host.coerce(), 280 | port: port 281 | } 282 | } 283 | 284 | fn new(user_info: Option<&'a str>, host: &'a str, port: Option) -> Authority<'a> { 285 | Authority { 286 | source: None, 287 | user_info: user_info.map(|u| u.into()), 288 | host: host.into(), 289 | port: port 290 | } 291 | } 292 | 293 | fn user_info(&self) -> Option<&str> { 294 | self.user_info.as_ref().map(|u| u.to_source(&self.source)) 295 | } 296 | 297 | #[inline(always)] 298 | fn host(&self) -> &str { 299 | self.host.to_source(&self.source) 300 | } 301 | 302 | #[inline(always)] 303 | fn port(&self) -> Option { 304 | self.port 305 | } 306 | } 307 | 308 | impl<'a, 'b> PartialEq> for Authority<'a> { 309 | fn eq(&self, other: &Authority<'b>) -> bool { 310 | self.user_info() == other.user_info() 311 | && self.host() == other.host() 312 | && self.port() == other.port() 313 | } 314 | } 315 | 316 | impl<'a> Display for Authority<'a> { 317 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 318 | if let Some(user_info) = self.user_info() { 319 | write!(f, "{}@", user_info)?; 320 | } 321 | 322 | write!(f, "{}", self.host())?; 323 | if let Some(port) = self.port { 324 | write!(f, ":{}", port)?; 325 | } 326 | 327 | Ok(()) 328 | } 329 | } 330 | 331 | #[derive(Debug)] 332 | pub struct Absolute<'a> { 333 | source: Option>, 334 | scheme: IndexedStr<'a>, 335 | authority: Option>, 336 | origin: Option>, 337 | } 338 | 339 | impl<'a> Absolute<'a> { 340 | #[inline] 341 | unsafe fn raw( 342 | source: Cow<'a, [u8]>, 343 | scheme: Indexed<'a, [u8]>, 344 | authority: Option>, 345 | origin: Option>, 346 | ) -> Absolute<'a> { 347 | Absolute { 348 | source: Some(as_utf8_unchecked(source)), 349 | scheme: scheme.coerce(), 350 | authority: authority, 351 | origin: origin, 352 | } 353 | } 354 | 355 | fn new( 356 | scheme: &'a str, 357 | authority: Option>, 358 | origin: Option> 359 | ) -> Absolute<'a> { 360 | Absolute { 361 | source: None, scheme: scheme.into(), authority, origin 362 | } 363 | } 364 | 365 | #[inline(always)] 366 | fn scheme(&self) -> &str { 367 | self.scheme.to_source(&self.source) 368 | } 369 | 370 | #[inline(always)] 371 | fn authority(&self) -> Option<&Authority<'a>> { 372 | self.authority.as_ref() 373 | } 374 | 375 | #[inline(always)] 376 | fn origin(&self) -> Option<&Origin<'a>> { 377 | self.origin.as_ref() 378 | } 379 | } 380 | 381 | impl<'a, 'b> PartialEq> for Absolute<'a> { 382 | fn eq(&self, other: &Absolute<'b>) -> bool { 383 | self.scheme() == other.scheme() 384 | && self.authority() == other.authority() 385 | && self.origin() == other.origin() 386 | } 387 | } 388 | 389 | impl<'a> Display for Absolute<'a> { 390 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 391 | write!(f, "{}", self.scheme())?; 392 | match self.authority { 393 | Some(ref authority) => write!(f, "://{}", authority)?, 394 | None => write!(f, ":")? 395 | } 396 | 397 | if let Some(ref origin) = self.origin { 398 | write!(f, "{}", origin)?; 399 | } 400 | 401 | Ok(()) 402 | } 403 | } 404 | 405 | type RawInput<'a> = IndexedInput<'a, [u8]>; 406 | type Result<'a, T> = ::pear::Result>; 407 | 408 | #[parser] 409 | fn uri<'a>(input: &mut RawInput<'a>) -> Result<'a, Uri<'a>> { 410 | match input.len() { 411 | 0 => return Err(parse_error!("empty URI")), 412 | 1 => switch! { 413 | eat(b'*') => Uri::Asterisk, 414 | eat(b'/') => Uri::origin("/", None), 415 | _ => unsafe { 416 | // the `is_reg_name_char` guarantees ASCII 417 | let host = take_n_while(1, is_reg_name_char)?; 418 | Uri::Authority(Authority::raw(input.cow_source(), None, host, None)) 419 | } 420 | }, 421 | _ => switch! { 422 | peek(b'/') => Uri::Origin(origin()?), 423 | _ => absolute_or_authority()? 424 | } 425 | } 426 | } 427 | 428 | #[parser] 429 | fn origin<'a>(input: &mut RawInput<'a>) -> Result<'a, Origin<'a>> { 430 | (peek(b'/')?, path_and_query()?).1 431 | } 432 | 433 | #[parser] 434 | fn path_and_query<'a>(input: &mut RawInput<'a>) -> Result<'a, Origin<'a>> { 435 | let path = take_while(is_pchar)?; 436 | let query = switch! { 437 | eat(b'?') => Some(take_while(is_pchar)?), 438 | _ => None 439 | }; 440 | 441 | if path.is_empty() && query.is_none() { 442 | Err(parse_error!("expected path or query, found neither")) 443 | } else { 444 | // We know the string is ASCII because of the `is_pchar` checks above. 445 | Ok(unsafe { Origin::raw(input.cow_source(), path, query) }) 446 | } 447 | } 448 | 449 | #[parser] 450 | fn port<'a>(input: &mut RawInput<'a>) -> Result<'a, u16> { 451 | let port_str = take_n_while(5, |c| c >= b'0' && c <= b'9')?; 452 | 453 | let mut port_num: u16 = 0; 454 | for (b, i) in port_str.to_source(&Some(input.cow_source())).iter().rev().zip(&[1, 10, 100, 1000, 10000]) { 455 | port_num += (*b - b'0') as u16 * i; 456 | } 457 | 458 | port_num 459 | } 460 | 461 | #[parser] 462 | fn authority<'a>( 463 | input: &mut RawInput<'a>, 464 | user_info: Option> 465 | ) -> Result<'a, Authority<'a>> { 466 | let host = switch! { 467 | peek(b'[') => delimited(b'[', is_pchar, b']')?, 468 | _ => take_while(is_reg_name_char)? 469 | }; 470 | 471 | let port = switch! { 472 | eat(b':') => Some(port()?), 473 | _ => None 474 | }; 475 | 476 | // The `is_pchar`,`is_reg_name_char`, and `port()` function ensure ASCII. 477 | unsafe { Authority::raw(input.cow_source(), user_info, host, port) } 478 | } 479 | 480 | // FIXME: Be able to write this. 481 | // How (when) do we pass inputs to macro invocations? 482 | // macro_rules! maybe { 483 | // ($e:ident ()) => ( 484 | // match $e(input) { 485 | // Ok(result) => Some(result), 486 | // Err(_) => None 487 | // } 488 | // ) 489 | // } 490 | 491 | // Callers must ensure that `scheme` is actually ASCII. 492 | #[parser] 493 | fn absolute<'a>( 494 | input: &mut RawInput<'a>, 495 | scheme: Indexed<'a, [u8]> 496 | ) -> Result<'a, Absolute<'a>> { 497 | let (authority, path_and_query) = switch! { 498 | eat_slice(b"://") => { 499 | let left = take_while(|c| is_reg_name_char(c) || c == b':')?; 500 | let authority = switch! { 501 | eat(b'@') => authority(Some(left))?, 502 | _ => { 503 | input.backtrack(left.len())?; 504 | authority(None)? 505 | } 506 | }; 507 | 508 | let path_and_query = switch! { 509 | result@path_and_query() => Some(result), 510 | _ => None 511 | }; 512 | 513 | (Some(authority), path_and_query) 514 | }, 515 | eat(b':') => (None, Some(path_and_query()?)), 516 | _ => return Err(parse_error!("Something")) 517 | }; 518 | 519 | // `authority` and `path_and_query` parsers ensure ASCII. 520 | unsafe { Absolute::raw(input.cow_source(), scheme, authority, path_and_query) } 521 | } 522 | 523 | #[parser] 524 | fn absolute_or_authority<'a>( 525 | input: &mut RawInput<'a>, 526 | ) -> Result<'a, Uri<'a>> { 527 | let left = take_while(is_reg_name_char)?; 528 | switch! { 529 | peek_slice(b":/") => Uri::Absolute(absolute(left)?), 530 | eat(b'@') => Uri::Authority(authority(Some(left))?), 531 | colon@take_n_while(1, |b| b == b':') => { 532 | // could be authority or an IP with ':' in it 533 | let rest = take_while(|c| is_reg_name_char(c) || c == b':')?; 534 | switch! { 535 | eat(b'@') => Uri::Authority(authority(Some(left + colon + rest))?), 536 | peek(b'/') => { 537 | input.backtrack(rest.len() + 1)?; 538 | Uri::Absolute(absolute(left)?) 539 | }, 540 | _ => { 541 | let query = switch! { 542 | eat(b'?') => Some(take_while(is_pchar)?), 543 | _ => None 544 | }; 545 | 546 | // `left` and `rest` are reg_name, `query` is pchar. 547 | unsafe { Uri::raw_absolute(input.cow_source(), left, rest, query) } 548 | } 549 | } 550 | }, 551 | _ => Uri::Authority(authority(None)?) 552 | } 553 | } 554 | 555 | pub fn parse_bytes<'a>(data: &'a [u8]) -> Result> { 556 | parse!(uri: &mut IndexedInput::from(data)) 557 | } 558 | 559 | pub fn main() { 560 | let uri = parse_bytes(&b"ftp:a/b?c=d"[..]); 561 | match uri { 562 | Ok(uri) => println!("{} ({:?})", uri, uri), 563 | Err(e) => println!("Error: {}", e) 564 | } 565 | 566 | // println!("{}", Origin { path: b"hi", query: Some(b"hey") }); 567 | // println!("{}", Origin { path: b"hi", query: None }); 568 | // println!("{}", Authority { userinfo: Some(b"user:pass"), host: b"sergio.bz", port: Some(10) }); 569 | // println!("{}", Absolute { 570 | // scheme: b"abc", 571 | // authority: Some(Authority { 572 | // userinfo: Some(b"u:p"), 573 | // host: b"foo.com", 574 | // port: Some(123) 575 | // }), 576 | // origin: Some(Origin { 577 | // path: b"/a/b", 578 | // query: Some(b"key=value&key2=value2") 579 | // }), 580 | // }); 581 | // let ab = parse_simple(b"dlfsjhklsdfakjfkljdfkb"); 582 | // println!("{}", ab); 583 | } 584 | 585 | #[cfg(test)] 586 | mod test { 587 | use super::*; 588 | 589 | fn parse_str(string: &str) -> ::pear::Result { 590 | parse!(uri: &mut IndexedInput::from(string.as_bytes())) 591 | } 592 | 593 | macro_rules! assert_parse_eq { 594 | ($($from:expr => $to:expr),+) => ( 595 | $( 596 | let expected = $to.into(); 597 | match parse_str($from) { 598 | Ok(output) => { 599 | if output != expected { 600 | println!("Failure on: {:?}", $from); 601 | assert_eq!(output, expected); 602 | } 603 | } 604 | Err(e) => { 605 | println!("{:?} failed to parse!", $from); 606 | panic!("Error: {}", e); 607 | } 608 | } 609 | )+ 610 | ); 611 | 612 | ($($from:expr => $to:expr),+,) => (assert_parse_eq!($($from => $to),+)) 613 | } 614 | 615 | macro_rules! assert_no_parse { 616 | ($($from:expr),+) => ( 617 | $( 618 | if let Ok(uri) = parse_str($from) { 619 | println!("{:?} parsed unexpectedly!", $from); 620 | panic!("Parsed as: {:?}", uri); 621 | } 622 | )+ 623 | ); 624 | 625 | ($($from:expr),+,) => (assert_no_parse!($($from),+)) 626 | } 627 | 628 | #[test] 629 | #[should_panic] 630 | fn test_assert_parse_eq() { 631 | assert_parse_eq!("*" => Uri::origin("*", None)); 632 | } 633 | 634 | #[test] 635 | #[should_panic] 636 | fn test_assert_parse_eq_consecutive() { 637 | assert_parse_eq!("/" => Uri::origin("/", None), "/" => Uri::Asterisk); 638 | } 639 | 640 | #[test] 641 | #[should_panic] 642 | fn test_assert_no_parse() { 643 | assert_no_parse!("/"); 644 | } 645 | 646 | #[test] 647 | fn single_byte() { 648 | assert_parse_eq!( 649 | "*" => Uri::Asterisk, 650 | "/" => Uri::origin("/", None), 651 | "." => Authority::new(None, ".", None), 652 | "_" => Authority::new(None, "_", None), 653 | "1" => Authority::new(None, "1", None), 654 | "b" => Authority::new(None, "b", None), 655 | ); 656 | 657 | assert_no_parse!("?", "#", "%"); 658 | } 659 | 660 | #[test] 661 | fn origin() { 662 | assert_parse_eq!( 663 | "/a/b/c" => Uri::origin("/a/b/c", None), 664 | "/a/b/c?" => Uri::origin("/a/b/c", Some("")), 665 | "/a/b/c?abc" => Uri::origin("/a/b/c", Some("abc")), 666 | "/?abc" => Uri::origin("/", Some("abc")), 667 | "/hi%20there?a=b&c=d" => Uri::origin("/hi%20there", Some("a=b&c=d")), 668 | "/c/d/fa/b/c?abc" => Uri::origin("/c/d/fa/b/c", Some("abc")), 669 | "/xn--ls8h?emoji=poop" => Uri::origin("/xn--ls8h", Some("emoji=poop")), 670 | ); 671 | } 672 | 673 | #[test] 674 | fn authority() { 675 | assert_parse_eq!( 676 | "sergio:benitez@spark" => Authority::new(Some("sergio:benitez"), "spark", None), 677 | "a:b:c@1.2.3:12121" => Authority::new(Some("a:b:c"), "1.2.3", Some(12121)), 678 | "sergio@spark" => Authority::new(Some("sergio"), "spark", None), 679 | "sergio@spark:230" => Authority::new(Some("sergio"), "spark", Some(230)), 680 | "sergio@[1::]:230" => Authority::new(Some("sergio"), "1::", Some(230)), 681 | ); 682 | } 683 | 684 | #[test] 685 | fn absolute() { 686 | assert_parse_eq!( 687 | "http://foo.com:8000" => Absolute::new( 688 | "http", 689 | Some(Authority::new(None, "foo.com", Some(8000))), 690 | None 691 | ), 692 | "http://foo:8000" => Absolute::new( 693 | "http", 694 | Some(Authority::new(None, "foo", Some(8000))), 695 | None, 696 | ), 697 | "foo:bar" => Absolute::new( 698 | "foo", 699 | None, 700 | Some(Origin::new::<_, &str>("bar", None)), 701 | ), 702 | "http://sergio:pass@foo.com:8000" => Absolute::new( 703 | "http", 704 | Some(Authority::new(Some("sergio:pass"), "foo.com", Some(8000))), 705 | None, 706 | ), 707 | "foo:/sergio/pass?hi" => Absolute::new( 708 | "foo", 709 | None, 710 | Some(Origin::new("/sergio/pass", Some("hi"))), 711 | ), 712 | "bar:" => Absolute::new( 713 | "bar", 714 | None, 715 | Some(Origin::new::<_, &str>("", None)), 716 | ), 717 | "foo:?hi" => Absolute::new( 718 | "foo", 719 | None, 720 | Some(Origin::new("", Some("hi"))), 721 | ), 722 | "foo:a/b?hi" => Absolute::new( 723 | "foo", 724 | None, 725 | Some(Origin::new("a/b", Some("hi"))), 726 | ), 727 | "foo:a/b" => Absolute::new( 728 | "foo", 729 | None, 730 | Some(Origin::new::<_, &str>("a/b", None)), 731 | ), 732 | "foo:/a/b" => Absolute::new( 733 | "foo", 734 | None, 735 | Some(Origin::new::<_, &str>("/a/b", None)) 736 | ), 737 | "abc://u:p@foo.com:123/a/b?key=value&key2=value2" => Absolute::new( 738 | "abc", 739 | Some(Authority::new(Some("u:p"), "foo.com", Some(123))), 740 | Some(Origin::new("/a/b", Some("key=value&key2=value2"))), 741 | ), 742 | "ftp://foo.com:21/abc" => Absolute::new( 743 | "ftp", 744 | Some(Authority::new(None, "foo.com", Some(21))), 745 | Some(Origin::new::<_, &str>("/abc", None)), 746 | ), 747 | "http://google.com/abc" => Absolute::new( 748 | "http", 749 | Some(Authority::new(None, "google.com", None)), 750 | Some(Origin::new::<_, &str>("/abc", None)), 751 | ), 752 | "http://google.com" => Absolute::new( 753 | "http", 754 | Some(Authority::new(None, "google.com", None)), 755 | None 756 | ), 757 | "http://foo.com?test" => Absolute::new( 758 | "http", 759 | Some(Authority::new(None, "foo.com", None,)), 760 | Some(Origin::new("", Some("test"))), 761 | ), 762 | "http://google.com/abc?hi" => Absolute::new( 763 | "http", 764 | Some(Authority::new(None, "google.com", None,)), 765 | Some(Origin::new("/abc", Some("hi"))), 766 | ), 767 | ); 768 | } 769 | } 770 | --------------------------------------------------------------------------------