├── .editorconfig ├── .gitignore ├── .gitmodules ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE-APACHE.txt ├── LICENSE-MIT.txt ├── Makefile.toml ├── README.md ├── examples ├── flamegraph-parse.rs ├── flamegraph-resolve.rs ├── normalize.rs ├── parse.rs └── resolve.rs ├── iri-string-benches ├── Cargo.toml ├── benches │ ├── resolve.rs │ └── validate.rs └── src │ └── lib.rs ├── iri-string-tests ├── Cargo.toml ├── src │ └── lib.rs └── tests │ └── uritemplate-test.rs ├── src ├── build.rs ├── components.rs ├── components │ └── authority.rs ├── convert.rs ├── format.rs ├── lib.rs ├── mask_password.rs ├── normalize.rs ├── normalize │ ├── error.rs │ ├── path.rs │ └── pct_case.rs ├── parser.rs ├── parser │ ├── char.rs │ ├── str.rs │ ├── str │ │ └── maybe_pct_encoded.rs │ ├── trusted.rs │ ├── trusted │ │ └── authority.rs │ ├── validate.rs │ └── validate │ │ ├── authority.rs │ │ └── path.rs ├── percent_encode.rs ├── raw.rs ├── resolve.rs ├── spec.rs ├── spec │ └── internal.rs ├── template.rs ├── template │ ├── components.rs │ ├── context.rs │ ├── error.rs │ ├── expand.rs │ ├── parser.rs │ ├── parser │ │ ├── char.rs │ │ └── validate.rs │ ├── simple_context.rs │ ├── string.rs │ └── string │ │ └── owned.rs ├── types.rs ├── types │ ├── generic.rs │ ├── generic │ │ ├── absolute.rs │ │ ├── error.rs │ │ ├── fragment.rs │ │ ├── macros.rs │ │ ├── normal.rs │ │ ├── query.rs │ │ ├── reference.rs │ │ └── relative.rs │ ├── iri.rs │ └── uri.rs └── validate.rs └── tests ├── build.rs ├── components └── mod.rs ├── gh-issues.rs ├── iri.rs ├── normalize.rs ├── percent_encode.rs ├── resolve.rs ├── resolve_refimpl └── mod.rs ├── serde.rs ├── string_types_interop.rs ├── template.rs └── utils └── mod.rs /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | # Use rustfmt (not editorconfig) to format Rust sources. 4 | 5 | [*.{toml,yaml,yml}] 6 | charset = utf-8 7 | end_of_line = lf 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | 11 | [*.toml] 12 | indent_size = 4 13 | 14 | [*.{yaml,yml}] 15 | indent_size = 2 16 | indent_style = space 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock 4 | 5 | # for `cargo flamegraph` 6 | /perf.data 7 | /perf.data.old 8 | /*.svg 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "iri-string-tests/uritemplate-test"] 2 | path = iri-string-tests/assets/uritemplate-test 3 | url = https://github.com/uri-templates/uritemplate-test.git 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | ".", 4 | "iri-string-tests", 5 | "iri-string-benches", 6 | ] 7 | 8 | [package] 9 | name = "iri-string" 10 | version = "0.7.8" 11 | authors = ["YOSHIOKA Takuma "] 12 | edition = "2021" 13 | rust-version = "1.60" 14 | license = "MIT OR Apache-2.0" 15 | readme = "README.md" 16 | description = "IRI as string types" 17 | repository = "https://github.com/lo48576/iri-string" 18 | keywords = ["IRI", "URI"] 19 | 20 | [package.metadata.docs.rs] 21 | all-features = true 22 | # Use unstable toolchain feature on document generation on docs.rs. 23 | # See . 24 | # 25 | # Didn't create `docsrs` feature, since Rustdoc feature `doc_auto_cfg` depends 26 | # on nightly toolchain and it prevents `cargo doc --all-features` from running 27 | # with stable Rust toolchain. 28 | # See 29 | # for unstable `doc_auto_cfg` feature. 30 | rustdoc-args = ["--cfg", "docsrs"] 31 | 32 | [lib] 33 | bench = false 34 | 35 | [features] 36 | # Default features. 37 | default = ["std"] 38 | 39 | # Enable features that requires `alloc`. 40 | alloc = ["serde?/alloc"] 41 | # Enable features that requires `std`. 42 | std = ["alloc", "memchr?/std", "serde?/std"] 43 | 44 | [dependencies] 45 | memchr = { version = "2.4.1", default-features = false, optional = true } 46 | serde = { version = "1.0.103", default-features = false, features = ["derive"], optional = true } 47 | 48 | [dev-dependencies] 49 | serde_test = "1.0.104" 50 | 51 | [badges] 52 | maintenance = { status = "actively-developed" } 53 | 54 | [[example]] 55 | name = "flamegraph-resolve" 56 | required-features = ["alloc"] 57 | 58 | [[example]] 59 | name = "normalize" 60 | required-features = ["std"] 61 | 62 | [[example]] 63 | name = "parse" 64 | required-features = ["std"] 65 | 66 | [[example]] 67 | name = "resolve" 68 | required-features = ["std"] 69 | -------------------------------------------------------------------------------- /LICENSE-MIT.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019-2024 YOSHIOKA Takuma 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /Makefile.toml: -------------------------------------------------------------------------------- 1 | # # cargo-make config for CI. 2 | # 3 | # ## Variables to maintain 4 | # * `MSRV` 5 | # + Minimal supported rust version. 6 | # + This should be consistent with `package.rust-version` field of `Cargo.toml`. 7 | # 8 | # ## Controllable variables 9 | # * `VERBOSE` 10 | # + If set to non-`false` or non-`0`, `--verbose` options are added for build and test. 11 | # 12 | # ## Profiles 13 | # Profiles are mainly used to change features and dependencies. 14 | # 15 | # * `default` 16 | # + Default features. 17 | # + Up-to-date deps. 18 | # * `feat-none` 19 | # + No features. 20 | # + Up-to-date deps. 21 | # * `feat-alloc` 22 | # + `alloc` feature. 23 | # + Up-to-date deps. 24 | # * `feat-std` 25 | # + `std` feature. 26 | # + Up-to-date deps. 27 | # * `feat-memchr` 28 | # + `memchr` feature. 29 | # + Up-to-date deps. 30 | # * `feat-serde` 31 | # + `serde` feature. 32 | # + Up-to-date deps. 33 | # * `feat-serde-alloc` 34 | # + `serde` and `alloc` features. 35 | # + Up-to-date deps. 36 | # * `feat-serde-std` 37 | # + `serde` and `std` features. 38 | # + Up-to-date deps. 39 | # * `feat-all` 40 | # + All features. 41 | # + Up-to-date deps. 42 | # * `minimal-deps-feat-none` 43 | # + No features. 44 | # + Minimal versions deps. 45 | # * `minimal-deps-default` 46 | # + Default features. 47 | # + Minimal versions deps. 48 | # * `minimal-deps-feat-all` 49 | # + All features. 50 | # + Minimal versions deps. 51 | # 52 | # ## Tasks 53 | # 54 | # ### For CLI 55 | # * `manual-ci-all` 56 | # + Invokes all required CI tasks, with required rust versions. 57 | # * `ci-all-profiles` 58 | # + Invokes required CI tasks for all profiles. 59 | # * `ci-all-profiles` 60 | # + Invokes required CI tasks for all profiles. 61 | # 62 | # ### For automated CI 63 | # * `ci-once` 64 | # + CI task that should be run at least once for one commit. 65 | # + This does not need to be run with multiple tasks, or 66 | # with multiple rust versions. 67 | # * `ci` 68 | # + CI task that should (or recommended to) be run for each profiles and/or 69 | # for each toolchain. 70 | 71 | ################################################################ 72 | 73 | [env] 74 | # Minimal supported rust version. 75 | MSRV = "1.60.0" 76 | 77 | FEATURES = "" 78 | ALL_FEATURES = false 79 | # `FEATURES = "default,..."` is recommended to enable default features. 80 | # `ALL_FEATURES = true` does NOT require this to be `false`. 81 | NO_DEFAULT_FEATURES = true 82 | USE_MINIMAL_DEPENDENCIES = false 83 | 84 | [env.default] 85 | FEATURES = "default" 86 | 87 | [env.feat-none] 88 | FEATURES = "" 89 | 90 | [env.feat-alloc] 91 | FEATURES = "alloc" 92 | 93 | [env.feat-std] 94 | FEATURES = "std" 95 | 96 | [env.feat-memchr] 97 | FEATURES = "memchr" 98 | 99 | [env.feat-serde] 100 | FEATURES = "serde" 101 | 102 | [env.feat-serde-alloc] 103 | FEATURES = "serde,alloc" 104 | 105 | [env.feat-serde-std] 106 | FEATURES = "serde,std" 107 | 108 | [env.feat-all] 109 | ALL_FEATURES = true 110 | 111 | [env.minimal-deps-feat-none] 112 | FEATURES = "" 113 | USE_MINIMAL_DEPENDENCIES = true 114 | 115 | [env.minimal-deps-feat-default] 116 | FEATURES = "default" 117 | USE_MINIMAL_DEPENDENCIES = true 118 | 119 | [env.minimal-deps-feat-all] 120 | ALL_FEATURES = true 121 | USE_MINIMAL_DEPENDENCIES = true 122 | 123 | ################################################################ 124 | 125 | # For manual invocation from CLI. 126 | [tasks.manual-ci-all] 127 | VERBOSE = { value = "false", condition = { env_not_set = ["VERBOSE"] } } 128 | script = [ 129 | ''' 130 | cargo +${MSRV} make ci-once 131 | cargo +${MSRV} make ci-all-profiles 132 | cargo +stable make --profile default ci 133 | cargo +stable make --profile feat-all ci 134 | cargo +stable make --profile minimal-deps-feat-all ci 135 | cargo +beta make --profile feat-all ci 136 | cargo +nightly make --profile feat-all ci 137 | cargo +nightly make --profile default check-miri 138 | cargo +nightly make --profile feat-none check-miri 139 | ''' 140 | ] 141 | 142 | # For manual invocation from CLI. 143 | [tasks.ci-all-profiles] 144 | VERBOSE = { value = "false", condition = { env_not_set = ["VERBOSE"] } } 145 | script = [ 146 | ''' 147 | cargo make --profile default ci 148 | cargo make --profile feat-none ci 149 | cargo make --profile feat-alloc ci 150 | cargo make --profile feat-std ci 151 | cargo make --profile feat-memchr ci 152 | cargo make --profile feat-serde ci 153 | cargo make --profile feat-serde-alloc ci 154 | cargo make --profile feat-serde-std ci 155 | cargo make --profile feat-all ci 156 | cargo make --profile minimal-deps-feat-none ci 157 | cargo make --profile minimal-deps-feat-default ci 158 | cargo make --profile minimal-deps-feat-all ci 159 | ''' 160 | ] 161 | 162 | # CI tasks to run only once per commit. 163 | # Recommended to be run before `ci` task. 164 | [tasks.ci-once] 165 | dependencies = [ 166 | "print-makers-env", 167 | "ci-once-check", 168 | ] 169 | 170 | # CI tasks to run per profile. 171 | [tasks.ci] 172 | dependencies = [ 173 | "print-makers-env", 174 | "print-rust-version", 175 | "prepare-dependencies", 176 | "ci-check", 177 | "ci-build", 178 | "ci-test", 179 | { name = "check-miri", condition = { channels = ["nightly"] } }, 180 | ] 181 | 182 | [tasks.ci-once-check] 183 | dependencies = [ 184 | "check-rustfmt", 185 | ] 186 | 187 | [tasks.ci-check] 188 | dependencies = [ 189 | "check-clippy", 190 | ] 191 | 192 | [tasks.ci-build] 193 | dependencies = [ 194 | "build", 195 | ] 196 | 197 | [tasks.ci-test] 198 | dependencies = [ 199 | "test", 200 | ] 201 | 202 | [tasks.print-makers-env] 203 | script = [ 204 | ''' 205 | echo "Environment:" 206 | echo " PROFILE_NAME: ${CARGO_MAKE_PROFILE}" 207 | echo " ALL_FEATURES: ${ALL_FEATURES}" 208 | echo " NO_DEFAULT_FEATURES: ${NO_DEFAULT_FEATURES}" 209 | echo " FEATURES: ${FEATURES}" 210 | echo " VERBOSE: ${VERBOSE:-}" 211 | echo " Rust version: ${CARGO_MAKE_RUST_VERSION}" 212 | echo " Rust channel: ${CARGO_MAKE_RUST_CHANNEL}" 213 | ''' 214 | ] 215 | 216 | [tasks.prepare-dependencies] 217 | run_task = [ 218 | { name = "prepare-minimal-dependencies", condition = { env_true = ["USE_MINIMAL_DEPENDENCIES"] } }, 219 | { name = "prepare-latest-dependencies" }, 220 | ] 221 | 222 | [tasks.prepare-minimal-dependencies] 223 | condition = { env_true = ["USE_MINIMAL_DEPENDENCIES"] } 224 | toolchain = "nightly" 225 | command = "cargo" 226 | args = ["update", "-Z", "minimal-versions"] 227 | 228 | [tasks.prepare-latest-dependencies] 229 | command = "cargo" 230 | args = ["update"] 231 | 232 | [tasks.cargo-clean] 233 | command = "cargo" 234 | args = ["clean"] 235 | 236 | [tasks.check-rustfmt] 237 | run_task = [ 238 | { name = "check-rustfmt-strict", condition = { rust_version = { equal = "$MSRV" } } }, 239 | { name = "check-rustfmt-ignore-errors" }, 240 | ] 241 | 242 | [tasks.check-rustfmt-strict] 243 | dependencies = ["print-rustfmt-version"] 244 | command = "cargo" 245 | args = ["fmt", "--all", "--", "--check"] 246 | 247 | [tasks.check-rustfmt-ignore-errors] 248 | dependencies = ["print-rustfmt-version"] 249 | command = "cargo" 250 | args = ["fmt", "--all", "--", "--check"] 251 | 252 | [tasks.print-rustfmt-version] 253 | install_crate = { rustup_component_name = "rustfmt" } 254 | command = "cargo" 255 | args = ["fmt", "--version"] 256 | 257 | [tasks.check-miri] 258 | dependencies = ["print-miri-version", "cargo-clean"] 259 | toolchain = "nightly" 260 | command = "cargo" 261 | args = [ 262 | "miri", 263 | "test", 264 | "@@decode(NO_DEFAULT_FEATURES,true,--no-default-features,)", 265 | "@@decode(ALL_FEATURES,true,--all-features,)", 266 | "@@decode(FEATURES,,,--features=${FEATURES})", 267 | ] 268 | 269 | [tasks.print-miri-version] 270 | install_crate = { rustup_component_name = "miri" } 271 | toolchain = "nightly" 272 | command = "cargo" 273 | args = ["miri", "--version"] 274 | 275 | [tasks.check-clippy] 276 | run_task = [ 277 | { name = "check-clippy-strict", condition = { rust_version = { equal = "$MSRV" } } }, 278 | { name = "check-clippy-ignore-errors" }, 279 | ] 280 | 281 | [tasks.check-clippy-strict] 282 | dependencies = ["print-clippy-version"] 283 | command = "cargo" 284 | args = [ 285 | "clippy", 286 | "@@decode(NO_DEFAULT_FEATURES,true,--no-default-features,)", 287 | "@@decode(ALL_FEATURES,true,--all-features,)", 288 | "@@decode(FEATURES,,,--features=${FEATURES})", 289 | "--", 290 | "--deny", 291 | "warnings", 292 | ] 293 | 294 | [tasks.check-clippy-ignore-errors] 295 | dependencies = ["print-clippy-version"] 296 | command = "cargo" 297 | args = [ 298 | "clippy", 299 | "@@decode(NO_DEFAULT_FEATURES,true,--no-default-features,)", 300 | "@@decode(ALL_FEATURES,true,--all-features,)", 301 | "@@decode(FEATURES,,,--features=${FEATURES})", 302 | ] 303 | ignore_errors = true 304 | 305 | [tasks.print-clippy-version] 306 | install_crate = { rustup_component_name = "clippy" } 307 | command = "cargo" 308 | args = ["clippy", "--version"] 309 | 310 | [tasks.print-rust-version] 311 | command = "rustc" 312 | args = ["--version"] 313 | 314 | [tasks.build] 315 | command = "cargo" 316 | args = [ 317 | "build", 318 | "@@decode(NO_DEFAULT_FEATURES,true,--no-default-features,)", 319 | "@@decode(ALL_FEATURES,true,--all-features,)", 320 | "@@decode(FEATURES,,,--features=${FEATURES})", 321 | "@@decode(VERBOSE,false,,0,,--verbose)", 322 | ] 323 | 324 | [tasks.test] 325 | command = "cargo" 326 | args = [ 327 | "test", 328 | "@@decode(NO_DEFAULT_FEATURES,true,--no-default-features,)", 329 | "@@decode(ALL_FEATURES,true,--all-features,)", 330 | "@@decode(FEATURES,,,--features=${FEATURES})", 331 | "@@decode(VERBOSE,false,,0,,--verbose)", 332 | ] 333 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iri-string 2 | 3 | [![Latest version](https://img.shields.io/crates/v/iri-string.svg)](https://crates.io/crates/iri-string) 4 | [![Documentation](https://docs.rs/iri-string/badge.svg)](https://docs.rs/iri-string) 5 | ![Minimum rustc version: 1.60](https://img.shields.io/badge/rustc-1.60+-lightgray.svg) 6 | 7 | String types for [IRI](https://tools.ietf.org/html/rfc3987)s (Internationalized Resource 8 | Identifiers) and [URI](https://tools.ietf.org/html/rfc3986)s (Uniform Resource Identifiers). 9 | 10 | See the [documentation](https://docs.rs/iri-string) for details. 11 | 12 | ## Features 13 | 14 | * `no_std` support. 15 | * String types (both owned and borrowed) for RFC 3986 URIs and RFC 3987 IRIs. 16 | + Native slice types, so highly operable with `Cow`, `ToOwned`, etc. 17 | + URIs/IRIs validation. 18 | + Conversions between URIs and IRIs. 19 | + Decomposition into components. 20 | * IRI reference resolution algorithm. 21 | * IRI normalization algorithm. 22 | * Masking password part of an IRI (optional and not automatic). 23 | * Percent encoding of user-provided strings. 24 | * IRI builder. 25 | * RFC 6570 URI Template. 26 | 27 | ### Feature flags 28 | 29 | #### Direct 30 | * `alloc` (enabled by default) 31 | + Enables types and functions which require memory allocation. 32 | + Requires `std` or `alloc` crate available. 33 | * `std` (enabled by default) 34 | + Enables all `std` features (such as memory allocations and `std::error::Error` trait). 35 | + Requires `std` crate available. 36 | + This automatically enables `alloc` feature. 37 | 38 | #### memchr 39 | * `memchr` 40 | + Enables optimization for internal parsers, using [`memchr`] crate. 41 | 42 | [`memchr`]: https://crates.io/crates/memchr 43 | 44 | #### serde 45 | * `serde` 46 | + Implements `Serialize` and `Deserialize` traits for string types. 47 | 48 | ## CI 49 | 50 | CI must pass on `develop` and `master` branches. 51 | No automated online CI is set up (since they consumes credit too fast), so run 52 | `cargo make manual-ci-all` locally before committing to these branches. 53 | On other branches, tests and some lints (such as `dead_code`) are allowed to 54 | fail, but all commits must be successfully compilable and must be formatted. 55 | 56 | ## License 57 | 58 | Licensed under either of 59 | 60 | * Apache License, Version 2.0, ([LICENSE-APACHE.txt](LICENSE-APACHE.txt) or 61 | ) 62 | * MIT license ([LICENSE-MIT.txt](LICENSE-MIT.txt) or 63 | ) 64 | 65 | at your option. 66 | 67 | ### Contribution 68 | 69 | Unless you explicitly state otherwise, any contribution intentionally submitted 70 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall be 71 | dual licensed as above, without any additional terms or conditions. 72 | -------------------------------------------------------------------------------- /examples/flamegraph-parse.rs: -------------------------------------------------------------------------------- 1 | use iri_string::types::IriReferenceStr; 2 | 3 | fn main() { 4 | for _ in 0..1000000 { 5 | let s = concat!( 6 | "scheme://user:pw@sub.example.com:8080/a/b/c/%30/%31/%32%33%34", 7 | "/foo/foo/../../../foo.foo/foo/foo/././././//////foo", 8 | "/\u{03B1}\u{03B2}\u{03B3}/\u{03B1}\u{03B2}\u{03B3}/\u{03B1}\u{03B2}\u{03B3}", 9 | "?k1=v1&k2=v2&k3=v3#fragment" 10 | ); 11 | 12 | let domain = "scheme://sub.sub.sub.example.com:8080/a/b/c"; 13 | let v4 = "scheme://198.51.100.23:8080/a/b/c"; 14 | let v6 = "scheme://[2001:db8:0123::cafe]:8080/a/b/c"; 15 | let v6v4 = "scheme://[2001:db8::198.51.100.23]:8080/a/b/c"; 16 | let vfuture = "scheme://[v2.ipv2-does-not-exist]:8080/a/b/c"; 17 | let _ = ( 18 | IriReferenceStr::new(s), 19 | IriReferenceStr::new(domain), 20 | IriReferenceStr::new(v4), 21 | IriReferenceStr::new(v6), 22 | IriReferenceStr::new(v6v4), 23 | IriReferenceStr::new(vfuture), 24 | ); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /examples/flamegraph-resolve.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "alloc")] 2 | use iri_string::format::ToDedicatedString; 3 | use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; 4 | 5 | fn main() { 6 | let base = IriAbsoluteStr::new("https://sub.example.com/foo1/foo2/foo3/foo4/foo5") 7 | .expect("should be valid IRI"); 8 | let rel = IriReferenceStr::new(concat!( 9 | "bar1/bar2/bar3/../bar4/../../bar5/bar6/bar7/../../../../..", 10 | "/bar8/../../../bar9/././././././bar10/bar11", 11 | )) 12 | .expect("should be valid IRI"); 13 | for _ in 0..1000000 { 14 | let resolved = rel.resolve_against(base).to_dedicated_string(); 15 | drop(resolved); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /examples/normalize.rs: -------------------------------------------------------------------------------- 1 | //! An example to normalize an IRI from the CLI argument. 2 | 3 | use iri_string::format::ToDedicatedString; 4 | use iri_string::types::{RiStr, RiString}; 5 | 6 | const USAGE: &str = "\ 7 | USAGE: 8 | normalize [FLAGS] [--] IRI 9 | 10 | FLAGS: 11 | -h, --help Prints this help 12 | -i, --iri Handle the input as an IRI (RFC 3987) 13 | -u, --uri Handle the input as an URI (RFC 3986) 14 | -a, --ascii Converts the output to an URI (RFC 3986) 15 | -w, --whatwg Serialize normalization result according to WHATWG URL Standard. 16 | 17 | ARGS: 18 | IRI 19 | "; 20 | 21 | fn print_help() { 22 | eprintln!("{USAGE}"); 23 | } 24 | 25 | fn help_and_exit() -> ! { 26 | print_help(); 27 | std::process::exit(1); 28 | } 29 | 30 | fn die(msg: impl std::fmt::Display) -> ! { 31 | eprintln!("ERROR: {msg}"); 32 | eprintln!(); 33 | print_help(); 34 | std::process::exit(1); 35 | } 36 | 37 | /// Syntax specification. 38 | #[derive(Debug, Clone, Copy)] 39 | enum Spec { 40 | /// RFC 3986 URI. 41 | Uri, 42 | /// RFC 3987 IRI. 43 | Iri, 44 | } 45 | 46 | impl Default for Spec { 47 | #[inline] 48 | fn default() -> Self { 49 | Self::Iri 50 | } 51 | } 52 | 53 | /// CLI options. 54 | #[derive(Default, Debug, Clone)] 55 | struct CliOpt { 56 | /// IRI. 57 | iri: String, 58 | /// Syntax spec. 59 | spec: Spec, 60 | /// Whether to convert output to ASCII URI or not. 61 | output_ascii: bool, 62 | /// Whether to serialize in WHATWG URL Standard way. 63 | whatwg_serialization: bool, 64 | } 65 | 66 | impl CliOpt { 67 | fn parse() -> Self { 68 | let mut args = std::env::args(); 69 | // Skip `argv[0]`. 70 | args.next(); 71 | 72 | let mut iri = None; 73 | let mut spec = None; 74 | let mut output_ascii = false; 75 | let mut whatwg_serialization = false; 76 | 77 | for arg in args.by_ref() { 78 | match arg.as_str() { 79 | "--ascii" | "-a" => output_ascii = true, 80 | "--iri" | "-i" => spec = Some(Spec::Iri), 81 | "--uri" | "-u" => spec = Some(Spec::Uri), 82 | "--whatwg" | "-w" => whatwg_serialization = true, 83 | "--help" | "-h" => help_and_exit(), 84 | opt if opt.starts_with('-') => die(format_args!("Unknown option: {opt}")), 85 | _ => { 86 | if iri.replace(arg).is_some() { 87 | die("IRI can be specified at most once"); 88 | } 89 | } 90 | } 91 | } 92 | 93 | for arg in args { 94 | if iri.replace(arg).is_some() { 95 | eprintln!("ERROR: IRI can be specified at most once"); 96 | } 97 | } 98 | 99 | let iri = iri.unwrap_or_else(|| die("IRI should be specified")); 100 | let spec = spec.unwrap_or_default(); 101 | Self { 102 | iri, 103 | spec, 104 | output_ascii, 105 | whatwg_serialization, 106 | } 107 | } 108 | } 109 | 110 | fn main() { 111 | let opt = CliOpt::parse(); 112 | 113 | match opt.spec { 114 | Spec::Iri => process_iri(&opt), 115 | Spec::Uri => process_uri(&opt), 116 | } 117 | } 118 | 119 | fn process_iri(opt: &CliOpt) { 120 | let mut normalized = normalize::(opt); 121 | if opt.output_ascii { 122 | normalized.encode_to_uri_inline(); 123 | } 124 | println!("{normalized}"); 125 | } 126 | 127 | fn process_uri(opt: &CliOpt) { 128 | let normalized = normalize::(opt); 129 | println!("{normalized}"); 130 | } 131 | 132 | fn normalize(opt: &CliOpt) -> RiString { 133 | let raw = &opt.iri.as_str(); 134 | let iri = match RiStr::::new(raw) { 135 | Ok(v) => v, 136 | Err(e) => die(format_args!("Failed to parse {raw:?}: {e:?}")), 137 | }; 138 | let normalized = iri.normalize(); 139 | if !opt.whatwg_serialization { 140 | if let Err(e) = normalized.ensure_rfc3986_normalizable() { 141 | die(format_args!("Failed to normalize: {e:?}")); 142 | } 143 | } 144 | normalized.to_dedicated_string() 145 | } 146 | -------------------------------------------------------------------------------- /examples/parse.rs: -------------------------------------------------------------------------------- 1 | //! An example to parse IRI from the CLI argument. 2 | 3 | use iri_string::types::{IriStr, RiReferenceStr, RiStr}; 4 | 5 | const USAGE: &str = "\ 6 | USAGE: 7 | parse [FLAGS] [--] IRI 8 | 9 | FLAGS: 10 | -h, --help Prints this help 11 | -i, --iri Handle the input as an IRI (RFC 3987) 12 | -u, --uri Handle the input as an URI (RFC 3986) 13 | 14 | ARGS: 15 | IRI or URI 16 | "; 17 | 18 | fn print_help() { 19 | eprintln!("{}", USAGE); 20 | } 21 | 22 | fn help_and_exit() -> ! { 23 | print_help(); 24 | std::process::exit(1); 25 | } 26 | 27 | fn die(msg: impl std::fmt::Display) -> ! { 28 | eprintln!("ERROR: {}", msg); 29 | eprintln!(); 30 | print_help(); 31 | std::process::exit(1); 32 | } 33 | 34 | /// Syntax specification. 35 | #[derive(Debug, Clone, Copy)] 36 | enum Spec { 37 | /// RFC 3986 URI. 38 | Uri, 39 | /// RFC 3987 IRI. 40 | Iri, 41 | } 42 | 43 | impl Default for Spec { 44 | #[inline] 45 | fn default() -> Self { 46 | Self::Iri 47 | } 48 | } 49 | 50 | /// CLI options. 51 | #[derive(Default, Debug, Clone)] 52 | struct CliOpt { 53 | /// IRI. 54 | iri: String, 55 | /// Syntax spec. 56 | spec: Spec, 57 | } 58 | 59 | impl CliOpt { 60 | fn parse() -> Self { 61 | let mut args = std::env::args(); 62 | // Skip `argv[0]`. 63 | args.next(); 64 | 65 | let mut iri = None; 66 | let mut spec = None; 67 | 68 | for arg in args.by_ref() { 69 | match arg.as_str() { 70 | "--iri" | "-i" => spec = Some(Spec::Iri), 71 | "--uri" | "-u" => spec = Some(Spec::Uri), 72 | "--help" | "-h" => help_and_exit(), 73 | opt if opt.starts_with('-') => die(format_args!("Unknown option: {}", opt)), 74 | _ => { 75 | if iri.replace(arg).is_some() { 76 | die("IRI can be specified at most once"); 77 | } 78 | } 79 | } 80 | } 81 | 82 | for arg in args { 83 | if iri.replace(arg).is_some() { 84 | eprintln!("ERROR: IRI can be specified at most once"); 85 | } 86 | } 87 | 88 | let iri = iri.unwrap_or_else(|| die("IRI should be specified")); 89 | let spec = spec.unwrap_or_default(); 90 | Self { iri, spec } 91 | } 92 | } 93 | 94 | fn main() { 95 | let opt = CliOpt::parse(); 96 | 97 | match opt.spec { 98 | Spec::Iri => parse_iri(&opt), 99 | Spec::Uri => parse_uri(&opt), 100 | } 101 | } 102 | 103 | fn parse_iri(opt: &CliOpt) { 104 | let iri = parse::(opt); 105 | let uri = iri.encode_to_uri(); 106 | println!("ASCII: {:?}", uri); 107 | } 108 | 109 | fn parse_uri(opt: &CliOpt) { 110 | let iri = parse::(opt); 111 | println!("ASCII: {:?}", iri); 112 | } 113 | 114 | fn parse(opt: &CliOpt) -> &RiReferenceStr 115 | where 116 | RiStr: AsRef>, 117 | { 118 | let raw = &opt.iri.as_str(); 119 | let iri = match RiReferenceStr::::new(raw) { 120 | Ok(v) => v, 121 | Err(e) => die(format_args!("Failed to parse {:?}: {}", raw, e)), 122 | }; 123 | println!("Successfully parsed: {:?}", iri); 124 | 125 | let absolute = iri.to_iri().ok(); 126 | match absolute { 127 | Some(_) => println!("IRI is ablolute."), 128 | None => println!("IRI is relative."), 129 | } 130 | 131 | print_components(iri); 132 | if let Some(absolute) = absolute { 133 | print_normalized(absolute.as_ref()); 134 | } 135 | 136 | iri 137 | } 138 | 139 | fn print_components(iri: &RiReferenceStr) { 140 | println!("scheme: {:?}", iri.scheme_str()); 141 | println!("authority: {:?}", iri.authority_str()); 142 | if let Some(components) = iri.authority_components() { 143 | println!(" userinfo: {:?}", components.userinfo()); 144 | println!(" host: {:?}", components.host()); 145 | println!(" port: {:?}", components.port()); 146 | } 147 | println!("path: {:?}", iri.path_str()); 148 | println!("query: {:?}", iri.query_str()); 149 | println!("fragment: {:?}", iri.fragment()); 150 | } 151 | 152 | pub fn print_normalized(iri: &IriStr) { 153 | println!("is_normalized_rfc3986: {}", iri.is_normalized_rfc3986()); 154 | println!( 155 | "is_normalized_but_authorityless_relative_path_preserved: {}", 156 | iri.is_normalized_but_authorityless_relative_path_preserved() 157 | ); 158 | println!("normalized: {}", iri.normalize()); 159 | } 160 | -------------------------------------------------------------------------------- /examples/resolve.rs: -------------------------------------------------------------------------------- 1 | //! An example to parse IRI from the CLI argument. 2 | 3 | use iri_string::types::{RiAbsoluteStr, RiReferenceStr}; 4 | 5 | const USAGE: &str = "\ 6 | USAGE: 7 | resolve [FLAGS] [--] BASE REFERENCE 8 | 9 | FLAGS: 10 | -h, --help Prints this help 11 | -i, --iri Handle the input as an IRI (RFC 3987) 12 | -u, --uri Handle the input as an URI (RFC 3986) 13 | -w, --whatwg Serialize normalization result according to WHATWG URL Standard. 14 | 15 | ARGS: 16 | Base IRI or URI to resolve REFERENCE against 17 | IRI or URI to resolve 18 | "; 19 | 20 | fn print_help() { 21 | eprintln!("{}", USAGE); 22 | } 23 | 24 | fn help_and_exit() -> ! { 25 | print_help(); 26 | std::process::exit(1); 27 | } 28 | 29 | fn die(msg: impl std::fmt::Display) -> ! { 30 | eprintln!("ERROR: {}", msg); 31 | eprintln!(); 32 | print_help(); 33 | std::process::exit(1); 34 | } 35 | 36 | /// Syntax specification. 37 | #[derive(Debug, Clone, Copy)] 38 | enum Spec { 39 | /// RFC 3986 URI. 40 | Uri, 41 | /// RFC 3987 IRI. 42 | Iri, 43 | } 44 | 45 | impl Default for Spec { 46 | #[inline] 47 | fn default() -> Self { 48 | Self::Iri 49 | } 50 | } 51 | 52 | /// CLI options. 53 | #[derive(Default, Debug, Clone)] 54 | struct CliOpt { 55 | /// Base IRI. 56 | base: String, 57 | /// Reference IRI. 58 | reference: String, 59 | /// Syntax spec. 60 | spec: Spec, 61 | /// Whether to serialize in WHATWG URL Standard way. 62 | whatwg_serialization: bool, 63 | } 64 | 65 | impl CliOpt { 66 | fn parse() -> Self { 67 | let mut args = std::env::args(); 68 | // Skip `argv[0]`. 69 | args.next(); 70 | 71 | let mut base = None; 72 | let mut reference = None; 73 | let mut spec = None; 74 | let mut whatwg_serialization = false; 75 | 76 | for arg in args.by_ref() { 77 | match arg.as_str() { 78 | "--iri" | "-i" => spec = Some(Spec::Iri), 79 | "--uri" | "-u" => spec = Some(Spec::Uri), 80 | "--whatwg" | "-w" => whatwg_serialization = true, 81 | "--help" | "-h" => help_and_exit(), 82 | opt if opt.starts_with('-') => die(format_args!("Unknown option: {}", opt)), 83 | _ => { 84 | if base.is_none() { 85 | base = Some(arg); 86 | } else if reference.is_none() { 87 | reference = Some(arg); 88 | } else { 89 | die("IRI can be specified at most twice"); 90 | } 91 | } 92 | } 93 | } 94 | 95 | for arg in args { 96 | if base.is_none() { 97 | base = Some(arg); 98 | } else if reference.is_none() { 99 | reference = Some(arg); 100 | } else { 101 | die("IRI can be specified at most twice"); 102 | } 103 | } 104 | 105 | let base = base.unwrap_or_else(|| die("Base IRI should be specified")); 106 | let reference = reference.unwrap_or_else(|| die("Reference IRI should be specified")); 107 | let spec = spec.unwrap_or_default(); 108 | Self { 109 | base, 110 | reference, 111 | spec, 112 | whatwg_serialization, 113 | } 114 | } 115 | } 116 | 117 | fn main() { 118 | let opt = CliOpt::parse(); 119 | 120 | match opt.spec { 121 | Spec::Iri => parse::(&opt), 122 | Spec::Uri => parse::(&opt), 123 | } 124 | } 125 | 126 | fn parse(opt: &CliOpt) { 127 | let base_raw = &opt.base.as_str(); 128 | let reference_raw = &opt.reference.as_str(); 129 | let base = match RiAbsoluteStr::::new(base_raw) { 130 | Ok(v) => v, 131 | Err(e) => die(format_args!( 132 | "Failed to parse {:?} as an IRI (without fragment): {}", 133 | reference_raw, e 134 | )), 135 | }; 136 | let reference = match RiReferenceStr::::new(reference_raw) { 137 | Ok(v) => v, 138 | Err(e) => die(format_args!( 139 | "Failed to parse {:?} as an IRI reference: {}", 140 | reference_raw, e 141 | )), 142 | }; 143 | 144 | let resolved = reference.resolve_against(base); 145 | if !opt.whatwg_serialization { 146 | if let Err(e) = resolved.ensure_rfc3986_normalizable() { 147 | die(format_args!( 148 | "Failed to resolve {:?} against {:?}: {}", 149 | reference_raw, base_raw, e 150 | )); 151 | } 152 | } 153 | println!("{}", resolved); 154 | } 155 | -------------------------------------------------------------------------------- /iri-string-benches/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "iri-string-benches" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [features] 9 | # Default features. 10 | default = ["std"] 11 | 12 | alloc = ["iri-string/alloc"] 13 | std = ["alloc", "iri-string/std"] 14 | memchr = ["iri-string/memchr"] 15 | serde = ["iri-string/serde"] 16 | 17 | [dependencies] 18 | iri-string = { path = ".." } 19 | 20 | [dev-dependencies] 21 | criterion = "0.4.0" 22 | 23 | [[bench]] 24 | name = "resolve" 25 | harness = false 26 | required-features = ["alloc"] 27 | 28 | [[bench]] 29 | name = "validate" 30 | harness = false 31 | -------------------------------------------------------------------------------- /iri-string-benches/benches/resolve.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Criterion}; 2 | 3 | use core::fmt::Write; 4 | 5 | use iri_string::format::{write_to_slice, ToDedicatedString}; 6 | use iri_string::resolve::FixedBaseResolver; 7 | use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; 8 | 9 | pub fn criterion_benchmark(c: &mut Criterion) { 10 | let base = IriAbsoluteStr::new("https://sub.example.com/foo1/foo2/foo3/foo4/foo5") 11 | .expect("should be valid IRI"); 12 | let rel = IriReferenceStr::new(concat!( 13 | "bar1/bar2/bar3/../bar4/../../bar5/bar6/bar7/../../../../..", 14 | "/bar8/../../../bar9/././././././bar10/bar11", 15 | )) 16 | .expect("should be valid IRI"); 17 | 18 | c.bench_function("resolve (new task, new buf)", |b| { 19 | b.iter(|| rel.resolve_against(base).to_dedicated_string()) 20 | }); 21 | 22 | c.bench_function("resolve (task reuse, new buf)", |b| { 23 | let task = FixedBaseResolver::new(base).resolve(rel); 24 | b.iter(|| task.to_dedicated_string()); 25 | }); 26 | 27 | c.bench_function("resolve (task reuse, buf reuse)", |b| { 28 | let mut buf = String::new(); 29 | let task = FixedBaseResolver::new(base).resolve(rel); 30 | b.iter(|| { 31 | buf.clear(); 32 | write!(&mut buf, "{}", task).expect("write to `String` should never fail"); 33 | }); 34 | }); 35 | 36 | c.bench_function("resolve (task reuse, fixed buf reuse)", |b| { 37 | let mut buf = [0_u8; 512]; 38 | let task = FixedBaseResolver::new(base).resolve(rel); 39 | b.iter(move || { 40 | write_to_slice(&mut buf, &task).expect("`buf` should have enough capacity"); 41 | }); 42 | }); 43 | } 44 | 45 | criterion_group!(benches, criterion_benchmark); 46 | criterion_main!(benches); 47 | -------------------------------------------------------------------------------- /iri-string-benches/benches/validate.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Criterion}; 2 | 3 | use iri_string::types::IriReferenceStr; 4 | 5 | pub fn criterion_benchmark(c: &mut Criterion) { 6 | let domain = "scheme://sub.sub.sub.example.com:8080/a/b/c"; 7 | let v4 = "scheme://198.51.100.23:8080/a/b/c"; 8 | let v6 = "scheme://[2001:db8:0123::cafe]:8080/a/b/c"; 9 | let v6v4 = "scheme://[2001:db8::198.51.100.23]:8080/a/b/c"; 10 | let vfuture = "scheme://[v2.ipv2-does-not-exist]:8080/a/b/c"; 11 | 12 | c.bench_function("parse various hosts", |b| { 13 | b.iter(|| { 14 | ( 15 | IriReferenceStr::new(domain), 16 | IriReferenceStr::new(v4), 17 | IriReferenceStr::new(v6), 18 | IriReferenceStr::new(v6v4), 19 | IriReferenceStr::new(vfuture), 20 | ) 21 | }) 22 | }); 23 | 24 | c.bench_function("parse complex path", |b| { 25 | b.iter(|| { 26 | let s = concat!( 27 | "scheme://user:pw@sub.example.com:8080/a/b/c/%30/%31/%32%33%34", 28 | "/foo/foo/../../../foo.foo/foo/foo/././././//////foo", 29 | "/\u{03B1}\u{03B2}\u{03B3}/\u{03B1}\u{03B2}\u{03B3}/\u{03B1}\u{03B2}\u{03B3}", 30 | "?k1=v1&k2=v2&k3=v3#fragment" 31 | ); 32 | IriReferenceStr::new(s) 33 | }); 34 | }); 35 | } 36 | 37 | criterion_group!(benches, criterion_benchmark); 38 | criterion_main!(benches); 39 | -------------------------------------------------------------------------------- /iri-string-benches/src/lib.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /iri-string-tests/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "iri-string-tests" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [features] 9 | # Default features. 10 | default = ["std"] 11 | 12 | alloc = ["iri-string/alloc"] 13 | std = ["alloc", "iri-string/std"] 14 | memchr = ["iri-string/memchr"] 15 | serde = ["iri-string/serde"] 16 | 17 | [dependencies] 18 | iri-string = { path = ".." } 19 | 20 | serde = { version = "1.0.140", features = ["derive"] } 21 | serde_json = { version = "1.0.82", features = ["arbitrary_precision"] } 22 | -------------------------------------------------------------------------------- /iri-string-tests/src/lib.rs: -------------------------------------------------------------------------------- 1 | // No content. 2 | -------------------------------------------------------------------------------- /iri-string-tests/tests/uritemplate-test.rs: -------------------------------------------------------------------------------- 1 | //! Tests using . 2 | 3 | use std::collections::HashMap; 4 | use std::fs::File; 5 | use std::path::Path; 6 | 7 | use iri_string::spec::UriSpec; 8 | use iri_string::template::simple_context::{SimpleContext, Value}; 9 | use iri_string::template::UriTemplateStr; 10 | 11 | use serde::Deserialize; 12 | use serde_json::Value as JsonValue; 13 | 14 | #[derive(Debug, Clone, Deserialize)] 15 | struct TestFile { 16 | #[serde(flatten)] 17 | tests: HashMap, 18 | } 19 | 20 | #[derive(Debug, Clone, Deserialize)] 21 | struct TestSet { 22 | variables: HashMap, 23 | testcases: Vec<(String, JsonValue)>, 24 | } 25 | 26 | fn test_with_file(filename: &str) { 27 | let base = Path::new("assets/uritemplate-test"); 28 | let path = base.join(Path::new(filename)); 29 | let mut file = File::open(path).expect("test file not found"); 30 | let tests: TestFile = serde_json::from_reader(&mut file).expect("failed to load test asset"); 31 | 32 | for (test_set_name, test_set) in &tests.tests { 33 | let mut context = SimpleContext::new(); 34 | for (name, value) in &test_set.variables { 35 | let value = match value { 36 | JsonValue::Null => Value::Undefined, 37 | JsonValue::String(s) => Value::String(s.clone()), 38 | JsonValue::Array(vec) => { 39 | let vec = vec 40 | .iter() 41 | .map(|v| match v { 42 | JsonValue::String(s) => s.clone(), 43 | v => panic!("list item of unexpected type: {v:?}"), 44 | }) 45 | .collect(); 46 | Value::List(vec) 47 | } 48 | JsonValue::Object(map) => { 49 | let vec = map 50 | .iter() 51 | .map(|(k, v)| { 52 | let v = match v { 53 | JsonValue::String(s) => s.clone(), 54 | v => panic!("assoc-list item of unexpected type: {v:?}"), 55 | }; 56 | (k.clone(), v) 57 | }) 58 | .collect(); 59 | Value::Assoc(vec) 60 | } 61 | // Note that `arbitrary_precision` flag of `serde_json` crate is expected. 62 | JsonValue::Number(num) => Value::String(num.to_string()), 63 | v => panic!("value of unexpected type: {v:?}"), 64 | }; 65 | context.insert(name, value); 66 | } 67 | 68 | for (template, expected) in &test_set.testcases { 69 | let expected = match expected { 70 | JsonValue::Bool(false) => None, 71 | JsonValue::String(s) => Some(vec![s.as_str()]), 72 | JsonValue::Array(arr) => Some( 73 | arr.iter() 74 | .map(|candidate| { 75 | candidate 76 | .as_str() 77 | .expect("expected strings as result candidates") 78 | }) 79 | .collect::>(), 80 | ), 81 | v => panic!("unexpected `expected` value: {v:?}"), 82 | }; 83 | let result = UriTemplateStr::new(template) 84 | .and_then(|template| template.expand::(&context)); 85 | match (result, expected) { 86 | (Ok(expanded), Some(candidates)) => { 87 | let expanded = expanded.to_string(); 88 | assert!( 89 | candidates.contains(&expanded.as_str()), 90 | "unexpected expansion result: test={test_set_name:?}, \ 91 | template={template:?}, expanded={expanded}, candidates={candidates:?}" 92 | ); 93 | } 94 | (Ok(expanded), None) => { 95 | panic!( 96 | "expected to fail but successfully parsed: test={test_set_name:?}, 97 | template={template:?}, expanded={expanded}", 98 | ) 99 | } 100 | (Err(e), Some(expected)) => { 101 | panic!( 102 | "expected to success but failed to parse: {e} \ 103 | (test={test_set_name:?}, template={template:?}, expected={expected:?})" 104 | ) 105 | } 106 | (Err(_), None) => {} 107 | } 108 | } 109 | } 110 | } 111 | 112 | #[test] 113 | fn negative_tests() { 114 | test_with_file("negative-tests.json"); 115 | } 116 | 117 | #[test] 118 | fn extended_tests() { 119 | test_with_file("extended-tests.json"); 120 | } 121 | 122 | #[test] 123 | fn spec_examples_by_section() { 124 | test_with_file("spec-examples-by-section.json"); 125 | } 126 | 127 | #[test] 128 | fn spec_examples() { 129 | test_with_file("spec-examples.json"); 130 | } 131 | -------------------------------------------------------------------------------- /src/components.rs: -------------------------------------------------------------------------------- 1 | //! Components of IRIs. 2 | 3 | mod authority; 4 | 5 | use core::num::NonZeroUsize; 6 | use core::ops::{Range, RangeFrom, RangeTo}; 7 | 8 | use crate::parser::trusted as trusted_parser; 9 | use crate::spec::Spec; 10 | use crate::types::RiReferenceStr; 11 | 12 | pub use self::authority::AuthorityComponents; 13 | 14 | /// Positions to split an IRI into components. 15 | #[derive(Debug, Clone, Copy)] 16 | pub(crate) struct Splitter { 17 | /// Scheme end. 18 | scheme_end: Option, 19 | /// Authority end. 20 | /// 21 | /// Note that absence of the authority and the empty authority is 22 | /// distinguished. 23 | authority_end: Option, 24 | /// Query start (after the leading `?`). 25 | query_start: Option, 26 | /// Fragment start (after the leading `#`). 27 | fragment_start: Option, 28 | } 29 | 30 | impl Splitter { 31 | /// Creates a new splitter. 32 | #[inline] 33 | #[must_use] 34 | pub(crate) fn new( 35 | scheme_end: Option, 36 | authority_end: Option, 37 | query_start: Option, 38 | fragment_start: Option, 39 | ) -> Self { 40 | Self { 41 | scheme_end, 42 | authority_end, 43 | query_start, 44 | fragment_start, 45 | } 46 | } 47 | 48 | /// Decomposes an IRI into five major components: scheme, authority, path, query, and fragment. 49 | #[must_use] 50 | fn split_into_major( 51 | self, 52 | s: &str, 53 | ) -> (Option<&str>, Option<&str>, &str, Option<&str>, Option<&str>) { 54 | let (scheme, next_of_scheme) = match self.scheme_end { 55 | // +1: ":".len() 56 | Some(end) => (Some(&s[..end.get()]), end.get() + 1), 57 | None => (None, 0), 58 | }; 59 | let (authority, next_of_authority) = match self.authority_end { 60 | // +2: "//".len() 61 | Some(end) => (Some(&s[(next_of_scheme + 2)..end.get()]), end.get()), 62 | None => (None, next_of_scheme), 63 | }; 64 | let (fragment, end_of_prev_of_fragment) = match self.fragment_start { 65 | // -1: "#".len() 66 | Some(start) => (Some(&s[start.get()..]), start.get() - 1), 67 | None => (None, s.len()), 68 | }; 69 | let (query, end_of_path) = match self.query_start { 70 | Some(start) => ( 71 | Some(&s[start.get()..end_of_prev_of_fragment]), 72 | // -1: "?".len() 73 | start.get() - 1, 74 | ), 75 | None => (None, end_of_prev_of_fragment), 76 | }; 77 | let path = &s[next_of_authority..end_of_path]; 78 | (scheme, authority, path, query, fragment) 79 | } 80 | 81 | /// Returns the range for the scheme part. 82 | #[inline] 83 | #[must_use] 84 | fn scheme_range(self) -> Option> { 85 | self.scheme_end.map(|end| ..end.get()) 86 | } 87 | 88 | /// Returns the scheme as a string. 89 | #[inline] 90 | #[must_use] 91 | pub(crate) fn scheme_str<'a>(&self, s: &'a str) -> Option<&'a str> { 92 | self.scheme_range().map(|range| &s[range]) 93 | } 94 | 95 | /// Returns true if the IRI has a scheme part, false otherwise. 96 | #[inline] 97 | #[must_use] 98 | pub(crate) fn has_scheme(&self) -> bool { 99 | self.scheme_end.is_some() 100 | } 101 | 102 | /// Returns the range for the authority part. 103 | #[inline] 104 | #[must_use] 105 | fn authority_range(self) -> Option> { 106 | let end = self.authority_end?.get(); 107 | // 2: "//".len() 108 | // +3: "://".len() 109 | let start = self.scheme_end.map_or(2, |v| v.get() + 3); 110 | Some(start..end) 111 | } 112 | 113 | /// Returns the authority as a string. 114 | #[inline] 115 | #[must_use] 116 | pub(crate) fn authority_str<'a>(&self, s: &'a str) -> Option<&'a str> { 117 | self.authority_range().map(|range| &s[range]) 118 | } 119 | 120 | /// Returns true if the IRI has an authority part, false otherwise. 121 | #[inline] 122 | #[must_use] 123 | pub(crate) fn has_authority(&self) -> bool { 124 | self.authority_end.is_some() 125 | } 126 | 127 | /// Returns the range for the path part. 128 | #[inline] 129 | #[must_use] 130 | fn path_range(self, full_len: usize) -> Range { 131 | // -1: "?".len() and "#".len() 132 | let end = self 133 | .query_start 134 | .or(self.fragment_start) 135 | .map_or(full_len, |v| v.get() - 1); 136 | let start = self.authority_end.map_or_else( 137 | // +1: ":".len() 138 | || self.scheme_end.map_or(0, |v| v.get() + 1), 139 | NonZeroUsize::get, 140 | ); 141 | 142 | start..end 143 | } 144 | 145 | /// Returns the path as a string. 146 | #[inline] 147 | #[must_use] 148 | pub(crate) fn path_str<'a>(&self, s: &'a str) -> &'a str { 149 | &s[self.path_range(s.len())] 150 | } 151 | 152 | /// Returns true if the path part of the IRI is empty. 153 | #[inline] 154 | #[must_use] 155 | pub(crate) fn is_path_empty(&self, full_len: usize) -> bool { 156 | self.path_range(full_len).is_empty() 157 | } 158 | 159 | /// Returns the range for the query part excluding a prefix `?`. 160 | #[inline] 161 | #[must_use] 162 | fn query_range(self, full_len: usize) -> Option> { 163 | let start = self.query_start?.get(); 164 | // -1: "#".len() 165 | let end = self.fragment_start.map_or(full_len, |v| v.get() - 1); 166 | 167 | Some(start..end) 168 | } 169 | 170 | /// Returns the query as a string. 171 | #[inline] 172 | #[must_use] 173 | pub(crate) fn query_str<'a>(&self, s: &'a str) -> Option<&'a str> { 174 | self.query_range(s.len()).map(|range| &s[range]) 175 | } 176 | 177 | /// Returns true if the IRI has a query part, false otherwise. 178 | #[inline] 179 | #[must_use] 180 | pub(crate) fn has_query(&self) -> bool { 181 | self.query_start.is_some() 182 | } 183 | 184 | /// Returns the range for the fragment part excluding a prefix `#`. 185 | #[inline] 186 | #[must_use] 187 | pub(crate) fn fragment_range(self) -> Option> { 188 | self.fragment_start.map(|v| v.get()..) 189 | } 190 | 191 | /// Returns the fragment as a string. 192 | #[inline] 193 | #[must_use] 194 | pub(crate) fn fragment_str<'a>(&self, s: &'a str) -> Option<&'a str> { 195 | self.fragment_range().map(|range| &s[range]) 196 | } 197 | } 198 | 199 | /// Components of an IRI reference. 200 | /// 201 | /// See . 202 | #[derive(Debug, Clone, Copy)] 203 | pub(crate) struct RiReferenceComponents<'a, S: Spec> { 204 | /// Original complete string. 205 | pub(crate) iri: &'a RiReferenceStr, 206 | /// Positions to split the IRI into components. 207 | pub(crate) splitter: Splitter, 208 | } 209 | 210 | impl<'a, S: Spec> RiReferenceComponents<'a, S> { 211 | /// Returns five major components: scheme, authority, path, query, and fragment. 212 | #[inline] 213 | #[must_use] 214 | pub(crate) fn to_major( 215 | self, 216 | ) -> ( 217 | Option<&'a str>, 218 | Option<&'a str>, 219 | &'a str, 220 | Option<&'a str>, 221 | Option<&'a str>, 222 | ) { 223 | self.splitter.split_into_major(self.iri.as_str()) 224 | } 225 | 226 | /// Returns the IRI reference. 227 | #[inline] 228 | #[must_use] 229 | pub(crate) fn iri(&self) -> &'a RiReferenceStr { 230 | self.iri 231 | } 232 | 233 | /// Returns the scheme as a string. 234 | #[inline] 235 | #[must_use] 236 | pub(crate) fn scheme_str(&self) -> Option<&str> { 237 | self.splitter.scheme_str(self.iri.as_str()) 238 | } 239 | 240 | /// Returns the authority as a string. 241 | #[inline] 242 | #[must_use] 243 | pub(crate) fn authority_str(&self) -> Option<&str> { 244 | self.splitter.authority_str(self.iri.as_str()) 245 | } 246 | 247 | /// Returns the path as a string. 248 | #[inline] 249 | #[must_use] 250 | pub(crate) fn path_str(&self) -> &str { 251 | self.splitter.path_str(self.iri.as_str()) 252 | } 253 | 254 | /// Returns the query as a string. 255 | #[inline] 256 | #[must_use] 257 | pub(crate) fn query_str(&self) -> Option<&str> { 258 | self.splitter.query_str(self.iri.as_str()) 259 | } 260 | } 261 | 262 | impl<'a, S: Spec> From<&'a RiReferenceStr> for RiReferenceComponents<'a, S> { 263 | #[inline] 264 | fn from(s: &'a RiReferenceStr) -> Self { 265 | trusted_parser::decompose_iri_reference(s) 266 | } 267 | } 268 | -------------------------------------------------------------------------------- /src/components/authority.rs: -------------------------------------------------------------------------------- 1 | //! Subcomponents of authority. 2 | 3 | use crate::parser::trusted as trusted_parser; 4 | use crate::spec::Spec; 5 | use crate::types::RiReferenceStr; 6 | 7 | /// Subcomponents of authority. 8 | /// 9 | /// This is a return type of the `authority_components` method of the string 10 | /// types (for example [`RiStr::authority_components`]. 11 | /// 12 | /// [`RiStr::authority_components`]: `crate::types::RiStr::authority_components` 13 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 14 | pub struct AuthorityComponents<'a> { 15 | /// Authority string, excluding the leading `//`. 16 | pub(crate) authority: &'a str, 17 | /// Start position of the `host`. 18 | pub(crate) host_start: usize, 19 | /// End position of the `host`. 20 | pub(crate) host_end: usize, 21 | } 22 | 23 | impl<'a> AuthorityComponents<'a> { 24 | /// Creates a new `AuthorityComponents` from the IRI. 25 | pub fn from_iri(iri: &'a RiReferenceStr) -> Option { 26 | iri.authority_str() 27 | .map(trusted_parser::authority::decompose_authority) 28 | } 29 | 30 | /// Returns the `userinfo` part, excluding the following `@`. 31 | #[must_use] 32 | pub fn userinfo(&self) -> Option<&'a str> { 33 | let userinfo_at = self.host_start.checked_sub(1)?; 34 | debug_assert_eq!(self.authority.as_bytes()[userinfo_at], b'@'); 35 | Some(&self.authority[..userinfo_at]) 36 | } 37 | 38 | /// Returns the `host` part. 39 | #[inline] 40 | #[must_use] 41 | pub fn host(&self) -> &'a str { 42 | // NOTE: RFC 6874 support may need the internal logic to change. 43 | &self.authority[self.host_start..self.host_end] 44 | } 45 | 46 | /// Returns the `port` part, excluding the following `:`. 47 | #[must_use] 48 | pub fn port(&self) -> Option<&'a str> { 49 | if self.host_end == self.authority.len() { 50 | return None; 51 | } 52 | let port_colon = self.host_end; 53 | debug_assert_eq!(self.authority.as_bytes()[port_colon], b':'); 54 | Some(&self.authority[(port_colon + 1)..]) 55 | } 56 | } 57 | 58 | #[cfg(test)] 59 | #[cfg(feature = "alloc")] 60 | mod tests { 61 | use super::*; 62 | 63 | #[cfg(all(feature = "alloc", not(feature = "std")))] 64 | use alloc::string::String; 65 | 66 | use crate::types::IriReferenceStr; 67 | 68 | const USERINFO: &[&str] = &["", "user:password", "user"]; 69 | 70 | const PORT: &[&str] = &[ 71 | "", 72 | "0", 73 | "0000", 74 | "80", 75 | "1234567890123456789012345678901234567890", 76 | ]; 77 | 78 | const HOST: &[&str] = &[ 79 | "", 80 | "localhost", 81 | "example.com", 82 | "192.0.2.0", 83 | "[2001:db8::1]", 84 | "[2001:0db8:0:0:0:0:0:1]", 85 | "[2001:0db8::192.0.2.255]", 86 | "[v9999.this-is-futuristic-ip-address]", 87 | ]; 88 | 89 | fn compose_to_relative_iri(userinfo: Option<&str>, host: &str, port: Option<&str>) -> String { 90 | let mut buf = String::from("//"); 91 | if let Some(userinfo) = userinfo { 92 | buf.push_str(userinfo); 93 | buf.push('@'); 94 | } 95 | buf.push_str(host); 96 | if let Some(port) = port { 97 | buf.push(':'); 98 | buf.push_str(port); 99 | } 100 | buf 101 | } 102 | 103 | #[test] 104 | fn test_decompose_authority() { 105 | for host in HOST.iter().copied() { 106 | for userinfo in USERINFO.iter().map(|s| Some(*s)).chain(None) { 107 | for port in PORT.iter().map(|s| Some(*s)).chain(None) { 108 | let authority = compose_to_relative_iri(userinfo, host, port); 109 | let authority = 110 | IriReferenceStr::new(&authority).expect("test case should be valid"); 111 | let components = AuthorityComponents::from_iri(authority) 112 | .expect("relative path composed for this test should contain authority"); 113 | 114 | assert_eq!(components.host(), host); 115 | assert_eq!(components.userinfo(), userinfo); 116 | assert_eq!(components.port(), port); 117 | } 118 | } 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/format.rs: -------------------------------------------------------------------------------- 1 | //! Utilities for formatting (especially `Display` trait). 2 | //! 3 | //! This module contains utilities for [`Display`][`core::fmt::Display`]-able 4 | //! types. 5 | 6 | use core::fmt::{self, Write as _}; 7 | 8 | #[cfg(feature = "alloc")] 9 | use alloc::collections::TryReserveError; 10 | #[cfg(all(feature = "alloc", not(feature = "std")))] 11 | use alloc::string::String; 12 | 13 | /// Output buffer capacity overflow error. 14 | #[derive(Debug, Clone, Copy)] 15 | pub struct CapacityOverflowError; 16 | 17 | impl fmt::Display for CapacityOverflowError { 18 | #[inline] 19 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 20 | f.write_str("buffer capacity overflow") 21 | } 22 | } 23 | 24 | #[cfg(feature = "std")] 25 | impl std::error::Error for CapacityOverflowError {} 26 | 27 | /// Writer to the bytes buffer. 28 | struct ByteBufWriter<'b> { 29 | /// Destination buffer. 30 | buffer: &'b mut [u8], 31 | /// Position to write the next string fragment. 32 | cursor: usize, 33 | } 34 | 35 | impl fmt::Write for ByteBufWriter<'_> { 36 | fn write_str(&mut self, s: &str) -> fmt::Result { 37 | let dest = &mut self.buffer[self.cursor..]; 38 | if dest.len() < s.len() { 39 | return Err(fmt::Error); 40 | } 41 | dest[..s.len()].copy_from_slice(s.as_bytes()); 42 | self.cursor += s.len(); 43 | Ok(()) 44 | } 45 | } 46 | 47 | /// Writes to the bytes buffer. 48 | pub fn write_to_slice<'a, T: fmt::Display>( 49 | buf: &'a mut [u8], 50 | value: &T, 51 | ) -> Result<&'a str, CapacityOverflowError> { 52 | let mut writer = ByteBufWriter { 53 | buffer: buf, 54 | cursor: 0, 55 | }; 56 | if write!(writer, "{}", value).is_err() { 57 | return Err(CapacityOverflowError); 58 | } 59 | let len = writer.cursor; 60 | let result = core::str::from_utf8(&buf[..len]) 61 | .expect("[validity] fmt::Display writes valid UTF-8 byte sequence"); 62 | Ok(result) 63 | } 64 | 65 | /// Writer that fails (not panics) on OOM. 66 | #[cfg(feature = "alloc")] 67 | struct StringWriter<'a> { 68 | /// Destination buffer. 69 | buffer: &'a mut String, 70 | /// Memory allocation error. 71 | error: Option, 72 | } 73 | 74 | #[cfg(feature = "alloc")] 75 | impl fmt::Write for StringWriter<'_> { 76 | fn write_str(&mut self, s: &str) -> fmt::Result { 77 | if self.error.is_some() { 78 | return Err(fmt::Error); 79 | } 80 | if let Err(e) = self.buffer.try_reserve(s.len()) { 81 | self.error = Some(e); 82 | return Err(fmt::Error); 83 | } 84 | // This should never fail since `.try_reserve(s.len())` succeeded. 85 | self.buffer.push_str(s); 86 | Ok(()) 87 | } 88 | } 89 | 90 | /// Appends the data to the string. 91 | /// 92 | /// When allocation failure happens, incompletely appended strings won't be 93 | /// stripped. Callers are responsible to clean up the destination if necessary. 94 | #[cfg(feature = "alloc")] 95 | pub fn try_append_to_string( 96 | dest: &mut String, 97 | value: &T, 98 | ) -> Result<(), TryReserveError> { 99 | let mut writer = StringWriter { 100 | buffer: dest, 101 | error: None, 102 | }; 103 | if write!(writer, "{}", value).is_err() { 104 | let e = writer 105 | .error 106 | .expect("[consistency] allocation error should be set on formatting failure"); 107 | return Err(e); 108 | } 109 | Ok(()) 110 | } 111 | 112 | /// Returns true if the two equals after they are converted to strings. 113 | pub(crate) fn eq_str_display(s: &str, d: &T) -> bool 114 | where 115 | T: ?Sized + fmt::Display, 116 | { 117 | /// Dummy writer to compare the formatted object to the given string. 118 | struct CmpWriter<'a>(&'a str); 119 | impl fmt::Write for CmpWriter<'_> { 120 | fn write_str(&mut self, s: &str) -> fmt::Result { 121 | if self.0.len() < s.len() { 122 | return Err(fmt::Error); 123 | } 124 | let (prefix, rest) = self.0.split_at(s.len()); 125 | self.0 = rest; 126 | if prefix == s { 127 | Ok(()) 128 | } else { 129 | Err(fmt::Error) 130 | } 131 | } 132 | } 133 | 134 | let mut writer = CmpWriter(s); 135 | let succeeded = write!(writer, "{}", d).is_ok(); 136 | succeeded && writer.0.is_empty() 137 | } 138 | 139 | /// A debug-printable type to hide the sensitive information. 140 | #[derive(Clone, Copy)] 141 | pub(crate) struct Censored; 142 | 143 | impl core::fmt::Debug for Censored { 144 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> core::fmt::Result { 145 | f.write_str("{censored}") 146 | } 147 | } 148 | 149 | /// [`ToString`][`alloc::string::ToString`], but without panic. 150 | #[cfg(feature = "alloc")] 151 | pub trait ToStringFallible: alloc::string::ToString { 152 | /// [`ToString::to_string`][`alloc::string::ToString::to_string`], but without panic on OOM. 153 | fn try_to_string(&self) -> Result; 154 | } 155 | 156 | #[cfg(feature = "alloc")] 157 | impl ToStringFallible for T { 158 | /// [`ToString::to_string`][`alloc::string::ToString::to_string`], but without panic on OOM. 159 | #[inline] 160 | fn try_to_string(&self) -> Result { 161 | let mut buf = String::new(); 162 | try_append_to_string(&mut buf, self)?; 163 | Ok(buf) 164 | } 165 | } 166 | 167 | /// A trait for types that can be converted to a dedicated allocated string types. 168 | #[cfg(feature = "alloc")] 169 | pub trait ToDedicatedString { 170 | /// Conversion target type. 171 | type Target; 172 | 173 | /// Converts the value to the allocated string. 174 | fn try_to_dedicated_string(&self) -> Result; 175 | 176 | /// Converts the value to the allocated string. 177 | /// 178 | /// # Panics 179 | /// 180 | /// Panics if memory allocation error occured. 181 | #[inline] 182 | #[must_use] 183 | fn to_dedicated_string(&self) -> Self::Target { 184 | self.try_to_dedicated_string() 185 | .expect("failed to allocate enough memory") 186 | } 187 | } 188 | 189 | #[cfg(test)] 190 | mod tests { 191 | use super::*; 192 | 193 | #[test] 194 | fn eq_str_display_1() { 195 | assert!(eq_str_display("hello", "hello")); 196 | assert!(eq_str_display("42", &42)); 197 | 198 | assert!(eq_str_display( 199 | r#"\x00\t\r\n\xff\\"#, 200 | &b"\x00\t\r\n\xff\\".escape_ascii() 201 | )); 202 | 203 | assert!(!eq_str_display("hello", "world")); 204 | assert!(!eq_str_display("hello world", "hello")); 205 | assert!(!eq_str_display("hello", "hello world")); 206 | assert!(!eq_str_display("42", &4)); 207 | assert!(!eq_str_display("4", &42)); 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! String types for [RFC 3987 Internationalized Resource Identifiers (IRIs)][RFC 3987] and 2 | //! [RFC 3986 Uniform Resource Identifiers (URIs)][RFC 3986]. 3 | //! 4 | //! Note that this crate does not have any extra knowledge about protocols. 5 | //! Comparisons between IRI strings by `PartialEq` and `Eq` is implemented as [simple string 6 | //! comparison](https://tools.ietf.org/html/rfc3986#section-6.2.1). 7 | //! You should implement by yourself or use another crate to use such extra knowledge to compare 8 | //! IRIs / URIs. 9 | //! 10 | //! # Capability 11 | //! 12 | //! This crate provides many features for IRIs / URIs. 13 | //! 14 | //! ## String types 15 | //! 16 | //! [`types` module][`types`] module provides various string types for IRIs and URIs. 17 | //! The borrowed string types are unsized slice types (such as `[u8]` and `str`) 18 | //! and not a sized struct, so they are highly interoperable with for example 19 | //! `Cow` and `Rc`. Conversions between `&str` and borrwed IRI string types are easy. 20 | //! 21 | //! ## Resolvers 22 | //! 23 | //! [`resolve` module][`resolve`] provides IRI / URI references resolver. 24 | //! However, you are recommended to use methods of string types such as 25 | //! [`RiReferenceStr::resolve_against()`] or [`RiRelativeStr::resolve_against()`] 26 | //! if you don't intend to resolve multiple IRIs against the same base. 27 | //! 28 | //! ## Validators 29 | //! 30 | //! Validator functions are provided from [`validate` module][`validate`]. 31 | //! 32 | //! ## Percent encoding 33 | //! 34 | //! [`percent_encode` module][`percent_encode`] provides a converter to encode 35 | //! user-provided string into percent-encoded one (if syntax requires so). 36 | //! 37 | //! ## IRI builder 38 | //! 39 | //! [`build` module][`build`] provides IRI builder. 40 | //! 41 | //! ## URI template (RFC 6570) 42 | //! 43 | //! [`template` module][`template`] provides an RFC 6570 URI Template processor. 44 | //! 45 | //! # Feature flags 46 | //! 47 | //! ## `std` and `alloc` support 48 | //! 49 | //! This crate supports `no_std` usage. 50 | //! 51 | //! * `alloc` feature: 52 | //! + Std library or `alloc` crate is required. 53 | //! + This feature enables types and functions which require memory allocation, 54 | //! e.g. `types::IriString` and `types::IriRelativeStr::resolve_against()`. 55 | //! * `std` feature (**enabled by default**): 56 | //! + Std library is required. 57 | //! + This automatically enables `alloc` feature. 58 | //! + The feature let the crate utilize std-specific stuff, such as `std::error::Error` trait. 59 | //! * With neither of them: 60 | //! + The crate can be used in `no_std` environment. 61 | //! 62 | //! ## Other features 63 | //! 64 | //! * `serde` 65 | //! + Enables serde support. 66 | //! + Implement `Serailize` and `Deserialize` traits for IRI / URI types. 67 | //! * `memchr` 68 | //! + Enables faster internal character search. 69 | //! 70 | //! # Rationale 71 | //! 72 | //! ## `foo:`, `foo:/`, `foo://`, `foo:///`, `foo:////`, ... are valid IRIs 73 | //! 74 | //! All of these are valid IRIs. 75 | //! (On the other hand, all of them are invalid as relative IRI reference, because they don't 76 | //! match `relative-part` rule, especially `path-noscheme`, as the first path component of the 77 | //! relative path contains a colon.) 78 | //! 79 | //! * `foo:` 80 | //! + Decomposed to `:`. 81 | //! * `foo:/` 82 | //! + Decomposed to `:`. 83 | //! * `foo://` 84 | //! + Decomposed to `://`. 85 | //! * `foo:///` 86 | //! + Decomposed to `://`. 87 | //! * `foo:////` 88 | //! + Decomposed to `://`. 89 | //! * `foo://///` 90 | //! + Decomposed to `://`. 91 | //! 92 | //! RFC 3986 says that "if authority is absent, path cannot start with `//`". 93 | //! 94 | //! > When authority is present, the path must either be empty or begin with a slash ("/") 95 | //! > character. When authority is not present, the path cannot begin with two slash characters 96 | //! > ("//"). 97 | //! > 98 | //! > --- [RFC 3986, section 3. Syntax Components](https://tools.ietf.org/html/rfc3986#section-3). 99 | //! 100 | //! > If a URI contains an authority component, then the path component must either be empty or 101 | //! > begin with a slash ("/") character. If a URI does not contain an authority component, then the 102 | //! > path cannot begin with two slash characters ("//"). 103 | //! > 104 | //! > --- [RFC 3986, section 3.3. Path](https://tools.ietf.org/html/rfc3986#section-3.3) 105 | //! 106 | //! We should interpret them as "if `authority` rule is completely unused (i.e. does not match any 107 | //! strings **including empty string**), path cannot start with `//`". 108 | //! In other words, we should consider this as **explaining the ABNF of `hier-part` rule** 109 | //! (especially why it does not use `path` rule), but **not adding extra restriction to the rule 110 | //! written in ABNF**. 111 | //! 112 | //! This restriction is necessary to remove ambiguity in decomposition of some strings. 113 | //! For example, it is natural to decompose `foo://` to `:` or 114 | //! `://`. 115 | //! The restriction, **which is already encoded to the ABNF rule**, tells us to always decompose to 116 | //! the latter form, rather than the former one. 117 | //! 118 | //! Readers of the spec might be confused by "when authority is **present**" and "if a URI 119 | //! **contains** an authority component, which is unclear. 120 | //! However, based on the interpretation above, we should consider authority part with empty string 121 | //! as satisfying the condition "authority is **present**". 122 | //! 123 | //! ## IRI resolution can fail 124 | //! 125 | //! For some inputs, resulting string of IRI normalization and resolution can be syntactically 126 | //! correct but semantically wrong. In such cases, the normalizer and resolver provided by this 127 | //! crate do not silently "fix" the IRI by non-standard processing, but just 128 | //! fail by returning `Err(_)`. 129 | //! 130 | //! For details, see the documentation of [`normalize`] module. 131 | //! 132 | //! [RFC 3986]: https://tools.ietf.org/html/rfc3986 133 | //! [RFC 3987]: https://tools.ietf.org/html/rfc3987 134 | //! [`RiReferenceStr::resolve_against()`]: `types::RiReferenceStr::resolve_against` 135 | //! [`RiRelativeStr::resolve_against()`]: `types::RiRelativeStr::resolve_against` 136 | #![warn(missing_docs)] 137 | #![warn(unsafe_op_in_unsafe_fn)] 138 | #![warn(clippy::missing_docs_in_private_items)] 139 | #![warn(clippy::undocumented_unsafe_blocks)] 140 | #![cfg_attr(not(feature = "std"), no_std)] 141 | #![cfg_attr(docsrs, feature(doc_auto_cfg))] 142 | 143 | #[cfg(feature = "alloc")] 144 | extern crate alloc; 145 | 146 | pub mod build; 147 | pub mod components; 148 | pub mod convert; 149 | pub mod format; 150 | pub mod mask_password; 151 | pub mod normalize; 152 | pub(crate) mod parser; 153 | pub mod percent_encode; 154 | pub(crate) mod raw; 155 | pub mod resolve; 156 | pub mod spec; 157 | pub mod template; 158 | pub mod types; 159 | pub mod validate; 160 | -------------------------------------------------------------------------------- /src/normalize/error.rs: -------------------------------------------------------------------------------- 1 | //! Normalization and resolution error. 2 | 3 | use core::fmt; 4 | 5 | /// IRI normalization and resolution error. 6 | /// 7 | /// For detail about resolution failure, see [the module documentation][`crate::resolve`]. 8 | #[derive(Debug, Clone)] 9 | pub struct Error(()); 10 | 11 | impl Error { 12 | /// Creates a new error. 13 | pub(crate) fn new() -> Self { 14 | Self(()) 15 | } 16 | } 17 | 18 | impl fmt::Display for Error { 19 | #[inline] 20 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 21 | f.write_str("unresolvable IRI") 22 | } 23 | } 24 | 25 | #[cfg(feature = "std")] 26 | impl std::error::Error for Error {} 27 | -------------------------------------------------------------------------------- /src/parser.rs: -------------------------------------------------------------------------------- 1 | //! Common stuff for parsing. 2 | 3 | pub(crate) mod char; 4 | pub(crate) mod str; 5 | pub(crate) mod trusted; 6 | pub(crate) mod validate; 7 | -------------------------------------------------------------------------------- /src/parser/char.rs: -------------------------------------------------------------------------------- 1 | //! Characters. 2 | 3 | use crate::spec::Spec; 4 | 5 | /// A mask to test whether the character is continue character of `scheme`. 6 | // `ALPHA / DIGIT / "+" / "-" / "."` 7 | const MASK_SCHEME_CONTINUE: u8 = 1 << 0; 8 | 9 | /// A mask to test whether the character matches `unreserved`. 10 | // `unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"` 11 | const MASK_UNRESERVED: u8 = 1 << 1; 12 | 13 | /// A mask to test whether the character matches `gen-delims`. 14 | // `gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"` 15 | const MASK_GEN_DELIMS: u8 = 1 << 2; 16 | 17 | /// A mask to test whether the character matches `sub-delims`. 18 | // `sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="` 19 | const MASK_SUB_DELIMS: u8 = 1 << 3; 20 | 21 | /// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes). 22 | // `pchar = unreserved / pct-encoded / sub-delims / ":" / "@"` 23 | const MASK_PCHAR: u8 = 1 << 4; 24 | 25 | /// A mask to test whether the character can appear in `query` and `fragment`. 26 | // `query = *( pchar / "/" / "?" )` 27 | // `fragment = *( pchar / "/" / "?" )` 28 | const MASK_FRAG_QUERY: u8 = 1 << 5; 29 | 30 | /// A mask to test whether the character can appear in `userinfo` and address of `IPvFuture`. 31 | // `userinfo = *( unreserved / pct-encoded / sub-delims / ":" )` 32 | const MASK_USERINFO_IPVFUTUREADDR: u8 = 1 << 6; 33 | 34 | /// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes) or slash. 35 | const MASK_PCHAR_SLASH: u8 = 1 << 7; 36 | 37 | /// ASCII characters' properties. 38 | const TABLE: [u8; 128] = [ 39 | 0b_0000_0000, // NUL 40 | 0b_0000_0000, // SOH 41 | 0b_0000_0000, // STX 42 | 0b_0000_0000, // ETX 43 | 0b_0000_0000, // EOT 44 | 0b_0000_0000, // ENQ 45 | 0b_0000_0000, // ACK 46 | 0b_0000_0000, // BEL 47 | 0b_0000_0000, // BS 48 | 0b_0000_0000, // HT 49 | 0b_0000_0000, // LF 50 | 0b_0000_0000, // VT 51 | 0b_0000_0000, // FF 52 | 0b_0000_0000, // CR 53 | 0b_0000_0000, // SO 54 | 0b_0000_0000, // SI 55 | 0b_0000_0000, // DLE 56 | 0b_0000_0000, // DC1 57 | 0b_0000_0000, // DC2 58 | 0b_0000_0000, // DC3 59 | 0b_0000_0000, // DC4 60 | 0b_0000_0000, // NAK 61 | 0b_0000_0000, // SYN 62 | 0b_0000_0000, // ETB 63 | 0b_0000_0000, // CAN 64 | 0b_0000_0000, // EM 65 | 0b_0000_0000, // SUB 66 | 0b_0000_0000, // ESC 67 | 0b_0000_0000, // FS 68 | 0b_0000_0000, // GS 69 | 0b_0000_0000, // RS 70 | 0b_0000_0000, // US 71 | 0b_0000_0000, // SPACE 72 | 0b_1111_1000, // ! 73 | 0b_0000_0000, // " 74 | 0b_0000_0100, // # 75 | 0b_1111_1000, // $ 76 | 0b_0000_0000, // % 77 | 0b_1111_1000, // & 78 | 0b_1111_1000, // ' 79 | 0b_1111_1000, // ( 80 | 0b_1111_1000, // ) 81 | 0b_1111_1000, // * 82 | 0b_1111_1001, // + 83 | 0b_1111_1000, // , 84 | 0b_1111_0011, // - 85 | 0b_1111_0011, // . 86 | 0b_1010_0100, // / 87 | 0b_1111_0011, // 0 88 | 0b_1111_0011, // 1 89 | 0b_1111_0011, // 2 90 | 0b_1111_0011, // 3 91 | 0b_1111_0011, // 4 92 | 0b_1111_0011, // 5 93 | 0b_1111_0011, // 6 94 | 0b_1111_0011, // 7 95 | 0b_1111_0011, // 8 96 | 0b_1111_0011, // 9 97 | 0b_1111_0100, // : 98 | 0b_1111_1000, // ; 99 | 0b_0000_0000, // < 100 | 0b_1111_1000, // = 101 | 0b_0000_0000, // > 102 | 0b_0010_0100, // ? 103 | 0b_1011_0100, // @ 104 | 0b_1111_0011, // A 105 | 0b_1111_0011, // B 106 | 0b_1111_0011, // C 107 | 0b_1111_0011, // D 108 | 0b_1111_0011, // E 109 | 0b_1111_0011, // F 110 | 0b_1111_0011, // G 111 | 0b_1111_0011, // H 112 | 0b_1111_0011, // I 113 | 0b_1111_0011, // J 114 | 0b_1111_0011, // K 115 | 0b_1111_0011, // L 116 | 0b_1111_0011, // M 117 | 0b_1111_0011, // N 118 | 0b_1111_0011, // O 119 | 0b_1111_0011, // P 120 | 0b_1111_0011, // Q 121 | 0b_1111_0011, // R 122 | 0b_1111_0011, // S 123 | 0b_1111_0011, // T 124 | 0b_1111_0011, // U 125 | 0b_1111_0011, // V 126 | 0b_1111_0011, // W 127 | 0b_1111_0011, // X 128 | 0b_1111_0011, // Y 129 | 0b_1111_0011, // Z 130 | 0b_0000_0100, // [ 131 | 0b_0000_0000, // \ 132 | 0b_0000_0100, // ] 133 | 0b_0000_0000, // ^ 134 | 0b_1111_0010, // _ 135 | 0b_0000_0000, // ` 136 | 0b_1111_0011, // a 137 | 0b_1111_0011, // b 138 | 0b_1111_0011, // c 139 | 0b_1111_0011, // d 140 | 0b_1111_0011, // e 141 | 0b_1111_0011, // f 142 | 0b_1111_0011, // g 143 | 0b_1111_0011, // h 144 | 0b_1111_0011, // i 145 | 0b_1111_0011, // j 146 | 0b_1111_0011, // k 147 | 0b_1111_0011, // l 148 | 0b_1111_0011, // m 149 | 0b_1111_0011, // n 150 | 0b_1111_0011, // o 151 | 0b_1111_0011, // p 152 | 0b_1111_0011, // q 153 | 0b_1111_0011, // r 154 | 0b_1111_0011, // s 155 | 0b_1111_0011, // t 156 | 0b_1111_0011, // u 157 | 0b_1111_0011, // v 158 | 0b_1111_0011, // w 159 | 0b_1111_0011, // x 160 | 0b_1111_0011, // y 161 | 0b_1111_0011, // z 162 | 0b_0000_0000, // { 163 | 0b_0000_0000, // | 164 | 0b_0000_0000, // } 165 | 0b_1111_0010, // ~ 166 | 0b_0000_0000, // DEL 167 | ]; 168 | 169 | /// Returns `true` if the given ASCII character is allowed as continue character of `scheme` part. 170 | #[inline] 171 | #[must_use] 172 | pub(crate) const fn is_ascii_scheme_continue(c: u8) -> bool { 173 | (TABLE[c as usize] & MASK_SCHEME_CONTINUE) != 0 174 | } 175 | 176 | /// Returns `true` if the given ASCII character matches `unreserved`. 177 | #[inline] 178 | #[must_use] 179 | pub(crate) const fn is_ascii_unreserved(c: u8) -> bool { 180 | (TABLE[c as usize] & MASK_UNRESERVED) != 0 181 | } 182 | 183 | /// Returns true if the character is unreserved. 184 | #[inline] 185 | #[must_use] 186 | pub(crate) fn is_unreserved(c: char) -> bool { 187 | if c.is_ascii() { 188 | is_ascii_unreserved(c as u8) 189 | } else { 190 | S::is_nonascii_char_unreserved(c) 191 | } 192 | } 193 | 194 | ///// Returns `true` if the given ASCII character matches `gen-delims`. 195 | //#[inline] 196 | //#[must_use] 197 | //pub(crate) const fn is_ascii_gen_delims(c: u8) -> bool { 198 | // (TABLE[c as usize] & MASK_GEN_DELIMS) != 0 199 | //} 200 | 201 | ///// Returns `true` if the given ASCII character matches `sub-delims`. 202 | //#[inline] 203 | //#[must_use] 204 | //pub(crate) const fn is_ascii_sub_delims(c: u8) -> bool { 205 | // (TABLE[c as usize] & MASK_SUB_DELIMS) != 0 206 | //} 207 | 208 | ///// Returns `true` if the given ASCII character matches `reserved`. 209 | //#[inline] 210 | //#[must_use] 211 | //pub(crate) const fn is_ascii_reserved(c: u8) -> bool { 212 | // (TABLE[c as usize] & (MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0 213 | //} 214 | 215 | /// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded`. 216 | #[inline] 217 | #[must_use] 218 | pub(crate) const fn is_ascii_pchar(c: u8) -> bool { 219 | (TABLE[c as usize] & MASK_PCHAR) != 0 220 | } 221 | 222 | /// Returns `true` if the given ASCII character is allowed to appear in `query` and `fragment`. 223 | #[inline] 224 | #[must_use] 225 | pub(crate) const fn is_ascii_frag_query(c: u8) -> bool { 226 | (TABLE[c as usize] & MASK_FRAG_QUERY) != 0 227 | } 228 | 229 | /// Returns `true` if the given non-ASCII character is allowed to appear in `iquery`. 230 | #[inline] 231 | #[must_use] 232 | pub(crate) fn is_nonascii_query(c: char) -> bool { 233 | S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c) 234 | } 235 | 236 | /// Returns `true` if the given non-ASCII character is allowed to appear in `ifragment`. 237 | #[inline] 238 | #[must_use] 239 | pub(crate) fn is_nonascii_fragment(c: char) -> bool { 240 | S::is_nonascii_char_unreserved(c) 241 | } 242 | 243 | /// Returns `true` if the given ASCII character is allowed to appear in `userinfo` and `IPvFuture`. 244 | #[inline] 245 | #[must_use] 246 | pub(crate) const fn is_ascii_userinfo_ipvfutureaddr(c: u8) -> bool { 247 | (TABLE[c as usize] & MASK_USERINFO_IPVFUTUREADDR) != 0 248 | } 249 | 250 | /// Returns `true` if the given non-ASCII character is allowed to appear in `iuserinfo`. 251 | #[inline] 252 | #[must_use] 253 | pub(crate) fn is_nonascii_userinfo(c: char) -> bool { 254 | S::is_nonascii_char_unreserved(c) 255 | } 256 | 257 | /// Returns `true` if the given ASCII character is allowed to appear in `reg-name` 258 | #[inline] 259 | #[must_use] 260 | pub(crate) const fn is_ascii_regname(c: u8) -> bool { 261 | (TABLE[c as usize] & (MASK_UNRESERVED | MASK_SUB_DELIMS)) != 0 262 | } 263 | 264 | /// Returns `true` if the given non-ASCII character is allowed to appear in `ireg-name`. 265 | #[inline] 266 | #[must_use] 267 | pub(crate) fn is_nonascii_regname(c: char) -> bool { 268 | S::is_nonascii_char_unreserved(c) 269 | } 270 | 271 | /// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded` or a slash. 272 | #[inline] 273 | #[must_use] 274 | pub(crate) const fn is_ascii_pchar_slash(c: u8) -> bool { 275 | (TABLE[c as usize] & MASK_PCHAR_SLASH) != 0 276 | } 277 | 278 | /// Checks if the given character matches `ucschar` rule. 279 | #[must_use] 280 | pub(crate) fn is_ucschar(c: char) -> bool { 281 | matches!( 282 | u32::from(c), 283 | 0xA0..=0xD7FF | 284 | 0xF900..=0xFDCF | 285 | 0xFDF0..=0xFFEF | 286 | 0x1_0000..=0x1_FFFD | 287 | 0x2_0000..=0x2_FFFD | 288 | 0x3_0000..=0x3_FFFD | 289 | 0x4_0000..=0x4_FFFD | 290 | 0x5_0000..=0x5_FFFD | 291 | 0x6_0000..=0x6_FFFD | 292 | 0x7_0000..=0x7_FFFD | 293 | 0x8_0000..=0x8_FFFD | 294 | 0x9_0000..=0x9_FFFD | 295 | 0xA_0000..=0xA_FFFD | 296 | 0xB_0000..=0xB_FFFD | 297 | 0xC_0000..=0xC_FFFD | 298 | 0xD_0000..=0xD_FFFD | 299 | 0xE_1000..=0xE_FFFD 300 | ) 301 | } 302 | 303 | /// Returns true if the given value is a continue byte of UTF-8. 304 | #[inline(always)] 305 | #[must_use] 306 | pub(crate) fn is_utf8_byte_continue(byte: u8) -> bool { 307 | // `0x80..=0xbf` (i.e. `0b_1000_0000..=0b_1011_1111`) is not the first byte, 308 | // and `0xc0..=0xc1` (i.e. `0b_1100_0000..=0b_1100_0001` shouldn't appear 309 | // anywhere in UTF-8 byte sequence. 310 | // `0x80 as i8` is -128, and `0xc0 as i8` is -96. 311 | // 312 | // The first byte of the UTF-8 character is not `0b10xx_xxxx`, and 313 | // the continue bytes is `0b10xx_xxxx`. 314 | // `0b1011_1111 as i8` is -65, and `0b1000_0000 as i8` is -128. 315 | (byte as i8) < -64 316 | } 317 | 318 | /// Returns true if the given ASCII character is `unreserved` or `reserved`. 319 | #[inline] 320 | #[must_use] 321 | pub(crate) const fn is_ascii_unreserved_or_reserved(c: u8) -> bool { 322 | (TABLE[c as usize] & (MASK_UNRESERVED | MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0 323 | } 324 | -------------------------------------------------------------------------------- /src/parser/trusted/authority.rs: -------------------------------------------------------------------------------- 1 | //! Parsers for trusted `authority` string. 2 | 3 | use crate::components::AuthorityComponents; 4 | use crate::parser::str::{find_split_hole, rfind_split2}; 5 | 6 | /// Decomposes the authority into `(userinfo, host, port)`. 7 | /// 8 | /// The leading `:` is truncated. 9 | /// 10 | /// # Precondition 11 | /// 12 | /// The given string must be a valid IRI reference. 13 | #[inline] 14 | #[must_use] 15 | pub(crate) fn decompose_authority(authority: &str) -> AuthorityComponents<'_> { 16 | let i = authority; 17 | let (i, host_start) = match find_split_hole(i, b'@') { 18 | Some((userinfo, rest)) => (rest, userinfo.len() + 1), 19 | None => (authority, 0), 20 | }; 21 | let colon_port_len = match rfind_split2(i, b':', b']') { 22 | Some((_, suffix)) if suffix.starts_with(':') => suffix.len(), 23 | _ => 0, 24 | }; 25 | let host_end = authority.len() - colon_port_len; 26 | 27 | AuthorityComponents { 28 | authority, 29 | host_start, 30 | host_end, 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/parser/validate.rs: -------------------------------------------------------------------------------- 1 | //! Validating parsers for non-trusted (possibly invalid) input. 2 | 3 | mod authority; 4 | mod path; 5 | 6 | use crate::parser::char; 7 | use crate::parser::str::{ 8 | find_split, find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded, 9 | }; 10 | use crate::spec::Spec; 11 | use crate::validate::Error; 12 | 13 | use self::authority::validate_authority; 14 | pub(crate) use self::authority::{validate_host, validate_userinfo}; 15 | pub(crate) use self::path::validate_path; 16 | use self::path::{ 17 | validate_path_abempty, validate_path_absolute_authority_absent, 18 | validate_path_relative_authority_absent, 19 | }; 20 | 21 | /// Returns `Ok(_)` if the string matches `scheme`. 22 | pub(crate) fn validate_scheme(i: &str) -> Result<(), Error> { 23 | debug_assert!(!i.is_empty()); 24 | let bytes = i.as_bytes(); 25 | if bytes[0].is_ascii_alphabetic() 26 | && bytes[1..] 27 | .iter() 28 | .all(|&b| b.is_ascii() && char::is_ascii_scheme_continue(b)) 29 | { 30 | Ok(()) 31 | } else { 32 | Err(Error::new()) 33 | } 34 | } 35 | 36 | /// Returns `Ok(_)` if the string matches `query` or `iquery`. 37 | pub(crate) fn validate_query(i: &str) -> Result<(), Error> { 38 | let is_valid = 39 | satisfy_chars_with_pct_encoded(i, char::is_ascii_frag_query, char::is_nonascii_query::); 40 | if is_valid { 41 | Ok(()) 42 | } else { 43 | Err(Error::new()) 44 | } 45 | } 46 | 47 | /// Returns `Ok(_)` if the string matches `authority path-abempty` rule sequence. 48 | fn validate_authority_path_abempty(i: &str) -> Result<(), Error> { 49 | let (maybe_authority, maybe_path) = match find_split(i, b'/') { 50 | Some(v) => v, 51 | None => (i, ""), 52 | }; 53 | validate_authority::(maybe_authority)?; 54 | validate_path_abempty::(maybe_path) 55 | } 56 | 57 | /// Returns `Ok(_)` if the string matches `URI`/`IRI` rules. 58 | #[inline] 59 | pub(crate) fn validate_uri(i: &str) -> Result<(), Error> { 60 | validate_uri_reference_common::(i, UriReferenceRule::Absolute) 61 | } 62 | 63 | /// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules. 64 | #[inline] 65 | pub(crate) fn validate_uri_reference(i: &str) -> Result<(), Error> { 66 | validate_uri_reference_common::(i, UriReferenceRule::Any) 67 | } 68 | 69 | /// Returns `Ok(_)` if the string matches `absolute-URI`/`absolute-IRI` rules. 70 | #[inline] 71 | pub(crate) fn validate_absolute_uri(i: &str) -> Result<(), Error> { 72 | validate_uri_reference_common::(i, UriReferenceRule::AbsoluteWithoutFragment) 73 | } 74 | 75 | /// Syntax rule for URI/IRI references. 76 | #[derive(Clone, Copy, PartialEq, Eq, Hash)] 77 | enum UriReferenceRule { 78 | /// `URI` and `IRI`. 79 | /// 80 | /// This can have a fragment. 81 | Absolute, 82 | /// `absolute-URI` and `absolute-IRI`. 83 | /// 84 | /// This cannot have a fragment. 85 | AbsoluteWithoutFragment, 86 | /// `URI-reference` and `IRI-reference`. 87 | /// 88 | /// This can be relative. 89 | Any, 90 | } 91 | 92 | impl UriReferenceRule { 93 | /// Returns `true` is the relative reference is allowed. 94 | #[inline] 95 | #[must_use] 96 | fn is_relative_allowed(self) -> bool { 97 | self == Self::Any 98 | } 99 | 100 | /// Returns `true` is the fragment part is allowed. 101 | #[inline] 102 | #[must_use] 103 | fn is_fragment_allowed(self) -> bool { 104 | matches!(self, Self::Absolute | Self::Any) 105 | } 106 | } 107 | 108 | /// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules. 109 | fn validate_uri_reference_common( 110 | i: &str, 111 | ref_rule: UriReferenceRule, 112 | ) -> Result<(), Error> { 113 | // Validate `scheme ":"`. 114 | let (i, _scheme) = match find_split_hole(i, b':') { 115 | None => { 116 | if ref_rule.is_relative_allowed() { 117 | return validate_relative_ref::(i); 118 | } else { 119 | return Err(Error::new()); 120 | } 121 | } 122 | Some(("", _)) => return Err(Error::new()), 123 | Some((maybe_scheme, rest)) => { 124 | if validate_scheme(maybe_scheme).is_err() { 125 | // The string before the first colon is not a scheme. 126 | // Falling back to `relative-ref` parsing. 127 | if ref_rule.is_relative_allowed() { 128 | return validate_relative_ref::(i); 129 | } else { 130 | return Err(Error::new()); 131 | } 132 | } 133 | (rest, maybe_scheme) 134 | } 135 | }; 136 | 137 | // Validate `hier-part`. 138 | let after_path = match i.strip_prefix("//") { 139 | Some(i) => { 140 | let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') { 141 | Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))), 142 | None => (i, None), 143 | }; 144 | validate_authority_path_abempty::(maybe_authority_path)?; 145 | after_path 146 | } 147 | None => { 148 | let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') { 149 | Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))), 150 | None => (i, None), 151 | }; 152 | // Authority is absent. 153 | validate_path_absolute_authority_absent::(maybe_path)?; 154 | after_path 155 | } 156 | }; 157 | 158 | // Validate `[ "?" query ] [ "#" fragment ]`. 159 | if let Some((first, rest)) = after_path { 160 | validate_after_path::(first, rest, ref_rule.is_fragment_allowed())?; 161 | } 162 | Ok(()) 163 | } 164 | 165 | /// Returns `Ok(_)` if the string matches `relative-ref`/`irelative-ref` rules. 166 | pub(crate) fn validate_relative_ref(i: &str) -> Result<(), Error> { 167 | // Validate `relative-part`. 168 | let after_path = match i.strip_prefix("//") { 169 | Some(i) => { 170 | let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') { 171 | Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))), 172 | None => (i, None), 173 | }; 174 | validate_authority_path_abempty::(maybe_authority_path)?; 175 | after_path 176 | } 177 | None => { 178 | let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') { 179 | Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))), 180 | None => (i, None), 181 | }; 182 | // Authority is absent. 183 | validate_path_relative_authority_absent::(maybe_path)?; 184 | after_path 185 | } 186 | }; 187 | 188 | // Validate `[ "?" query ] [ "#" fragment ]`. 189 | if let Some((first, rest)) = after_path { 190 | validate_after_path::(first, rest, true)?; 191 | } 192 | Ok(()) 193 | } 194 | 195 | /// Returns `Ok(_)` if the string matches `[ "?" query ] [ "#" fragment ]` (or IRI version). 196 | fn validate_after_path(first: u8, rest: &str, accept_fragment: bool) -> Result<(), Error> { 197 | let (maybe_query, maybe_fragment) = if first == b'?' { 198 | match find_split_hole(rest, b'#') { 199 | Some(v) => v, 200 | None => (rest, ""), 201 | } 202 | } else { 203 | debug_assert_eq!(first, b'#'); 204 | ("", rest) 205 | }; 206 | validate_query::(maybe_query)?; 207 | if !accept_fragment && !maybe_fragment.is_empty() { 208 | return Err(Error::new()); 209 | } 210 | validate_fragment::(maybe_fragment) 211 | } 212 | 213 | /// Returns `Ok(_)` if the string matches `fragment`/`ifragment` rules. 214 | pub(crate) fn validate_fragment(i: &str) -> Result<(), Error> { 215 | let is_valid = satisfy_chars_with_pct_encoded( 216 | i, 217 | char::is_ascii_frag_query, 218 | char::is_nonascii_fragment::, 219 | ); 220 | if is_valid { 221 | Ok(()) 222 | } else { 223 | Err(Error::new()) 224 | } 225 | } 226 | -------------------------------------------------------------------------------- /src/parser/validate/authority.rs: -------------------------------------------------------------------------------- 1 | //! Parsers for authority. 2 | 3 | use core::mem; 4 | 5 | use crate::parser::char; 6 | use crate::parser::str::{ 7 | find_split_hole, get_wrapped_inner, rfind_split_hole, satisfy_chars_with_pct_encoded, 8 | strip_ascii_char_prefix, 9 | }; 10 | use crate::spec::Spec; 11 | use crate::validate::Error; 12 | 13 | /// Returns `Ok(_)` if the string matches `userinfo` or `iuserinfo`. 14 | pub(crate) fn validate_userinfo(i: &str) -> Result<(), Error> { 15 | let is_valid = satisfy_chars_with_pct_encoded( 16 | i, 17 | char::is_ascii_userinfo_ipvfutureaddr, 18 | char::is_nonascii_userinfo::, 19 | ); 20 | if is_valid { 21 | Ok(()) 22 | } else { 23 | Err(Error::new()) 24 | } 25 | } 26 | 27 | /// Returns `true` if the string matches `dec-octet`. 28 | /// 29 | /// In other words, this tests whether the string is decimal "0" to "255". 30 | #[must_use] 31 | fn is_dec_octet(i: &str) -> bool { 32 | matches!( 33 | i.as_bytes(), 34 | [b'0'..=b'9'] 35 | | [b'1'..=b'9', b'0'..=b'9'] 36 | | [b'1', b'0'..=b'9', b'0'..=b'9'] 37 | | [b'2', b'0'..=b'4', b'0'..=b'9'] 38 | | [b'2', b'5', b'0'..=b'5'] 39 | ) 40 | } 41 | 42 | /// Returns `Ok(_)` if the string matches `IPv4address`. 43 | fn validate_ipv4address(i: &str) -> Result<(), Error> { 44 | let (first, rest) = find_split_hole(i, b'.').ok_or_else(Error::new)?; 45 | if !is_dec_octet(first) { 46 | return Err(Error::new()); 47 | } 48 | let (second, rest) = find_split_hole(rest, b'.').ok_or_else(Error::new)?; 49 | if !is_dec_octet(second) { 50 | return Err(Error::new()); 51 | } 52 | let (third, fourth) = find_split_hole(rest, b'.').ok_or_else(Error::new)?; 53 | if is_dec_octet(third) && is_dec_octet(fourth) { 54 | Ok(()) 55 | } else { 56 | Err(Error::new()) 57 | } 58 | } 59 | 60 | /// A part of IPv6 addr. 61 | #[derive(Clone, Copy)] 62 | enum V6AddrPart { 63 | /// `[0-9a-fA-F]{1,4}::`. 64 | H16Omit, 65 | /// `[0-9a-fA-F]{1,4}:`. 66 | H16Cont, 67 | /// `[0-9a-fA-F]{1,4}`. 68 | H16End, 69 | /// IPv4 address. 70 | V4, 71 | /// `::`. 72 | Omit, 73 | } 74 | 75 | /// Splits the IPv6 address string into the next component and the rest substring. 76 | fn split_v6_addr_part(i: &str) -> Result<(&str, V6AddrPart), Error> { 77 | debug_assert!(!i.is_empty()); 78 | match find_split_hole(i, b':') { 79 | Some((prefix, rest)) => { 80 | if prefix.len() >= 5 { 81 | return Err(Error::new()); 82 | } 83 | 84 | if prefix.is_empty() { 85 | return match strip_ascii_char_prefix(rest, b':') { 86 | Some(rest) => Ok((rest, V6AddrPart::Omit)), 87 | None => Err(Error::new()), 88 | }; 89 | } 90 | 91 | // Should be `h16`. 92 | debug_assert!((1..=4).contains(&prefix.len())); 93 | if !prefix.bytes().all(|b| b.is_ascii_hexdigit()) { 94 | return Err(Error::new()); 95 | } 96 | match strip_ascii_char_prefix(rest, b':') { 97 | Some(rest) => Ok((rest, V6AddrPart::H16Omit)), 98 | None => Ok((rest, V6AddrPart::H16Cont)), 99 | } 100 | } 101 | None => { 102 | if i.len() >= 5 { 103 | // Possibly `IPv4address`. 104 | validate_ipv4address(i)?; 105 | return Ok(("", V6AddrPart::V4)); 106 | } 107 | if i.bytes().all(|b| b.is_ascii_hexdigit()) { 108 | Ok(("", V6AddrPart::H16End)) 109 | } else { 110 | Err(Error::new()) 111 | } 112 | } 113 | } 114 | } 115 | 116 | /// Returns `Ok(_)` if the string matches `IPv6address`. 117 | fn validate_ipv6address(mut i: &str) -> Result<(), Error> { 118 | let mut h16_count = 0; 119 | let mut is_omitted = false; 120 | while !i.is_empty() { 121 | let (rest, part) = split_v6_addr_part(i)?; 122 | match part { 123 | V6AddrPart::H16Omit => { 124 | h16_count += 1; 125 | if mem::replace(&mut is_omitted, true) { 126 | // Omitted twice. 127 | return Err(Error::new()); 128 | } 129 | } 130 | V6AddrPart::H16Cont => { 131 | h16_count += 1; 132 | if rest.is_empty() { 133 | // `H16Cont` cannot be the last part of an IPv6 address. 134 | return Err(Error::new()); 135 | } 136 | } 137 | V6AddrPart::H16End => { 138 | h16_count += 1; 139 | break; 140 | } 141 | V6AddrPart::V4 => { 142 | debug_assert!(rest.is_empty()); 143 | h16_count += 2; 144 | break; 145 | } 146 | V6AddrPart::Omit => { 147 | if mem::replace(&mut is_omitted, true) { 148 | // Omitted twice. 149 | return Err(Error::new()); 150 | } 151 | } 152 | } 153 | if h16_count > 8 { 154 | return Err(Error::new()); 155 | } 156 | i = rest; 157 | } 158 | let is_valid = if is_omitted { 159 | h16_count < 8 160 | } else { 161 | h16_count == 8 162 | }; 163 | if is_valid { 164 | Ok(()) 165 | } else { 166 | Err(Error::new()) 167 | } 168 | } 169 | 170 | /// Returns `Ok(_)` if the string matches `authority` or `iauthority`. 171 | pub(super) fn validate_authority(i: &str) -> Result<(), Error> { 172 | // Strip and validate `userinfo`. 173 | let (i, _userinfo) = match find_split_hole(i, b'@') { 174 | Some((maybe_userinfo, i)) => { 175 | validate_userinfo::(maybe_userinfo)?; 176 | (i, Some(maybe_userinfo)) 177 | } 178 | None => (i, None), 179 | }; 180 | // `host` can contain colons, but `port` cannot. 181 | // Strip and validate `port`. 182 | let (maybe_host, _port) = match rfind_split_hole(i, b':') { 183 | Some((maybe_host, maybe_port)) => { 184 | if maybe_port.bytes().all(|b| b.is_ascii_digit()) { 185 | (maybe_host, Some(maybe_port)) 186 | } else { 187 | (i, None) 188 | } 189 | } 190 | None => (i, None), 191 | }; 192 | // Validate `host`. 193 | validate_host::(maybe_host) 194 | } 195 | 196 | /// Validates `host`. 197 | pub(crate) fn validate_host(i: &str) -> Result<(), Error> { 198 | match get_wrapped_inner(i, b'[', b']') { 199 | Some(maybe_addr) => { 200 | // `IP-literal`. 201 | // Note that `v` here is case insensitive. See RFC 3987 section 3.2.2. 202 | if let Some(maybe_addr_rest) = strip_ascii_char_prefix(maybe_addr, b'v') 203 | .or_else(|| strip_ascii_char_prefix(maybe_addr, b'V')) 204 | { 205 | // `IPvFuture`. 206 | let (maybe_ver, maybe_addr) = 207 | find_split_hole(maybe_addr_rest, b'.').ok_or_else(Error::new)?; 208 | // Validate version. 209 | if maybe_ver.is_empty() || !maybe_ver.bytes().all(|b| b.is_ascii_hexdigit()) { 210 | return Err(Error::new()); 211 | } 212 | // Validate address. 213 | if !maybe_addr.is_empty() 214 | && maybe_addr.is_ascii() 215 | && maybe_addr 216 | .bytes() 217 | .all(char::is_ascii_userinfo_ipvfutureaddr) 218 | { 219 | Ok(()) 220 | } else { 221 | Err(Error::new()) 222 | } 223 | } else { 224 | // `IPv6address`. 225 | validate_ipv6address(maybe_addr) 226 | } 227 | } 228 | None => { 229 | // `IPv4address` or `reg-name`. No need to distinguish them here. 230 | let is_valid = satisfy_chars_with_pct_encoded( 231 | i, 232 | char::is_ascii_regname, 233 | char::is_nonascii_regname::, 234 | ); 235 | if is_valid { 236 | Ok(()) 237 | } else { 238 | Err(Error::new()) 239 | } 240 | } 241 | } 242 | } 243 | 244 | #[cfg(test)] 245 | #[cfg(feature = "alloc")] 246 | mod tests { 247 | use super::*; 248 | 249 | use alloc::format; 250 | 251 | macro_rules! assert_validate { 252 | ($parser:expr, $($input:expr),* $(,)?) => {{ 253 | $({ 254 | let input = $input; 255 | let input: &str = input.as_ref(); 256 | assert!($parser(input).is_ok(), "input={:?}", input); 257 | })* 258 | }}; 259 | } 260 | 261 | #[test] 262 | fn test_ipv6address() { 263 | use core::cmp::Ordering; 264 | 265 | assert_validate!(validate_ipv6address, "a:bB:cCc:dDdD:e:F:a:B"); 266 | assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1:1"); 267 | assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1.1.1.1"); 268 | assert_validate!(validate_ipv6address, "2001:db8::7"); 269 | 270 | // Generate IPv6 addresses with `::`. 271 | let make_sub = |n: usize| { 272 | let mut s = "1:".repeat(n); 273 | s.pop(); 274 | s 275 | }; 276 | for len_pref in 0..=7 { 277 | let prefix = make_sub(len_pref); 278 | for len_suf in 1..=(7 - len_pref) { 279 | assert_validate!( 280 | validate_ipv6address, 281 | &format!("{}::{}", prefix, make_sub(len_suf)) 282 | ); 283 | match len_suf.cmp(&2) { 284 | Ordering::Greater => assert_validate!( 285 | validate_ipv6address, 286 | &format!("{}::{}:1.1.1.1", prefix, make_sub(len_suf - 2)) 287 | ), 288 | Ordering::Equal => { 289 | assert_validate!(validate_ipv6address, &format!("{}::1.1.1.1", prefix)) 290 | } 291 | Ordering::Less => {} 292 | } 293 | } 294 | } 295 | } 296 | } 297 | -------------------------------------------------------------------------------- /src/parser/validate/path.rs: -------------------------------------------------------------------------------- 1 | //! Parsers for path. 2 | 3 | use crate::parser::char; 4 | use crate::parser::str::{find_split2_hole, satisfy_chars_with_pct_encoded}; 5 | use crate::spec::Spec; 6 | use crate::validate::Error; 7 | 8 | /// Returns `Ok(_)` if the string matches `path-abempty` or `ipath-abempty`. 9 | pub(super) fn validate_path_abempty(i: &str) -> Result<(), Error> { 10 | if i.is_empty() { 11 | return Ok(()); 12 | } 13 | let i = match i.strip_prefix('/') { 14 | Some(rest) => rest, 15 | None => return Err(Error::new()), 16 | }; 17 | let is_valid = satisfy_chars_with_pct_encoded( 18 | i, 19 | char::is_ascii_pchar_slash, 20 | S::is_nonascii_char_unreserved, 21 | ); 22 | if is_valid { 23 | Ok(()) 24 | } else { 25 | Err(Error::new()) 26 | } 27 | } 28 | 29 | /// Returns `Ok(_)` if the string matches `hier-part` or `ihier-part` modulo 30 | /// `"//" authority path-abempty`. 31 | pub(super) fn validate_path_absolute_authority_absent(i: &str) -> Result<(), Error> { 32 | if i.is_empty() { 33 | // `path-empty`. 34 | return Ok(()); 35 | } 36 | if i.starts_with("//") { 37 | unreachable!("this case should be handled by the caller"); 38 | } 39 | let is_valid = satisfy_chars_with_pct_encoded( 40 | i, 41 | char::is_ascii_pchar_slash, 42 | S::is_nonascii_char_unreserved, 43 | ); 44 | if is_valid { 45 | Ok(()) 46 | } else { 47 | Err(Error::new()) 48 | } 49 | } 50 | 51 | /// Returns `Ok(_)` if the string matches `relative-part` or `irelative-part` modulo 52 | /// `"//" authority path-abempty`. 53 | pub(super) fn validate_path_relative_authority_absent(i: &str) -> Result<(), Error> { 54 | if i.starts_with("//") { 55 | unreachable!("this case should be handled by the caller"); 56 | } 57 | let is_valid = match find_split2_hole(i, b'/', b':') { 58 | Some((_, b'/', _)) | None => satisfy_chars_with_pct_encoded( 59 | i, 60 | char::is_ascii_pchar_slash, 61 | S::is_nonascii_char_unreserved, 62 | ), 63 | Some((_, c, _)) => { 64 | debug_assert_eq!(c, b':'); 65 | // `foo:bar`-style. This does not match `path-noscheme`. 66 | return Err(Error::new()); 67 | } 68 | }; 69 | if is_valid { 70 | Ok(()) 71 | } else { 72 | Err(Error::new()) 73 | } 74 | } 75 | 76 | /// Returns `Ok(_)` if the string matches `path`/`ipath` rules. 77 | pub(crate) fn validate_path(i: &str) -> Result<(), Error> { 78 | if i.starts_with("//") { 79 | return Err(Error::new()); 80 | } 81 | let is_valid = satisfy_chars_with_pct_encoded( 82 | i, 83 | char::is_ascii_pchar_slash, 84 | S::is_nonascii_char_unreserved, 85 | ); 86 | if is_valid { 87 | Ok(()) 88 | } else { 89 | Err(Error::new()) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/raw.rs: -------------------------------------------------------------------------------- 1 | //! Raw IRI strings manipulation. 2 | //! 3 | //! Note that functions in this module may operates on raw `&str` types. 4 | //! It is caller's responsilibility to guarantee that the given string satisfies the precondition. 5 | 6 | #[cfg(all(feature = "alloc", not(feature = "std")))] 7 | use alloc::string::String; 8 | 9 | #[cfg(feature = "alloc")] 10 | use crate::parser::trusted as trusted_parser; 11 | 12 | /// Sets the fragment part to the given string. 13 | /// 14 | /// Removes fragment part (and following `#` character) if `None` is given. 15 | #[cfg(feature = "alloc")] 16 | pub(crate) fn set_fragment(s: &mut String, fragment: Option<&str>) { 17 | remove_fragment(s); 18 | if let Some(fragment) = fragment { 19 | s.reserve(fragment.len() + 1); 20 | s.push('#'); 21 | s.push_str(fragment); 22 | } 23 | } 24 | 25 | /// Removes the fragment part from the string. 26 | #[cfg(feature = "alloc")] 27 | #[inline] 28 | pub(crate) fn remove_fragment(s: &mut String) { 29 | if let Some(colon_pos) = s.find('#') { 30 | s.truncate(colon_pos); 31 | } 32 | } 33 | 34 | /// Splits the string into the prefix and the fragment part. 35 | /// 36 | /// A leading `#` character is truncated if the fragment part exists. 37 | #[cfg(feature = "alloc")] 38 | pub(crate) fn split_fragment_owned(mut s: String) -> (String, Option) { 39 | let prefix_len = match trusted_parser::split_fragment(&s) { 40 | (_, None) => return (s, None), 41 | (prefix, Some(_fragment)) => prefix.len(), 42 | }; 43 | 44 | // `+ 1` is for leading `#` character. 45 | let fragment = s.split_off(prefix_len + 1); 46 | // Current `s` contains a trailing `#` character, which should be removed. 47 | { 48 | // Remove a trailing `#`. 49 | let hash = s.pop(); 50 | assert_eq!(hash, Some('#')); 51 | } 52 | assert_eq!(s.len(), prefix_len); 53 | 54 | (s, Some(fragment)) 55 | } 56 | -------------------------------------------------------------------------------- /src/spec.rs: -------------------------------------------------------------------------------- 1 | //! IRI specs. 2 | 3 | use core::fmt; 4 | 5 | // Note that this MUST be private module. 6 | // See about 7 | // sealed trait. 8 | mod internal; 9 | 10 | /// A trait for spec types. 11 | /// 12 | /// This trait is not intended to be implemented by crate users. 13 | // Note that all types which implement `Spec` also implement `SpecInternal`. 14 | pub trait Spec: internal::Sealed + Copy + fmt::Debug {} 15 | 16 | /// A type that represents specification of IRI. 17 | /// 18 | /// About IRI, see [RFC 3987]. 19 | /// 20 | /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 21 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 22 | pub enum IriSpec {} 23 | 24 | impl Spec for IriSpec {} 25 | 26 | /// A type that represents specification of URI. 27 | /// 28 | /// About URI, see [RFC 3986]. 29 | /// 30 | /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 31 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 32 | pub enum UriSpec {} 33 | 34 | impl Spec for UriSpec {} 35 | -------------------------------------------------------------------------------- /src/spec/internal.rs: -------------------------------------------------------------------------------- 1 | //! A private module for sealed trait and internal implementations. 2 | //! 3 | //! Note that this MUST be a private module. 4 | //! See [Rust API Guidelines][sealed-trait] about the necessity of being private. 5 | //! 6 | //! [sealed-trait]: 7 | //! https://rust-lang.github.io/api-guidelines/future-proofing.html#sealed-traits-protect-against-downstream-implementations-c-sealed 8 | 9 | use crate::parser::char::is_ucschar; 10 | use crate::spec::{IriSpec, UriSpec}; 11 | 12 | /// A trait to prohibit user-defined types from implementing `Spec`. 13 | /// 14 | /// About sealed trait, see [Rust API Guidelines][future-proofing]. 15 | /// 16 | /// [future-proofing]: https://rust-lang.github.io/api-guidelines/future-proofing.html 17 | pub trait Sealed: SpecInternal {} 18 | 19 | impl Sealed for IriSpec {} 20 | impl Sealed for UriSpec {} 21 | 22 | /// Internal implementations for spec types. 23 | pub trait SpecInternal: Sized { 24 | /// Checks if the given non-ASCII character matches `unreserved` or `iunreserved` rule. 25 | #[must_use] 26 | fn is_nonascii_char_unreserved(c: char) -> bool; 27 | /// Checks if the given character matches `iprivate` rule. 28 | #[must_use] 29 | fn is_nonascii_char_private(c: char) -> bool; 30 | } 31 | 32 | impl SpecInternal for IriSpec { 33 | #[inline] 34 | fn is_nonascii_char_unreserved(c: char) -> bool { 35 | is_ucschar(c) 36 | } 37 | 38 | fn is_nonascii_char_private(c: char) -> bool { 39 | matches!( 40 | u32::from(c), 41 | 0xE000..=0xF8FF | 42 | 0xF_0000..=0xF_FFFD | 43 | 0x10_0000..=0x10_FFFD 44 | ) 45 | } 46 | } 47 | 48 | impl SpecInternal for UriSpec { 49 | #[inline] 50 | fn is_nonascii_char_unreserved(_: char) -> bool { 51 | false 52 | } 53 | 54 | #[inline] 55 | fn is_nonascii_char_private(_: char) -> bool { 56 | false 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/template.rs: -------------------------------------------------------------------------------- 1 | //! Processor for [RFC 6570] URI Template. 2 | //! 3 | //! [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html 4 | //! 5 | //! # Usage 6 | //! 7 | //! 1. Prepare a template. 8 | //! * You can create a template as [`UriTemplateStr`] 9 | #![cfg_attr( 10 | feature = "alloc", 11 | doc = " type (borrowed) or [`UriTemplateString`] type (owned)." 12 | )] 13 | #![cfg_attr(not(feature = "alloc"), doc = " type.")] 14 | //! 2. Prepare a context. 15 | //! * Create a value of type that implements [`Context`] trait. 16 | #![cfg_attr( 17 | feature = "alloc", 18 | doc = " * Or, if you use [`SimpleContext`], insert key-value pairs into it." 19 | )] 20 | //! 3. Expand. 21 | //! * Pass the context to [`UriTemplateStr::expand`] method of the template. 22 | //! 4. Use the result. 23 | //! * Returned [`Expanded`] object can be directly printed since it 24 | //! implements [`Display`][`core::fmt::Display`] trait. Or, you can call 25 | //! `.to_string()` method of the `alloc::string::ToString` trait to 26 | //! convert it to a `String`. 27 | //! 28 | //! # Examples 29 | //! 30 | //! ## Custom context type 31 | //! 32 | //! For details, see [the documentation of `context` module][`context`]. 33 | //! 34 | //! ``` 35 | //! # use iri_string::template::Error; 36 | //! use core::fmt; 37 | //! use iri_string::spec::{IriSpec, Spec, UriSpec}; 38 | //! use iri_string::template::UriTemplateStr; 39 | //! use iri_string::template::context::{Context, VarName, Visitor}; 40 | //! 41 | //! struct UserInfo { 42 | //! username: &'static str, 43 | //! utf8_available: bool, 44 | //! } 45 | //! 46 | //! impl Context for UserInfo { 47 | //! fn visit( 48 | //! &self, 49 | //! visitor: V, 50 | //! ) -> V::Result { 51 | //! match visitor.var_name().as_str() { 52 | //! "username" => visitor.visit_string(self.username), 53 | //! "utf8" => { 54 | //! if self.utf8_available { 55 | //! // U+2713 CHECK MARK 56 | //! visitor.visit_string("\u{2713}") 57 | //! } else { 58 | //! visitor.visit_undefined() 59 | //! } 60 | //! } 61 | //! _ => visitor.visit_undefined() 62 | //! } 63 | //! } 64 | //! } 65 | //! 66 | //! let context = UserInfo { 67 | //! username: "foo", 68 | //! utf8_available: true, 69 | //! }; 70 | //! 71 | //! let template = UriTemplateStr::new("/users/{username}{?utf8}")?; 72 | //! 73 | //! # #[cfg(feature = "alloc")] { 74 | //! assert_eq!( 75 | //! template.expand::(&context)?.to_string(), 76 | //! "/users/foo?utf8=%E2%9C%93" 77 | //! ); 78 | //! assert_eq!( 79 | //! template.expand::(&context)?.to_string(), 80 | //! "/users/foo?utf8=\u{2713}" 81 | //! ); 82 | //! # } 83 | //! # Ok::<_, Error>(()) 84 | //! ``` 85 | //! 86 | //! ## `SimpleContext` type (enabled by `alloc` feature flag) 87 | //! 88 | //! ``` 89 | //! # use iri_string::template::Error; 90 | //! # #[cfg(feature = "alloc")] { 91 | //! use iri_string::spec::{IriSpec, UriSpec}; 92 | //! use iri_string::template::UriTemplateStr; 93 | //! use iri_string::template::simple_context::SimpleContext; 94 | //! 95 | //! let mut context = SimpleContext::new(); 96 | //! context.insert("username", "foo"); 97 | //! // U+2713 CHECK MARK 98 | //! context.insert("utf8", "\u{2713}"); 99 | //! 100 | //! let template = UriTemplateStr::new("/users/{username}{?utf8}")?; 101 | //! 102 | //! assert_eq!( 103 | //! template.expand::(&context)?.to_string(), 104 | //! "/users/foo?utf8=%E2%9C%93" 105 | //! ); 106 | //! assert_eq!( 107 | //! template.expand::(&context)?.to_string(), 108 | //! "/users/foo?utf8=\u{2713}" 109 | //! ); 110 | //! # } 111 | //! # Ok::<_, Error>(()) 112 | //! ``` 113 | //! 114 | #![cfg_attr( 115 | feature = "alloc", 116 | doc = "[`SimpleContext`]: `simple_context::SimpleContext`" 117 | )] 118 | mod components; 119 | pub mod context; 120 | mod error; 121 | mod expand; 122 | mod parser; 123 | #[cfg(feature = "alloc")] 124 | pub mod simple_context; 125 | mod string; 126 | 127 | pub use self::context::{Context, DynamicContext}; 128 | #[cfg(feature = "alloc")] 129 | pub use self::error::CreationError; 130 | pub use self::error::Error; 131 | pub use self::expand::Expanded; 132 | #[cfg(feature = "alloc")] 133 | pub use self::string::UriTemplateString; 134 | pub use self::string::{UriTemplateStr, UriTemplateVariables}; 135 | 136 | /// Deprecated old name of [`template::context::VarName`]. 137 | /// 138 | /// [`template::context::VarName`]: `components::VarName` 139 | #[deprecated( 140 | since = "0.7.1", 141 | note = "renamed (moved) to `template::context::VarName`" 142 | )] 143 | pub type VarName<'a> = self::components::VarName<'a>; 144 | 145 | /// Variable value type. 146 | #[derive(Debug, Clone, Copy)] 147 | enum ValueType { 148 | /// Undefined (i.e. null). 149 | Undefined, 150 | /// String value. 151 | String, 152 | /// List. 153 | List, 154 | /// Associative array. 155 | Assoc, 156 | } 157 | 158 | impl ValueType { 159 | /// Returns the value type for an undefined variable. 160 | #[inline] 161 | #[must_use] 162 | pub const fn undefined() -> Self { 163 | ValueType::Undefined 164 | } 165 | 166 | /// Returns the value type for a string variable. 167 | #[inline] 168 | #[must_use] 169 | pub const fn string() -> Self { 170 | ValueType::String 171 | } 172 | 173 | /// Returns the value type for an empty list variable. 174 | #[inline] 175 | #[must_use] 176 | pub const fn empty_list() -> Self { 177 | ValueType::Undefined 178 | } 179 | 180 | /// Returns the value type for a nonempty list variable. 181 | #[inline] 182 | #[must_use] 183 | pub const fn nonempty_list() -> Self { 184 | ValueType::List 185 | } 186 | 187 | /// Returns the value type for an empty associative array variable. 188 | #[inline] 189 | #[must_use] 190 | pub const fn empty_assoc() -> Self { 191 | ValueType::Undefined 192 | } 193 | 194 | /// Returns the value type for a nonempty associative array variable. 195 | #[inline] 196 | #[must_use] 197 | pub const fn nonempty_assoc() -> Self { 198 | ValueType::Assoc 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /src/template/components.rs: -------------------------------------------------------------------------------- 1 | //! Syntax components of URI templates. 2 | 3 | use core::mem; 4 | 5 | use crate::parser::str::find_split_hole; 6 | use crate::template::error::Error; 7 | use crate::template::parser::validate as validate_parser; 8 | 9 | /// Expression body. 10 | /// 11 | /// This does not contain the wrapping braces (`{` and `}`). 12 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 13 | pub(super) struct ExprBody<'a>(&'a str); 14 | 15 | impl<'a> ExprBody<'a> { 16 | /// Creates a new expression body. 17 | /// 18 | /// # Precondition 19 | /// 20 | /// The given string should be a valid expression body. 21 | #[inline] 22 | #[must_use] 23 | pub(super) fn new(s: &'a str) -> Self { 24 | debug_assert!( 25 | !s.is_empty(), 26 | "[precondition] valid expression body is not empty" 27 | ); 28 | 29 | Self(s) 30 | } 31 | 32 | /// Decomposes the expression into an `operator` and `variable-list`. 33 | /// 34 | /// # Panics 35 | /// 36 | /// May panic if the input is invalid. 37 | #[must_use] 38 | pub(super) fn decompose(&self) -> (Operator, VarListStr<'a>) { 39 | debug_assert!( 40 | !self.0.is_empty(), 41 | "[precondition] valid expression body is not empty" 42 | ); 43 | let first = self.0.as_bytes()[0]; 44 | if first.is_ascii_alphanumeric() || (first == b'_') || (first == b'%') { 45 | // The first byte is a part of the variable list. 46 | (Operator::String, VarListStr::new(self.0)) 47 | } else { 48 | let op = Operator::from_byte(first).unwrap_or_else(|| { 49 | unreachable!( 50 | "[precondition] valid expression has (optional) \ 51 | valid operator, but got a byte {first:#02x?}" 52 | ) 53 | }); 54 | (op, VarListStr::new(&self.0[1..])) 55 | } 56 | } 57 | 58 | /// Returns the raw expression in a string slice. 59 | #[inline] 60 | #[must_use] 61 | pub(super) fn as_str(&self) -> &'a str { 62 | self.0 63 | } 64 | } 65 | 66 | /// Variable name. 67 | // QUESTION: Should hexdigits in percent-encoded triplets be compared case sensitively? 68 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 69 | pub struct VarName<'a>(&'a str); 70 | 71 | impl<'a> VarName<'a> { 72 | /// Creates a `VarName` from the trusted string. 73 | /// 74 | /// # Precondition 75 | /// 76 | /// The given string should be a valid variable name. 77 | #[inline] 78 | #[must_use] 79 | pub(super) fn from_trusted(s: &'a str) -> Self { 80 | Self(s) 81 | } 82 | 83 | /// Creates a `VarName` from the string. 84 | /// 85 | /// # Examples 86 | /// 87 | /// ``` 88 | /// # use iri_string::template::Error; 89 | /// use iri_string::template::context::VarName; 90 | /// 91 | /// let name = VarName::new("hello")?; 92 | /// assert_eq!(name.as_str(), "hello"); 93 | /// 94 | /// assert!(VarName::new("0+non-variable-name").is_err()); 95 | /// 96 | /// # Ok::<_, Error>(()) 97 | /// ``` 98 | #[inline] 99 | pub fn new(s: &'a str) -> Result { 100 | match validate_parser::validate_varname(s, 0) { 101 | Ok(_) => Ok(Self::from_trusted(s)), 102 | Err(e) => Err(e), 103 | } 104 | } 105 | 106 | /// Returns the varibale name. 107 | #[inline] 108 | #[must_use] 109 | pub fn as_str(&self) -> &'a str { 110 | self.0 111 | } 112 | } 113 | 114 | /// Variable specifier. 115 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 116 | pub struct VarSpec<'a> { 117 | /// Variable name. 118 | name: VarName<'a>, 119 | /// Variable modifier. 120 | modifier: Modifier, 121 | } 122 | 123 | impl<'a> VarSpec<'a> { 124 | /// Returns the varibale name. 125 | #[inline] 126 | #[must_use] 127 | pub(super) fn name(&self) -> VarName<'a> { 128 | self.name 129 | } 130 | 131 | /// Returns the modifier. 132 | #[inline] 133 | #[must_use] 134 | pub(super) fn modifier(&self) -> Modifier { 135 | self.modifier 136 | } 137 | 138 | /// Parses the trusted varspec string. 139 | /// 140 | /// # Panics 141 | /// 142 | /// May panic if the input is invalid. 143 | #[must_use] 144 | pub(super) fn parse_trusted(s: &'a str) -> Self { 145 | if let Some(varname) = s.strip_suffix('*') { 146 | // `varname "*"`. 147 | return Self { 148 | name: VarName::from_trusted(varname), 149 | modifier: Modifier::Explode, 150 | }; 151 | } 152 | // `varname ":" max-length` or `varname`. 153 | match find_split_hole(s, b':') { 154 | Some((varname, max_len)) => { 155 | let max_len: u16 = max_len 156 | .parse() 157 | .expect("[precondition] the input should be valid `varspec`"); 158 | Self { 159 | name: VarName::from_trusted(varname), 160 | modifier: Modifier::MaxLen(max_len), 161 | } 162 | } 163 | None => Self { 164 | name: VarName(s), 165 | modifier: Modifier::None, 166 | }, 167 | } 168 | } 169 | } 170 | 171 | /// Variable list. 172 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 173 | pub(super) struct VarListStr<'a>(&'a str); 174 | 175 | impl<'a> VarListStr<'a> { 176 | /// Creates a new variable list. 177 | /// 178 | /// # Precondition 179 | /// 180 | /// The given string should be a valid variable list. 181 | #[inline] 182 | #[must_use] 183 | pub(super) fn new(s: &'a str) -> Self { 184 | Self(s) 185 | } 186 | } 187 | 188 | impl<'a> IntoIterator for VarListStr<'a> { 189 | type IntoIter = VarListIter<'a>; 190 | type Item = (usize, VarSpec<'a>); 191 | 192 | #[inline] 193 | fn into_iter(self) -> Self::IntoIter { 194 | VarListIter { rest: self.0 } 195 | } 196 | } 197 | 198 | /// Iterator of variable specs. 199 | #[derive(Debug, Clone)] 200 | pub(super) struct VarListIter<'a> { 201 | /// Remaining input. 202 | rest: &'a str, 203 | } 204 | 205 | impl<'a> Iterator for VarListIter<'a> { 206 | /// A pair of the length of the varspec and the varspec itself. 207 | type Item = (usize, VarSpec<'a>); 208 | 209 | fn next(&mut self) -> Option { 210 | match find_split_hole(self.rest, b',') { 211 | Some((prefix, new_rest)) => { 212 | self.rest = new_rest; 213 | Some((prefix.len(), VarSpec::parse_trusted(prefix))) 214 | } 215 | None => { 216 | if self.rest.is_empty() { 217 | None 218 | } else { 219 | Some(( 220 | self.rest.len(), 221 | VarSpec::parse_trusted(mem::take(&mut self.rest)), 222 | )) 223 | } 224 | } 225 | } 226 | } 227 | } 228 | 229 | /// Variable modifier. 230 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 231 | pub(super) enum Modifier { 232 | /// No modifiers. 233 | None, 234 | /// Max length, greater than 0 and less than 10000. 235 | MaxLen(u16), 236 | /// Explode the variable, e.g. the var spec has `*`. 237 | Explode, 238 | } 239 | 240 | /// Operator that is possibly reserved for future extension. 241 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 242 | pub(super) enum MaybeOperator { 243 | /// Working operator. 244 | Operator(Operator), 245 | /// Reserved for future extensions. 246 | Reserved(OperatorReservedForFuture), 247 | } 248 | 249 | impl MaybeOperator { 250 | /// Returns the operator for the given character. 251 | pub(super) fn from_byte(b: u8) -> Option { 252 | match b { 253 | b'+' => Some(Self::Operator(Operator::Reserved)), 254 | b'#' => Some(Self::Operator(Operator::Fragment)), 255 | b'.' => Some(Self::Operator(Operator::Label)), 256 | b'/' => Some(Self::Operator(Operator::PathSegments)), 257 | b';' => Some(Self::Operator(Operator::PathParams)), 258 | b'?' => Some(Self::Operator(Operator::FormQuery)), 259 | b'&' => Some(Self::Operator(Operator::FormQueryCont)), 260 | b'=' => Some(Self::Reserved(OperatorReservedForFuture::Equals)), 261 | b',' => Some(Self::Reserved(OperatorReservedForFuture::Comma)), 262 | b'!' => Some(Self::Reserved(OperatorReservedForFuture::Exclamation)), 263 | b'@' => Some(Self::Reserved(OperatorReservedForFuture::AtSign)), 264 | b'|' => Some(Self::Reserved(OperatorReservedForFuture::Pipe)), 265 | _ => None, 266 | } 267 | } 268 | } 269 | 270 | /// Working operator. 271 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 272 | pub(super) enum Operator { 273 | /// No operator. String expansion. 274 | String, 275 | /// Reserved expansion by `+`. 276 | Reserved, 277 | /// Fragment expansion by `#`. 278 | Fragment, 279 | /// Label expansion by `.`. 280 | Label, 281 | /// Path segments by `/`. 282 | PathSegments, 283 | /// Path-style parameters by `;`. 284 | PathParams, 285 | /// Form-style query by `?`. 286 | FormQuery, 287 | /// Form-style query continuation by `&`. 288 | FormQueryCont, 289 | } 290 | 291 | impl Operator { 292 | /// Returns the operator for the given character. 293 | #[must_use] 294 | pub(super) fn from_byte(b: u8) -> Option { 295 | match b { 296 | b'+' => Some(Self::Reserved), 297 | b'#' => Some(Self::Fragment), 298 | b'.' => Some(Self::Label), 299 | b'/' => Some(Self::PathSegments), 300 | b';' => Some(Self::PathParams), 301 | b'?' => Some(Self::FormQuery), 302 | b'&' => Some(Self::FormQueryCont), 303 | _ => None, 304 | } 305 | } 306 | 307 | /// Returns the string length of the operator. 308 | #[inline] 309 | #[must_use] 310 | pub(super) const fn len(self) -> usize { 311 | if matches!(self, Self::String) { 312 | 0 313 | } else { 314 | 1 315 | } 316 | } 317 | } 318 | 319 | /// Operator reserved for future extension. 320 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 321 | pub(super) enum OperatorReservedForFuture { 322 | /// Reserved `=` operator. 323 | Equals, 324 | /// Reserved `,` operator. 325 | Comma, 326 | /// Reserved `!` operator. 327 | Exclamation, 328 | /// Reserved `@` operator. 329 | AtSign, 330 | /// Reserved `|` operator. 331 | Pipe, 332 | } 333 | -------------------------------------------------------------------------------- /src/template/error.rs: -------------------------------------------------------------------------------- 1 | //! Errors related to URI templates. 2 | 3 | use core::fmt; 4 | 5 | #[cfg(feature = "std")] 6 | use std::error; 7 | 8 | /// Template construction and expansion error kind. 9 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 10 | pub(super) enum ErrorKind { 11 | /// Cannot write to the backend. 12 | WriteFailed, 13 | /// Expression is not closed. 14 | ExpressionNotClosed, 15 | /// Invalid character. 16 | InvalidCharacter, 17 | /// Invalid expression. 18 | InvalidExpression, 19 | /// Invalid percent-encoded triplets. 20 | InvalidPercentEncoding, 21 | /// Invalid UTF-8 bytes. 22 | InvalidUtf8, 23 | /// Unexpected value type for the variable. 24 | UnexpectedValueType, 25 | /// Unsupported operator, including operators reserved for future. 26 | UnsupportedOperator, 27 | } 28 | 29 | impl ErrorKind { 30 | /// Returns the error message. 31 | #[must_use] 32 | fn as_str(self) -> &'static str { 33 | match self { 34 | Self::WriteFailed => "failed to write to the backend writer", 35 | Self::ExpressionNotClosed => "expression not closed", 36 | Self::InvalidCharacter => "invalid character", 37 | Self::InvalidExpression => "invalid expression", 38 | Self::InvalidPercentEncoding => "invalid percent-encoded triplets", 39 | Self::InvalidUtf8 => "invalid utf-8 byte sequence", 40 | Self::UnexpectedValueType => "unexpected value type for the variable", 41 | Self::UnsupportedOperator => "unsupported operator", 42 | } 43 | } 44 | } 45 | 46 | /// Template construction and expansion error. 47 | /// 48 | // Note that this type should implement `Copy` trait. 49 | // To return additional non-`Copy` data as an error, use wrapper type 50 | // (as `std::string::FromUtf8Error` contains `std::str::Utf8Error`). 51 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 52 | pub struct Error { 53 | /// Error kind. 54 | kind: ErrorKind, 55 | /// Location (byte position of the error). 56 | location: usize, 57 | } 58 | 59 | impl Error { 60 | /// Creates a new `Error`. 61 | /// 62 | /// For internal use. 63 | #[inline] 64 | #[must_use] 65 | pub(super) fn new(kind: ErrorKind, location: usize) -> Self { 66 | Self { kind, location } 67 | } 68 | 69 | /// Returns the byte position the error is detected. 70 | /// 71 | /// NOTE: This is not a part of the public API since the value to be 72 | /// returned (i.e., the definition of the "position" of an error) is not 73 | /// guaranteed to be stable. 74 | #[cfg(test)] 75 | pub(super) fn location(&self) -> usize { 76 | self.location 77 | } 78 | } 79 | 80 | impl fmt::Display for Error { 81 | #[inline] 82 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 83 | write!( 84 | f, 85 | "invalid URI template: {} (at {}-th byte)", 86 | self.kind.as_str(), 87 | self.location 88 | ) 89 | } 90 | } 91 | 92 | #[cfg(feature = "std")] 93 | impl error::Error for Error {} 94 | 95 | /// Error on conversion into a URI template type. 96 | // TODO: Unifiable to `types::CreationError`? 97 | #[cfg(feature = "alloc")] 98 | pub struct CreationError { 99 | /// Soruce data. 100 | source: T, 101 | /// Validation error. 102 | error: Error, 103 | } 104 | 105 | #[cfg(feature = "alloc")] 106 | impl CreationError { 107 | /// Returns the source data. 108 | #[must_use] 109 | pub fn into_source(self) -> T { 110 | self.source 111 | } 112 | 113 | /// Returns the validation error. 114 | #[must_use] 115 | pub fn validation_error(&self) -> Error { 116 | self.error 117 | } 118 | 119 | /// Creates a new `CreationError`. 120 | #[must_use] 121 | pub(crate) fn new(error: Error, source: T) -> Self { 122 | Self { source, error } 123 | } 124 | } 125 | 126 | #[cfg(feature = "alloc")] 127 | impl fmt::Debug for CreationError { 128 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 129 | f.debug_struct("CreationError") 130 | .field("source", &self.source) 131 | .field("error", &self.error) 132 | .finish() 133 | } 134 | } 135 | 136 | #[cfg(feature = "alloc")] 137 | impl Clone for CreationError { 138 | fn clone(&self) -> Self { 139 | Self { 140 | source: self.source.clone(), 141 | error: self.error, 142 | } 143 | } 144 | } 145 | 146 | #[cfg(feature = "alloc")] 147 | impl fmt::Display for CreationError { 148 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 149 | self.error.fmt(f) 150 | } 151 | } 152 | 153 | #[cfg(feature = "std")] 154 | impl error::Error for CreationError {} 155 | -------------------------------------------------------------------------------- /src/template/parser.rs: -------------------------------------------------------------------------------- 1 | //! URI Template parser. 2 | 3 | pub(super) mod char; 4 | pub(super) mod validate; 5 | 6 | pub(super) use self::validate::validate_template_str; 7 | -------------------------------------------------------------------------------- /src/template/parser/char.rs: -------------------------------------------------------------------------------- 1 | //! Characters. 2 | 3 | /// Properties of ASCII characters. 4 | /// 5 | /// About `'` (single quote) being considered as a literal: see 6 | /// [Errata ID 6937](https://www.rfc-editor.org/errata/eid6937). 7 | const CHARS_TABLE: [u8; 128] = [ 8 | 0b_0000_0000, // NUL 9 | 0b_0000_0000, // SOH 10 | 0b_0000_0000, // STX 11 | 0b_0000_0000, // ETX 12 | 0b_0000_0000, // EOT 13 | 0b_0000_0000, // ENQ 14 | 0b_0000_0000, // ACK 15 | 0b_0000_0000, // BEL 16 | 0b_0000_0000, // BS 17 | 0b_0000_0000, // HT 18 | 0b_0000_0000, // LF 19 | 0b_0000_0000, // VT 20 | 0b_0000_0000, // FF 21 | 0b_0000_0000, // CR 22 | 0b_0000_0000, // SO 23 | 0b_0000_0000, // SI 24 | 0b_0000_0000, // DLE 25 | 0b_0000_0000, // DC1 26 | 0b_0000_0000, // DC2 27 | 0b_0000_0000, // DC3 28 | 0b_0000_0000, // DC4 29 | 0b_0000_0000, // NAK 30 | 0b_0000_0000, // SYN 31 | 0b_0000_0000, // ETB 32 | 0b_0000_0000, // CAN 33 | 0b_0000_0000, // EM 34 | 0b_0000_0000, // SUB 35 | 0b_0000_0000, // ESC 36 | 0b_0000_0000, // FS 37 | 0b_0000_0000, // GS 38 | 0b_0000_0000, // RS 39 | 0b_0000_0000, // US 40 | 0b_0000_0000, // SPACE 41 | 0b_0000_0001, // ! 42 | 0b_0000_0000, // " 43 | 0b_0000_0001, // # 44 | 0b_0000_0001, // $ 45 | 0b_0000_0000, // % 46 | 0b_0000_0001, // & 47 | 0b_0000_0001, // ' 48 | 0b_0000_0001, // ( 49 | 0b_0000_0001, // ) 50 | 0b_0000_0001, // * 51 | 0b_0000_0001, // + 52 | 0b_0000_0001, // , 53 | 0b_0000_0001, // - 54 | 0b_0000_0101, // . 55 | 0b_0000_0001, // / 56 | 0b_0000_0111, // 0 57 | 0b_0000_0111, // 1 58 | 0b_0000_0111, // 2 59 | 0b_0000_0111, // 3 60 | 0b_0000_0111, // 4 61 | 0b_0000_0111, // 5 62 | 0b_0000_0111, // 6 63 | 0b_0000_0111, // 7 64 | 0b_0000_0111, // 8 65 | 0b_0000_0111, // 9 66 | 0b_0000_0001, // : 67 | 0b_0000_0001, // ; 68 | 0b_0000_0000, // < 69 | 0b_0000_0001, // = 70 | 0b_0000_0000, // > 71 | 0b_0000_0001, // ? 72 | 0b_0000_0001, // @ 73 | 0b_0000_0111, // A 74 | 0b_0000_0111, // B 75 | 0b_0000_0111, // C 76 | 0b_0000_0111, // D 77 | 0b_0000_0111, // E 78 | 0b_0000_0111, // F 79 | 0b_0000_0111, // G 80 | 0b_0000_0111, // H 81 | 0b_0000_0111, // I 82 | 0b_0000_0111, // J 83 | 0b_0000_0111, // K 84 | 0b_0000_0111, // L 85 | 0b_0000_0111, // M 86 | 0b_0000_0111, // N 87 | 0b_0000_0111, // O 88 | 0b_0000_0111, // P 89 | 0b_0000_0111, // Q 90 | 0b_0000_0111, // R 91 | 0b_0000_0111, // S 92 | 0b_0000_0111, // T 93 | 0b_0000_0111, // U 94 | 0b_0000_0111, // V 95 | 0b_0000_0111, // W 96 | 0b_0000_0111, // X 97 | 0b_0000_0111, // Y 98 | 0b_0000_0111, // Z 99 | 0b_0000_0001, // [ 100 | 0b_0000_0000, // \ 101 | 0b_0000_0001, // ] 102 | 0b_0000_0000, // ^ 103 | 0b_0000_0111, // _ 104 | 0b_0000_0000, // ` 105 | 0b_0000_0111, // a 106 | 0b_0000_0111, // b 107 | 0b_0000_0111, // c 108 | 0b_0000_0111, // d 109 | 0b_0000_0111, // e 110 | 0b_0000_0111, // f 111 | 0b_0000_0111, // g 112 | 0b_0000_0111, // h 113 | 0b_0000_0111, // i 114 | 0b_0000_0111, // j 115 | 0b_0000_0111, // k 116 | 0b_0000_0111, // l 117 | 0b_0000_0111, // m 118 | 0b_0000_0111, // n 119 | 0b_0000_0111, // o 120 | 0b_0000_0111, // p 121 | 0b_0000_0111, // q 122 | 0b_0000_0111, // r 123 | 0b_0000_0111, // s 124 | 0b_0000_0111, // t 125 | 0b_0000_0111, // u 126 | 0b_0000_0111, // v 127 | 0b_0000_0111, // w 128 | 0b_0000_0111, // x 129 | 0b_0000_0111, // y 130 | 0b_0000_0111, // z 131 | 0b_0000_0000, // { 132 | 0b_0000_0000, // | 133 | 0b_0000_0000, // } 134 | 0b_0000_0001, // ~ 135 | 0b_0000_0000, // DEL 136 | ]; 137 | 138 | /// A mask to test whether the character matches `literals` rule defined in [RFC 6570]. 139 | /// 140 | /// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.1 141 | const CHARS_TABLE_MASK_LITERAL: u8 = 1 << 0; 142 | 143 | /// A mask to test whether the character matches `varchar` rule defined in [RFC 6570]. 144 | /// 145 | /// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.3 146 | const CHARS_TABLE_MASK_VARCHAR_START: u8 = 1 << 1; 147 | 148 | /// A mask to test whether the character matches `varchar` rule defined in [RFC 6570] or a period. 149 | /// 150 | /// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.3 151 | const CHARS_TABLE_MASK_VARCHAR_CONTINUE: u8 = 1 << 2; 152 | 153 | /// Returns true if the given ASCII character is allowed in a literal string. 154 | /// 155 | /// # Precondition 156 | /// 157 | /// The given byte should be an ASCII character, i.e. should be less than 128. 158 | #[inline] 159 | #[must_use] 160 | pub(super) const fn is_ascii_literal_char(c: u8) -> bool { 161 | (CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_LITERAL) != 0 162 | } 163 | 164 | /// Returns true if the given ASCII character is allowed as the beginning of the `varname`. 165 | /// 166 | /// Note that this does not return true for `%` character. It is caller's 167 | /// responsibility to test validity of percent-encoded triplets. 168 | /// 169 | /// # Precondition 170 | /// 171 | /// The given byte should be an ASCII character, i.e. should be less than 128. 172 | #[inline] 173 | #[must_use] 174 | pub(super) const fn is_ascii_varchar_start(c: u8) -> bool { 175 | (CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_VARCHAR_START) != 0 176 | } 177 | 178 | /// Returns true if the given ASCII character is allowed as the non-beginning of the `varname`. 179 | /// 180 | /// Note that this does not return true for `%` character. It is caller's 181 | /// responsibility to test validity of percent-encoded triplets. 182 | /// 183 | /// # Precondition 184 | /// 185 | /// The given byte should be an ASCII character, i.e. should be less than 128. 186 | #[inline] 187 | #[must_use] 188 | pub(super) const fn is_ascii_varchar_continue(c: u8) -> bool { 189 | (CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_VARCHAR_CONTINUE) != 0 190 | } 191 | -------------------------------------------------------------------------------- /src/template/parser/validate.rs: -------------------------------------------------------------------------------- 1 | //! Validating parsers. 2 | 3 | use crate::parser::str::{ 4 | find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded, starts_with_double_hexdigits, 5 | }; 6 | use crate::template::components::MaybeOperator; 7 | use crate::template::error::{Error, ErrorKind}; 8 | 9 | use crate::template::parser::char::{ 10 | is_ascii_literal_char, is_ascii_varchar_continue, is_ascii_varchar_start, 11 | }; 12 | 13 | /// Returns `Ok(())` if the given string is a valid literal. 14 | fn validate_literal(s: &str, offset: usize) -> Result<(), Error> { 15 | match s 16 | .chars() 17 | .position(|c| !c.is_ascii() || !is_ascii_literal_char(c as u8)) 18 | { 19 | Some(pos) => Err(Error::new(ErrorKind::InvalidCharacter, offset + pos)), 20 | None => Ok(()), 21 | } 22 | } 23 | 24 | /// Returns `Ok(())` if the given string is a valid varspec. 25 | fn validate_varspec(s: &str, offset: usize) -> Result<(), Error> { 26 | match find_split2_hole(s, b':', b'*') { 27 | Some((maybe_varname, b':', maybe_len)) => { 28 | validate_varname(maybe_varname, offset)?; 29 | if !(1..=5).contains(&maybe_len.len()) { 30 | return Err(Error::new( 31 | ErrorKind::InvalidExpression, 32 | offset + maybe_varname.len() + 2, 33 | )); 34 | } 35 | if let Some(pos) = maybe_len.bytes().position(|b| !b.is_ascii_digit()) { 36 | return Err(Error::new( 37 | ErrorKind::InvalidExpression, 38 | offset + maybe_varname.len() + 2 + pos, 39 | )); 40 | } 41 | } 42 | Some((maybe_varname, b'*', extra)) => { 43 | validate_varname(maybe_varname, offset)?; 44 | if !extra.is_empty() { 45 | return Err(Error::new( 46 | ErrorKind::InvalidExpression, 47 | offset + maybe_varname.len() + 1, 48 | )); 49 | } 50 | } 51 | Some((_, sep, _)) => unreachable!("[consistency] the byte {sep:#02x} is not searched"), 52 | None => validate_varname(s, offset)?, 53 | } 54 | Ok(()) 55 | } 56 | 57 | /// Returns `Ok(())` if the given string is a valid varname. 58 | pub(crate) fn validate_varname(s: &str, offset: usize) -> Result<(), Error> { 59 | let rest = match s.as_bytes().first() { 60 | Some(b'%') if starts_with_double_hexdigits(&s.as_bytes()[1..]) => &s[3..], 61 | Some(b) if b.is_ascii() && is_ascii_varchar_start(*b) => &s[1..], 62 | _ => return Err(Error::new(ErrorKind::InvalidExpression, offset)), 63 | }; 64 | let is_valid = satisfy_chars_with_pct_encoded(rest, is_ascii_varchar_continue, |_| false); 65 | if !is_valid { 66 | return Err(Error::new(ErrorKind::InvalidExpression, offset)); 67 | } 68 | Ok(()) 69 | } 70 | 71 | /// Returns `Ok(())` if the given string is a valid expression. 72 | /// 73 | /// "Expression" here is the expression body inside `{` and `}`, but not including braces. 74 | fn validate_expr_body(s: &str, mut offset: usize) -> Result<(), Error> { 75 | if s.is_empty() { 76 | return Err(Error::new(ErrorKind::InvalidExpression, offset)); 77 | } 78 | 79 | // Skip the operator. 80 | let maybe_variable_list = match MaybeOperator::from_byte(s.as_bytes()[0]) { 81 | Some(MaybeOperator::Operator(_)) => { 82 | offset += 1; 83 | &s[1..] 84 | } 85 | Some(MaybeOperator::Reserved(_)) => { 86 | return Err(Error::new(ErrorKind::UnsupportedOperator, offset)); 87 | } 88 | None => s, 89 | }; 90 | 91 | // Validate varspecs. 92 | for (spec_i, maybe_varspec) in maybe_variable_list.split(',').enumerate() { 93 | if spec_i != 0 { 94 | // Add the length of the leading separator `,`. 95 | offset += 1; 96 | } 97 | validate_varspec(maybe_varspec, offset)?; 98 | offset += maybe_varspec.len(); 99 | } 100 | 101 | Ok(()) 102 | } 103 | 104 | /// Validates whether the given string is valid as a URI template. 105 | /// 106 | /// Returns `Ok(())` if the given string is a valid URI template. 107 | pub(in crate::template) fn validate_template_str(s: &str) -> Result<(), Error> { 108 | let mut rest = s; 109 | let mut offset = 0; 110 | while !rest.is_empty() { 111 | rest = match find_split2_hole(rest, b'%', b'{') { 112 | Some((literal, b'%', xdigits2_and_rest)) => { 113 | validate_literal(literal, offset)?; 114 | 115 | if xdigits2_and_rest.len() < 2 { 116 | return Err(Error::new( 117 | ErrorKind::InvalidPercentEncoding, 118 | offset + literal.len(), 119 | )); 120 | } 121 | let (xdigits2, new_rest) = xdigits2_and_rest.split_at(2); 122 | if !xdigits2.as_bytes()[0].is_ascii_hexdigit() { 123 | return Err(Error::new( 124 | ErrorKind::InvalidPercentEncoding, 125 | offset + literal.len() + 1, 126 | )); 127 | } 128 | if !xdigits2.as_bytes()[1].is_ascii_hexdigit() { 129 | return Err(Error::new( 130 | ErrorKind::InvalidPercentEncoding, 131 | offset + literal.len() + 2, 132 | )); 133 | } 134 | new_rest 135 | } 136 | Some((literal, b'{', expr_and_rest)) => { 137 | validate_literal(literal, offset)?; 138 | 139 | let (expr, new_rest) = match find_split_hole(expr_and_rest, b'}') { 140 | Some(v) => v, 141 | None => { 142 | return Err(Error::new( 143 | ErrorKind::ExpressionNotClosed, 144 | offset + literal.len(), 145 | )) 146 | } 147 | }; 148 | 149 | // +1 is `+ "{".len()`. 150 | validate_expr_body(expr, offset + literal.len() + 1)?; 151 | 152 | new_rest 153 | } 154 | Some(_) => unreachable!("[consistency] searching only `%` and `{{`"), 155 | None => return validate_literal(rest, offset), 156 | }; 157 | offset = s.len() - rest.len(); 158 | } 159 | 160 | Ok(()) 161 | } 162 | -------------------------------------------------------------------------------- /src/template/simple_context.rs: -------------------------------------------------------------------------------- 1 | //! Simple general-purpose context type. 2 | 3 | use core::ops::ControlFlow; 4 | 5 | use alloc::collections::BTreeMap; 6 | #[cfg(all(feature = "alloc", not(feature = "std")))] 7 | use alloc::string::String; 8 | #[cfg(all(feature = "alloc", not(feature = "std")))] 9 | use alloc::vec::Vec; 10 | 11 | use crate::template::context::{Context, VarName, Visitor}; 12 | 13 | /// Value. 14 | #[derive(Debug, Clone)] 15 | pub enum Value { 16 | /// Undefined (i.e. null). 17 | Undefined, 18 | /// String value. 19 | String(String), 20 | /// List. 21 | List(Vec), 22 | /// Associative array. 23 | Assoc(Vec<(String, String)>), 24 | } 25 | 26 | impl From<&str> for Value { 27 | #[inline] 28 | fn from(v: &str) -> Self { 29 | Self::String(v.into()) 30 | } 31 | } 32 | 33 | impl From for Value { 34 | #[inline] 35 | fn from(v: String) -> Self { 36 | Self::String(v) 37 | } 38 | } 39 | 40 | /// Simple template expansion context. 41 | #[derive(Default, Debug, Clone)] 42 | pub struct SimpleContext { 43 | /// Variable values. 44 | // Any map types (including `HashMap`) is ok, but the hash map is not provided by `alloc`. 45 | // 46 | // QUESTION: Should hexdigits in percent-encoded triplets in varnames be 47 | // compared case sensitively? 48 | variables: BTreeMap, 49 | } 50 | 51 | impl SimpleContext { 52 | /// Creates a new empty context. 53 | /// 54 | /// # Examples 55 | /// 56 | /// ``` 57 | /// # use iri_string::template::Error; 58 | /// # #[cfg(feature = "alloc")] { 59 | /// use iri_string::spec::UriSpec; 60 | /// use iri_string::template::UriTemplateStr; 61 | /// use iri_string::template::simple_context::SimpleContext; 62 | /// 63 | /// let empty_ctx = SimpleContext::new(); 64 | /// let template = UriTemplateStr::new("{no_such_variable}")?; 65 | /// let expanded = template.expand::(&empty_ctx)?; 66 | /// 67 | /// assert_eq!( 68 | /// expanded.to_string(), 69 | /// "" 70 | /// ); 71 | /// # } 72 | /// # Ok::<_, Error>(()) 73 | /// ``` 74 | #[inline] 75 | #[must_use] 76 | pub fn new() -> Self { 77 | Self::default() 78 | } 79 | 80 | /// Inserts a variable. 81 | /// 82 | /// Passing [`Value::Undefined`] removes the value from the context. 83 | /// 84 | /// The entry will be inserted or removed even if the key is invalid as a 85 | /// variable name. Such entries will be simply ignored on expansion. 86 | /// 87 | /// # Examples 88 | /// 89 | /// ``` 90 | /// # use iri_string::template::Error; 91 | /// # #[cfg(feature = "alloc")] { 92 | /// use iri_string::spec::UriSpec; 93 | /// use iri_string::template::UriTemplateStr; 94 | /// use iri_string::template::simple_context::SimpleContext; 95 | /// 96 | /// let mut context = SimpleContext::new(); 97 | /// context.insert("username", "foo"); 98 | /// 99 | /// let template = UriTemplateStr::new("/users/{username}")?; 100 | /// let expanded = template.expand::(&context)?; 101 | /// 102 | /// assert_eq!( 103 | /// expanded.to_string(), 104 | /// "/users/foo" 105 | /// ); 106 | /// # } 107 | /// # Ok::<_, Error>(()) 108 | /// ``` 109 | /// 110 | /// Passing [`Value::Undefined`] removes the value from the context. 111 | /// 112 | /// ``` 113 | /// # use iri_string::template::Error; 114 | /// ## [cfg(feature = "alloc")] { 115 | /// use iri_string::spec::UriSpec; 116 | /// use iri_string::template::UriTemplateStr; 117 | /// use iri_string::template::simple_context::{SimpleContext, Value}; 118 | /// 119 | /// let mut context = SimpleContext::new(); 120 | /// context.insert("username", "foo"); 121 | /// context.insert("username", Value::Undefined); 122 | /// 123 | /// let template = UriTemplateStr::new("/users/{username}")?; 124 | /// let expanded = template.expand::(&context)?; 125 | /// 126 | /// assert_eq!( 127 | /// expanded.to_string(), 128 | /// "/users/" 129 | /// ); 130 | /// # } 131 | /// # Ok::<_, Error>(()) 132 | /// ``` 133 | pub fn insert(&mut self, key: K, value: V) -> Option 134 | where 135 | K: Into, 136 | V: Into, 137 | { 138 | let key = key.into(); 139 | match value.into() { 140 | Value::Undefined => self.variables.remove(&key), 141 | value => self.variables.insert(key, value), 142 | } 143 | } 144 | 145 | /// Removes all entries in the context. 146 | /// 147 | /// # Examples 148 | /// 149 | /// ``` 150 | /// # use iri_string::template::Error; 151 | /// # #[cfg(feature = "alloc")] { 152 | /// use iri_string::spec::UriSpec; 153 | /// use iri_string::template::UriTemplateStr; 154 | /// use iri_string::template::simple_context::SimpleContext; 155 | /// 156 | /// let template = UriTemplateStr::new("{foo,bar}")?; 157 | /// let mut context = SimpleContext::new(); 158 | /// 159 | /// context.insert("foo", "FOO"); 160 | /// context.insert("bar", "BAR"); 161 | /// assert_eq!( 162 | /// template.expand::(&context)?.to_string(), 163 | /// "FOO,BAR" 164 | /// ); 165 | /// 166 | /// context.clear(); 167 | /// assert_eq!( 168 | /// template.expand::(&context)?.to_string(), 169 | /// "" 170 | /// ); 171 | /// # } 172 | /// # Ok::<_, Error>(()) 173 | /// ``` 174 | #[inline] 175 | pub fn clear(&mut self) { 176 | self.variables.clear(); 177 | } 178 | 179 | /// Returns a reference to the value for the key. 180 | // 181 | // QUESTION: Should hexdigits in percent-encoded triplets in varnames be 182 | // compared case sensitively? 183 | #[inline] 184 | #[must_use] 185 | pub fn get(&self, key: VarName<'_>) -> Option<&Value> { 186 | self.variables.get(key.as_str()) 187 | } 188 | } 189 | 190 | impl Context for SimpleContext { 191 | fn visit(&self, visitor: V) -> V::Result { 192 | use crate::template::context::{AssocVisitor, ListVisitor}; 193 | 194 | let name = visitor.var_name().as_str(); 195 | match self.variables.get(name) { 196 | None | Some(Value::Undefined) => visitor.visit_undefined(), 197 | Some(Value::String(s)) => visitor.visit_string(s), 198 | Some(Value::List(list)) => { 199 | let mut visitor = visitor.visit_list(); 200 | if let ControlFlow::Break(res) = 201 | list.iter().try_for_each(|item| visitor.visit_item(item)) 202 | { 203 | return res; 204 | } 205 | visitor.finish() 206 | } 207 | Some(Value::Assoc(list)) => { 208 | let mut visitor = visitor.visit_assoc(); 209 | if let ControlFlow::Break(res) = 210 | list.iter().try_for_each(|(k, v)| visitor.visit_entry(k, v)) 211 | { 212 | return res; 213 | } 214 | visitor.finish() 215 | } 216 | } 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /src/template/string/owned.rs: -------------------------------------------------------------------------------- 1 | //! Owned `UriTemplateString`. 2 | 3 | use core::fmt; 4 | 5 | use alloc::borrow::Cow; 6 | #[cfg(all(feature = "alloc", not(feature = "std")))] 7 | use alloc::borrow::ToOwned; 8 | #[cfg(all(feature = "alloc", not(feature = "std")))] 9 | use alloc::boxed::Box; 10 | #[cfg(all(feature = "alloc", not(feature = "std")))] 11 | use alloc::string::String; 12 | 13 | use crate::template::error::{CreationError, Error, ErrorKind}; 14 | use crate::template::parser::validate_template_str; 15 | use crate::template::string::UriTemplateStr; 16 | 17 | /// An owned slice of a URI template. 18 | /// 19 | /// URI Template is defined by [RFC 6570]. 20 | /// 21 | /// Note that "URI Template" can also be used for IRI. 22 | /// 23 | /// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html 24 | /// 25 | /// # Valid values 26 | /// 27 | /// This type can have a URI template string. 28 | // Note that `From<$ty> for {Arc,Rc}<$slice>` is currently not implemented since 29 | // this won't reuse allocated memory and hides internal memory reallocation. See 30 | // . 31 | // However, this is not decided with firm belief or opinion, so there would be 32 | // a chance that they are implemented in future. 33 | #[cfg_attr(feature = "serde", derive(serde::Serialize))] 34 | #[cfg_attr(feature = "serde", serde(transparent))] 35 | #[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 36 | pub struct UriTemplateString { 37 | /// Inner data. 38 | inner: String, 39 | } 40 | 41 | impl UriTemplateString { 42 | /// Creates a new string without validation. 43 | /// 44 | /// This does not validate the given string, so it is caller's 45 | /// responsibility to ensure the given string is valid. 46 | /// 47 | /// # Safety 48 | /// 49 | /// The given string must be syntactically valid as `Self` type. 50 | /// If not, any use of the returned value or the call of this 51 | /// function itself may result in undefined behavior. 52 | #[inline] 53 | #[must_use] 54 | pub unsafe fn new_unchecked(s: alloc::string::String) -> Self { 55 | // The construction itself can be written in safe Rust, but 56 | // every other place including unsafe functions expects 57 | // `self.inner` to be syntactically valid as `Self`. In order to 58 | // make them safe, the construction should validate the value 59 | // or at least should require users to validate the value by 60 | // making the function `unsafe`. 61 | Self { inner: s } 62 | } 63 | 64 | /// Shrinks the capacity of the inner buffer to match its length. 65 | #[inline] 66 | pub fn shrink_to_fit(&mut self) { 67 | self.inner.shrink_to_fit() 68 | } 69 | 70 | /// Returns the internal buffer capacity in bytes. 71 | #[inline] 72 | #[must_use] 73 | pub fn capacity(&self) -> usize { 74 | self.inner.capacity() 75 | } 76 | 77 | /// Returns the borrowed IRI string slice. 78 | /// 79 | /// This is equivalent to `&*self`. 80 | #[inline] 81 | #[must_use] 82 | pub fn as_slice(&self) -> &UriTemplateStr { 83 | self.as_ref() 84 | } 85 | 86 | /// Appends the template string. 87 | #[inline] 88 | pub fn append(&mut self, other: &UriTemplateStr) { 89 | self.inner.push_str(other.as_str()); 90 | debug_assert!(validate_template_str(self.as_str()).is_ok()); 91 | } 92 | } 93 | 94 | impl AsRef for UriTemplateString { 95 | #[inline] 96 | fn as_ref(&self) -> &str { 97 | &self.inner 98 | } 99 | } 100 | 101 | impl AsRef for UriTemplateString { 102 | #[inline] 103 | fn as_ref(&self) -> &UriTemplateStr { 104 | // SAFETY: `UriTemplateString and `UriTemplateStr` requires same validation, 105 | // so the content of `self: &UriTemplateString` must be valid as `UriTemplateStr`. 106 | unsafe { UriTemplateStr::new_always_unchecked(AsRef::::as_ref(self)) } 107 | } 108 | } 109 | 110 | impl core::borrow::Borrow for UriTemplateString { 111 | #[inline] 112 | fn borrow(&self) -> &str { 113 | self.as_ref() 114 | } 115 | } 116 | 117 | impl core::borrow::Borrow for UriTemplateString { 118 | #[inline] 119 | fn borrow(&self) -> &UriTemplateStr { 120 | self.as_ref() 121 | } 122 | } 123 | 124 | impl ToOwned for UriTemplateStr { 125 | type Owned = UriTemplateString; 126 | 127 | #[inline] 128 | fn to_owned(&self) -> Self::Owned { 129 | self.into() 130 | } 131 | } 132 | 133 | impl From<&'_ UriTemplateStr> for UriTemplateString { 134 | #[inline] 135 | fn from(s: &UriTemplateStr) -> Self { 136 | // This is safe because `s` must be valid. 137 | Self { 138 | inner: alloc::string::String::from(s.as_str()), 139 | } 140 | } 141 | } 142 | 143 | impl From for alloc::string::String { 144 | #[inline] 145 | fn from(s: UriTemplateString) -> Self { 146 | s.inner 147 | } 148 | } 149 | 150 | impl<'a> From for Cow<'a, UriTemplateStr> { 151 | #[inline] 152 | fn from(s: UriTemplateString) -> Cow<'a, UriTemplateStr> { 153 | Cow::Owned(s) 154 | } 155 | } 156 | 157 | impl From for Box { 158 | #[inline] 159 | fn from(s: UriTemplateString) -> Box { 160 | let inner: String = s.into(); 161 | let buf = Box::::from(inner); 162 | // SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so 163 | // the memory layouts of `Box` and `Box` are 164 | // compatible. Additionally, `UriTemplateString` and `UriTemplateStr` 165 | // require the same syntax. 166 | unsafe { 167 | let raw: *mut str = Box::into_raw(buf); 168 | Box::::from_raw(raw as *mut UriTemplateStr) 169 | } 170 | } 171 | } 172 | 173 | impl TryFrom<&'_ str> for UriTemplateString { 174 | type Error = Error; 175 | 176 | #[inline] 177 | fn try_from(s: &str) -> Result { 178 | <&UriTemplateStr>::try_from(s).map(Into::into) 179 | } 180 | } 181 | 182 | impl TryFrom<&'_ [u8]> for UriTemplateString { 183 | type Error = Error; 184 | 185 | #[inline] 186 | fn try_from(bytes: &[u8]) -> Result { 187 | let s = core::str::from_utf8(bytes) 188 | .map_err(|e| Error::new(ErrorKind::InvalidUtf8, e.valid_up_to()))?; 189 | <&UriTemplateStr>::try_from(s).map(Into::into) 190 | } 191 | } 192 | 193 | impl core::convert::TryFrom for UriTemplateString { 194 | type Error = CreationError; 195 | 196 | #[inline] 197 | fn try_from(s: alloc::string::String) -> Result { 198 | match <&UriTemplateStr>::try_from(s.as_str()) { 199 | Ok(_) => { 200 | // This is safe because `<&UriTemplateStr>::try_from(s)?` ensures 201 | // that the string `s` is valid. 202 | Ok(Self { inner: s }) 203 | } 204 | Err(e) => Err(CreationError::new(e, s)), 205 | } 206 | } 207 | } 208 | 209 | impl alloc::str::FromStr for UriTemplateString { 210 | type Err = Error; 211 | 212 | #[inline] 213 | fn from_str(s: &str) -> Result { 214 | TryFrom::try_from(s) 215 | } 216 | } 217 | 218 | impl core::ops::Deref for UriTemplateString { 219 | type Target = UriTemplateStr; 220 | 221 | #[inline] 222 | fn deref(&self) -> &UriTemplateStr { 223 | self.as_ref() 224 | } 225 | } 226 | 227 | impl_cmp!(str, UriTemplateStr, Cow<'_, str>); 228 | impl_cmp!(str, &UriTemplateStr, Cow<'_, str>); 229 | 230 | impl_cmp!(str, str, UriTemplateString); 231 | impl_cmp!(str, &str, UriTemplateString); 232 | impl_cmp!(str, Cow<'_, str>, UriTemplateString); 233 | impl_cmp!(str, String, UriTemplateString); 234 | 235 | impl fmt::Display for UriTemplateString { 236 | #[inline] 237 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 238 | f.write_str(self.as_str()) 239 | } 240 | } 241 | 242 | /// Serde deserializer implementation. 243 | #[cfg(feature = "serde")] 244 | mod __serde_owned { 245 | use super::UriTemplateString; 246 | 247 | use core::fmt; 248 | 249 | #[cfg(all(feature = "alloc", feature = "serde", not(feature = "std")))] 250 | use alloc::string::String; 251 | 252 | use serde::{ 253 | de::{self, Visitor}, 254 | Deserialize, Deserializer, 255 | }; 256 | 257 | /// Custom owned string visitor. 258 | #[derive(Debug, Clone, Copy)] 259 | struct CustomStringVisitor; 260 | 261 | impl Visitor<'_> for CustomStringVisitor { 262 | type Value = UriTemplateString; 263 | 264 | #[inline] 265 | fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 266 | f.write_str("URI template string") 267 | } 268 | 269 | #[inline] 270 | fn visit_str(self, v: &str) -> Result 271 | where 272 | E: de::Error, 273 | { 274 | >::try_from(v).map_err(E::custom) 275 | } 276 | 277 | #[cfg(feature = "serde")] 278 | #[inline] 279 | fn visit_string(self, v: String) -> Result 280 | where 281 | E: de::Error, 282 | { 283 | >::try_from(v).map_err(E::custom) 284 | } 285 | } 286 | 287 | impl<'de> Deserialize<'de> for UriTemplateString { 288 | #[inline] 289 | fn deserialize(deserializer: D) -> Result 290 | where 291 | D: Deserializer<'de>, 292 | { 293 | deserializer.deserialize_str(CustomStringVisitor) 294 | } 295 | } 296 | } 297 | -------------------------------------------------------------------------------- /src/types.rs: -------------------------------------------------------------------------------- 1 | //! URI and IRI types. 2 | //! 3 | //! # URI and IRI 4 | //! 5 | //! IRIs (Internationalized Resource Identifiers) are defined in [RFC 3987], 6 | //! and URIs (Uniform Resource Identifiers) are defined in [RFC 3986]. 7 | //! 8 | //! URI consists of only ASCII characters, and is a subset of IRI. 9 | //! 10 | //! IRIs are defined as below: 11 | //! 12 | //! ```text 13 | //! IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ] 14 | //! IRI-reference = IRI / irelative-ref 15 | //! absolute-IRI = scheme ":" ihier-part [ "?" iquery ] 16 | //! irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ] 17 | //! (`irelative-part` is roughly same as `ihier-part`.) 18 | //! ``` 19 | //! 20 | //! Definitions for URIs are almost same, but they cannot have non-ASCII characters. 21 | //! 22 | //! # Types 23 | //! 24 | //! Types can be categorized by: 25 | //! 26 | //! * syntax, 27 | //! * spec, and 28 | //! * ownership. 29 | //! 30 | //! ## Syntax 31 | //! 32 | //! Since URIs and IRIs have almost same syntax and share algorithms, they are implemented by 33 | //! generic types. 34 | //! 35 | //! * [`RiStr`] and [`RiString`] 36 | //! + String types for `IRI` and `URI` rules. 37 | //! * [`RiAbsoluteStr`] and [`RiAbsoluteString`] 38 | //! + String types for `absolute-IRI` and `absolute-URI` rules. 39 | //! * [`RiReferenceStr`] and [`RiReferenceString`] 40 | //! + String types for `IRI-reference` and `URI-reference` rules. 41 | //! * [`RiRelativeStr`] and [`RiRelativeString`] 42 | //! + String types for `irelative-ref` and `relative-ref` rules. 43 | //! * [`RiFragmentStr`] and [`RiFragmentString`] 44 | //! + String types for `ifragment` and `fragment` rules. 45 | //! + Note that these types represents a substring of an IRI / URI references. 46 | //! They are not intended to used directly as an IRI / URI references. 47 | //! 48 | //! "Ri" stands for "Resource Identifier". 49 | //! 50 | //! ## Spec 51 | //! 52 | //! These types have a type parameter, which represents RFC specification. 53 | //! [`IriSpec`] represents [RFC 3987] spec, and [`UriSpec`] represents [RFC 3986] spec. 54 | //! For example, `RiAbsoluteStr` can have `absolute-IRI` string value, 55 | //! and `RiReferenceStr` can have `URI-reference` string value. 56 | //! 57 | //! ## Ownership 58 | //! 59 | //! String-like types have usually two variations, borrowed and owned. 60 | //! 61 | //! Borrowed types (such as `str`, `Path`, `OsStr`) are unsized, and used by reference style. 62 | //! Owned types (such as `String`, `PathBuf`, `OsString`) are sized, and requires heap allocation. 63 | //! Owned types can be coerced to a borrowed type (for example, `&String` is automatically coerced 64 | //! to `&str` in many context). 65 | //! 66 | //! IRI / URI types have same variations, `RiFooStr` and `RiFooString` 67 | //! (`Foo` part represents syntax). 68 | //! They are very similar to `&str` and `String`. 69 | //! `Deref` is implemented, `RiFooStr::len()` is available, `&RiFooString` can be coerced to 70 | //! `&RiFooStr`, `Cow<'_, RiFooStr>` and `Box` is available, and so on. 71 | //! 72 | //! # Hierarchy and safe conversion 73 | //! 74 | //! IRI syntaxes have the hierarchy below. 75 | //! 76 | //! ```text 77 | //! RiReferenceStr 78 | //! |-- RiStr 79 | //! | `-- RiAbsoluteStr 80 | //! `-- RiRelativeStr 81 | //! ``` 82 | //! 83 | //! Therefore, the conversions below are safe and cheap: 84 | //! 85 | //! * `RiStr -> RiReferenceStr` 86 | //! * `RiAbsoluteStr -> RiStr` 87 | //! * `RiAbsoluteStr -> RiReferenceStr` 88 | //! * `RiRelativeStr -> RiReferenceStr` 89 | //! 90 | //! For safely convertible types (consider `FooStr -> BarStr` is safe), traits 91 | //! below are implemented: 92 | //! 93 | //! * `AsRef for FooStr` 94 | //! * `AsRef for FooString` 95 | //! * `From for BarString` 96 | //! * `PartialEq for BarStr`, and lots of impls like that 97 | //! + `PartialEq` and `ParitalOrd`. 98 | //! + Slice, owned, `Cow`, reference, etc... 99 | //! 100 | //! ## Fallible conversions 101 | //! 102 | //! Fallible conversions are implemented from plain string into IRI strings. 103 | //! 104 | //! * `TryFrom<&str> for &FooStr` 105 | //! * `TryFrom<&str> for FooString` 106 | //! * `TryFrom for FooString` 107 | //! * `FromStr for FooString` 108 | //! 109 | //! Some IRI string types provide more convenient methods to convert between IRI types. 110 | //! For example, [`RiReferenceString::into_iri()`] tries to convert an IRI reference into an IRI, 111 | //! and returns `Result`. 112 | //! This is because an IRI reference is valid as an IRI or a relative IRI reference. 113 | //! Such methods are usually more efficient than using `TryFrom` for plain strings, because they 114 | //! prevents you from losing ownership of a string, and does a conversion without extra memory 115 | //! allocation. 116 | //! 117 | //! # Aliases 118 | //! 119 | //! This module contains type aliases for RFC 3986 URI types and RFC 3987 IRI types. 120 | //! 121 | //! `IriFooStr{,ing}` are aliases of `RiFooStr{,ing}`, and `UriFooStr{,ing}` are aliases 122 | //! of `RiFooStr{,ing}`. 123 | //! 124 | //! # Wrapped string types 125 | //! 126 | //! Similar to string types in std (such as `str`, `std::path::Path`, and `std::ffi::OsStr`), 127 | //! IRI string types in this crate provides convenient conversions to: 128 | //! 129 | //! * `std::box::Box`, 130 | //! * `std::borrow::Cow`, 131 | //! * `std::rc::Rc`, and 132 | //! * `std::sync::Arc`. 133 | //! 134 | //! ``` 135 | //! # use iri_string::validate::Error; 136 | //! # #[cfg(feature = "std")] { 137 | //! use std::borrow::Cow; 138 | //! use std::rc::Rc; 139 | //! use std::sync::Arc; 140 | //! 141 | //! use iri_string::types::IriStr; 142 | //! 143 | //! let iri = IriStr::new("http://example.com/")?; 144 | //! let iri_owned = iri.to_owned(); 145 | //! 146 | //! // From slice. 147 | //! let cow_1_1: Cow<'_, IriStr> = iri.into(); 148 | //! let cow_1_2 = Cow::<'_, IriStr>::from(iri); 149 | //! assert!(matches!(cow_1_1, Cow::Borrowed(_))); 150 | //! assert!(matches!(cow_1_2, Cow::Borrowed(_))); 151 | //! // From owned. 152 | //! let cow_2_1: Cow<'_, IriStr> = iri_owned.clone().into(); 153 | //! let cow_2_2 = Cow::<'_, IriStr>::from(iri_owned.clone()); 154 | //! assert!(matches!(cow_2_1, Cow::Owned(_))); 155 | //! assert!(matches!(cow_2_2, Cow::Owned(_))); 156 | //! 157 | //! // From slice. 158 | //! let box_1_1: Box = iri.into(); 159 | //! let box_1_2 = Box::::from(iri); 160 | //! // From owned. 161 | //! let box_2_1: Box = iri_owned.clone().into(); 162 | //! let box_2_2 = Box::::from(iri_owned.clone()); 163 | //! 164 | //! // From slice. 165 | //! let rc_1_1: Rc = iri.into(); 166 | //! let rc_1_2 = Rc::::from(iri); 167 | //! // From owned. 168 | //! // Note that `From for Rc` is not implemented for now. 169 | //! // Get borrowed string by `.as_slice()` and convert it. 170 | //! let rc_2_1: Rc = iri_owned.clone().as_slice().into(); 171 | //! let rc_2_2 = Rc::::from(iri_owned.clone().as_slice()); 172 | //! 173 | //! // From slice. 174 | //! let arc_1_1: Arc = iri.into(); 175 | //! let arc_1_2 = Arc::::from(iri); 176 | //! // From owned. 177 | //! // Note that `From for Arc` is not implemented for now. 178 | //! // Get borrowed string by `.as_slice()` and convert it. 179 | //! let arc_2_1: Arc = iri_owned.clone().as_slice().into(); 180 | //! let arc_2_2 = Arc::::from(iri_owned.clone().as_slice()); 181 | //! # } 182 | //! # Ok::<_, Error>(()) 183 | //! ``` 184 | //! 185 | //! [RFC 3986]: https://tools.ietf.org/html/rfc3986 186 | //! [RFC 3987]: https://tools.ietf.org/html/rfc3987 187 | //! [`RiStr`]: struct.RiStr.html 188 | //! [`RiString`]: struct.RiString.html 189 | //! [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html 190 | //! [`RiAbsoluteString`]: struct.RiAbsoluteString.html 191 | //! [`RiFragmentStr`]: struct.RiFragmentStr.html 192 | //! [`RiFragmentString`]: struct.RiFragmentString.html 193 | //! [`RiReferenceStr`]: struct.RiReferenceStr.html 194 | //! [`RiReferenceString`]: struct.RiReferenceString.html 195 | //! [`RiReferenceString::into_iri()`]: struct.RiReferenceString.html#method.into_iri 196 | //! [`RiRelativeStr`]: struct.RiRelativeStr.html 197 | //! [`RiRelativeString`]: struct.RiRelativeString.html 198 | //! [`IriSpec`]: ../spec/enum.IriSpec.html 199 | //! [`UriSpec`]: ../spec/enum.UriSpec.html 200 | 201 | #[cfg(feature = "alloc")] 202 | pub use self::{ 203 | generic::{ 204 | CreationError, RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString, 205 | RiRelativeString, RiString, 206 | }, 207 | iri::{ 208 | IriAbsoluteString, IriFragmentString, IriQueryString, IriReferenceString, 209 | IriRelativeString, IriString, 210 | }, 211 | uri::{ 212 | UriAbsoluteString, UriFragmentString, UriQueryString, UriReferenceString, 213 | UriRelativeString, UriString, 214 | }, 215 | }; 216 | pub use self::{ 217 | generic::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr}, 218 | iri::{IriAbsoluteStr, IriFragmentStr, IriQueryStr, IriReferenceStr, IriRelativeStr, IriStr}, 219 | uri::{UriAbsoluteStr, UriFragmentStr, UriQueryStr, UriReferenceStr, UriRelativeStr, UriStr}, 220 | }; 221 | 222 | pub(crate) mod generic; 223 | mod iri; 224 | mod uri; 225 | -------------------------------------------------------------------------------- /src/types/generic.rs: -------------------------------------------------------------------------------- 1 | //! Generic resource identifier types. 2 | //! 3 | //! ```text 4 | //! IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ] 5 | //! IRI-reference = IRI / irelative-ref 6 | //! absolute-IRI = scheme ":" ihier-part [ "?" iquery ] 7 | //! irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ] 8 | //! (`irelative-part` is roughly same as `ihier-part`.) 9 | //! ``` 10 | //! 11 | //! Hierarchy: 12 | //! 13 | //! ```text 14 | //! RiReferenceStr 15 | //! |-- RiStr 16 | //! | `-- RiAbsoluteStr 17 | //! `-- RiRelativeStr 18 | //! ``` 19 | //! 20 | //! Therefore, the conversions below are safe and cheap: 21 | //! 22 | //! * `RiStr -> RiReferenceStr` 23 | //! * `RiAbsoluteStr -> RiStr` 24 | //! * `RiAbsoluteStr -> RiReferenceStr` 25 | //! * `RiRelativeStr -> RiReferenceStr` 26 | //! 27 | //! For safely convertible types (consider `FooStr -> BarStr` is safe), traits 28 | //! below are implemented: 29 | //! 30 | //! * `AsRef for FooStr` 31 | //! * `AsRef for FooString` 32 | //! * `From for BarString` 33 | //! * `PartialEq for BarStr` and lots of impls like that 34 | //! + `PartialEq` and `ParitalOrd`. 35 | //! + Slice, owned, `Cow`, reference, etc... 36 | 37 | pub use self::{ 38 | absolute::RiAbsoluteStr, fragment::RiFragmentStr, normal::RiStr, query::RiQueryStr, 39 | reference::RiReferenceStr, relative::RiRelativeStr, 40 | }; 41 | #[cfg(feature = "alloc")] 42 | pub use self::{ 43 | absolute::RiAbsoluteString, error::CreationError, fragment::RiFragmentString, normal::RiString, 44 | query::RiQueryString, reference::RiReferenceString, relative::RiRelativeString, 45 | }; 46 | 47 | #[macro_use] 48 | mod macros; 49 | 50 | mod absolute; 51 | #[cfg(feature = "alloc")] 52 | mod error; 53 | mod fragment; 54 | mod normal; 55 | mod query; 56 | mod reference; 57 | mod relative; 58 | -------------------------------------------------------------------------------- /src/types/generic/error.rs: -------------------------------------------------------------------------------- 1 | //! Resource identifier creation error. 2 | 3 | use core::fmt; 4 | 5 | #[cfg(feature = "std")] 6 | use std::error; 7 | 8 | use crate::validate::Error; 9 | 10 | /// Error on conversion into an IRI type. 11 | /// 12 | /// Enabled by `alloc` or `std` feature. 13 | // This type itself does not require `alloc` or `std, but the type is used only when `alloc` 14 | // feature is enabled. To avoid exporting unused stuff, the type (and the `types::generic::error` 15 | // module) is available only when necessary. 16 | // 17 | // Note that all types which implement `Spec` also implement `SpecInternal`. 18 | pub struct CreationError { 19 | /// Soruce data. 20 | source: T, 21 | /// Validation error. 22 | error: Error, 23 | } 24 | 25 | impl CreationError { 26 | /// Returns the source data. 27 | #[must_use] 28 | pub fn into_source(self) -> T { 29 | self.source 30 | } 31 | 32 | /// Returns the validation error. 33 | #[must_use] 34 | pub fn validation_error(&self) -> Error { 35 | self.error 36 | } 37 | 38 | /// Creates a new `CreationError`. 39 | #[must_use] 40 | pub(crate) fn new(error: Error, source: T) -> Self { 41 | Self { source, error } 42 | } 43 | } 44 | 45 | impl fmt::Debug for CreationError { 46 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 47 | f.debug_struct("CreationError") 48 | .field("source", &self.source) 49 | .field("error", &self.error) 50 | .finish() 51 | } 52 | } 53 | 54 | impl Clone for CreationError { 55 | fn clone(&self) -> Self { 56 | Self { 57 | source: self.source.clone(), 58 | error: self.error, 59 | } 60 | } 61 | } 62 | 63 | impl fmt::Display for CreationError { 64 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 65 | self.error.fmt(f) 66 | } 67 | } 68 | 69 | #[cfg(feature = "std")] 70 | impl error::Error for CreationError {} 71 | -------------------------------------------------------------------------------- /src/types/generic/fragment.rs: -------------------------------------------------------------------------------- 1 | //! Fragment string. 2 | 3 | use crate::{ 4 | spec::Spec, 5 | validate::{fragment, Error}, 6 | }; 7 | 8 | define_custom_string_slice! { 9 | /// A borrowed slice of an IRI fragment (i.e. after the first `#` character). 10 | /// 11 | /// This corresponds to [`ifragment` rule] in [RFC 3987] (and [`fragment` rule] in [RFC 3986]). 12 | /// The rule for `ifragment` is `*( ipchar / "/" / "?" )`. 13 | /// 14 | /// # Valid values 15 | /// 16 | /// This type can have an IRI fragment. 17 | /// Note that the IRI `foo://bar/baz#qux` has the fragment `qux`, **not** `#qux`. 18 | /// 19 | /// ``` 20 | /// # use iri_string::types::IriFragmentStr; 21 | /// assert!(IriFragmentStr::new("").is_ok()); 22 | /// assert!(IriFragmentStr::new("foo").is_ok()); 23 | /// assert!(IriFragmentStr::new("foo/bar").is_ok()); 24 | /// assert!(IriFragmentStr::new("/foo/bar").is_ok()); 25 | /// assert!(IriFragmentStr::new("//foo/bar").is_ok()); 26 | /// assert!(IriFragmentStr::new("https://user:pass@example.com:8080").is_ok()); 27 | /// assert!(IriFragmentStr::new("https://example.com/").is_ok()); 28 | /// ``` 29 | /// 30 | /// Some characters and sequences cannot used in a fragment. 31 | /// 32 | /// ``` 33 | /// # use iri_string::types::IriFragmentStr; 34 | /// // `<` and `>` cannot directly appear in an IRI reference. 35 | /// assert!(IriFragmentStr::new("").is_err()); 36 | /// // Broken percent encoding cannot appear in an IRI reference. 37 | /// assert!(IriFragmentStr::new("%").is_err()); 38 | /// assert!(IriFragmentStr::new("%GG").is_err()); 39 | /// // Hash sign `#` cannot appear in an IRI fragment. 40 | /// assert!(IriFragmentStr::new("#hash").is_err()); 41 | /// ``` 42 | /// 43 | /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 44 | /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 45 | /// [`fragment` rule]: https://tools.ietf.org/html/rfc3986#section-3.5 46 | /// [`ifragment` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 47 | struct RiFragmentStr { 48 | validator = fragment, 49 | expecting_msg = "IRI fragment string", 50 | } 51 | } 52 | 53 | #[cfg(feature = "alloc")] 54 | define_custom_string_owned! { 55 | /// An owned string of an IRI fragment (i.e. after the first `#` character). 56 | /// 57 | /// This corresponds to [`ifragment` rule] in [RFC 3987] (and [`fragment` rule] in [RFC 3986]). 58 | /// The rule for `absolute-IRI` is `*( ipchar / "/" / "?" )`. 59 | /// 60 | /// For details, see the documentation for [`RiFragmentStr`]. 61 | /// 62 | /// Enabled by `alloc` or `std` feature. 63 | /// 64 | /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 65 | /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 66 | /// [`fragment` rule]: https://tools.ietf.org/html/rfc3986#section-3.5 67 | /// [`ifragment` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 68 | /// [`RiFragmentStr`]: struct.RiFragmentStr.html 69 | struct RiFragmentString { 70 | validator = fragment, 71 | slice = RiFragmentStr, 72 | expecting_msg = "IRI fragment string", 73 | } 74 | } 75 | 76 | impl RiFragmentStr { 77 | /// Creates a new `&RiFragmentStr` from the fragment part prefixed by `#`. 78 | /// 79 | /// # Examples 80 | /// 81 | /// ``` 82 | /// # use iri_string::types::IriFragmentStr; 83 | /// assert!(IriFragmentStr::from_prefixed("#").is_ok()); 84 | /// assert!(IriFragmentStr::from_prefixed("#foo").is_ok()); 85 | /// assert!(IriFragmentStr::from_prefixed("#foo/bar").is_ok()); 86 | /// assert!(IriFragmentStr::from_prefixed("#/foo/bar").is_ok()); 87 | /// assert!(IriFragmentStr::from_prefixed("#//foo/bar").is_ok()); 88 | /// assert!(IriFragmentStr::from_prefixed("#https://user:pass@example.com:8080").is_ok()); 89 | /// assert!(IriFragmentStr::from_prefixed("#https://example.com/").is_ok()); 90 | /// 91 | /// // `<` and `>` cannot directly appear in an IRI. 92 | /// assert!(IriFragmentStr::from_prefixed("#").is_err()); 93 | /// // Broken percent encoding cannot appear in an IRI. 94 | /// assert!(IriFragmentStr::new("#%").is_err()); 95 | /// assert!(IriFragmentStr::new("#%GG").is_err()); 96 | /// // `#` prefix is expected. 97 | /// assert!(IriFragmentStr::from_prefixed("").is_err()); 98 | /// assert!(IriFragmentStr::from_prefixed("foo").is_err()); 99 | /// // Hash sign `#` cannot appear in an IRI fragment. 100 | /// assert!(IriFragmentStr::from_prefixed("##hash").is_err()); 101 | /// ``` 102 | pub fn from_prefixed(s: &str) -> Result<&Self, Error> { 103 | if !s.starts_with('#') { 104 | return Err(Error::new()); 105 | } 106 | TryFrom::try_from(&s[1..]) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/types/generic/query.rs: -------------------------------------------------------------------------------- 1 | //! Query string. 2 | 3 | use crate::{ 4 | spec::Spec, 5 | validate::{query, Error}, 6 | }; 7 | 8 | define_custom_string_slice! { 9 | /// A borrowed slice of an IRI query (i.e. after the first `?` and before the first `#`). 10 | /// 11 | /// This corresponds to [`iquery` rule] in [RFC 3987] (and [`query` rule] in [RFC 3986]). 12 | /// The rule for `ifragment` is `*( ipchar / iprivate / "/" / "?" )`. 13 | /// 14 | /// # Valid values 15 | /// 16 | /// This type can have an IRI fragment. 17 | /// Note that the IRI `foo://bar/baz#qux` has the fragment `qux`, **not** `#qux`. 18 | /// 19 | /// ``` 20 | /// # use iri_string::types::IriFragmentStr; 21 | /// assert!(IriFragmentStr::new("").is_ok()); 22 | /// assert!(IriFragmentStr::new("foo").is_ok()); 23 | /// assert!(IriFragmentStr::new("foo/bar").is_ok()); 24 | /// assert!(IriFragmentStr::new("/foo/bar").is_ok()); 25 | /// assert!(IriFragmentStr::new("//foo/bar").is_ok()); 26 | /// assert!(IriFragmentStr::new("https://user:pass@example.com:8080").is_ok()); 27 | /// assert!(IriFragmentStr::new("https://example.com/").is_ok()); 28 | /// ``` 29 | /// 30 | /// Some characters and sequences cannot used in a fragment. 31 | /// 32 | /// ``` 33 | /// # use iri_string::types::IriFragmentStr; 34 | /// // `<` and `>` cannot directly appear in an IRI reference. 35 | /// assert!(IriFragmentStr::new("").is_err()); 36 | /// // Broken percent encoding cannot appear in an IRI reference. 37 | /// assert!(IriFragmentStr::new("%").is_err()); 38 | /// assert!(IriFragmentStr::new("%GG").is_err()); 39 | /// // Hash sign `#` cannot appear in an IRI fragment. 40 | /// assert!(IriFragmentStr::new("#hash").is_err()); 41 | /// ``` 42 | /// ``` 43 | /// use iri_string::types::IriQueryStr; 44 | /// assert!(IriQueryStr::new("").is_ok()); 45 | /// assert!(IriQueryStr::new("foo").is_ok()); 46 | /// assert!(IriQueryStr::new("foo/bar").is_ok()); 47 | /// assert!(IriQueryStr::new("/foo/bar").is_ok()); 48 | /// assert!(IriQueryStr::new("//foo/bar").is_ok()); 49 | /// assert!(IriQueryStr::new("https://user:pass@example.com:8080").is_ok()); 50 | /// assert!(IriQueryStr::new("https://example.com/").is_ok()); 51 | /// // Question sign `?` can appear in an IRI query. 52 | /// assert!(IriQueryStr::new("query?again").is_ok()); 53 | /// ``` 54 | /// 55 | /// Some characters and sequences cannot used in a query. 56 | /// 57 | /// ``` 58 | /// use iri_string::types::IriQueryStr; 59 | /// // `<` and `>` cannot directly appear in an IRI reference. 60 | /// assert!(IriQueryStr::new("").is_err()); 61 | /// // Broken percent encoding cannot appear in an IRI reference. 62 | /// assert!(IriQueryStr::new("%").is_err()); 63 | /// assert!(IriQueryStr::new("%GG").is_err()); 64 | /// // Hash sign `#` cannot appear in an IRI query. 65 | /// assert!(IriQueryStr::new("#hash").is_err()); 66 | /// ``` 67 | /// 68 | /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 69 | /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 70 | /// [`query` rule]: https://tools.ietf.org/html/rfc3986#section-3.4 71 | /// [`iquery` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 72 | struct RiQueryStr { 73 | validator = query, 74 | expecting_msg = "IRI query string", 75 | } 76 | } 77 | 78 | #[cfg(feature = "alloc")] 79 | define_custom_string_owned! { 80 | /// An owned string of an IRI fragment (i.e. after the first `#` character). 81 | /// 82 | /// This corresponds to [`iquery` rule] in [RFC 3987] (and [`query` rule] in [RFC 3986]). 83 | /// The rule for `absolute-IRI` is `*( ipchar / iprivate / "/" / "?" )`. 84 | /// 85 | /// For details, see the documentation for [`RiQueryStr`]. 86 | /// 87 | /// Enabled by `alloc` or `std` feature. 88 | /// 89 | /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 90 | /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 91 | /// [`query` rule]: https://tools.ietf.org/html/rfc3986#section-3.4 92 | /// [`iquery` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 93 | /// [`RiQueryStr`]: struct.RiQueryStr.html 94 | struct RiQueryString { 95 | validator = query, 96 | slice = RiQueryStr, 97 | expecting_msg = "IRI query string", 98 | } 99 | } 100 | 101 | impl RiQueryStr { 102 | /// Creates a new `&RiQueryStr` from the query part prefixed by `?`. 103 | /// 104 | /// # Examples 105 | /// 106 | /// ``` 107 | /// # use iri_string::types::IriQueryStr; 108 | /// assert!(IriQueryStr::from_prefixed("?").is_ok()); 109 | /// assert!(IriQueryStr::from_prefixed("?foo").is_ok()); 110 | /// assert!(IriQueryStr::from_prefixed("?foo/bar").is_ok()); 111 | /// assert!(IriQueryStr::from_prefixed("?/foo/bar").is_ok()); 112 | /// assert!(IriQueryStr::from_prefixed("?//foo/bar").is_ok()); 113 | /// assert!(IriQueryStr::from_prefixed("?https://user:pass@example.com:8080").is_ok()); 114 | /// assert!(IriQueryStr::from_prefixed("?https://example.com/").is_ok()); 115 | /// // Question sign `?` can appear in an IRI query. 116 | /// assert!(IriQueryStr::from_prefixed("?query?again").is_ok()); 117 | /// 118 | /// // `<` and `>` cannot directly appear in an IRI. 119 | /// assert!(IriQueryStr::from_prefixed("?").is_err()); 120 | /// // Broken percent encoding cannot appear in an IRI. 121 | /// assert!(IriQueryStr::new("?%").is_err()); 122 | /// assert!(IriQueryStr::new("?%GG").is_err()); 123 | /// // `?` prefix is expected. 124 | /// assert!(IriQueryStr::from_prefixed("").is_err()); 125 | /// assert!(IriQueryStr::from_prefixed("foo").is_err()); 126 | /// // Hash sign `#` cannot appear in an IRI query. 127 | /// assert!(IriQueryStr::from_prefixed("?#hash").is_err()); 128 | /// ``` 129 | pub fn from_prefixed(s: &str) -> Result<&Self, Error> { 130 | if !s.starts_with('?') { 131 | return Err(Error::new()); 132 | } 133 | TryFrom::try_from(&s[1..]) 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/types/uri.rs: -------------------------------------------------------------------------------- 1 | //! URI-specific implementations. 2 | 3 | use crate::spec::UriSpec; 4 | use crate::types::{ 5 | IriAbsoluteStr, IriFragmentStr, IriQueryStr, IriReferenceStr, IriRelativeStr, IriStr, 6 | RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr, 7 | }; 8 | #[cfg(feature = "alloc")] 9 | use crate::types::{ 10 | IriAbsoluteString, IriFragmentString, IriQueryString, IriReferenceString, IriRelativeString, 11 | IriString, RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString, 12 | RiRelativeString, RiString, 13 | }; 14 | 15 | /// A type alias for [`RiAbsoluteStr`]`<`[`UriSpec`]`>`. 16 | pub type UriAbsoluteStr = RiAbsoluteStr; 17 | 18 | /// A type alias for [`RiAbsoluteString`]`<`[`UriSpec`]`>`. 19 | #[cfg(feature = "alloc")] 20 | pub type UriAbsoluteString = RiAbsoluteString; 21 | 22 | /// A type alias for [`RiFragmentStr`]`<`[`UriSpec`]`>`. 23 | pub type UriFragmentStr = RiFragmentStr; 24 | 25 | /// A type alias for [`RiFragmentString`]`<`[`UriSpec`]`>`. 26 | #[cfg(feature = "alloc")] 27 | pub type UriFragmentString = RiFragmentString; 28 | 29 | /// A type alias for [`RiStr`]`<`[`UriSpec`]`>`. 30 | pub type UriStr = RiStr; 31 | 32 | /// A type alias for [`RiString`]`<`[`UriSpec`]`>`. 33 | #[cfg(feature = "alloc")] 34 | pub type UriString = RiString; 35 | 36 | /// A type alias for [`RiReferenceStr`]`<`[`UriSpec`]`>`. 37 | pub type UriReferenceStr = RiReferenceStr; 38 | 39 | /// A type alias for [`RiReferenceString`]`<`[`UriSpec`]`>`. 40 | #[cfg(feature = "alloc")] 41 | pub type UriReferenceString = RiReferenceString; 42 | 43 | /// A type alias for [`RiRelativeStr`]`<`[`UriSpec`]`>`. 44 | pub type UriRelativeStr = RiRelativeStr; 45 | 46 | /// A type alias for [`RiRelativeString`]`<`[`UriSpec`]`>`. 47 | #[cfg(feature = "alloc")] 48 | pub type UriRelativeString = RiRelativeString; 49 | 50 | /// A type alias for [`RiQueryStr`]`<`[`UriSpec`]`>`. 51 | pub type UriQueryStr = RiQueryStr; 52 | 53 | /// A type alias for [`RiQueryString`]`<`[`UriSpec`]`>`. 54 | #[cfg(feature = "alloc")] 55 | pub type UriQueryString = RiQueryString; 56 | 57 | /// Implements the trivial conversions between a URI and an IRI. 58 | macro_rules! impl_conversions_between_iri { 59 | ( 60 | $borrowed_uri:ident, 61 | $owned_uri:ident, 62 | $borrowed_iri:ident, 63 | $owned_iri:ident, 64 | ) => { 65 | impl AsRef<$borrowed_iri> for $borrowed_uri { 66 | fn as_ref(&self) -> &$borrowed_iri { 67 | // SAFETY: A valid URI is also a valid IRI. 68 | unsafe { <$borrowed_iri>::new_maybe_unchecked(self.as_str()) } 69 | } 70 | } 71 | 72 | #[cfg(feature = "alloc")] 73 | impl From<$owned_uri> for $owned_iri { 74 | #[inline] 75 | fn from(uri: $owned_uri) -> Self { 76 | // SAFETY: A valid URI is also a valid IRI. 77 | unsafe { Self::new_maybe_unchecked(uri.into()) } 78 | } 79 | } 80 | 81 | #[cfg(feature = "alloc")] 82 | impl AsRef<$borrowed_iri> for $owned_uri { 83 | fn as_ref(&self) -> &$borrowed_iri { 84 | AsRef::<$borrowed_uri>::as_ref(self).as_ref() 85 | } 86 | } 87 | }; 88 | } 89 | 90 | impl_conversions_between_iri!( 91 | UriAbsoluteStr, 92 | UriAbsoluteString, 93 | IriAbsoluteStr, 94 | IriAbsoluteString, 95 | ); 96 | impl_conversions_between_iri!( 97 | UriReferenceStr, 98 | UriReferenceString, 99 | IriReferenceStr, 100 | IriReferenceString, 101 | ); 102 | impl_conversions_between_iri!( 103 | UriRelativeStr, 104 | UriRelativeString, 105 | IriRelativeStr, 106 | IriRelativeString, 107 | ); 108 | impl_conversions_between_iri!(UriStr, UriString, IriStr, IriString,); 109 | impl_conversions_between_iri!(UriQueryStr, UriQueryString, IriQueryStr, IriQueryString,); 110 | impl_conversions_between_iri!( 111 | UriFragmentStr, 112 | UriFragmentString, 113 | IriFragmentStr, 114 | IriFragmentString, 115 | ); 116 | -------------------------------------------------------------------------------- /tests/gh-issues.rs: -------------------------------------------------------------------------------- 1 | //! Test cases for issues reported on GitHub. 2 | 3 | #[macro_use] 4 | mod utils; 5 | 6 | use iri_string::types::UriReferenceStr; 7 | 8 | mod issue_17 { 9 | use super::*; 10 | 11 | #[test] 12 | fn ipv6_literal_authority_host() { 13 | let uri = UriReferenceStr::new("//[::1]").expect("valid relative URI"); 14 | let authority = uri 15 | .authority_components() 16 | .expect("the URI has authority `[::1]`"); 17 | assert_eq!(authority.host(), "[::1]"); 18 | } 19 | 20 | #[test] 21 | fn extra_trailing_colon_in_ipv6_literal() { 22 | assert!(UriReferenceStr::new("//[::1:]").is_err()); 23 | } 24 | 25 | #[test] 26 | fn ipvfuture_literal_capital_v() { 27 | assert!(UriReferenceStr::new("//[v0.0]").is_ok()); 28 | assert!(UriReferenceStr::new("//[V0.0]").is_ok()); 29 | } 30 | 31 | #[test] 32 | fn ipvfuture_empty_part() { 33 | assert!( 34 | UriReferenceStr::new("//[v0.]").is_err(), 35 | "address should not be empty" 36 | ); 37 | assert!( 38 | UriReferenceStr::new("//[v.0]").is_err(), 39 | "version should not be empty" 40 | ); 41 | assert!( 42 | UriReferenceStr::new("//[v.]").is_err(), 43 | "neither address nor version should be empty" 44 | ); 45 | } 46 | } 47 | 48 | mod issue_36 { 49 | use super::*; 50 | 51 | #[cfg(feature = "alloc")] 52 | use iri_string::format::ToDedicatedString; 53 | use iri_string::types::UriAbsoluteStr; 54 | 55 | // "/.//.".resolve_against("a:/") 56 | // => "a:" + remove_dot_segments("/.//.") 57 | // 58 | // STEP OUTPUT BUFFER INPUT BUFFER 59 | // 1 : /.//. 60 | // 2B: //. 61 | // 2E: / /. 62 | // 2B: / / 63 | // 2E: // 64 | // (see RFC 3986 section 5.2.4 for this notation.) 65 | // 66 | // => "a://" 67 | // 68 | // However, this is invalid since it should be semantically 69 | // `:` but this string will be parsed as 70 | // `://`. So, `./` should be inserted to break 71 | // `//` at the beginning of the path part. 72 | #[test] 73 | fn abnormal_resolution() { 74 | let base = UriAbsoluteStr::new("a:/").expect("valid absolute URI"); 75 | { 76 | let relative = UriReferenceStr::new("/.//.").expect("valid relative URI"); 77 | let result = relative.resolve_against(base); 78 | 79 | assert!( 80 | result.ensure_rfc3986_normalizable().is_err(), 81 | "strict RFC 3986 resolution should fail for base={:?}, ref={:?}", 82 | base, 83 | relative 84 | ); 85 | assert_eq_display!( 86 | result, 87 | "a:/.//", 88 | "resolution result will be modified using serialization by WHATWG URL Standard" 89 | ); 90 | } 91 | { 92 | let relative = UriReferenceStr::new(".//.").expect("valid relative URI"); 93 | let result = relative.resolve_against(base); 94 | 95 | assert!( 96 | result.ensure_rfc3986_normalizable().is_err(), 97 | "strict RFC 3986 resolution should fail for base={:?}, ref={:?}", 98 | base, 99 | relative 100 | ); 101 | assert_eq_display!( 102 | result, 103 | "a:/.//", 104 | "resolution result will be modified using serialization by WHATWG URL Standard" 105 | ); 106 | } 107 | } 108 | 109 | #[test] 110 | fn abnormal_normalization() { 111 | let uri = UriAbsoluteStr::new("a:/.//.").expect("valid absolute URI"); 112 | 113 | let normalized = uri.normalize(); 114 | assert!( 115 | normalized.ensure_rfc3986_normalizable().is_err(), 116 | "strict RFC 3986 normalization should fail for uri={:?}", 117 | uri 118 | ); 119 | assert_eq_display!( 120 | normalized, 121 | "a:/.//", 122 | "normalization result will be modified using serialization by WHATWG URL Standard" 123 | ); 124 | 125 | #[cfg(feature = "alloc")] 126 | { 127 | assert!( 128 | !normalized.to_dedicated_string().is_normalized_rfc3986(), 129 | "not normalizable by strict RFC 3986 algorithm" 130 | ); 131 | } 132 | } 133 | 134 | #[test] 135 | fn abnormal_normalization2() { 136 | { 137 | let uri = UriAbsoluteStr::new("a:/bar//.").expect("valid absolute URI"); 138 | assert_eq_display!(uri.normalize(), "a:/bar//"); 139 | } 140 | { 141 | let uri = UriAbsoluteStr::new("a:/bar/..//.").expect("valid absolute URI"); 142 | assert_eq_display!( 143 | uri.normalize(), 144 | "a:/.//", 145 | "normalization result will be modified using serialization by WHATWG URL Standard" 146 | ); 147 | } 148 | { 149 | let uri = UriAbsoluteStr::new("a:/.//bar/.").expect("valid absolute URI"); 150 | assert_eq_display!( 151 | uri.normalize(), 152 | "a:/.//bar/", 153 | "normalization result will be modified using serialization by WHATWG URL Standard" 154 | ); 155 | } 156 | { 157 | let uri = UriAbsoluteStr::new("a:/././././././foo/./.././././././././././/.") 158 | .expect("valid absolute URI"); 159 | assert_eq_display!( 160 | uri.normalize(), 161 | "a:/.//", 162 | "normalization result will be modified using serialization by WHATWG URL Standard" 163 | ); 164 | } 165 | } 166 | 167 | #[test] 168 | fn normalization_pct_triplet_loss() { 169 | let uri = UriAbsoluteStr::new("a://%92%99").expect("valid absolute URI"); 170 | assert_eq_display!(uri.normalize(), "a://%92%99"); 171 | // Other problems are found during fixing this bug. The test cases for 172 | // them have been added to generic test case data source. 173 | } 174 | } 175 | 176 | /// 177 | #[cfg(feature = "alloc")] 178 | mod issue_46 { 179 | use iri_string::types::{UriFragmentStr, UriRelativeString}; 180 | 181 | #[test] 182 | fn set_fragment_to_relative() { 183 | let mut uri = 184 | UriRelativeString::try_from("//user:password@example.com/path?query#frag.old") 185 | .expect("valid relative URI"); 186 | assert_eq!(uri, "//user:password@example.com/path?query#frag.old"); 187 | assert_eq!(uri.fragment_str(), Some("frag.old")); 188 | 189 | uri.set_fragment(None); 190 | assert_eq!(uri, "//user:password@example.com/path?query"); 191 | assert_eq!(uri.fragment(), None); 192 | 193 | let frag_new = UriFragmentStr::new("frag-new").expect("valid URI fragment"); 194 | uri.set_fragment(Some(frag_new)); 195 | assert_eq!(uri.fragment_str(), Some("frag-new")); 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /tests/iri.rs: -------------------------------------------------------------------------------- 1 | //! Tests specific to IRIs (not URIs). 2 | 3 | #[macro_use] 4 | mod utils; 5 | 6 | use iri_string::format::write_to_slice; 7 | #[cfg(feature = "alloc")] 8 | use iri_string::format::ToDedicatedString; 9 | #[cfg(feature = "alloc")] 10 | use iri_string::types::IriReferenceString; 11 | use iri_string::types::{IriReferenceStr, UriReferenceStr}; 12 | 13 | #[derive(Debug, Clone, Copy)] 14 | struct TestCase { 15 | iri: &'static str, 16 | uri: &'static str, 17 | } 18 | 19 | // `[(iri, uri)]`. 20 | const CASES: &[TestCase] = &[ 21 | TestCase { 22 | iri: "?alpha=\u{03B1}", 23 | uri: "?alpha=%CE%B1", 24 | }, 25 | TestCase { 26 | iri: "?katakana-letter-i=\u{30A4}", 27 | uri: "?katakana-letter-i=%E3%82%A4", 28 | }, 29 | TestCase { 30 | iri: "?sushi=\u{1f363}", 31 | uri: "?sushi=%F0%9F%8D%A3", 32 | }, 33 | ]; 34 | 35 | #[test] 36 | fn iri_to_uri() { 37 | let mut buf = [0_u8; 256]; 38 | let mut buf2 = [0_u8; 256]; 39 | 40 | for case in CASES.iter().copied() { 41 | let expected = UriReferenceStr::new(case.uri).expect("should be valid URI reference"); 42 | 43 | let iri = IriReferenceStr::new(case.iri).expect("should be valid URI reference"); 44 | let encoded = iri.encode_to_uri(); 45 | assert_eq_display!(encoded, expected); 46 | let encoded_uri = write_to_slice(&mut buf, &encoded).expect("not enough buffer"); 47 | let encoded_uri = UriReferenceStr::new(encoded_uri).expect("should be valid URI reference"); 48 | assert_eq!(encoded_uri, expected); 49 | 50 | let encoded_again = AsRef::::as_ref(encoded_uri).encode_to_uri(); 51 | assert_eq_display!(encoded_again, expected); 52 | let encoded_again_uri = 53 | write_to_slice(&mut buf2, &encoded_again).expect("not enough buffer"); 54 | let encoded_again_uri = 55 | UriReferenceStr::new(encoded_again_uri).expect("should be valid URI reference"); 56 | assert_eq!(encoded_again_uri, expected); 57 | } 58 | } 59 | 60 | #[cfg(feature = "alloc")] 61 | #[test] 62 | fn iri_to_uri_allocated() { 63 | for case in CASES.iter().copied() { 64 | let expected = UriReferenceStr::new(case.uri).expect("should be valid URI reference"); 65 | 66 | let iri = IriReferenceStr::new(case.iri).expect("should be valid URI reference"); 67 | let encoded = iri.encode_to_uri().to_dedicated_string(); 68 | assert_eq!(encoded, expected); 69 | 70 | let encoded_again = AsRef::::as_ref(&encoded) 71 | .encode_to_uri() 72 | .to_dedicated_string(); 73 | assert_eq!(encoded_again, expected); 74 | } 75 | } 76 | 77 | #[cfg(feature = "alloc")] 78 | #[test] 79 | fn iri_to_uri_inline() { 80 | for case in CASES.iter().copied() { 81 | let expected = UriReferenceStr::new(case.uri).expect("should be valid URI reference"); 82 | 83 | let mut iri = 84 | IriReferenceString::try_from(case.iri).expect("should be valid URI reference"); 85 | 86 | iri.encode_to_uri_inline(); 87 | assert_eq!(iri, expected); 88 | 89 | iri.encode_to_uri_inline(); 90 | assert_eq!( 91 | iri, expected, 92 | "``encode_to_uri_inline()` method should be idempotent" 93 | ); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /tests/normalize.rs: -------------------------------------------------------------------------------- 1 | //! Tests for normalization. 2 | 3 | mod components; 4 | #[macro_use] 5 | mod utils; 6 | 7 | #[cfg(feature = "alloc")] 8 | use iri_string::format::ToDedicatedString; 9 | use iri_string::types::*; 10 | 11 | use self::components::TEST_CASES; 12 | 13 | /// Semantically different IRIs should not be normalized into the same IRI. 14 | #[test] 15 | fn different_iris() { 16 | for case in TEST_CASES 17 | .iter() 18 | .filter(|case| !case.different_iris.is_empty()) 19 | { 20 | let normalized = IriStr::new(case.normalized_iri).expect("should be valid IRI reference"); 21 | for other in case.different_iris.iter().copied() { 22 | let other = IriStr::new(other).expect("should be valid IRI reference"); 23 | assert_ne!( 24 | normalized, other, 25 | "<{}> should not be normalized to <{other}>, case={case:#?}", 26 | case.composed 27 | ); 28 | } 29 | } 30 | } 31 | 32 | /// Normalization should work for IRI. 33 | #[test] 34 | fn normalize_uri() { 35 | for case in TEST_CASES 36 | .iter() 37 | .filter(|case| case.is_uri_class() && case.is_absolute()) 38 | { 39 | let source = UriStr::new(case.composed).expect("should be valid URI"); 40 | let normalized = source.normalize(); 41 | let expected = UriStr::new(case.normalized_uri).expect("should be valid URI"); 42 | 43 | assert_eq_display!(normalized, expected, "case={case:#?}"); 44 | #[cfg(feature = "alloc")] 45 | assert_eq!(normalized.to_string(), expected.as_str(), "case={case:#?}"); 46 | #[cfg(feature = "alloc")] 47 | assert_eq!(normalized.to_dedicated_string(), expected, "case={case:#?}"); 48 | 49 | assert_eq!( 50 | case.is_rfc3986_normalizable(), 51 | normalized.ensure_rfc3986_normalizable().is_ok(), 52 | "case={case:#?}" 53 | ); 54 | } 55 | } 56 | 57 | /// Normalization should work for IRI. 58 | #[test] 59 | fn normalize_iri() { 60 | for case in TEST_CASES 61 | .iter() 62 | .filter(|case| case.is_iri_class() && case.is_absolute()) 63 | { 64 | let source = IriStr::new(case.composed).expect("should be valid IRI"); 65 | let normalized = source.normalize(); 66 | let expected = IriStr::new(case.normalized_iri).expect("should be valid IRI"); 67 | 68 | assert_eq_display!(normalized, expected, "case={case:#?}"); 69 | #[cfg(feature = "alloc")] 70 | assert_eq!(normalized.to_string(), expected.as_str(), "case={case:#?}"); 71 | #[cfg(feature = "alloc")] 72 | assert_eq!(normalized.to_dedicated_string(), expected, "case={case:#?}"); 73 | 74 | assert_eq!( 75 | case.is_rfc3986_normalizable(), 76 | normalized.ensure_rfc3986_normalizable().is_ok(), 77 | "case={case:#?}" 78 | ); 79 | } 80 | } 81 | 82 | /// WHATWG-like normalization should work for IRI. 83 | #[test] 84 | fn normalize_uri_whatwg_like() { 85 | for case in TEST_CASES 86 | .iter() 87 | .filter(|case| case.is_uri_class() && case.is_absolute()) 88 | { 89 | let source = UriStr::new(case.composed).expect("should be valid URI"); 90 | let normalized = source.normalize_but_preserve_authorityless_relative_path(); 91 | let expected = UriStr::new( 92 | case.normalized_uri_whatwg_like 93 | .unwrap_or(case.normalized_uri), 94 | ) 95 | .expect("should be valid URI"); 96 | 97 | assert_eq_display!(normalized, expected, "case={case:#?}"); 98 | #[cfg(feature = "alloc")] 99 | assert_eq!(normalized.to_string(), expected.as_str(), "case={case:#?}"); 100 | #[cfg(feature = "alloc")] 101 | assert_eq!(normalized.to_dedicated_string(), expected, "case={case:#?}"); 102 | 103 | assert_eq!( 104 | case.is_rfc3986_normalizable(), 105 | normalized.ensure_rfc3986_normalizable().is_ok(), 106 | "case={case:#?}" 107 | ); 108 | } 109 | } 110 | 111 | /// WHATWG-like normalization should work for IRI. 112 | #[test] 113 | fn normalize_iri_whatwg_like() { 114 | for case in TEST_CASES 115 | .iter() 116 | .filter(|case| case.is_iri_class() && case.is_absolute()) 117 | { 118 | let source = IriStr::new(case.composed).expect("should be valid IRI"); 119 | let normalized = source.normalize_but_preserve_authorityless_relative_path(); 120 | let expected = IriStr::new( 121 | case.normalized_iri_whatwg_like 122 | .unwrap_or(case.normalized_iri), 123 | ) 124 | .expect("should be valid IRI"); 125 | 126 | assert_eq_display!(normalized, expected, "case={case:#?}"); 127 | #[cfg(feature = "alloc")] 128 | assert_eq!(normalized.to_string(), expected.as_str(), "case={case:#?}"); 129 | #[cfg(feature = "alloc")] 130 | assert_eq!(normalized.to_dedicated_string(), expected, "case={case:#?}"); 131 | 132 | assert_eq!( 133 | case.is_rfc3986_normalizable(), 134 | normalized.ensure_rfc3986_normalizable().is_ok(), 135 | "case={case:#?}" 136 | ); 137 | } 138 | } 139 | 140 | /// Normalization should be idempotent. 141 | #[test] 142 | fn normalize_idempotent() { 143 | let mut buf = [0_u8; 512]; 144 | 145 | for case in TEST_CASES 146 | .iter() 147 | .filter(|case| case.is_iri_class() && case.is_absolute()) 148 | { 149 | let source = IriStr::new(case.composed).expect("should be valid IRI"); 150 | let normalized = source.normalize(); 151 | let expected = IriStr::new(case.normalized_iri).expect("should be valid IRI"); 152 | 153 | let normalized_s = 154 | iri_string::format::write_to_slice(&mut buf, &normalized).expect("not enough buffer"); 155 | let normalized_s = IriStr::new(normalized_s).expect("should be valid IRI reference"); 156 | 157 | // Normalize again. 158 | let normalized_again = normalized_s.normalize(); 159 | assert_eq_display!(normalized_again, expected, "case={case:#?}"); 160 | } 161 | } 162 | 163 | /// Normalizedness checks. 164 | #[test] 165 | fn normalizedness() { 166 | #[derive(Debug, Clone, Copy)] 167 | struct Case { 168 | iri: &'static str, 169 | is_normalized_default: bool, 170 | is_normalized_rfc3986: bool, 171 | is_normalized_whatwg_like: bool, 172 | } 173 | const CASES: &[Case] = &[ 174 | Case { 175 | iri: "scheme:/.//foo", 176 | is_normalized_default: true, 177 | is_normalized_rfc3986: false, 178 | is_normalized_whatwg_like: true, 179 | }, 180 | Case { 181 | iri: "scheme:.///foo", 182 | is_normalized_default: false, 183 | is_normalized_rfc3986: false, 184 | is_normalized_whatwg_like: true, 185 | }, 186 | Case { 187 | iri: "scheme://authority/.//foo", 188 | is_normalized_default: false, 189 | is_normalized_rfc3986: false, 190 | is_normalized_whatwg_like: false, 191 | }, 192 | Case { 193 | iri: "scheme:relative/..//foo", 194 | is_normalized_default: false, 195 | is_normalized_rfc3986: false, 196 | is_normalized_whatwg_like: true, 197 | }, 198 | ]; 199 | 200 | for case in CASES { 201 | let iri = IriStr::new(case.iri).expect("should be valid IRI"); 202 | assert_eq!( 203 | iri.is_normalized(), 204 | case.is_normalized_default, 205 | "case={case:?}" 206 | ); 207 | assert_eq!( 208 | iri.is_normalized_rfc3986(), 209 | case.is_normalized_rfc3986, 210 | "case={case:?}" 211 | ); 212 | assert_eq!( 213 | iri.is_normalized_but_authorityless_relative_path_preserved(), 214 | case.is_normalized_whatwg_like, 215 | "case={case:?}" 216 | ); 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /tests/percent_encode.rs: -------------------------------------------------------------------------------- 1 | //! Tests for percent encoding. 2 | 3 | #[cfg(feature = "alloc")] 4 | extern crate alloc; 5 | 6 | #[macro_use] 7 | mod utils; 8 | 9 | #[cfg(all(feature = "alloc", not(feature = "std")))] 10 | use alloc::string::ToString; 11 | 12 | use iri_string::percent_encode::{PercentEncodedForIri, PercentEncodedForUri}; 13 | 14 | #[test] 15 | fn regname_uri() { 16 | let encoded = PercentEncodedForUri::from_reg_name("alpha.\u{03B1}.reg.name"); 17 | let expected = "alpha.%CE%B1.reg.name"; 18 | assert_eq_display!(encoded, expected); 19 | #[cfg(feature = "alloc")] 20 | assert_eq!(encoded.to_string(), expected); 21 | } 22 | 23 | #[test] 24 | fn regname_iri() { 25 | let encoded = PercentEncodedForIri::from_reg_name("alpha.\u{03B1}.reg.name"); 26 | let expected = "alpha.\u{03B1}.reg.name"; 27 | assert_eq_display!(encoded, expected); 28 | #[cfg(feature = "alloc")] 29 | assert_eq!(encoded.to_string(), expected); 30 | } 31 | 32 | #[test] 33 | fn path_segment_uri() { 34 | let encoded = PercentEncodedForUri::from_path_segment("\u{03B1}/?#"); 35 | let expected = "%CE%B1%2F%3Calpha%3E%3F%23"; 36 | assert_eq_display!(encoded, expected); 37 | #[cfg(feature = "alloc")] 38 | assert_eq!(encoded.to_string(), expected); 39 | } 40 | 41 | #[test] 42 | fn path_segment_iri() { 43 | let encoded = PercentEncodedForIri::from_path_segment("\u{03B1}/?#"); 44 | let expected = "\u{03B1}%2F%3Calpha%3E%3F%23"; 45 | assert_eq_display!(encoded, expected); 46 | #[cfg(feature = "alloc")] 47 | assert_eq!(encoded.to_string(), expected); 48 | } 49 | 50 | #[test] 51 | fn path_uri() { 52 | let encoded = PercentEncodedForUri::from_path("\u{03B1}/?#"); 53 | let expected = "%CE%B1/%3Calpha%3E%3F%23"; 54 | assert_eq_display!(encoded, expected); 55 | #[cfg(feature = "alloc")] 56 | assert_eq!(encoded.to_string(), expected); 57 | } 58 | 59 | #[test] 60 | fn path_iri() { 61 | let encoded = PercentEncodedForIri::from_path("\u{03B1}/?#"); 62 | let expected = "\u{03B1}/%3Calpha%3E%3F%23"; 63 | assert_eq_display!(encoded, expected); 64 | #[cfg(feature = "alloc")] 65 | assert_eq!(encoded.to_string(), expected); 66 | } 67 | 68 | #[test] 69 | fn query_uri() { 70 | let encoded = PercentEncodedForUri::from_query("\u{03B1}/?#"); 71 | let expected = "%CE%B1/%3Calpha%3E?%23"; 72 | assert_eq_display!(encoded, expected); 73 | #[cfg(feature = "alloc")] 74 | assert_eq!(encoded.to_string(), expected); 75 | } 76 | 77 | #[test] 78 | fn query_iri() { 79 | let encoded = PercentEncodedForIri::from_query("\u{03B1}/?#"); 80 | let expected = "\u{03B1}/%3Calpha%3E?%23"; 81 | assert_eq_display!(encoded, expected); 82 | #[cfg(feature = "alloc")] 83 | assert_eq!(encoded.to_string(), expected); 84 | } 85 | 86 | #[test] 87 | fn fragment_uri() { 88 | let encoded = PercentEncodedForUri::from_fragment("\u{03B1}/?#"); 89 | let expected = "%CE%B1/%3Calpha%3E?%23"; 90 | assert_eq_display!(encoded, expected); 91 | #[cfg(feature = "alloc")] 92 | assert_eq!(encoded.to_string(), expected); 93 | } 94 | 95 | #[test] 96 | fn fragment_iri() { 97 | let encoded = PercentEncodedForIri::from_fragment("\u{03B1}/?#"); 98 | let expected = "\u{03B1}/%3Calpha%3E?%23"; 99 | assert_eq_display!(encoded, expected); 100 | #[cfg(feature = "alloc")] 101 | assert_eq!(encoded.to_string(), expected); 102 | } 103 | 104 | #[test] 105 | fn unreserve_uri_unreserved() { 106 | let encoded = PercentEncodedForUri::unreserve("%a0-._~\u{03B1}"); 107 | let expected = "%25a0-._~%CE%B1"; 108 | assert_eq_display!(encoded, expected); 109 | #[cfg(feature = "alloc")] 110 | assert_eq!(encoded.to_string(), expected); 111 | } 112 | 113 | #[test] 114 | fn unreserve_iri_unreserved() { 115 | let encoded = PercentEncodedForIri::unreserve("%a0-._~\u{03B1}"); 116 | let expected = "%25a0-._~\u{03B1}"; 117 | assert_eq_display!(encoded, expected); 118 | #[cfg(feature = "alloc")] 119 | assert_eq!(encoded.to_string(), expected); 120 | } 121 | 122 | #[test] 123 | fn unreserve_uri_reserved() { 124 | let encoded = PercentEncodedForUri::unreserve(":/?#[]@ !$&'()*+,;="); 125 | let expected = "%3A%2F%3F%23%5B%5D%40%20%21%24%26%27%28%29%2A%2B%2C%3B%3D"; 126 | assert_eq_display!(encoded, expected); 127 | #[cfg(feature = "alloc")] 128 | assert_eq!(encoded.to_string(), expected); 129 | } 130 | 131 | #[test] 132 | fn unreserve_iri_reserved() { 133 | let encoded = PercentEncodedForIri::unreserve(":/?#[]@ !$&'()*+,;="); 134 | let expected = "%3A%2F%3F%23%5B%5D%40%20%21%24%26%27%28%29%2A%2B%2C%3B%3D"; 135 | assert_eq_display!(encoded, expected); 136 | #[cfg(feature = "alloc")] 137 | assert_eq!(encoded.to_string(), expected); 138 | } 139 | 140 | #[test] 141 | fn characters_uri_unreserved() { 142 | let encoded = PercentEncodedForUri::characters("%a0-._~\u{03B1}"); 143 | let expected = "%25a0-._~%CE%B1"; 144 | assert_eq_display!(encoded, expected); 145 | #[cfg(feature = "alloc")] 146 | assert_eq!(encoded.to_string(), expected); 147 | } 148 | 149 | #[test] 150 | fn characters_iri_unreserved() { 151 | let encoded = PercentEncodedForIri::characters("%a0-._~\u{03B1}"); 152 | let expected = "%25a0-._~\u{03B1}"; 153 | assert_eq_display!(encoded, expected); 154 | #[cfg(feature = "alloc")] 155 | assert_eq!(encoded.to_string(), expected); 156 | } 157 | 158 | #[test] 159 | fn characters_uri_reserved() { 160 | let encoded = PercentEncodedForUri::characters(":/?#[]@ !$&'()*+,;="); 161 | let expected = ":/?#[]@%20!$&'()*+,;="; 162 | assert_eq_display!(encoded, expected); 163 | #[cfg(feature = "alloc")] 164 | assert_eq!(encoded.to_string(), expected); 165 | } 166 | 167 | #[test] 168 | fn characters_iri_reserved() { 169 | let encoded = PercentEncodedForIri::characters(":/?#[]@ !$&'()*+,;="); 170 | let expected = ":/?#[]@%20!$&'()*+,;="; 171 | assert_eq_display!(encoded, expected); 172 | #[cfg(feature = "alloc")] 173 | assert_eq!(encoded.to_string(), expected); 174 | } 175 | -------------------------------------------------------------------------------- /tests/resolve_refimpl/mod.rs: -------------------------------------------------------------------------------- 1 | //! Reference implementation based on RFC 3986 section 5. 2 | #![cfg(feature = "alloc")] 3 | 4 | extern crate alloc; 5 | 6 | use alloc::format; 7 | #[cfg(not(feature = "std"))] 8 | use alloc::string::String; 9 | 10 | use iri_string::spec::Spec; 11 | use iri_string::types::{RiAbsoluteStr, RiReferenceStr, RiString}; 12 | 13 | fn to_major_components( 14 | s: &RiReferenceStr, 15 | ) -> (Option<&str>, Option<&str>, &str, Option<&str>, Option<&str>) { 16 | ( 17 | s.scheme_str(), 18 | s.authority_str(), 19 | s.path_str(), 20 | s.query().map(|s| s.as_str()), 21 | s.fragment().map(|s| s.as_str()), 22 | ) 23 | } 24 | 25 | /// Resolves the relative IRI. 26 | /// 27 | /// See . 28 | pub(super) fn resolve( 29 | reference: &RiReferenceStr, 30 | base: &RiAbsoluteStr, 31 | ) -> RiString { 32 | let (r_scheme, r_authority, r_path, r_query, r_fragment) = to_major_components(reference); 33 | let (b_scheme, b_authority, b_path, b_query, _) = to_major_components(base.as_ref()); 34 | 35 | let t_scheme: &str; 36 | let t_authority: Option<&str>; 37 | let t_path: String; 38 | let t_query: Option<&str>; 39 | 40 | if let Some(r_scheme) = r_scheme { 41 | t_scheme = r_scheme; 42 | t_authority = r_authority; 43 | t_path = remove_dot_segments(r_path.into()); 44 | t_query = r_query; 45 | } else { 46 | if r_authority.is_some() { 47 | t_authority = r_authority; 48 | t_path = remove_dot_segments(r_path.into()); 49 | t_query = r_query; 50 | } else { 51 | if r_path.is_empty() { 52 | t_path = b_path.into(); 53 | if r_query.is_some() { 54 | t_query = r_query; 55 | } else { 56 | t_query = b_query; 57 | } 58 | } else { 59 | if r_path.starts_with('/') { 60 | t_path = remove_dot_segments(r_path.into()); 61 | } else { 62 | t_path = remove_dot_segments(merge(b_path, r_path, b_authority.is_some())); 63 | } 64 | t_query = r_query; 65 | } 66 | t_authority = b_authority; 67 | } 68 | t_scheme = b_scheme.expect("non-relative IRI must have a scheme"); 69 | } 70 | let t_fragment: Option<&str> = r_fragment; 71 | 72 | let s = recompose(t_scheme, t_authority, &t_path, t_query, t_fragment); 73 | RiString::::try_from(s).expect("resolution result must be a valid IRI") 74 | } 75 | 76 | /// Merges the two paths. 77 | /// 78 | /// See . 79 | fn merge(base_path: &str, ref_path: &str, base_authority_defined: bool) -> String { 80 | if base_authority_defined && base_path.is_empty() { 81 | format!("/{}", ref_path) 82 | } else { 83 | let base_path_end = base_path.rfind('/').map_or(0, |s| s + 1); 84 | format!("{}{}", &base_path[..base_path_end], ref_path) 85 | } 86 | } 87 | 88 | /// Removes dot segments from the path. 89 | /// 90 | /// See . 91 | fn remove_dot_segments(mut input: String) -> String { 92 | let mut output = String::new(); 93 | while !input.is_empty() { 94 | if input.starts_with("../") { 95 | // 2A. 96 | input.drain(..3); 97 | } else if input.starts_with("./") { 98 | // 2A. 99 | input.drain(..2); 100 | } else if input.starts_with("/./") { 101 | // 2B. 102 | input.replace_range(..3, "/"); 103 | } else if input == "/." { 104 | // 2B. 105 | input.replace_range(..2, "/"); 106 | } else if input.starts_with("/../") { 107 | // 2C. 108 | input.replace_range(..4, "/"); 109 | remove_last_segment_and_preceding_slash(&mut output); 110 | } else if input == "/.." { 111 | // 2C. 112 | input.replace_range(..3, "/"); 113 | remove_last_segment_and_preceding_slash(&mut output); 114 | } else if input == "." { 115 | // 2D. 116 | input.drain(..1); 117 | } else if input == ".." { 118 | // 2D. 119 | input.drain(..2); 120 | } else { 121 | // 2E. 122 | let first_seg_end = if let Some(after_slash) = input.strip_prefix('/') { 123 | // `+1` is the length of the initial slash. 124 | after_slash 125 | .find('/') 126 | .map_or_else(|| input.len(), |pos| pos + 1) 127 | } else { 128 | input.find('/').unwrap_or(input.len()) 129 | }; 130 | output.extend(input.drain(..first_seg_end)); 131 | } 132 | } 133 | 134 | output 135 | } 136 | 137 | /// Removes the last path segment and the preceding slash if any. 138 | /// 139 | /// See , 140 | /// step 2C. 141 | fn remove_last_segment_and_preceding_slash(output: &mut String) { 142 | match output.rfind('/') { 143 | Some(slash_pos) => { 144 | output.drain(slash_pos..); 145 | } 146 | None => output.clear(), 147 | } 148 | } 149 | 150 | /// Recomposes the components. 151 | /// 152 | /// See . 153 | fn recompose( 154 | scheme: &str, 155 | authority: Option<&str>, 156 | path: &str, 157 | query: Option<&str>, 158 | fragment: Option<&str>, 159 | ) -> String { 160 | let mut result = String::new(); 161 | 162 | result.push_str(scheme); 163 | result.push(':'); 164 | if let Some(authority) = authority { 165 | result.push_str("//"); 166 | result.push_str(authority); 167 | } 168 | result.push_str(path); 169 | if let Some(query) = query { 170 | result.push('?'); 171 | result.push_str(query); 172 | } 173 | if let Some(fragment) = fragment { 174 | result.push('#'); 175 | result.push_str(fragment); 176 | } 177 | 178 | result 179 | } 180 | -------------------------------------------------------------------------------- /tests/serde.rs: -------------------------------------------------------------------------------- 1 | //! Serde test. 2 | #![cfg(feature = "serde")] 3 | 4 | use serde_test::{assert_tokens, Token}; 5 | 6 | use iri_string::types::*; 7 | 8 | mod utils; 9 | 10 | macro_rules! define_tests { 11 | ($positive:ident, $negative:ident, ($spec:ident, $kind:ident), $slice:ty, $owned:ty,) => { 12 | define_tests! { 13 | @positive, 14 | $positive, 15 | ($spec, $kind), 16 | $slice, 17 | $owned, 18 | } 19 | }; 20 | (@positive, $name:ident, ($spec:ident, $kind:ident), $slice:ty, $owned:ty,) => { 21 | #[test] 22 | fn $name() { 23 | for raw in utils::positive(utils::Spec::$spec, utils::Kind::$kind) { 24 | let s = <$slice>::new(raw).expect("Should not fail: valid string"); 25 | assert_tokens(&s, &[Token::BorrowedStr(raw)]); 26 | 27 | #[cfg(all(feature = "serde", feature = "alloc"))] 28 | { 29 | let s = s.to_owned(); 30 | assert_tokens(&s, &[Token::BorrowedStr(raw)]); 31 | } 32 | } 33 | } 34 | }; 35 | } 36 | 37 | define_tests! { 38 | uri, 39 | not_uri, 40 | (Uri, Normal), 41 | UriStr, 42 | UriString, 43 | } 44 | 45 | define_tests! { 46 | uri_absolute, 47 | not_uri_absolute, 48 | (Uri, Absolute), 49 | UriAbsoluteStr, 50 | UriAbsoluteString, 51 | } 52 | 53 | define_tests! { 54 | uri_reference, 55 | not_uri_reference, 56 | (Uri, Reference), 57 | UriReferenceStr, 58 | UriReferenceString, 59 | } 60 | 61 | define_tests! { 62 | uri_relative, 63 | not_uri_relative, 64 | (Uri, Relative), 65 | UriRelativeStr, 66 | UriRelativeString, 67 | } 68 | 69 | define_tests! { 70 | iri, 71 | not_iri, 72 | (Iri, Normal), 73 | IriStr, 74 | IriString, 75 | } 76 | 77 | define_tests! { 78 | iri_absolute, 79 | not_iri_absolute, 80 | (Iri, Absolute), 81 | IriAbsoluteStr, 82 | IriAbsoluteString, 83 | } 84 | 85 | define_tests! { 86 | iri_reference, 87 | not_iri_reference, 88 | (Iri, Reference), 89 | IriReferenceStr, 90 | IriReferenceString, 91 | } 92 | 93 | define_tests! { 94 | iri_relative, 95 | not_iri_relative, 96 | (Iri, Relative), 97 | IriRelativeStr, 98 | IriRelativeString, 99 | } 100 | -------------------------------------------------------------------------------- /tests/utils/mod.rs: -------------------------------------------------------------------------------- 1 | //! Utilities. 2 | #![allow(dead_code)] 3 | 4 | use core::fmt; 5 | 6 | use RawKind::*; 7 | 8 | /// Raw kind (exclusive). 9 | #[derive(Clone, Copy, PartialEq, Eq)] 10 | enum RawKind { 11 | /// Invalid string. 12 | Invalid, 13 | /// IRI. 14 | Iri, 15 | /// Absolute IRI. 16 | IriAbsolute, 17 | /// Relative IRI. 18 | IriRelative, 19 | /// URI. 20 | Uri, 21 | /// Absolute URI. 22 | UriAbsolute, 23 | /// Relative URI. 24 | UriRelative, 25 | } 26 | 27 | impl RawKind { 28 | fn spec_is(self, spec: Spec) -> bool { 29 | match spec { 30 | Spec::Uri => matches!(self, Self::Uri | Self::UriAbsolute | Self::UriRelative), 31 | Spec::Iri => self != Self::Invalid, 32 | } 33 | } 34 | 35 | fn kind_is(self, kind: Kind) -> bool { 36 | match kind { 37 | Kind::Absolute => matches!(self, Self::UriAbsolute | Self::IriAbsolute), 38 | Kind::Normal => matches!( 39 | self, 40 | Self::UriAbsolute | Self::Uri | Self::IriAbsolute | Self::Iri 41 | ), 42 | Kind::Reference => self != Self::Invalid, 43 | Kind::Relative => matches!(self, Self::UriRelative | Self::IriRelative), 44 | } 45 | } 46 | 47 | fn is(self, spec: Spec, kind: Kind) -> bool { 48 | self.spec_is(spec) && self.kind_is(kind) 49 | } 50 | } 51 | 52 | /// Strings. 53 | /// ``` 54 | /// # use iri_string::types::IriReferenceStr; 55 | /// // `<` and `>` cannot directly appear in an IRI reference. 56 | /// assert!(IriReferenceStr::new("").is_err()); 57 | /// // Broken percent encoding cannot appear in an IRI reference. 58 | /// assert!(IriReferenceStr::new("%").is_err()); 59 | /// assert!(IriReferenceStr::new("%GG").is_err()); 60 | /// ``` 61 | const STRINGS: &[(RawKind, &str)] = &[ 62 | (UriAbsolute, "https://user:pass@example.com:8080"), 63 | (UriAbsolute, "https://example.com/"), 64 | (UriAbsolute, "https://example.com/foo?bar=baz"), 65 | (Uri, "https://example.com/foo?bar=baz#qux"), 66 | (UriAbsolute, "foo:bar"), 67 | (UriAbsolute, "foo:"), 68 | (UriAbsolute, "foo:/"), 69 | (UriAbsolute, "foo://"), 70 | (UriAbsolute, "foo:///"), 71 | (UriAbsolute, "foo:////"), 72 | (UriAbsolute, "foo://///"), 73 | (UriRelative, "foo"), 74 | (UriRelative, "foo/bar"), 75 | (UriRelative, "foo//bar"), 76 | (UriRelative, "/"), 77 | (UriRelative, "/foo"), 78 | (UriRelative, "/foo/bar"), 79 | (UriRelative, "//foo/bar"), 80 | (UriRelative, "/foo//bar"), 81 | (UriRelative, "?"), 82 | (UriRelative, "???"), 83 | (UriRelative, "?foo"), 84 | (UriRelative, "#"), 85 | (UriRelative, "#foo"), 86 | (Invalid, "##"), 87 | (Invalid, "fragment#cannot#have#hash#char"), 88 | // `<` cannot appear in an IRI reference. 89 | (Invalid, "<"), 90 | // `>` cannot appear in an IRI reference. 91 | (Invalid, ">"), 92 | // `<` and `>` cannot appear in an IRI reference. 93 | (Invalid, "ltnot-allowed"), 94 | // Incomplete percent encoding. 95 | (Invalid, "%"), 96 | (Invalid, "%0"), 97 | (Invalid, "%f"), 98 | (Invalid, "%F"), 99 | // Invalid percent encoding. 100 | (Invalid, "%0g"), 101 | (Invalid, "%0G"), 102 | (Invalid, "%GG"), 103 | (Invalid, "%G0"), 104 | ]; 105 | 106 | /// Spec. 107 | #[derive(Clone, Copy, PartialEq, Eq)] 108 | pub enum Spec { 109 | /// URI. 110 | Uri, 111 | /// IRI and URI. 112 | Iri, 113 | } 114 | 115 | /// Kind. 116 | #[derive(Clone, Copy, PartialEq, Eq)] 117 | pub enum Kind { 118 | /// Absolute IRI / URI. 119 | Absolute, 120 | /// IRI / URI. 121 | Normal, 122 | /// IRI / URI reference. 123 | Reference, 124 | /// Relative IRI / URI reference. 125 | Relative, 126 | } 127 | 128 | pub fn positive(spec: Spec, kind: Kind) -> impl Iterator { 129 | STRINGS 130 | .iter() 131 | .filter(move |(raw_kind, _)| raw_kind.is(spec, kind)) 132 | .map(|(_, s)| *s) 133 | } 134 | 135 | pub fn negative(spec: Spec, kind: Kind) -> impl Iterator { 136 | STRINGS 137 | .iter() 138 | .filter(move |(raw_kind, _)| !raw_kind.is(spec, kind)) 139 | .map(|(_, s)| *s) 140 | } 141 | 142 | /// Returns true if the two equals after they are converted to strings. 143 | pub(crate) fn eq_display_str(d: &T, s: &str) -> bool 144 | where 145 | T: ?Sized + fmt::Display, 146 | { 147 | use core::fmt::Write as _; 148 | 149 | /// Dummy writer to compare the formatted object to the given string. 150 | struct CmpWriter<'a>(&'a str); 151 | impl fmt::Write for CmpWriter<'_> { 152 | fn write_str(&mut self, s: &str) -> fmt::Result { 153 | if self.0.len() < s.len() { 154 | return Err(fmt::Error); 155 | } 156 | let (prefix, rest) = self.0.split_at(s.len()); 157 | self.0 = rest; 158 | if prefix == s { 159 | Ok(()) 160 | } else { 161 | Err(fmt::Error) 162 | } 163 | } 164 | } 165 | 166 | let mut writer = CmpWriter(s); 167 | let succeeded = write!(writer, "{}", d).is_ok(); 168 | succeeded && writer.0.is_empty() 169 | } 170 | 171 | #[allow(unused_macros)] 172 | macro_rules! assert_eq_display { 173 | ($left:expr, $right:expr $(,)?) => {{ 174 | match (&$left, &$right) { 175 | (left, right) => { 176 | assert!( 177 | utils::eq_display_str(left, right.as_ref()), 178 | "`eq_str_display(left, right)`\n left: `{left}`,\n right: `{right}`", 179 | ); 180 | #[cfg(feature = "alloc")] 181 | { 182 | let left = left.to_string(); 183 | let right = right.to_string(); 184 | assert_eq!(left, right); 185 | } 186 | } 187 | } 188 | }}; 189 | ($left:expr, $right:expr, $($args:tt)*) => {{ 190 | match (&$left, &$right) { 191 | (left, right) => { 192 | assert!( 193 | utils::eq_display_str(left, right.as_ref()), 194 | "{}", 195 | format_args!( 196 | "{}: {}", 197 | format_args!( 198 | "`eq_str_display(left, right)`\n left: `{left}`,\n right: `{right}`", 199 | ), 200 | format_args!($($args)*) 201 | ) 202 | ); 203 | #[cfg(feature = "alloc")] 204 | { 205 | let left = left.to_string(); 206 | let right = right.to_string(); 207 | assert_eq!(left, right, $($args)*); 208 | } 209 | } 210 | } 211 | }}; 212 | } 213 | --------------------------------------------------------------------------------