├── idna ├── LICENSE-MIT ├── LICENSE-APACHE ├── Cargo.toml ├── tests │ ├── tests.rs │ ├── punycode.rs │ ├── punycode_tests.json │ ├── unit.rs │ └── uts46.rs ├── benches │ └── all.rs └── src │ ├── lib.rs │ ├── make_uts46_mapping_table.py │ └── punycode.rs ├── url ├── LICENSE-MIT ├── LICENSE-APACHE ├── fuzz │ ├── .gitignore │ ├── Cargo.toml │ └── fuzz_targets │ │ └── parse.rs ├── benches │ └── parse_url.rs ├── Cargo.toml ├── tests │ ├── debugger_visualizer.rs │ └── data.rs └── src │ ├── origin.rs │ ├── slicing.rs │ ├── path_segments.rs │ └── quirks.rs ├── data-url ├── LICENSE-MIT ├── LICENSE-APACHE ├── Cargo.toml ├── src │ ├── make_base64_decode_table.py │ ├── forgiving_base64.rs │ ├── mime.rs │ └── lib.rs ├── README.md └── tests │ ├── base64.json │ ├── data-urls.json │ ├── wpt.rs │ └── mime-types.json ├── percent_encoding ├── LICENSE-MIT ├── LICENSE-APACHE └── Cargo.toml ├── .gitignore ├── Cargo.toml ├── deny.toml ├── .github ├── ISSUE_TEMPLATE │ └── bug_report.md └── workflows │ ├── codecov.yml │ └── main.yml ├── form_urlencoded ├── Cargo.toml ├── LICENSE-MIT ├── LICENSE-APACHE └── src │ └── lib.rs ├── SECURITY.md ├── README.md ├── LICENSE-MIT ├── debug_metadata ├── url.natvis └── README.md ├── LICENSE-APACHE └── UPGRADING.md /idna/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /url/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /data-url/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /idna/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /url/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /data-url/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /percent_encoding/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /percent_encoding/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | /.cargo/config 4 | -------------------------------------------------------------------------------- /url/fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target 3 | corpus 4 | artifacts 5 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["url", "form_urlencoded", "idna", "percent_encoding", "data-url"] 3 | -------------------------------------------------------------------------------- /deny.toml: -------------------------------------------------------------------------------- 1 | [licenses] 2 | allow-osi-fsf-free = "either" 3 | copyleft = "warn" 4 | private = { ignore = true } 5 | -------------------------------------------------------------------------------- /percent_encoding/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "percent-encoding" 3 | version = "2.1.0" 4 | authors = ["The rust-url developers"] 5 | description = "Percent encoding and decoding" 6 | repository = "https://github.com/servo/rust-url/" 7 | license = "MIT OR Apache-2.0" 8 | edition = "2018" 9 | rust-version = "1.51" 10 | 11 | [features] 12 | default = ["alloc"] 13 | alloc = [] 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | - [ ] Note that this crate implements the [URL Standard](https://url.spec.whatwg.org/) not RFC 1738 or RFC 3986 11 | 12 | **Describe the bug** 13 | A clear and concise description of what the bug is. Include code snippets if possible. 14 | -------------------------------------------------------------------------------- /url/fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | 2 | [package] 3 | name = "url-fuzz" 4 | version = "0.0.1" 5 | authors = ["Automatically generated"] 6 | publish = false 7 | 8 | [package.metadata] 9 | cargo-fuzz = true 10 | 11 | [dependencies] 12 | libfuzzer-sys = "0.4.0" 13 | 14 | [dependencies.url] 15 | path = ".." 16 | 17 | [[bin]] 18 | name = "parse" 19 | path = "fuzz_targets/parse.rs" 20 | 21 | [workspace] 22 | members = ["."] 23 | -------------------------------------------------------------------------------- /url/fuzz/fuzz_targets/parse.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | #[macro_use] extern crate libfuzzer_sys; 3 | extern crate url; 4 | use std::str; 5 | 6 | fuzz_target!(|data: &[u8]| { 7 | if let Ok(utf8) = str::from_utf8(data) { 8 | if let Ok(parsed) = url::Url::parse(utf8) { 9 | let as_str = parsed.as_str(); 10 | assert_eq!(parsed, url::Url::parse(as_str).unwrap()); 11 | } 12 | } 13 | }); 14 | -------------------------------------------------------------------------------- /form_urlencoded/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "form_urlencoded" 3 | version = "1.0.1" 4 | authors = ["The rust-url developers"] 5 | description = "Parser and serializer for the application/x-www-form-urlencoded syntax, as used by HTML forms." 6 | repository = "https://github.com/servo/rust-url" 7 | license = "MIT OR Apache-2.0" 8 | edition = "2018" 9 | rust-version = "1.51" 10 | 11 | [lib] 12 | test = false 13 | 14 | [dependencies] 15 | percent-encoding = { version = "2.1.0", path = "../percent_encoding" } 16 | -------------------------------------------------------------------------------- /data-url/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "data-url" 3 | version = "0.1.1" 4 | authors = ["Simon Sapin "] 5 | description = "Processing of data: URL according to WHATWG’s Fetch Standard" 6 | repository = "https://github.com/servo/rust-url" 7 | license = "MIT OR Apache-2.0" 8 | edition = "2018" 9 | autotests = false 10 | rust-version = "1.51" 11 | 12 | [dev-dependencies] 13 | tester = "0.9" 14 | serde = {version = "1.0", features = ["derive"]} 15 | serde_json = "1.0" 16 | 17 | [lib] 18 | test = false 19 | 20 | [[test]] 21 | name = "wpt" 22 | harness = false 23 | -------------------------------------------------------------------------------- /data-url/src/make_base64_decode_table.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate the BASE64_DECODE_TABLE constant. See its doc-comment. 3 | """ 4 | 5 | import string 6 | 7 | # https://tools.ietf.org/html/rfc4648#section-4 8 | alphabet = string.ascii_uppercase + string.ascii_lowercase + string.digits + "+/" 9 | assert len(alphabet) == 64 10 | 11 | reverse_table = [-1] * 256 12 | for i, symbol in enumerate(alphabet): 13 | reverse_table[ord(symbol)] = i 14 | 15 | print("[") 16 | per_line = 16 17 | for line in range(0, 256, per_line): 18 | print(" " + "".join(" %2s," % value for value in reverse_table[line:][:per_line])) 19 | print("]") 20 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | | Version | Supported | 6 | | ------- | ------------------ | 7 | | 2.2.x | :white_check_mark: | 8 | 9 | ## Reporting a Vulnerability 10 | 11 | The most secure way to report a vulnerability while this crate is maintained by a Mozilla contributor is by reporting a bug on the Bugzilla issue tracker using this [link](https://bugzilla.mozilla.org/enter_bug.cgi?product=Core&component=Networking&groups=network-core-security&short_desc=[rust-url]%20DESCRIPTION&bug_type=defect). 12 | 13 | Alternatively, you may send an email to valentin AT mozilla DOT com. 14 | -------------------------------------------------------------------------------- /url/benches/parse_url.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate bencher; 3 | 4 | use bencher::{black_box, Bencher}; 5 | 6 | use url::Url; 7 | 8 | fn short(bench: &mut Bencher) { 9 | let url = "https://example.com/bench"; 10 | 11 | bench.bytes = url.len() as u64; 12 | bench.iter(|| black_box(url).parse::().unwrap()); 13 | } 14 | 15 | fn long(bench: &mut Bencher) { 16 | let url = "https://example.com/parkbench?tre=es&st=uff"; 17 | 18 | bench.bytes = url.len() as u64; 19 | bench.iter(|| black_box(url).parse::().unwrap()); 20 | } 21 | 22 | benchmark_group!(benches, short, long); 23 | benchmark_main!(benches); 24 | -------------------------------------------------------------------------------- /idna/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "idna" 3 | version = "0.2.3" 4 | authors = ["The rust-url developers"] 5 | description = "IDNA (Internationalizing Domain Names in Applications) and Punycode." 6 | repository = "https://github.com/servo/rust-url/" 7 | license = "MIT OR Apache-2.0" 8 | autotests = false 9 | edition = "2018" 10 | rust-version = "1.51" 11 | 12 | [lib] 13 | doctest = false 14 | 15 | [[test]] 16 | name = "tests" 17 | harness = false 18 | 19 | [[test]] 20 | name = "unit" 21 | 22 | [dev-dependencies] 23 | assert_matches = "1.3" 24 | bencher = "0.1" 25 | tester = "0.9" 26 | serde_json = "1.0" 27 | 28 | [dependencies] 29 | unicode-bidi = "0.3" 30 | unicode-normalization = "0.1.17" 31 | 32 | [[bench]] 33 | name = "all" 34 | harness = false 35 | -------------------------------------------------------------------------------- /idna/tests/tests.rs: -------------------------------------------------------------------------------- 1 | use tester as test; 2 | 3 | mod punycode; 4 | mod uts46; 5 | 6 | fn main() { 7 | let mut tests = Vec::new(); 8 | { 9 | let mut add_test = |name, run| { 10 | tests.push(test::TestDescAndFn { 11 | desc: test::TestDesc { 12 | name: test::DynTestName(name), 13 | ignore: false, 14 | should_panic: test::ShouldPanic::No, 15 | allow_fail: false, 16 | test_type: test::TestType::Unknown, 17 | }, 18 | testfn: run, 19 | }) 20 | }; 21 | punycode::collect_tests(&mut add_test); 22 | uts46::collect_tests(&mut add_test); 23 | } 24 | test::test_main(&std::env::args().collect::>(), tests, None) 25 | } 26 | -------------------------------------------------------------------------------- /.github/workflows/codecov.yml: -------------------------------------------------------------------------------- 1 | name: Coverage 2 | 3 | on: 4 | push: 5 | branches: ["master"] 6 | pull_request: 7 | 8 | jobs: 9 | coverage: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v1 13 | - uses: actions-rs/toolchain@v1 14 | with: 15 | profile: minimal 16 | toolchain: stable 17 | override: true 18 | - uses: actions-rs/tarpaulin@v0.1 19 | - uses: codecov/codecov-action@v2.1.0 20 | # A codecov token is not needed for public repos if the repo is linked 21 | # on codecov.io. See https://docs.codecov.com/docs/frequently-asked-questions#where-is-the-repository-upload-token-found 22 | - uses: actions/upload-artifact@v1 23 | with: 24 | name: code-coverage-report 25 | path: cobertura.xml 26 | -------------------------------------------------------------------------------- /data-url/README.md: -------------------------------------------------------------------------------- 1 | # data-url 2 | 3 | [![crates.io](https://img.shields.io/crates/v/data-url.svg)](https://crates.io/crates/data-url) 4 | [![docs.rs](https://img.shields.io/badge/docs.rs-%F0%9F%93%84-blue.svg)](https://docs.rs/data-url/) 5 | 6 | Processing of `data:` URLs in Rust according to the Fetch Standard: 7 | 8 | but starting from a string rather than a parsed URL to avoid extra copies. 9 | 10 | ```rust 11 | use data_url::{DataUrl, mime}; 12 | 13 | let url = DataUrl::process("data:,Hello%20World!").unwrap(); 14 | let (body, fragment) = url.decode_to_vec().unwrap(); 15 | 16 | assert_eq!(url.mime_type().type_, "text"); 17 | assert_eq!(url.mime_type().subtype, "plain"); 18 | assert_eq!(url.mime_type().get_parameter("charset"), Some("US-ASCII")); 19 | assert_eq!(body, b"Hello World!"); 20 | assert!(fragment.is_none()); 21 | ``` 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | rust-url 2 | ======== 3 | 4 | [![Build status](https://github.com/servo/rust-url/workflows/CI/badge.svg)](https://github.com/servo/rust-url/actions?query=workflow%3ACI) 5 | [![Coverage](https://codecov.io/gh/servo/rust-url/branch/master/graph/badge.svg)](https://codecov.io/gh/servo/rust-url) 6 | [![Chat](https://img.shields.io/badge/chat-%23rust--url:mozilla.org-%2346BC99?logo=Matrix)](https://matrix.to/#/#rust-url:mozilla.org) 7 | [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE-MIT) 8 | [![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE-APACHE) 9 | 10 | URL library for Rust, based on the [URL Standard](https://url.spec.whatwg.org/). 11 | 12 | [Documentation](https://docs.rs/url/) 13 | 14 | Please see [UPGRADING.md](https://github.com/servo/rust-url/blob/master/UPGRADING.md) if you are upgrading from a previous version. 15 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013-2022 The rust-url developers 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /form_urlencoded/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013-2016 The rust-url developers 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /idna/benches/all.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate bencher; 3 | extern crate idna; 4 | 5 | use bencher::{black_box, Bencher}; 6 | use idna::Config; 7 | 8 | fn to_unicode_puny_label(bench: &mut Bencher) { 9 | let encoded = "abc.xn--mgbcm"; 10 | let config = Config::default(); 11 | bench.iter(|| config.to_unicode(black_box(encoded))); 12 | } 13 | 14 | fn to_unicode_ascii(bench: &mut Bencher) { 15 | let encoded = "example.com"; 16 | let config = Config::default(); 17 | bench.iter(|| config.to_unicode(black_box(encoded))); 18 | } 19 | 20 | fn to_unicode_merged_label(bench: &mut Bencher) { 21 | let encoded = "Beispiel.xn--vermgensberater-ctb"; 22 | let config = Config::default(); 23 | bench.iter(|| config.to_unicode(black_box(encoded))); 24 | } 25 | 26 | fn to_ascii_puny_label(bench: &mut Bencher) { 27 | let encoded = "abc.ابج"; 28 | let config = Config::default(); 29 | bench.iter(|| config.to_ascii(black_box(encoded))); 30 | } 31 | 32 | fn to_ascii_simple(bench: &mut Bencher) { 33 | let encoded = "example.com"; 34 | let config = Config::default(); 35 | bench.iter(|| config.to_ascii(black_box(encoded))); 36 | } 37 | 38 | fn to_ascii_merged(bench: &mut Bencher) { 39 | let encoded = "beispiel.vermögensberater"; 40 | let config = Config::default(); 41 | bench.iter(|| config.to_ascii(black_box(encoded))); 42 | } 43 | 44 | benchmark_group!( 45 | benches, 46 | to_unicode_puny_label, 47 | to_unicode_ascii, 48 | to_unicode_merged_label, 49 | to_ascii_puny_label, 50 | to_ascii_simple, 51 | to_ascii_merged, 52 | ); 53 | benchmark_main!(benches); 54 | -------------------------------------------------------------------------------- /url/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | 3 | name = "url" 4 | # When updating version, also modify html_root_url in the lib.rs 5 | version = "2.2.2" 6 | authors = ["The rust-url developers"] 7 | 8 | description = "URL library for Rust, based on the WHATWG URL Standard" 9 | documentation = "https://docs.rs/url" 10 | repository = "https://github.com/servo/rust-url" 11 | readme = "../README.md" 12 | keywords = ["url", "parser"] 13 | categories = ["parser-implementations", "web-programming", "encoding"] 14 | license = "MIT OR Apache-2.0" 15 | include = ["src/**/*", "LICENSE-*", "README.md", "tests/**"] 16 | edition = "2018" 17 | rust-version = "1.51" 18 | 19 | [badges] 20 | travis-ci = { repository = "servo/rust-url" } 21 | appveyor = { repository = "Manishearth/rust-url" } 22 | 23 | [dev-dependencies] 24 | serde_json = "1.0" 25 | bencher = "0.1" 26 | # To test debugger visualizers defined for the url crate such as url.natvis 27 | debugger_test = "0.1" 28 | debugger_test_parser = "0.1" 29 | 30 | [dependencies] 31 | form_urlencoded = { version = "1.0.0", path = "../form_urlencoded" } 32 | idna = { version = "0.2.0", path = "../idna", optional = true } 33 | percent-encoding = { version = "2.1.0", path = "../percent_encoding" } 34 | serde = {version = "1.0", optional = true, features = ["derive"]} 35 | 36 | [features] 37 | default = ["idna"] 38 | # UNSTABLE FEATURES (requires Rust nightly) 39 | # Enable to use the #[debugger_visualizer] attribute. 40 | debugger_visualizer = [] 41 | 42 | [[bench]] 43 | name = "parse_url" 44 | path = "benches/parse_url.rs" 45 | harness = false 46 | 47 | [[test]] 48 | name = "debugger_visualizer" 49 | path = "tests/debugger_visualizer.rs" 50 | required-features = ["debugger_visualizer"] 51 | test = false 52 | -------------------------------------------------------------------------------- /data-url/tests/base64.json: -------------------------------------------------------------------------------- 1 | [ 2 | ["", []], 3 | ["abcd", [105, 183, 29]], 4 | [" abcd", [105, 183, 29]], 5 | ["abcd ", [105, 183, 29]], 6 | [" abcd===", null], 7 | ["abcd=== ", null], 8 | ["abcd ===", null], 9 | ["a", null], 10 | ["ab", [105]], 11 | ["abc", [105, 183]], 12 | ["abcde", null], 13 | ["𐀀", null], 14 | ["=", null], 15 | ["==", null], 16 | ["===", null], 17 | ["====", null], 18 | ["=====", null], 19 | ["a=", null], 20 | ["a==", null], 21 | ["a===", null], 22 | ["a====", null], 23 | ["a=====", null], 24 | ["ab=", null], 25 | ["ab==", [105]], 26 | ["ab===", null], 27 | ["ab====", null], 28 | ["ab=====", null], 29 | ["abc=", [105, 183]], 30 | ["abc==", null], 31 | ["abc===", null], 32 | ["abc====", null], 33 | ["abc=====", null], 34 | ["abcd=", null], 35 | ["abcd==", null], 36 | ["abcd===", null], 37 | ["abcd====", null], 38 | ["abcd=====", null], 39 | ["abcde=", null], 40 | ["abcde==", null], 41 | ["abcde===", null], 42 | ["abcde====", null], 43 | ["abcde=====", null], 44 | ["=a", null], 45 | ["=a=", null], 46 | ["a=b", null], 47 | ["a=b=", null], 48 | ["ab=c", null], 49 | ["ab=c=", null], 50 | ["abc=d", null], 51 | ["abc=d=", null], 52 | ["ab\u000Bcd", null], 53 | ["ab\u3000cd", null], 54 | ["ab\u3001cd", null], 55 | ["ab\tcd", [105, 183, 29]], 56 | ["ab\ncd", [105, 183, 29]], 57 | ["ab\fcd", [105, 183, 29]], 58 | ["ab\rcd", [105, 183, 29]], 59 | ["ab cd", [105, 183, 29]], 60 | ["ab\u00a0cd", null], 61 | ["ab\t\n\f\r cd", [105, 183, 29]], 62 | [" \t\n\f\r ab\t\n\f\r cd\t\n\f\r ", [105, 183, 29]], 63 | ["ab\t\n\f\r =\t\n\f\r =\t\n\f\r ", [105]], 64 | ["A", null], 65 | ["/A", [252]], 66 | ["//A", [255, 240]], 67 | ["///A", [255, 255, 192]], 68 | ["////A", null], 69 | ["/", null], 70 | ["A/", [3]], 71 | ["AA/", [0, 15]], 72 | ["AAAA/", null], 73 | ["AAA/", [0, 0, 63]], 74 | ["\u0000nonsense", null], 75 | ["abcd\u0000nonsense", null], 76 | ["YQ", [97]], 77 | ["YR", [97]], 78 | ["~~", null], 79 | ["..", null], 80 | ["--", null], 81 | ["__", null] 82 | ] 83 | -------------------------------------------------------------------------------- /debug_metadata/url.natvis: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {serialization} 6 | 7 | 8 | {(char*)(ptr()),[scheme_end]s8} 9 | 10 | 11 | 12 | {(char*)(ptr()+(scheme_end + 3)),[((username_end)-(scheme_end + 3))]s8} 13 | 14 | 15 | {(char*)(ptr()+host_start),[host_end-host_start]s8} 16 | 17 | 18 | {port.variant1.value.__0,d} 19 | 20 | 21 | {(char*)(ptr()+path_start),[(serialization.vec.len-path_start)]s8} 22 | {(char*)(ptr()+path_start),[(query_start.variant1.value.__0-path_start)]s8} 23 | {(char*)(ptr()+path_start),[(fragment_start.variant1.value.__0-path_start)]s8} 24 | 25 | 26 | {(char*)(ptr()+query_start.variant1.value.__0+1),[((serialization.vec.len)-(query_start.variant1.value.__0+1))]s8} 27 | {(char*)(ptr()+query_start.variant1.value.__0+1),[((fragment_start.variant1.value.__0)-(query_start.variant1.value.__0+1))]s8} 28 | 29 | 30 | {(char*)(ptr()+fragment_start.variant1.value.__0+1),[(serialization.vec.len-fragment_start.variant1.value.__0-1)]s8} 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /idna/tests/punycode.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The rust-url developers. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | use crate::test::TestFn; 10 | use idna::punycode::{decode, encode_str}; 11 | use serde_json::map::Map; 12 | use serde_json::Value; 13 | use std::str::FromStr; 14 | 15 | fn one_test(decoded: &str, encoded: &str) { 16 | match decode(encoded) { 17 | None => panic!("Decoding {} failed.", encoded), 18 | Some(result) => { 19 | let result = result.into_iter().collect::(); 20 | assert!( 21 | result == decoded, 22 | "Incorrect decoding of \"{}\":\n \"{}\"\n!= \"{}\"\n", 23 | encoded, 24 | result, 25 | decoded 26 | ) 27 | } 28 | } 29 | 30 | match encode_str(decoded) { 31 | None => panic!("Encoding {} failed.", decoded), 32 | Some(result) => assert!( 33 | result == encoded, 34 | "Incorrect encoding of \"{}\":\n \"{}\"\n!= \"{}\"\n", 35 | decoded, 36 | result, 37 | encoded 38 | ), 39 | } 40 | } 41 | 42 | fn get_string<'a>(map: &'a Map, key: &str) -> &'a str { 43 | match map.get(&key.to_string()) { 44 | Some(&Value::String(ref s)) => s, 45 | None => "", 46 | _ => panic!(), 47 | } 48 | } 49 | 50 | pub fn collect_tests(add_test: &mut F) { 51 | match Value::from_str(include_str!("punycode_tests.json")) { 52 | Ok(Value::Array(tests)) => { 53 | for (i, test) in tests.into_iter().enumerate() { 54 | match test { 55 | Value::Object(o) => { 56 | let test_name = { 57 | let desc = get_string(&o, "description"); 58 | if desc.is_empty() { 59 | format!("Punycode {}", i + 1) 60 | } else { 61 | format!("Punycode {}: {}", i + 1, desc) 62 | } 63 | }; 64 | add_test( 65 | test_name, 66 | TestFn::DynTestFn(Box::new(move || { 67 | one_test(get_string(&o, "decoded"), get_string(&o, "encoded")) 68 | })), 69 | ) 70 | } 71 | _ => panic!(), 72 | } 73 | } 74 | } 75 | other => panic!("{:?}", other), 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: ["master"] 6 | pull_request: 7 | 8 | env: 9 | CARGO_NET_GIT_FETCH_WITH_CLI: true 10 | 11 | jobs: 12 | Test: 13 | strategy: 14 | matrix: 15 | os: [ubuntu-latest, macos-latest, windows-latest] 16 | rust: [1.51.0, stable, beta, nightly] 17 | exclude: 18 | - os: macos-latest 19 | rust: 1.51.0 20 | - os: windows-latest 21 | rust: 1.51.0 22 | - os: macos-latest 23 | rust: beta 24 | - os: windows-latest 25 | rust: beta 26 | - os: macos-latest 27 | rust: nightly 28 | 29 | runs-on: ${{ matrix.os }} 30 | 31 | steps: 32 | - uses: actions/checkout@v2 33 | - uses: actions-rs/toolchain@v1 34 | with: 35 | profile: minimal 36 | toolchain: ${{ matrix.rust }} 37 | override: true 38 | - uses: actions-rs/cargo@v1 39 | with: 40 | command: build 41 | args: --all-targets 42 | # Run tests 43 | - name: Run tests 44 | uses: actions-rs/cargo@v1 45 | with: 46 | command: test 47 | # Run tests enabling the serde feature 48 | - name: Run tests with the serde feature 49 | uses: actions-rs/cargo@v1 50 | with: 51 | command: test 52 | args: --features "url/serde" 53 | # The #[debugger_visualizer] attribute is currently gated behind an unstable feature flag. 54 | # In order to test the visualizers for the url crate, they have to be tested on a nightly build. 55 | - name: Run debugger_visualizer tests 56 | if: | 57 | matrix.os == 'windows-latest' && 58 | matrix.rust == 'nightly' 59 | uses: actions-rs/cargo@v1 60 | with: 61 | command: test 62 | args: --test debugger_visualizer --features "url/serde,url/debugger_visualizer" -- --test-threads=1 63 | 64 | WASM: 65 | runs-on: ubuntu-latest 66 | steps: 67 | - uses: actions/checkout@v2 68 | - uses: actions-rs/toolchain@v1 69 | with: 70 | profile: minimal 71 | toolchain: stable 72 | target: wasm32-unknown-unknown 73 | override: true 74 | - uses: actions-rs/cargo@v1 75 | with: 76 | command: build 77 | args: --target wasm32-unknown-unknown 78 | 79 | Lint: 80 | runs-on: ubuntu-latest 81 | steps: 82 | - uses: actions/checkout@v2 83 | - uses: actions-rs/toolchain@v1 84 | with: 85 | profile: minimal 86 | toolchain: stable 87 | override: true 88 | components: rustfmt, clippy 89 | - uses: actions-rs/cargo@v1 90 | with: 91 | command: fmt 92 | args: --all -- --check 93 | - uses: actions-rs/cargo@v1 94 | if: always() 95 | with: 96 | command: clippy 97 | args: --workspace --all-targets -- -D warnings 98 | 99 | Audit: 100 | runs-on: ubuntu-latest 101 | steps: 102 | - uses: actions/checkout@v1 103 | - uses: EmbarkStudios/cargo-deny-action@v1 104 | -------------------------------------------------------------------------------- /idna/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2016 The rust-url developers. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | //! This Rust crate implements IDNA 10 | //! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna). 11 | //! 12 | //! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing* 13 | //! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/) 14 | //! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492). 15 | //! 16 | //! Quoting from [UTS #46’s introduction](http://www.unicode.org/reports/tr46/#Introduction): 17 | //! 18 | //! > Initially, domain names were restricted to ASCII characters. 19 | //! > A system was introduced in 2003 for internationalized domain names (IDN). 20 | //! > This system is called Internationalizing Domain Names for Applications, 21 | //! > or IDNA2003 for short. 22 | //! > This mechanism supports IDNs by means of a client software transformation 23 | //! > into a format known as Punycode. 24 | //! > A revision of IDNA was approved in 2010 (IDNA2008). 25 | //! > This revision has a number of incompatibilities with IDNA2003. 26 | //! > 27 | //! > The incompatibilities force implementers of client software, 28 | //! > such as browsers and emailers, 29 | //! > to face difficult choices during the transition period 30 | //! > as registries shift from IDNA2003 to IDNA2008. 31 | //! > This document specifies a mechanism 32 | //! > that minimizes the impact of this transition for client software, 33 | //! > allowing client software to access domains that are valid under either system. 34 | 35 | #[cfg(test)] 36 | #[macro_use] 37 | extern crate assert_matches; 38 | 39 | pub mod punycode; 40 | mod uts46; 41 | 42 | pub use crate::uts46::{Config, Errors, Idna}; 43 | 44 | /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm. 45 | /// 46 | /// Return the ASCII representation a domain name, 47 | /// normalizing characters (upper-case to lower-case and other kinds of equivalence) 48 | /// and using Punycode as necessary. 49 | /// 50 | /// This process may fail. 51 | pub fn domain_to_ascii(domain: &str) -> Result { 52 | Config::default().to_ascii(domain) 53 | } 54 | 55 | /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm, 56 | /// with the `beStrict` flag set. 57 | pub fn domain_to_ascii_strict(domain: &str) -> Result { 58 | Config::default() 59 | .use_std3_ascii_rules(true) 60 | .verify_dns_length(true) 61 | .to_ascii(domain) 62 | } 63 | 64 | /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm. 65 | /// 66 | /// Return the Unicode representation of a domain name, 67 | /// normalizing characters (upper-case to lower-case and other kinds of equivalence) 68 | /// and decoding Punycode as necessary. 69 | /// 70 | /// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation) 71 | /// but always returns a string for the mapped domain. 72 | pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) { 73 | Config::default().to_unicode(domain) 74 | } 75 | -------------------------------------------------------------------------------- /url/tests/debugger_visualizer.rs: -------------------------------------------------------------------------------- 1 | use debugger_test::debugger_test; 2 | use url::Url; 3 | 4 | #[inline(never)] 5 | fn __break() {} 6 | 7 | #[debugger_test( 8 | debugger = "cdb", 9 | commands = " 10 | .nvlist 11 | 12 | dx base_url 13 | 14 | dx url_with_non_special_scheme 15 | 16 | dx url_with_user_pass_port_query_fragments 17 | 18 | dx url_blob 19 | 20 | dx url_with_base 21 | 22 | dx url_with_base_replaced 23 | 24 | dx url_with_comma", 25 | expected_statements = r#" 26 | pattern:debugger_visualizer-.*\.exe \(embedded NatVis ".*-[0-9]+\.natvis"\) 27 | 28 | base_url : "http://example.org/foo/bar" [Type: url::Url] 29 | [] [Type: url::Url] 30 | [scheme] : "http" 31 | [host] : "example.org" 32 | [path] : "/foo/bar" 33 | 34 | url_with_non_special_scheme : "non-special://test/x" [Type: url::Url] 35 | [] [Type: url::Url] 36 | [scheme] : "non-special" 37 | [host] : "test" 38 | [path] : "/x" 39 | 40 | url_with_user_pass_port_query_fragments : "http://user:pass@foo:21/bar;par?b#c" [Type: url::Url] 41 | [] [Type: url::Url] 42 | [scheme] : "http" 43 | [username] : "user" 44 | [host] : "foo" 45 | [port] : 21 46 | [path] : "/bar;par" 47 | [query] : "b" 48 | [fragment] : "c" 49 | 50 | url_blob : "blob:https://example.com:443/" [Type: url::Url] 51 | [] [Type: url::Url] 52 | [scheme] : "blob" 53 | [path] : "https://example.com:443/" 54 | 55 | url_with_base : "http://example.org/a%2fc" [Type: url::Url] 56 | [] [Type: url::Url] 57 | [scheme] : "http" 58 | [host] : "example.org" 59 | [path] : "/a%2fc" 60 | 61 | url_with_base_replaced : "http://[::7f00:1]/" [Type: url::Url] 62 | [] [Type: url::Url] 63 | [scheme] : "http" 64 | [host] : "[::7f00:1]" 65 | [path] : "/" 66 | 67 | url_with_comma : "data:text/html,test#test" [Type: url::Url] 68 | [] [Type: url::Url] 69 | [scheme] : "data" 70 | [path] : "text/html,test" 71 | [fragment] : "test" 72 | "# 73 | )] 74 | fn test_url_visualizer() { 75 | // Copied from https://github.com/web-platform-tests/wpt/blob/master/url/ 76 | let base_url = Url::parse("http://example.org/foo/bar").unwrap(); 77 | assert_eq!(base_url.as_str(), "http://example.org/foo/bar"); 78 | 79 | let url_with_non_special_scheme = Url::parse("non-special://:@test/x").unwrap(); 80 | assert_eq!(url_with_non_special_scheme.as_str(), "non-special://test/x"); 81 | 82 | let url_with_user_pass_port_query_fragments = 83 | Url::parse("http://user:pass@foo:21/bar;par?b#c").unwrap(); 84 | assert_eq!( 85 | url_with_user_pass_port_query_fragments.as_str(), 86 | "http://user:pass@foo:21/bar;par?b#c" 87 | ); 88 | 89 | let url_blob = Url::parse("blob:https://example.com:443/").unwrap(); 90 | assert_eq!(url_blob.as_str(), "blob:https://example.com:443/"); 91 | 92 | let url_with_base = base_url.join("/a%2fc").unwrap(); 93 | assert_eq!(url_with_base.as_str(), "http://example.org/a%2fc"); 94 | 95 | let url_with_base_replaced = base_url.join("http://[::127.0.0.1]").unwrap(); 96 | assert_eq!(url_with_base_replaced.as_str(), "http://[::7f00:1]/"); 97 | 98 | let url_with_comma = base_url.join("data:text/html,test#test").unwrap(); 99 | assert_eq!(url_with_comma.as_str(), "data:text/html,test#test"); 100 | 101 | __break(); 102 | } 103 | -------------------------------------------------------------------------------- /url/src/origin.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2016 The rust-url developers. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | use crate::host::Host; 10 | use crate::parser::default_port; 11 | use crate::Url; 12 | use std::sync::atomic::{AtomicUsize, Ordering}; 13 | 14 | pub fn url_origin(url: &Url) -> Origin { 15 | let scheme = url.scheme(); 16 | match scheme { 17 | "blob" => { 18 | let result = Url::parse(url.path()); 19 | match result { 20 | Ok(ref url) => url_origin(url), 21 | Err(_) => Origin::new_opaque(), 22 | } 23 | } 24 | "ftp" | "http" | "https" | "ws" | "wss" => Origin::Tuple( 25 | scheme.to_owned(), 26 | url.host().unwrap().to_owned(), 27 | url.port_or_known_default().unwrap(), 28 | ), 29 | // TODO: Figure out what to do if the scheme is a file 30 | "file" => Origin::new_opaque(), 31 | _ => Origin::new_opaque(), 32 | } 33 | } 34 | 35 | /// The origin of an URL 36 | /// 37 | /// Two URLs with the same origin are considered 38 | /// to originate from the same entity and can therefore trust 39 | /// each other. 40 | /// 41 | /// The origin is determined based on the scheme as follows: 42 | /// 43 | /// - If the scheme is "blob" the origin is the origin of the 44 | /// URL contained in the path component. If parsing fails, 45 | /// it is an opaque origin. 46 | /// - If the scheme is "ftp", "http", "https", "ws", or "wss", 47 | /// then the origin is a tuple of the scheme, host, and port. 48 | /// - If the scheme is anything else, the origin is opaque, meaning 49 | /// the URL does not have the same origin as any other URL. 50 | /// 51 | /// For more information see 52 | #[derive(PartialEq, Eq, Hash, Clone, Debug)] 53 | pub enum Origin { 54 | /// A globally unique identifier 55 | Opaque(OpaqueOrigin), 56 | 57 | /// Consists of the URL's scheme, host and port 58 | Tuple(String, Host, u16), 59 | } 60 | 61 | impl Origin { 62 | /// Creates a new opaque origin that is only equal to itself. 63 | pub fn new_opaque() -> Origin { 64 | static COUNTER: AtomicUsize = AtomicUsize::new(0); 65 | Origin::Opaque(OpaqueOrigin(COUNTER.fetch_add(1, Ordering::SeqCst))) 66 | } 67 | 68 | /// Return whether this origin is a (scheme, host, port) tuple 69 | /// (as opposed to an opaque origin). 70 | pub fn is_tuple(&self) -> bool { 71 | matches!(*self, Origin::Tuple(..)) 72 | } 73 | 74 | /// 75 | pub fn ascii_serialization(&self) -> String { 76 | match *self { 77 | Origin::Opaque(_) => "null".to_owned(), 78 | Origin::Tuple(ref scheme, ref host, port) => { 79 | if default_port(scheme) == Some(port) { 80 | format!("{}://{}", scheme, host) 81 | } else { 82 | format!("{}://{}:{}", scheme, host, port) 83 | } 84 | } 85 | } 86 | } 87 | 88 | /// 89 | #[cfg(feature = "idna")] 90 | pub fn unicode_serialization(&self) -> String { 91 | match *self { 92 | Origin::Opaque(_) => "null".to_owned(), 93 | Origin::Tuple(ref scheme, ref host, port) => { 94 | let host = match *host { 95 | Host::Domain(ref domain) => { 96 | let (domain, _errors) = idna::domain_to_unicode(domain); 97 | Host::Domain(domain) 98 | } 99 | _ => host.clone(), 100 | }; 101 | if default_port(scheme) == Some(port) { 102 | format!("{}://{}", scheme, host) 103 | } else { 104 | format!("{}://{}:{}", scheme, host, port) 105 | } 106 | } 107 | } 108 | } 109 | } 110 | 111 | /// Opaque identifier for URLs that have file or other schemes 112 | #[derive(Eq, PartialEq, Hash, Clone, Debug)] 113 | pub struct OpaqueOrigin(usize); 114 | -------------------------------------------------------------------------------- /idna/tests/punycode_tests.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "description": "These tests are copied from https://github.com/bestiejs/punycode.js/blob/master/tests/tests.js , used under the MIT license.", 4 | "decoded": "", 5 | "encoded": "" 6 | }, 7 | { 8 | "description": "a single basic code point", 9 | "decoded": "Bach", 10 | "encoded": "Bach-" 11 | }, 12 | { 13 | "description": "a single non-ASCII character", 14 | "decoded": "\u00FC", 15 | "encoded": "tda" 16 | }, 17 | { 18 | "description": "multiple non-ASCII characters", 19 | "decoded": "\u00FC\u00EB\u00E4\u00F6\u2665", 20 | "encoded": "4can8av2009b" 21 | }, 22 | { 23 | "description": "mix of ASCII and non-ASCII characters", 24 | "decoded": "b\u00FCcher", 25 | "encoded": "bcher-kva" 26 | }, 27 | { 28 | "description": "long string with both ASCII and non-ASCII characters", 29 | "decoded": "Willst du die Bl\u00FCthe des fr\u00FChen, die Fr\u00FCchte des sp\u00E4teren Jahres", 30 | "encoded": "Willst du die Blthe des frhen, die Frchte des spteren Jahres-x9e96lkal" 31 | }, 32 | { 33 | "description": "Arabic (Egyptian)", 34 | "decoded": "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F", 35 | "encoded": "egbpdaj6bu4bxfgehfvwxn" 36 | }, 37 | { 38 | "description": "Chinese (simplified)", 39 | "decoded": "\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2d\u6587", 40 | "encoded": "ihqwcrb4cv8a8dqg056pqjye" 41 | }, 42 | { 43 | "description": "Chinese (traditional)", 44 | "decoded": "\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587", 45 | "encoded": "ihqwctvzc91f659drss3x8bo0yb" 46 | }, 47 | { 48 | "description": "Czech", 49 | "decoded": "Pro\u010Dprost\u011Bnemluv\u00ED\u010Desky", 50 | "encoded": "Proprostnemluvesky-uyb24dma41a" 51 | }, 52 | { 53 | "description": "Hebrew", 54 | "decoded": "\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2\u05D1\u05E8\u05D9\u05EA", 55 | "encoded": "4dbcagdahymbxekheh6e0a7fei0b" 56 | }, 57 | { 58 | "description": "Hindi (Devanagari)", 59 | "decoded": "\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947\u0939\u0948\u0902", 60 | "encoded": "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" 61 | }, 62 | { 63 | "description": "Japanese (kanji and hiragana)", 64 | "decoded": "\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B", 65 | "encoded": "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" 66 | }, 67 | { 68 | "description": "Korean (Hangul syllables)", 69 | "decoded": "\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C", 70 | "encoded": "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c" 71 | }, 72 | { 73 | "description": "Russian (Cyrillic)", 74 | "decoded": "\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A\u0438", 75 | "encoded": "b1abfaaepdrnnbgefbadotcwatmq2g4l" 76 | }, 77 | { 78 | "description": "Spanish", 79 | "decoded": "Porqu\u00E9nopuedensimplementehablarenEspa\u00F1ol", 80 | "encoded": "PorqunopuedensimplementehablarenEspaol-fmd56a" 81 | }, 82 | { 83 | "description": "Vietnamese", 84 | "decoded": "T\u1EA1isaoh\u1ECDkh\u00F4ngth\u1EC3ch\u1EC9n\u00F3iti\u1EBFngVi\u1EC7t", 85 | "encoded": "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" 86 | }, 87 | { 88 | "decoded": "3\u5E74B\u7D44\u91D1\u516B\u5148\u751F", 89 | "encoded": "3B-ww4c5e180e575a65lsy2b" 90 | }, 91 | { 92 | "decoded": "\u5B89\u5BA4\u5948\u7F8E\u6075-with-SUPER-MONKEYS", 93 | "encoded": "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n" 94 | }, 95 | { 96 | "decoded": "Hello-Another-Way-\u305D\u308C\u305E\u308C\u306E\u5834\u6240", 97 | "encoded": "Hello-Another-Way--fc4qua05auwb3674vfr0b" 98 | }, 99 | { 100 | "decoded": "\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B2", 101 | "encoded": "2-u9tlzr9756bt3uc0v" 102 | }, 103 | { 104 | "decoded": "Maji\u3067Koi\u3059\u308B5\u79D2\u524D", 105 | "encoded": "MajiKoi5-783gue6qz075azm5e" 106 | }, 107 | { 108 | "decoded": "\u30D1\u30D5\u30A3\u30FCde\u30EB\u30F3\u30D0", 109 | "encoded": "de-jg4avhby1noc0d" 110 | }, 111 | { 112 | "decoded": "\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067", 113 | "encoded": "d9juau41awczczp" 114 | }, 115 | { 116 | "description": "ASCII string that breaks the existing rules for host-name labels (It's not a realistic example for IDNA, because IDNA never encodes pure ASCII labels.)", 117 | "decoded": "-> $1.00 <-", 118 | "encoded": "-> $1.00 <--" 119 | } 120 | ] 121 | -------------------------------------------------------------------------------- /idna/tests/unit.rs: -------------------------------------------------------------------------------- 1 | use assert_matches::assert_matches; 2 | use unicode_normalization::char::is_combining_mark; 3 | 4 | /// https://github.com/servo/rust-url/issues/373 5 | #[test] 6 | fn test_punycode_prefix_with_length_check() { 7 | let config = idna::Config::default() 8 | .verify_dns_length(true) 9 | .check_hyphens(true) 10 | .use_std3_ascii_rules(true); 11 | 12 | assert!(config.to_ascii("xn--").is_err()); 13 | assert!(config.to_ascii("xn---").is_err()); 14 | assert!(config.to_ascii("xn-----").is_err()); 15 | assert!(config.to_ascii("xn--.").is_err()); 16 | assert!(config.to_ascii("xn--...").is_err()); 17 | assert!(config.to_ascii(".xn--").is_err()); 18 | assert!(config.to_ascii("...xn--").is_err()); 19 | assert!(config.to_ascii("xn--.xn--").is_err()); 20 | assert!(config.to_ascii("xn--.example.org").is_err()); 21 | } 22 | 23 | /// https://github.com/servo/rust-url/issues/373 24 | #[test] 25 | fn test_punycode_prefix_without_length_check() { 26 | let config = idna::Config::default() 27 | .verify_dns_length(false) 28 | .check_hyphens(true) 29 | .use_std3_ascii_rules(true); 30 | 31 | assert_eq!(config.to_ascii("xn--").unwrap(), ""); 32 | assert!(config.to_ascii("xn---").is_err()); 33 | assert!(config.to_ascii("xn-----").is_err()); 34 | assert_eq!(config.to_ascii("xn--.").unwrap(), "."); 35 | assert_eq!(config.to_ascii("xn--...").unwrap(), "..."); 36 | assert_eq!(config.to_ascii(".xn--").unwrap(), "."); 37 | assert_eq!(config.to_ascii("...xn--").unwrap(), "..."); 38 | assert_eq!(config.to_ascii("xn--.xn--").unwrap(), "."); 39 | assert_eq!(config.to_ascii("xn--.example.org").unwrap(), ".example.org"); 40 | } 41 | 42 | // http://www.unicode.org/reports/tr46/#Table_Example_Processing 43 | #[test] 44 | fn test_examples() { 45 | let mut codec = idna::Idna::default(); 46 | let mut out = String::new(); 47 | 48 | assert_matches!(codec.to_unicode("Bloß.de", &mut out), Ok(())); 49 | assert_eq!(out, "bloß.de"); 50 | 51 | out.clear(); 52 | assert_matches!(codec.to_unicode("xn--blo-7ka.de", &mut out), Ok(())); 53 | assert_eq!(out, "bloß.de"); 54 | 55 | out.clear(); 56 | assert_matches!(codec.to_unicode("u\u{308}.com", &mut out), Ok(())); 57 | assert_eq!(out, "ü.com"); 58 | 59 | out.clear(); 60 | assert_matches!(codec.to_unicode("xn--tda.com", &mut out), Ok(())); 61 | assert_eq!(out, "ü.com"); 62 | 63 | out.clear(); 64 | assert_matches!(codec.to_unicode("xn--u-ccb.com", &mut out), Err(_)); 65 | 66 | out.clear(); 67 | assert_matches!(codec.to_unicode("a⒈com", &mut out), Err(_)); 68 | 69 | out.clear(); 70 | assert_matches!(codec.to_unicode("xn--a-ecp.ru", &mut out), Err(_)); 71 | 72 | out.clear(); 73 | assert_matches!(codec.to_unicode("xn--0.pt", &mut out), Err(_)); 74 | 75 | out.clear(); 76 | assert_matches!(codec.to_unicode("日本語。JP", &mut out), Ok(())); 77 | assert_eq!(out, "日本語.jp"); 78 | 79 | out.clear(); 80 | assert_matches!(codec.to_unicode("☕.us", &mut out), Ok(())); 81 | assert_eq!(out, "☕.us"); 82 | } 83 | 84 | #[test] 85 | fn test_v5() { 86 | let config = idna::Config::default() 87 | .verify_dns_length(true) 88 | .use_std3_ascii_rules(true); 89 | 90 | // IdnaTest:784 蔏。𑰺 91 | assert!(is_combining_mark('\u{11C3A}')); 92 | assert!(config.to_ascii("\u{11C3A}").is_err()); 93 | assert!(config.to_ascii("\u{850f}.\u{11C3A}").is_err()); 94 | assert!(config.to_ascii("\u{850f}\u{ff61}\u{11C3A}").is_err()); 95 | } 96 | 97 | #[test] 98 | fn test_v8_bidi_rules() { 99 | let config = idna::Config::default() 100 | .verify_dns_length(true) 101 | .use_std3_ascii_rules(true); 102 | 103 | assert_eq!(config.to_ascii("abc").unwrap(), "abc"); 104 | assert_eq!(config.to_ascii("123").unwrap(), "123"); 105 | assert_eq!(config.to_ascii("אבּג").unwrap(), "xn--kdb3bdf"); 106 | assert_eq!(config.to_ascii("ابج").unwrap(), "xn--mgbcm"); 107 | assert_eq!(config.to_ascii("abc.ابج").unwrap(), "abc.xn--mgbcm"); 108 | assert_eq!(config.to_ascii("אבּג.ابج").unwrap(), "xn--kdb3bdf.xn--mgbcm"); 109 | 110 | // Bidi domain names cannot start with digits 111 | assert!(config.to_ascii("0a.\u{05D0}").is_err()); 112 | assert!(config.to_ascii("0à.\u{05D0}").is_err()); 113 | 114 | // Bidi chars may be punycode-encoded 115 | assert!(config.to_ascii("xn--0ca24w").is_err()); 116 | } 117 | 118 | #[test] 119 | fn emoji_domains() { 120 | // HOT BEVERAGE is allowed here... 121 | let config = idna::Config::default() 122 | .verify_dns_length(true) 123 | .use_std3_ascii_rules(true); 124 | assert_eq!(config.to_ascii("☕.com").unwrap(), "xn--53h.com"); 125 | 126 | // ... but not here 127 | let config = idna::Config::default() 128 | .verify_dns_length(true) 129 | .use_std3_ascii_rules(true) 130 | .use_idna_2008_rules(true); 131 | let error = format!("{:?}", config.to_ascii("☕.com").unwrap_err()); 132 | assert!(error.contains("disallowed_in_idna_2008")); 133 | } 134 | 135 | #[test] 136 | fn unicode_before_delimiter() { 137 | let config = idna::Config::default(); 138 | assert!(config.to_ascii("xn--f\u{34a}-PTP").is_err()); 139 | } 140 | -------------------------------------------------------------------------------- /data-url/tests/data-urls.json: -------------------------------------------------------------------------------- 1 | [ 2 | ["data://test/,X", 3 | "text/plain;charset=US-ASCII", 4 | [88]], 5 | ["data://test:test/,X", 6 | null], 7 | ["data:,X", 8 | "text/plain;charset=US-ASCII", 9 | [88]], 10 | ["data:", 11 | null], 12 | ["data:text/html", 13 | null], 14 | ["data:text/html ;charset=x ", 15 | null], 16 | ["data:,", 17 | "text/plain;charset=US-ASCII", 18 | []], 19 | ["data:,X#X", 20 | "text/plain;charset=US-ASCII", 21 | [88]], 22 | ["data:,%FF", 23 | "text/plain;charset=US-ASCII", 24 | [255]], 25 | ["data:text/plain,X", 26 | "text/plain", 27 | [88]], 28 | ["data:text/plain ,X", 29 | "text/plain", 30 | [88]], 31 | ["data:text/plain%20,X", 32 | "text/plain%20", 33 | [88]], 34 | ["data:text/plain\f,X", 35 | "text/plain%0c", 36 | [88]], 37 | ["data:text/plain%0C,X", 38 | "text/plain%0c", 39 | [88]], 40 | ["data:text/plain;,X", 41 | "text/plain", 42 | [88]], 43 | ["data:;x=x;charset=x,X", 44 | "text/plain;x=x;charset=x", 45 | [88]], 46 | ["data:;x=x,X", 47 | "text/plain;x=x", 48 | [88]], 49 | ["data:text/plain;charset=windows-1252,%C2%B1", 50 | "text/plain;charset=windows-1252", 51 | [194, 177]], 52 | ["data:text/plain;Charset=UTF-8,%C2%B1", 53 | "text/plain;charset=UTF-8", 54 | [194, 177]], 55 | ["data:text/plain;charset=windows-1252,áñçə💩", 56 | "text/plain;charset=windows-1252", 57 | [195, 161, 195, 177, 195, 167, 201, 153, 240, 159, 146, 169]], 58 | ["data:text/plain;charset=UTF-8,áñçə💩", 59 | "text/plain;charset=UTF-8", 60 | [195, 161, 195, 177, 195, 167, 201, 153, 240, 159, 146, 169]], 61 | ["data:image/gif,%C2%B1", 62 | "image/gif", 63 | [194, 177]], 64 | ["data:IMAGE/gif,%C2%B1", 65 | "image/gif", 66 | [194, 177]], 67 | ["data:IMAGE/gif;hi=x,%C2%B1", 68 | "image/gif;hi=x", 69 | [194, 177]], 70 | ["data:IMAGE/gif;CHARSET=x,%C2%B1", 71 | "image/gif;charset=x", 72 | [194, 177]], 73 | ["data: ,%FF", 74 | "text/plain;charset=US-ASCII", 75 | [255]], 76 | ["data:%20,%FF", 77 | "text/plain;charset=US-ASCII", 78 | [255]], 79 | ["data:\f,%FF", 80 | "text/plain;charset=US-ASCII", 81 | [255]], 82 | ["data:%1F,%FF", 83 | "text/plain;charset=US-ASCII", 84 | [255]], 85 | ["data:\u0000,%FF", 86 | "text/plain;charset=US-ASCII", 87 | [255]], 88 | ["data:%00,%FF", 89 | "text/plain;charset=US-ASCII", 90 | [255]], 91 | ["data:text/html ,X", 92 | "text/html", 93 | [88]], 94 | ["data:text / html,X", 95 | "text/plain;charset=US-ASCII", 96 | [88]], 97 | ["data:†,X", 98 | "text/plain;charset=US-ASCII", 99 | [88]], 100 | ["data:†/†,X", 101 | "%e2%80%a0/%e2%80%a0", 102 | [88]], 103 | ["data:X,X", 104 | "text/plain;charset=US-ASCII", 105 | [88]], 106 | ["data:image/png,X X", 107 | "image/png", 108 | [88, 32, 88]], 109 | ["data:application/javascript,X X", 110 | "application/javascript", 111 | [88, 32, 88]], 112 | ["data:application/xml,X X", 113 | "application/xml", 114 | [88, 32, 88]], 115 | ["data:text/javascript,X X", 116 | "text/javascript", 117 | [88, 32, 88]], 118 | ["data:text/plain,X X", 119 | "text/plain", 120 | [88, 32, 88]], 121 | ["data:unknown/unknown,X X", 122 | "unknown/unknown", 123 | [88, 32, 88]], 124 | ["data:text/plain;a=\",\",X", 125 | "text/plain;a=\"\"", 126 | [34, 44, 88]], 127 | ["data:text/plain;a=%2C,X", 128 | "text/plain;a=%2C", 129 | [88]], 130 | ["data:;base64;base64,WA", 131 | "text/plain", 132 | [88]], 133 | ["data:x/x;base64;base64,WA", 134 | "x/x", 135 | [88]], 136 | ["data:x/x;base64;charset=x,WA", 137 | "x/x;charset=x", 138 | [87, 65]], 139 | ["data:x/x;base64;charset=x;base64,WA", 140 | "x/x;charset=x", 141 | [88]], 142 | ["data:x/x;base64;base64x,WA", 143 | "x/x", 144 | [87, 65]], 145 | ["data:;base64,W%20A", 146 | "text/plain;charset=US-ASCII", 147 | [88]], 148 | ["data:;base64,W%0CA", 149 | "text/plain;charset=US-ASCII", 150 | [88]], 151 | ["data:x;base64x,WA", 152 | "text/plain;charset=US-ASCII", 153 | [87, 65]], 154 | ["data:x;base64;x,WA", 155 | "text/plain;charset=US-ASCII", 156 | [87, 65]], 157 | ["data:x;base64=x,WA", 158 | "text/plain;charset=US-ASCII", 159 | [87, 65]], 160 | ["data:; base64,WA", 161 | "text/plain;charset=US-ASCII", 162 | [88]], 163 | ["data:; base64,WA", 164 | "text/plain;charset=US-ASCII", 165 | [88]], 166 | ["data: ;charset=x ; base64,WA", 167 | "text/plain;charset=x", 168 | [88]], 169 | ["data:;base64;,WA", 170 | "text/plain", 171 | [87, 65]], 172 | ["data:;base64 ,WA", 173 | "text/plain;charset=US-ASCII", 174 | [88]], 175 | ["data:;base64 ,WA", 176 | "text/plain;charset=US-ASCII", 177 | [88]], 178 | ["data:;base 64,WA", 179 | "text/plain", 180 | [87, 65]], 181 | ["data:;BASe64,WA", 182 | "text/plain;charset=US-ASCII", 183 | [88]], 184 | ["data:;%62ase64,WA", 185 | "text/plain", 186 | [87, 65]], 187 | ["data:%3Bbase64,WA", 188 | "text/plain;charset=US-ASCII", 189 | [87, 65]], 190 | ["data:;charset=x,X", 191 | "text/plain;charset=x", 192 | [88]], 193 | ["data:; charset=x,X", 194 | "text/plain;charset=x", 195 | [88]], 196 | ["data:;charset =x,X", 197 | "text/plain", 198 | [88]], 199 | ["data:;charset= x,X", 200 | "text/plain;charset=\" x\"", 201 | [88]], 202 | ["data:;charset=,X", 203 | "text/plain", 204 | [88]], 205 | ["data:;charset,X", 206 | "text/plain", 207 | [88]], 208 | ["data:;charset=\"x\",X", 209 | "text/plain;charset=x", 210 | [88]], 211 | ["data:;CHARSET=\"X\",X", 212 | "text/plain;charset=X", 213 | [88]] 214 | ] 215 | -------------------------------------------------------------------------------- /debug_metadata/README.md: -------------------------------------------------------------------------------- 1 | ## Debugger Visualizers 2 | 3 | Many languages and debuggers enable developers to control how a type is 4 | displayed in a debugger. These are called "debugger visualizations" or "debugger 5 | views". 6 | 7 | The Windows debuggers (WinDbg\CDB) support defining custom debugger visualizations using 8 | the `Natvis` framework. To use Natvis, developers write XML documents using the natvis 9 | schema that describe how debugger types should be displayed with the `.natvis` extension. 10 | (See: https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects?view=vs-2019) 11 | The Natvis files provide patterns which match type names a description of how to display 12 | those types. 13 | 14 | The Natvis schema can be found either online (See: https://code.visualstudio.com/docs/cpp/natvis#_schema) 15 | or locally at `\Xml\Schemas\1033\natvis.xsd`. 16 | 17 | The GNU debugger (GDB) supports defining custom debugger views using Pretty Printers. 18 | Pretty printers are written as python scripts that describe how a type should be displayed 19 | when loaded up in GDB/LLDB. (See: https://sourceware.org/gdb/onlinedocs/gdb/Pretty-Printing.html#Pretty-Printing) 20 | The pretty printers provide patterns, which match type names, and for matching 21 | types, descibe how to display those types. (For writing a pretty printer, see: https://sourceware.org/gdb/onlinedocs/gdb/Writing-a-Pretty_002dPrinter.html#Writing-a-Pretty_002dPrinter). 22 | 23 | ### Embedding Visualizers 24 | 25 | Through the use of the currently unstable `#[debugger_visualizer]` attribute, the `url` 26 | crate can embed debugger visualizers into the crate metadata. 27 | 28 | Currently the two types of visualizers supported are Natvis and Pretty printers. 29 | 30 | For Natvis files, when linking an executable with a crate that includes Natvis files, 31 | the MSVC linker will embed the contents of all Natvis files into the generated `PDB`. 32 | 33 | For pretty printers, the compiler will encode the contents of the pretty printer 34 | in the `.debug_gdb_scripts` section of the `ELF` generated. 35 | 36 | ### Testing Visualizers 37 | 38 | The `url` crate supports testing debugger visualizers defined for this crate. The entry point for 39 | these tests are `tests/debugger_visualizer.rs`. These tests are defined using the `debugger_test` and 40 | `debugger_test_parser` crates. The `debugger_test` crate is a proc macro crate which defines a 41 | single proc macro attribute, `#[debugger_test]`. For more detailed information about this crate, 42 | see https://crates.io/crates/debugger_test. The CI pipeline for the `url` crate has been updated 43 | to run the debugger visualizer tests to ensure debugger visualizers do not become broken/stale. 44 | 45 | The `#[debugger_test]` proc macro attribute may only be used on test functions and will run the 46 | function under the debugger specified by the `debugger` meta item. 47 | 48 | This proc macro attribute has 3 required values: 49 | 50 | 1. The first required meta item, `debugger`, takes a string value which specifies the debugger to launch. 51 | 2. The second required meta item, `commands`, takes a string of new line (`\n`) separated list of debugger 52 | commands to run. 53 | 3. The third required meta item, `expected_statements`, takes a string of new line (`\n`) separated list of 54 | statements that must exist in the debugger output. Pattern matching through regular expressions is also 55 | supported by using the `pattern:` prefix for each expected statement. 56 | 57 | #### Example: 58 | 59 | ```rust 60 | #[debugger_test( 61 | debugger = "cdb", 62 | commands = "command1\ncommand2\ncommand3", 63 | expected_statements = "statement1\nstatement2\nstatement3")] 64 | fn test() { 65 | 66 | } 67 | ``` 68 | 69 | Using a multiline string is also supported, with a single debugger command/expected statement per line: 70 | 71 | ```rust 72 | #[debugger_test( 73 | debugger = "cdb", 74 | commands = " 75 | command1 76 | command2 77 | command3", 78 | expected_statements = " 79 | statement1 80 | pattern:statement[0-9]+ 81 | statement3")] 82 | fn test() { 83 | 84 | } 85 | ``` 86 | 87 | In the example above, the second expected statement uses pattern matching through a regular expression 88 | by using the `pattern:` prefix. 89 | 90 | #### Testing Locally 91 | 92 | Currently, only Natvis visualizations have been defined for the `url` crate via `debug_metadata/url.natvis`, 93 | which means the `tests/debugger_visualizer.rs` tests need to be run on Windows using the `*-pc-windows-msvc` targets. 94 | To run these tests locally, first ensure the debugging tools for Windows are installed or install them following 95 | the steps listed here, [Debugging Tools for Windows](https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/). 96 | Once the debugging tools have been installed, the tests can be run in the same manner as they are in the CI 97 | pipeline. 98 | 99 | #### Note 100 | 101 | When running the debugger visualizer tests, `tests/debugger_visualizer.rs`, they need to be run consecutively 102 | and not in parallel. This can be achieved by passing the flag `--test-threads=1` to rustc. This is due to 103 | how the debugger tests are run. Each test marked with the `#[debugger_test]` attribute launches a debugger 104 | and attaches it to the current test process. If tests are running in parallel, the test will try to attach 105 | a debugger to the current process which may already have a debugger attached causing the test to fail. 106 | 107 | For example: 108 | 109 | ``` 110 | cargo test --test debugger_visualizer --features debugger_visualizer -- --test-threads=1 111 | ``` 112 | -------------------------------------------------------------------------------- /data-url/tests/wpt.rs: -------------------------------------------------------------------------------- 1 | use tester as test; 2 | 3 | #[macro_use] 4 | extern crate serde; 5 | 6 | fn run_data_url( 7 | input: String, 8 | expected_mime: Option, 9 | expected_body: Option>, 10 | expected_panic: bool, 11 | ) { 12 | let priorhook = std::panic::take_hook(); 13 | std::panic::set_hook(Box::new(move |p| { 14 | if !expected_panic { 15 | priorhook(p); 16 | } 17 | })); 18 | let url = data_url::DataUrl::process(&input); 19 | if let Some(expected_mime) = expected_mime { 20 | let url = url.unwrap(); 21 | let (body, _) = url.decode_to_vec().unwrap(); 22 | if expected_mime.is_empty() { 23 | assert_eq!(url.mime_type().to_string(), "text/plain;charset=US-ASCII") 24 | } else { 25 | assert_eq!(url.mime_type().to_string(), expected_mime) 26 | } 27 | if let Some(expected_body) = expected_body { 28 | assert_eq!(body, expected_body) 29 | } 30 | } else if let Ok(url) = url { 31 | assert!(url.decode_to_vec().is_err(), "{:?}", url.mime_type()) 32 | } 33 | } 34 | 35 | fn collect_data_url(add_test: &mut F) 36 | where 37 | F: FnMut(String, bool, test::TestFn), 38 | { 39 | let known_failures = ["data://test:test/,X"]; 40 | 41 | #[derive(Deserialize)] 42 | #[serde(untagged)] 43 | enum TestCase { 44 | Two(String, Option), 45 | Three(String, Option, Vec), 46 | } 47 | 48 | let v: Vec = serde_json::from_str(include_str!("data-urls.json")).unwrap(); 49 | for test in v { 50 | let (input, expected_mime, expected_body) = match test { 51 | TestCase::Two(i, m) => (i, m, None), 52 | TestCase::Three(i, m, b) => (i, m, Some(b)), 53 | }; 54 | let should_panic = known_failures.contains(&&*input); 55 | add_test( 56 | format!("data: URL {:?}", input), 57 | should_panic, 58 | test::TestFn::DynTestFn(Box::new(move || { 59 | run_data_url(input, expected_mime, expected_body, should_panic) 60 | })), 61 | ); 62 | } 63 | } 64 | 65 | fn run_base64(input: String, expected: Option>) { 66 | let result = data_url::forgiving_base64::decode_to_vec(input.as_bytes()); 67 | match (result, expected) { 68 | (Ok(bytes), Some(expected)) => assert_eq!(bytes, expected), 69 | (Ok(bytes), None) => panic!("Expected error, got {:?}", bytes), 70 | (Err(e), Some(expected)) => panic!("Expected {:?}, got error {:?}", expected, e), 71 | (Err(_), None) => {} 72 | } 73 | } 74 | 75 | fn collect_base64(add_test: &mut F) 76 | where 77 | F: FnMut(String, bool, test::TestFn), 78 | { 79 | let known_failures = []; 80 | 81 | let v: Vec<(String, Option>)> = 82 | serde_json::from_str(include_str!("base64.json")).unwrap(); 83 | for (input, expected) in v { 84 | let should_panic = known_failures.contains(&&*input); 85 | add_test( 86 | format!("base64 {:?}", input), 87 | should_panic, 88 | test::TestFn::DynTestFn(Box::new(move || run_base64(input, expected))), 89 | ); 90 | } 91 | } 92 | 93 | fn run_mime(input: String, expected: Option) { 94 | let result = input.parse::(); 95 | match (result, expected) { 96 | (Ok(mime), Some(expected)) => assert_eq!(mime.to_string(), expected), 97 | (Ok(mime), None) => panic!("Expected error, got {:?}", mime), 98 | (Err(e), Some(expected)) => panic!("Expected {:?}, got error {:?}", expected, e), 99 | (Err(_), None) => {} 100 | } 101 | } 102 | 103 | fn collect_mime(add_test: &mut F) 104 | where 105 | F: FnMut(String, bool, test::TestFn), 106 | { 107 | let known_failures = []; 108 | 109 | #[derive(Deserialize)] 110 | #[serde(untagged)] 111 | enum Entry { 112 | Comment(String), 113 | TestCase { 114 | input: String, 115 | output: Option, 116 | }, 117 | } 118 | 119 | let v: Vec = serde_json::from_str(include_str!("mime-types.json")).unwrap(); 120 | let v2: Vec = serde_json::from_str(include_str!("generated-mime-types.json")).unwrap(); 121 | let entries = v.into_iter().chain(v2); 122 | 123 | let mut last_comment = None; 124 | for entry in entries { 125 | let (input, expected) = match entry { 126 | Entry::TestCase { input, output } => (input, output), 127 | Entry::Comment(s) => { 128 | last_comment = Some(s); 129 | continue; 130 | } 131 | }; 132 | 133 | let should_panic = known_failures.contains(&&*input); 134 | add_test( 135 | if let Some(ref s) = last_comment { 136 | format!("MIME type {:?} {:?}", s, input) 137 | } else { 138 | format!("MIME type {:?}", input) 139 | }, 140 | should_panic, 141 | test::TestFn::DynTestFn(Box::new(move || run_mime(input, expected))), 142 | ); 143 | } 144 | } 145 | 146 | fn main() { 147 | let mut tests = Vec::new(); 148 | { 149 | let mut add_one = |name: String, should_panic: bool, run: test::TestFn| { 150 | let desc = test::TestDesc { 151 | name: test::DynTestName(name), 152 | ignore: false, 153 | should_panic: match should_panic { 154 | true => test::ShouldPanic::Yes, 155 | false => test::ShouldPanic::No, 156 | }, 157 | allow_fail: false, 158 | test_type: test::TestType::Unknown, 159 | }; 160 | tests.push(test::TestDescAndFn { desc, testfn: run }) 161 | }; 162 | collect_data_url(&mut add_one); 163 | collect_base64(&mut add_one); 164 | collect_mime(&mut add_one); 165 | } 166 | test::test_main(&std::env::args().collect::>(), tests, None) 167 | } 168 | -------------------------------------------------------------------------------- /idna/src/make_uts46_mapping_table.py: -------------------------------------------------------------------------------- 1 | # Copyright 2013-2014 The rust-url developers. 2 | # 3 | # Licensed under the Apache License, Version 2.0 or the MIT license 5 | # , at your 6 | # option. This file may not be copied, modified, or distributed 7 | # except according to those terms. 8 | 9 | # Run as: python make_uts46_mapping_table.py IdnaMappingTable.txt > uts46_mapping_table.rs 10 | # You can get the latest idna table from 11 | # http://www.unicode.org/Public/idna/latest/IdnaMappingTable.txt 12 | 13 | import collections 14 | import itertools 15 | 16 | print('''\ 17 | // Copyright 2013-2020 The rust-url developers. 18 | // 19 | // Licensed under the Apache License, Version 2.0 or the MIT license 21 | // , at your 22 | // option. This file may not be copied, modified, or distributed 23 | // except according to those terms. 24 | 25 | // Generated by make_idna_table.py 26 | ''') 27 | 28 | txt = open("IdnaMappingTable.txt") 29 | 30 | def escape_char(c): 31 | return "\\u{%x}" % ord(c[0]) 32 | 33 | def char(s): 34 | return chr(int(s, 16)) 35 | 36 | strtab = collections.OrderedDict() 37 | strtab_offset = 0 38 | 39 | def strtab_slice(s): 40 | global strtab, strtab_offset 41 | 42 | if s in strtab: 43 | return strtab[s] 44 | else: 45 | utf8_len = len(s.encode('utf8')) 46 | c = (strtab_offset, utf8_len) 47 | strtab[s] = c 48 | strtab_offset += utf8_len 49 | return c 50 | 51 | def rust_slice(s): 52 | start = s[0] 53 | length = s[1] 54 | start_lo = start & 0xff 55 | start_hi = start >> 8 56 | assert length <= 255 57 | assert start_hi <= 255 58 | return "(StringTableSlice { byte_start_lo: %d, byte_start_hi: %d, byte_len: %d })" % (start_lo, start_hi, length) 59 | 60 | ranges = [] 61 | 62 | for line in txt: 63 | # remove comments 64 | line, _, _ = line.partition('#') 65 | # skip empty lines 66 | if len(line.strip()) == 0: 67 | continue 68 | fields = line.split(';') 69 | if fields[0].strip() == 'D800..DFFF': 70 | continue # Surrogates don't occur in Rust strings. 71 | first, _, last = fields[0].strip().partition('..') 72 | if not last: 73 | last = first 74 | mapping = fields[1].strip().replace('_', ' ').title().replace(' ', '') 75 | unicode_str = None 76 | if len(fields) > 2: 77 | if fields[2].strip(): 78 | unicode_str = u''.join(char(c) for c in fields[2].strip().split(' ')) 79 | elif mapping == "Deviation": 80 | unicode_str = u'' 81 | 82 | if len(fields) > 3: 83 | assert fields[3].strip() in ('NV8', 'XV8'), fields[3] 84 | assert mapping == 'Valid', mapping 85 | mapping = 'DisallowedIdna2008' 86 | 87 | ranges.append((first, last, mapping, unicode_str)) 88 | 89 | def mergeable_key(r): 90 | mapping = r[2] 91 | 92 | # These types have associated data, so we should not merge them. 93 | if mapping in ('Mapped', 'Deviation', 'DisallowedStd3Mapped'): 94 | return r 95 | assert mapping in ('Valid', 'Ignored', 'Disallowed', 'DisallowedStd3Valid', 'DisallowedIdna2008') 96 | return mapping 97 | 98 | grouped_ranges = itertools.groupby(ranges, key=mergeable_key) 99 | 100 | optimized_ranges = [] 101 | 102 | for (k, g) in grouped_ranges: 103 | group = list(g) 104 | if len(group) == 1: 105 | optimized_ranges.append(group[0]) 106 | continue 107 | # Assert that nothing in the group has an associated unicode string. 108 | for g in group: 109 | if g[3] is not None and len(g[3]) > 2: 110 | assert not g[3][2].strip() 111 | # Assert that consecutive members of the group don't leave gaps in 112 | # the codepoint space. 113 | a, b = itertools.tee(group) 114 | next(b, None) 115 | for (g1, g2) in zip(a, b): 116 | last_char = int(g1[1], 16) 117 | next_char = int(g2[0], 16) 118 | if last_char + 1 == next_char: 119 | continue 120 | # There's a gap where surrogates would appear, but we don't have to 121 | # worry about that gap, as surrogates never appear in Rust strings. 122 | # Assert we're seeing the surrogate case here. 123 | assert last_char == 0xd7ff 124 | assert next_char == 0xe000 125 | optimized_ranges.append((group[0][0], group[-1][1]) + group[0][2:]) 126 | 127 | def is_single_char_range(r): 128 | (first, last, _, _) = r 129 | return first == last 130 | 131 | # We can reduce the size of the character range table and the index table to about 1/4 132 | # by merging runs of single character ranges and using character offsets from the start 133 | # of that range to retrieve the correct `Mapping` value 134 | def merge_single_char_ranges(ranges): 135 | current = [] 136 | for r in ranges: 137 | if not current or is_single_char_range(current[-1]) and is_single_char_range(r): 138 | current.append(r) 139 | continue 140 | if len(current) != 0: 141 | ret = current 142 | current = [r] 143 | yield ret 144 | continue 145 | current.append(r) 146 | ret = current 147 | current = [] 148 | yield ret 149 | yield current 150 | 151 | optimized_ranges = list(merge_single_char_ranges(optimized_ranges)) 152 | 153 | SINGLE_MARKER = 1 << 15 154 | 155 | print("static TABLE: &[(char, u16)] = &[") 156 | 157 | offset = 0 158 | for ranges in optimized_ranges: 159 | assert offset < SINGLE_MARKER 160 | 161 | block_len = len(ranges) 162 | single = SINGLE_MARKER if block_len == 1 else 0 163 | index = offset | single 164 | offset += block_len 165 | 166 | start = escape_char(char(ranges[0][0])) 167 | print(" ('%s', %s)," % (start, index)) 168 | 169 | print("];\n") 170 | 171 | print("static MAPPING_TABLE: &[Mapping] = &[") 172 | 173 | for ranges in optimized_ranges: 174 | for (first, last, mapping, unicode_str) in ranges: 175 | if unicode_str is not None: 176 | mapping += rust_slice(strtab_slice(unicode_str)) 177 | print(" %s," % mapping) 178 | 179 | print("];\n") 180 | 181 | def escape_str(s): 182 | return [escape_char(c) for c in s] 183 | 184 | print("static STRING_TABLE: &str = \"%s\";" 185 | % '\\\n '.join(itertools.chain(*[escape_str(s) for s in strtab.keys()]))) 186 | -------------------------------------------------------------------------------- /data-url/src/forgiving_base64.rs: -------------------------------------------------------------------------------- 1 | //! 2 | 3 | #[derive(Debug)] 4 | pub struct InvalidBase64(InvalidBase64Details); 5 | 6 | #[derive(Debug)] 7 | enum InvalidBase64Details { 8 | UnexpectedSymbol(u8), 9 | AlphabetSymbolAfterPadding, 10 | LoneAlphabetSymbol, 11 | Padding, 12 | } 13 | 14 | #[derive(Debug)] 15 | pub enum DecodeError { 16 | InvalidBase64(InvalidBase64), 17 | WriteError(E), 18 | } 19 | 20 | impl From for DecodeError { 21 | fn from(e: InvalidBase64Details) -> Self { 22 | DecodeError::InvalidBase64(InvalidBase64(e)) 23 | } 24 | } 25 | 26 | pub(crate) enum Impossible {} 27 | 28 | impl From> for InvalidBase64 { 29 | fn from(e: DecodeError) -> Self { 30 | match e { 31 | DecodeError::InvalidBase64(e) => e, 32 | DecodeError::WriteError(e) => match e {}, 33 | } 34 | } 35 | } 36 | 37 | /// `input` is assumed to be in an ASCII-compatible encoding 38 | pub fn decode_to_vec(input: &[u8]) -> Result, InvalidBase64> { 39 | let mut v = Vec::new(); 40 | { 41 | let mut decoder = Decoder::new(|bytes| { 42 | v.extend_from_slice(bytes); 43 | Ok(()) 44 | }); 45 | decoder.feed(input)?; 46 | decoder.finish()?; 47 | } 48 | Ok(v) 49 | } 50 | 51 | /// 52 | pub struct Decoder 53 | where 54 | F: FnMut(&[u8]) -> Result<(), E>, 55 | { 56 | write_bytes: F, 57 | bit_buffer: u32, 58 | buffer_bit_length: u8, 59 | padding_symbols: u8, 60 | } 61 | 62 | impl Decoder 63 | where 64 | F: FnMut(&[u8]) -> Result<(), E>, 65 | { 66 | pub fn new(write_bytes: F) -> Self { 67 | Self { 68 | write_bytes, 69 | bit_buffer: 0, 70 | buffer_bit_length: 0, 71 | padding_symbols: 0, 72 | } 73 | } 74 | 75 | /// Feed to the decoder partial input in an ASCII-compatible encoding 76 | pub fn feed(&mut self, input: &[u8]) -> Result<(), DecodeError> { 77 | for &byte in input.iter() { 78 | let value = BASE64_DECODE_TABLE[byte as usize]; 79 | if value < 0 { 80 | // A character that’s not part of the alphabet 81 | 82 | // Remove ASCII whitespace 83 | if matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | b'\x0C') { 84 | continue; 85 | } 86 | 87 | if byte == b'=' { 88 | self.padding_symbols = self.padding_symbols.saturating_add(1); 89 | continue; 90 | } 91 | 92 | return Err(InvalidBase64Details::UnexpectedSymbol(byte).into()); 93 | } 94 | if self.padding_symbols > 0 { 95 | return Err(InvalidBase64Details::AlphabetSymbolAfterPadding.into()); 96 | } 97 | self.bit_buffer <<= 6; 98 | self.bit_buffer |= value as u32; 99 | // 18 before incrementing means we’ve just reached 24 100 | if self.buffer_bit_length < 18 { 101 | self.buffer_bit_length += 6; 102 | } else { 103 | // We’ve accumulated four times 6 bits, which equals three times 8 bits. 104 | let byte_buffer = [ 105 | (self.bit_buffer >> 16) as u8, 106 | (self.bit_buffer >> 8) as u8, 107 | self.bit_buffer as u8, 108 | ]; 109 | (self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?; 110 | self.buffer_bit_length = 0; 111 | // No need to reset bit_buffer, 112 | // since next time we’re only gonna read relevant bits. 113 | } 114 | } 115 | Ok(()) 116 | } 117 | 118 | /// Call this to signal the end of the input 119 | pub fn finish(mut self) -> Result<(), DecodeError> { 120 | match (self.buffer_bit_length, self.padding_symbols) { 121 | (0, 0) => { 122 | // A multiple of four of alphabet symbols, and nothing else. 123 | } 124 | (12, 2) | (12, 0) => { 125 | // A multiple of four of alphabet symbols, followed by two more symbols, 126 | // optionally followed by two padding characters (which make a total multiple of four). 127 | let byte_buffer = [(self.bit_buffer >> 4) as u8]; 128 | (self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?; 129 | } 130 | (18, 1) | (18, 0) => { 131 | // A multiple of four of alphabet symbols, followed by three more symbols, 132 | // optionally followed by one padding character (which make a total multiple of four). 133 | let byte_buffer = [(self.bit_buffer >> 10) as u8, (self.bit_buffer >> 2) as u8]; 134 | (self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?; 135 | } 136 | (6, _) => return Err(InvalidBase64Details::LoneAlphabetSymbol.into()), 137 | _ => return Err(InvalidBase64Details::Padding.into()), 138 | } 139 | Ok(()) 140 | } 141 | } 142 | 143 | /// Generated by `make_base64_decode_table.py` based on "Table 1: The Base 64 Alphabet" 144 | /// at 145 | /// 146 | /// Array indices are the byte value of symbols. 147 | /// Array values are their positions in the base64 alphabet, 148 | /// or -1 for symbols not in the alphabet. 149 | /// The position contributes 6 bits to the decoded bytes. 150 | #[rustfmt::skip] 151 | const BASE64_DECODE_TABLE: [i8; 256] = [ 152 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 153 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 154 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 155 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, 156 | -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 157 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, 158 | -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 159 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, 160 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 161 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 162 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 163 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 164 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 165 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 166 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 167 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 168 | ]; 169 | -------------------------------------------------------------------------------- /url/src/slicing.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2016 The rust-url developers. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | use crate::Url; 10 | use std::ops::{Index, Range, RangeFrom, RangeFull, RangeTo}; 11 | 12 | impl Index for Url { 13 | type Output = str; 14 | fn index(&self, _: RangeFull) -> &str { 15 | &self.serialization 16 | } 17 | } 18 | 19 | impl Index> for Url { 20 | type Output = str; 21 | fn index(&self, range: RangeFrom) -> &str { 22 | &self.serialization[self.index(range.start)..] 23 | } 24 | } 25 | 26 | impl Index> for Url { 27 | type Output = str; 28 | fn index(&self, range: RangeTo) -> &str { 29 | &self.serialization[..self.index(range.end)] 30 | } 31 | } 32 | 33 | impl Index> for Url { 34 | type Output = str; 35 | fn index(&self, range: Range) -> &str { 36 | &self.serialization[self.index(range.start)..self.index(range.end)] 37 | } 38 | } 39 | 40 | /// Indicates a position within a URL based on its components. 41 | /// 42 | /// A range of positions can be used for slicing `Url`: 43 | /// 44 | /// ```rust 45 | /// # use url::{Url, Position}; 46 | /// # fn something(some_url: Url) { 47 | /// let serialization: &str = &some_url[..]; 48 | /// let serialization_without_fragment: &str = &some_url[..Position::AfterQuery]; 49 | /// let authority: &str = &some_url[Position::BeforeUsername..Position::AfterPort]; 50 | /// let data_url_payload: &str = &some_url[Position::BeforePath..Position::AfterQuery]; 51 | /// let scheme_relative: &str = &some_url[Position::BeforeUsername..]; 52 | /// # } 53 | /// ``` 54 | /// 55 | /// In a pseudo-grammar (where `[`…`]?` makes a sub-sequence optional), 56 | /// URL components and delimiters that separate them are: 57 | /// 58 | /// ```notrust 59 | /// url = 60 | /// scheme ":" 61 | /// [ "//" [ username [ ":" password ]? "@" ]? host [ ":" port ]? ]? 62 | /// path [ "?" query ]? [ "#" fragment ]? 63 | /// ``` 64 | /// 65 | /// When a given component is not present, 66 | /// its "before" and "after" position are the same 67 | /// (so that `&some_url[BeforeFoo..AfterFoo]` is the empty string) 68 | /// and component ordering is preserved 69 | /// (so that a missing query "is between" a path and a fragment). 70 | /// 71 | /// The end of a component and the start of the next are either the same or separate 72 | /// by a delimiter. 73 | /// (Not that the initial `/` of a path is considered part of the path here, not a delimiter.) 74 | /// For example, `&url[..BeforeFragment]` would include a `#` delimiter (if present in `url`), 75 | /// so `&url[..AfterQuery]` might be desired instead. 76 | /// 77 | /// `BeforeScheme` and `AfterFragment` are always the start and end of the entire URL, 78 | /// so `&url[BeforeScheme..X]` is the same as `&url[..X]` 79 | /// and `&url[X..AfterFragment]` is the same as `&url[X..]`. 80 | #[derive(Copy, Clone, Debug)] 81 | pub enum Position { 82 | BeforeScheme, 83 | AfterScheme, 84 | BeforeUsername, 85 | AfterUsername, 86 | BeforePassword, 87 | AfterPassword, 88 | BeforeHost, 89 | AfterHost, 90 | BeforePort, 91 | AfterPort, 92 | BeforePath, 93 | AfterPath, 94 | BeforeQuery, 95 | AfterQuery, 96 | BeforeFragment, 97 | AfterFragment, 98 | } 99 | 100 | impl Url { 101 | #[inline] 102 | fn index(&self, position: Position) -> usize { 103 | match position { 104 | Position::BeforeScheme => 0, 105 | 106 | Position::AfterScheme => self.scheme_end as usize, 107 | 108 | Position::BeforeUsername => { 109 | if self.has_authority() { 110 | self.scheme_end as usize + "://".len() 111 | } else { 112 | debug_assert!(self.byte_at(self.scheme_end) == b':'); 113 | debug_assert!(self.scheme_end + ":".len() as u32 == self.username_end); 114 | self.scheme_end as usize + ":".len() 115 | } 116 | } 117 | 118 | Position::AfterUsername => self.username_end as usize, 119 | 120 | Position::BeforePassword => { 121 | if self.has_authority() && self.byte_at(self.username_end) == b':' { 122 | self.username_end as usize + ":".len() 123 | } else { 124 | debug_assert!(self.username_end == self.host_start); 125 | self.username_end as usize 126 | } 127 | } 128 | 129 | Position::AfterPassword => { 130 | if self.has_authority() && self.byte_at(self.username_end) == b':' { 131 | debug_assert!(self.byte_at(self.host_start - "@".len() as u32) == b'@'); 132 | self.host_start as usize - "@".len() 133 | } else { 134 | debug_assert!(self.username_end == self.host_start); 135 | self.host_start as usize 136 | } 137 | } 138 | 139 | Position::BeforeHost => self.host_start as usize, 140 | 141 | Position::AfterHost => self.host_end as usize, 142 | 143 | Position::BeforePort => { 144 | if self.port.is_some() { 145 | debug_assert!(self.byte_at(self.host_end) == b':'); 146 | self.host_end as usize + ":".len() 147 | } else { 148 | self.host_end as usize 149 | } 150 | } 151 | 152 | Position::AfterPort => self.path_start as usize, 153 | 154 | Position::BeforePath => self.path_start as usize, 155 | 156 | Position::AfterPath => match (self.query_start, self.fragment_start) { 157 | (Some(q), _) => q as usize, 158 | (None, Some(f)) => f as usize, 159 | (None, None) => self.serialization.len(), 160 | }, 161 | 162 | Position::BeforeQuery => match (self.query_start, self.fragment_start) { 163 | (Some(q), _) => { 164 | debug_assert!(self.byte_at(q) == b'?'); 165 | q as usize + "?".len() 166 | } 167 | (None, Some(f)) => f as usize, 168 | (None, None) => self.serialization.len(), 169 | }, 170 | 171 | Position::AfterQuery => match self.fragment_start { 172 | None => self.serialization.len(), 173 | Some(f) => f as usize, 174 | }, 175 | 176 | Position::BeforeFragment => match self.fragment_start { 177 | Some(f) => { 178 | debug_assert!(self.byte_at(f) == b'#'); 179 | f as usize + "#".len() 180 | } 181 | None => self.serialization.len(), 182 | }, 183 | 184 | Position::AfterFragment => self.serialization.len(), 185 | } 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /idna/tests/uts46.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2013-2014 The rust-url developers. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | use crate::test::TestFn; 10 | use std::char; 11 | use std::fmt::Write; 12 | 13 | use idna::Errors; 14 | 15 | pub fn collect_tests(add_test: &mut F) { 16 | // https://www.unicode.org/Public/idna/13.0.0/IdnaTestV2.txt 17 | for (i, line) in include_str!("IdnaTestV2.txt").lines().enumerate() { 18 | if line.is_empty() || line.starts_with('#') { 19 | continue; 20 | } 21 | 22 | // Remove comments 23 | let line = match line.find('#') { 24 | Some(index) => &line[0..index], 25 | None => line, 26 | }; 27 | 28 | let mut pieces = line.split(';').map(|x| x.trim()).collect::>(); 29 | let source = unescape(pieces.remove(0)); 30 | 31 | // ToUnicode 32 | let mut to_unicode = unescape(pieces.remove(0)); 33 | if to_unicode.is_empty() { 34 | to_unicode = source.clone(); 35 | } 36 | let to_unicode_status = status(pieces.remove(0)); 37 | 38 | // ToAsciiN 39 | let to_ascii_n = pieces.remove(0); 40 | let to_ascii_n = if to_ascii_n.is_empty() { 41 | to_unicode.clone() 42 | } else { 43 | to_ascii_n.to_owned() 44 | }; 45 | let to_ascii_n_status = pieces.remove(0); 46 | let to_ascii_n_status = if to_ascii_n_status.is_empty() { 47 | to_unicode_status.clone() 48 | } else { 49 | status(to_ascii_n_status) 50 | }; 51 | 52 | // ToAsciiT 53 | let to_ascii_t = pieces.remove(0); 54 | let to_ascii_t = if to_ascii_t.is_empty() { 55 | to_ascii_n.clone() 56 | } else { 57 | to_ascii_t.to_owned() 58 | }; 59 | let to_ascii_t_status = pieces.remove(0); 60 | let to_ascii_t_status = if to_ascii_t_status.is_empty() { 61 | to_ascii_n_status.clone() 62 | } else { 63 | status(to_ascii_t_status) 64 | }; 65 | 66 | let test_name = format!("UTS #46 line {}", i + 1); 67 | add_test( 68 | test_name, 69 | TestFn::DynTestFn(Box::new(move || { 70 | let config = idna::Config::default() 71 | .use_std3_ascii_rules(true) 72 | .verify_dns_length(true) 73 | .check_hyphens(true); 74 | 75 | // http://unicode.org/reports/tr46/#Deviations 76 | // applications that perform IDNA2008 lookup are not required to check 77 | // for these contexts, so we skip all tests annotated with C* 78 | 79 | // Everybody ignores V2 80 | // https://github.com/servo/rust-url/pull/240 81 | // https://github.com/whatwg/url/issues/53#issuecomment-181528158 82 | // http://www.unicode.org/review/pri317/ 83 | 84 | // "The special error codes X3 and X4_2 are now returned where a toASCII error code 85 | // was formerly being generated in toUnicode due to an empty label." 86 | // This is not implemented yet, so we skip toUnicode X4_2 tests for now, too. 87 | 88 | let (to_unicode_value, to_unicode_result) = 89 | config.transitional_processing(false).to_unicode(&source); 90 | let to_unicode_result = to_unicode_result.map(|()| to_unicode_value); 91 | check( 92 | &source, 93 | (&to_unicode, &to_unicode_status), 94 | to_unicode_result, 95 | |e| e.starts_with('C') || e == "V2" || e == "X4_2", 96 | ); 97 | 98 | let to_ascii_n_result = config.transitional_processing(false).to_ascii(&source); 99 | check( 100 | &source, 101 | (&to_ascii_n, &to_ascii_n_status), 102 | to_ascii_n_result, 103 | |e| e.starts_with('C') || e == "V2", 104 | ); 105 | 106 | let to_ascii_t_result = config.transitional_processing(true).to_ascii(&source); 107 | check( 108 | &source, 109 | (&to_ascii_t, &to_ascii_t_status), 110 | to_ascii_t_result, 111 | |e| e.starts_with('C') || e == "V2", 112 | ); 113 | })), 114 | ) 115 | } 116 | } 117 | 118 | #[allow(clippy::redundant_clone)] 119 | fn check(source: &str, expected: (&str, &[&str]), actual: Result, ignore: F) 120 | where 121 | F: Fn(&str) -> bool, 122 | { 123 | if !expected.1.is_empty() { 124 | if !expected.1.iter().copied().any(ignore) { 125 | let res = actual.ok(); 126 | assert_eq!( 127 | res.clone(), 128 | None, 129 | "Expected error {:?}. result: {} | source: {}", 130 | expected.1, 131 | res.unwrap(), 132 | source, 133 | ); 134 | } 135 | } else { 136 | assert!( 137 | actual.is_ok(), 138 | "Couldn't parse {} | error: {:?}", 139 | source, 140 | actual.err().unwrap(), 141 | ); 142 | assert_eq!(actual.unwrap(), expected.0, "source: {}", source); 143 | } 144 | } 145 | 146 | fn unescape(input: &str) -> String { 147 | let mut output = String::new(); 148 | let mut chars = input.chars(); 149 | loop { 150 | match chars.next() { 151 | None => return output, 152 | Some(c) => { 153 | if c == '\\' { 154 | match chars.next().unwrap() { 155 | '\\' => output.push('\\'), 156 | 'u' => { 157 | let c1 = chars.next().unwrap().to_digit(16).unwrap(); 158 | let c2 = chars.next().unwrap().to_digit(16).unwrap(); 159 | let c3 = chars.next().unwrap().to_digit(16).unwrap(); 160 | let c4 = chars.next().unwrap().to_digit(16).unwrap(); 161 | match char::from_u32(((c1 * 16 + c2) * 16 + c3) * 16 + c4) { 162 | Some(c) => output.push(c), 163 | None => { 164 | write!(&mut output, "\\u{:X}{:X}{:X}{:X}", c1, c2, c3, c4) 165 | .expect("Could not write to output"); 166 | } 167 | }; 168 | } 169 | _ => panic!("Invalid test data input"), 170 | } 171 | } else { 172 | output.push(c); 173 | } 174 | } 175 | } 176 | } 177 | } 178 | 179 | fn status(status: &str) -> Vec<&str> { 180 | if status.is_empty() || status == "[]" { 181 | return Vec::new(); 182 | } 183 | 184 | let mut result = status.split(", ").collect::>(); 185 | assert!(result[0].starts_with('[')); 186 | result[0] = &result[0][1..]; 187 | 188 | let idx = result.len() - 1; 189 | let last = &mut result[idx]; 190 | assert!(last.ends_with(']')); 191 | *last = &last[..last.len() - 1]; 192 | 193 | result 194 | } 195 | -------------------------------------------------------------------------------- /data-url/src/mime.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{self, Write}; 2 | use std::str::FromStr; 3 | 4 | /// 5 | #[derive(Debug, PartialEq, Eq)] 6 | pub struct Mime { 7 | pub type_: String, 8 | pub subtype: String, 9 | /// (name, value) 10 | pub parameters: Vec<(String, String)>, 11 | } 12 | 13 | impl Mime { 14 | pub fn get_parameter

(&self, name: &P) -> Option<&str> 15 | where 16 | P: ?Sized + PartialEq, 17 | { 18 | self.parameters 19 | .iter() 20 | .find(|&&(ref n, _)| name == &**n) 21 | .map(|&(_, ref v)| &**v) 22 | } 23 | } 24 | 25 | #[derive(Debug)] 26 | pub struct MimeParsingError(()); 27 | 28 | /// 29 | impl FromStr for Mime { 30 | type Err = MimeParsingError; 31 | 32 | fn from_str(s: &str) -> Result { 33 | parse(s).ok_or(MimeParsingError(())) 34 | } 35 | } 36 | 37 | fn parse(s: &str) -> Option { 38 | let trimmed = s.trim_matches(http_whitespace); 39 | 40 | let (type_, rest) = split2(trimmed, '/'); 41 | require!(only_http_token_code_points(type_) && !type_.is_empty()); 42 | 43 | let (subtype, rest) = split2(rest?, ';'); 44 | let subtype = subtype.trim_end_matches(http_whitespace); 45 | require!(only_http_token_code_points(subtype) && !subtype.is_empty()); 46 | 47 | let mut parameters = Vec::new(); 48 | if let Some(rest) = rest { 49 | parse_parameters(rest, &mut parameters) 50 | } 51 | 52 | Some(Mime { 53 | type_: type_.to_ascii_lowercase(), 54 | subtype: subtype.to_ascii_lowercase(), 55 | parameters, 56 | }) 57 | } 58 | 59 | fn split2(s: &str, separator: char) -> (&str, Option<&str>) { 60 | let mut iter = s.splitn(2, separator); 61 | let first = iter.next().unwrap(); 62 | (first, iter.next()) 63 | } 64 | 65 | fn parse_parameters(s: &str, parameters: &mut Vec<(String, String)>) { 66 | let mut semicolon_separated = s.split(';'); 67 | 68 | while let Some(piece) = semicolon_separated.next() { 69 | let piece = piece.trim_start_matches(http_whitespace); 70 | let (name, value) = split2(piece, '='); 71 | // We can not early return on an invalid name here, because the value 72 | // parsing later may consume more semicolon seperated pieces. 73 | let name_valid = 74 | !name.is_empty() && only_http_token_code_points(name) && !contains(parameters, name); 75 | if let Some(value) = value { 76 | let value = if let Some(stripped) = value.strip_prefix('"') { 77 | let max_len = stripped.len().saturating_sub(1); // without end quote 78 | let mut unescaped_value = String::with_capacity(max_len); 79 | let mut chars = stripped.chars(); 80 | 'until_closing_quote: loop { 81 | while let Some(c) = chars.next() { 82 | match c { 83 | '"' => break 'until_closing_quote, 84 | '\\' => unescaped_value.push(chars.next().unwrap_or_else(|| { 85 | semicolon_separated 86 | .next() 87 | .map(|piece| { 88 | // A semicolon inside a quoted value is not a separator 89 | // for the next parameter, but part of the value. 90 | chars = piece.chars(); 91 | ';' 92 | }) 93 | .unwrap_or('\\') 94 | })), 95 | _ => unescaped_value.push(c), 96 | } 97 | } 98 | if let Some(piece) = semicolon_separated.next() { 99 | // A semicolon inside a quoted value is not a separator 100 | // for the next parameter, but part of the value. 101 | unescaped_value.push(';'); 102 | chars = piece.chars() 103 | } else { 104 | break; 105 | } 106 | } 107 | if !name_valid || !valid_value(value) { 108 | continue; 109 | } 110 | unescaped_value 111 | } else { 112 | let value = value.trim_end_matches(http_whitespace); 113 | if value.is_empty() { 114 | continue; 115 | } 116 | if !name_valid || !valid_value(value) { 117 | continue; 118 | } 119 | value.to_owned() 120 | }; 121 | parameters.push((name.to_ascii_lowercase(), value)) 122 | } 123 | } 124 | } 125 | 126 | fn contains(parameters: &[(String, String)], name: &str) -> bool { 127 | parameters.iter().any(|&(ref n, _)| n == name) 128 | } 129 | 130 | fn valid_value(s: &str) -> bool { 131 | s.chars().all(|c| { 132 | // 133 | matches!(c, '\t' | ' '..='~' | '\u{80}'..='\u{FF}') 134 | }) 135 | } 136 | 137 | /// 138 | impl fmt::Display for Mime { 139 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 140 | f.write_str(&self.type_)?; 141 | f.write_str("/")?; 142 | f.write_str(&self.subtype)?; 143 | for &(ref name, ref value) in &self.parameters { 144 | f.write_str(";")?; 145 | f.write_str(name)?; 146 | f.write_str("=")?; 147 | if only_http_token_code_points(value) && !value.is_empty() { 148 | f.write_str(value)? 149 | } else { 150 | f.write_str("\"")?; 151 | for c in value.chars() { 152 | if c == '"' || c == '\\' { 153 | f.write_str("\\")? 154 | } 155 | f.write_char(c)? 156 | } 157 | f.write_str("\"")? 158 | } 159 | } 160 | Ok(()) 161 | } 162 | } 163 | 164 | fn http_whitespace(c: char) -> bool { 165 | matches!(c, ' ' | '\t' | '\n' | '\r') 166 | } 167 | 168 | fn only_http_token_code_points(s: &str) -> bool { 169 | s.bytes().all(|byte| IS_HTTP_TOKEN[byte as usize]) 170 | } 171 | 172 | macro_rules! byte_map { 173 | ($($flag:expr,)*) => ([ 174 | $($flag != 0,)* 175 | ]) 176 | } 177 | 178 | // Copied from https://github.com/hyperium/mime/blob/v0.3.5/src/parse.rs#L293 179 | #[rustfmt::skip] 180 | static IS_HTTP_TOKEN: [bool; 256] = byte_map![ 181 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 182 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 183 | 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 184 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 185 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 186 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 187 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 188 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 189 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 190 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 191 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 193 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 194 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 195 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 196 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 197 | ]; 198 | -------------------------------------------------------------------------------- /url/tests/data.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2013-2014 The rust-url developers. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | //! Data-driven tests 10 | 11 | use std::str::FromStr; 12 | 13 | use serde_json::Value; 14 | use url::{quirks, Url}; 15 | 16 | #[test] 17 | fn urltestdata() { 18 | #[cfg(not(feature = "idna"))] 19 | let idna_skip_inputs = [ 20 | "http://www.foo。bar.com", 21 | "http://Go.com", 22 | "http://你好你好", 23 | "https://faß.ExAmPlE/", 24 | "http://0Xc0.0250.01", 25 | "ftp://%e2%98%83", 26 | "https://%e2%98%83", 27 | "file://a\u{ad}b/p", 28 | "file://a%C2%ADb/p", 29 | "http://GOO\u{200b}\u{2060}\u{feff}goo.com", 30 | ]; 31 | 32 | // Copied from https://github.com/web-platform-tests/wpt/blob/master/url/ 33 | let mut json = Value::from_str(include_str!("urltestdata.json")) 34 | .expect("JSON parse error in urltestdata.json"); 35 | 36 | let mut passed = true; 37 | for entry in json.as_array_mut().unwrap() { 38 | if entry.is_string() { 39 | continue; // ignore comments 40 | } 41 | 42 | let maybe_base = entry 43 | .take_key("base") 44 | .expect("missing base key") 45 | .maybe_string(); 46 | let input = entry.take_string("input"); 47 | let failure = entry.take_key("failure").is_some(); 48 | 49 | #[cfg(not(feature = "idna"))] 50 | { 51 | if idna_skip_inputs.contains(&input.as_str()) { 52 | continue; 53 | } 54 | } 55 | 56 | let res = if let Some(base) = maybe_base { 57 | let base = match Url::parse(&base) { 58 | Ok(base) => base, 59 | Err(_) if failure => continue, 60 | Err(message) => { 61 | eprint_failure( 62 | format!(" failed: error parsing base {:?}: {}", base, message), 63 | &format!("parse base for {:?}", input), 64 | None, 65 | ); 66 | passed = false; 67 | continue; 68 | } 69 | }; 70 | base.join(&input) 71 | } else { 72 | Url::parse(&input) 73 | }; 74 | 75 | let url = match (res, failure) { 76 | (Ok(url), false) => url, 77 | (Err(_), true) => continue, 78 | (Err(message), false) => { 79 | eprint_failure( 80 | format!(" failed: {}", message), 81 | &format!("parse URL for {:?}", input), 82 | None, 83 | ); 84 | passed = false; 85 | continue; 86 | } 87 | (Ok(_), true) => { 88 | eprint_failure( 89 | format!(" failed: expected parse error for URL {:?}", input), 90 | &format!("parse URL for {:?}", input), 91 | None, 92 | ); 93 | passed = false; 94 | continue; 95 | } 96 | }; 97 | 98 | passed &= check_invariants(&url, &format!("invariants for {:?}", input), None); 99 | 100 | for &attr in ATTRIBS { 101 | passed &= test_eq_eprint( 102 | entry.take_string(attr), 103 | get(&url, attr), 104 | &format!("{:?} - {}", input, attr), 105 | None, 106 | ); 107 | } 108 | 109 | if let Some(expected_origin) = entry.take_key("origin").map(|s| s.string()) { 110 | passed &= test_eq_eprint( 111 | expected_origin, 112 | &quirks::origin(&url), 113 | &format!("origin for {:?}", input), 114 | None, 115 | ); 116 | } 117 | } 118 | 119 | assert!(passed) 120 | } 121 | 122 | #[test] 123 | fn setters_tests() { 124 | let mut json = Value::from_str(include_str!("setters_tests.json")) 125 | .expect("JSON parse error in setters_tests.json"); 126 | 127 | let mut passed = true; 128 | for &attr in ATTRIBS { 129 | if attr == "href" { 130 | continue; 131 | } 132 | 133 | let mut tests = json.take_key(attr).unwrap(); 134 | for mut test in tests.as_array_mut().unwrap().drain(..) { 135 | let comment = test.take_key("comment").map(|s| s.string()); 136 | #[cfg(not(feature = "idna"))] 137 | { 138 | if let Some(comment) = comment.as_ref() { 139 | if comment.starts_with("IDNA Nontransitional_Processing") { 140 | continue; 141 | } 142 | } 143 | } 144 | let href = test.take_string("href"); 145 | let new_value = test.take_string("new_value"); 146 | let name = format!("{:?}.{} = {:?}", href, attr, new_value); 147 | let mut expected = test.take_key("expected").unwrap(); 148 | 149 | let mut url = Url::parse(&href).unwrap(); 150 | let comment_ref = comment.as_deref(); 151 | passed &= check_invariants(&url, &name, comment_ref); 152 | set(&mut url, attr, &new_value); 153 | 154 | for attr in ATTRIBS { 155 | if let Some(value) = expected.take_key(attr) { 156 | passed &= test_eq_eprint(value.string(), get(&url, attr), &name, comment_ref); 157 | }; 158 | } 159 | 160 | passed &= check_invariants(&url, &name, comment_ref); 161 | } 162 | } 163 | 164 | assert!(passed); 165 | } 166 | 167 | fn check_invariants(url: &Url, name: &str, comment: Option<&str>) -> bool { 168 | let mut passed = true; 169 | if let Err(e) = url.check_invariants() { 170 | passed = false; 171 | eprint_failure( 172 | format!(" failed: invariants checked -> {:?}", e), 173 | name, 174 | comment, 175 | ); 176 | } 177 | 178 | #[cfg(feature = "serde")] 179 | { 180 | let bytes = serde_json::to_vec(url).unwrap(); 181 | let new_url: Url = serde_json::from_slice(&bytes).unwrap(); 182 | passed &= test_eq_eprint(url.to_string(), &new_url.to_string(), name, comment); 183 | } 184 | 185 | passed 186 | } 187 | 188 | trait JsonExt { 189 | fn take_key(&mut self, key: &str) -> Option; 190 | fn string(self) -> String; 191 | fn maybe_string(self) -> Option; 192 | fn take_string(&mut self, key: &str) -> String; 193 | } 194 | 195 | impl JsonExt for Value { 196 | fn take_key(&mut self, key: &str) -> Option { 197 | self.as_object_mut().unwrap().remove(key) 198 | } 199 | 200 | fn string(self) -> String { 201 | self.maybe_string().expect("") 202 | } 203 | 204 | fn maybe_string(self) -> Option { 205 | match self { 206 | Value::String(s) => Some(s), 207 | Value::Null => None, 208 | _ => panic!("Not a Value::String or Value::Null"), 209 | } 210 | } 211 | 212 | fn take_string(&mut self, key: &str) -> String { 213 | self.take_key(key).unwrap().string() 214 | } 215 | } 216 | 217 | fn get<'a>(url: &'a Url, attr: &str) -> &'a str { 218 | match attr { 219 | "href" => quirks::href(url), 220 | "protocol" => quirks::protocol(url), 221 | "username" => quirks::username(url), 222 | "password" => quirks::password(url), 223 | "hostname" => quirks::hostname(url), 224 | "host" => quirks::host(url), 225 | "port" => quirks::port(url), 226 | "pathname" => quirks::pathname(url), 227 | "search" => quirks::search(url), 228 | "hash" => quirks::hash(url), 229 | _ => unreachable!(), 230 | } 231 | } 232 | 233 | #[allow(clippy::unit_arg)] 234 | fn set<'a>(url: &'a mut Url, attr: &str, new: &str) { 235 | let _ = match attr { 236 | "protocol" => quirks::set_protocol(url, new), 237 | "username" => quirks::set_username(url, new), 238 | "password" => quirks::set_password(url, new), 239 | "hostname" => quirks::set_hostname(url, new), 240 | "host" => quirks::set_host(url, new), 241 | "port" => quirks::set_port(url, new), 242 | "pathname" => Ok(quirks::set_pathname(url, new)), 243 | "search" => Ok(quirks::set_search(url, new)), 244 | "hash" => Ok(quirks::set_hash(url, new)), 245 | _ => unreachable!(), 246 | }; 247 | } 248 | 249 | fn test_eq_eprint(expected: String, actual: &str, name: &str, comment: Option<&str>) -> bool { 250 | if expected == actual { 251 | return true; 252 | } 253 | eprint_failure( 254 | format!("expected: {}\n actual: {}", expected, actual), 255 | name, 256 | comment, 257 | ); 258 | false 259 | } 260 | 261 | fn eprint_failure(err: String, name: &str, comment: Option<&str>) { 262 | eprintln!(" test: {}\n{}", name, err); 263 | if let Some(comment) = comment { 264 | eprintln!("{}\n", comment); 265 | } else { 266 | eprintln!(); 267 | } 268 | } 269 | 270 | const ATTRIBS: &[&str] = &[ 271 | "href", "protocol", "username", "password", "host", "hostname", "port", "pathname", "search", 272 | "hash", 273 | ]; 274 | -------------------------------------------------------------------------------- /url/src/path_segments.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2016 The rust-url developers. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | use crate::parser::{self, to_u32, SchemeType}; 10 | use crate::Url; 11 | use std::str; 12 | 13 | /// Exposes methods to manipulate the path of an URL that is not cannot-be-base. 14 | /// 15 | /// The path always starts with a `/` slash, and is made of slash-separated segments. 16 | /// There is always at least one segment (which may be the empty string). 17 | /// 18 | /// Examples: 19 | /// 20 | /// ```rust 21 | /// use url::Url; 22 | /// # use std::error::Error; 23 | /// 24 | /// # fn run() -> Result<(), Box> { 25 | /// let mut url = Url::parse("mailto:me@example.com")?; 26 | /// assert!(url.path_segments_mut().is_err()); 27 | /// 28 | /// let mut url = Url::parse("http://example.net/foo/index.html")?; 29 | /// url.path_segments_mut().map_err(|_| "cannot be base")? 30 | /// .pop().push("img").push("2/100%.png"); 31 | /// assert_eq!(url.as_str(), "http://example.net/foo/img/2%2F100%25.png"); 32 | /// # Ok(()) 33 | /// # } 34 | /// # run().unwrap(); 35 | /// ``` 36 | #[derive(Debug)] 37 | pub struct PathSegmentsMut<'a> { 38 | url: &'a mut Url, 39 | after_first_slash: usize, 40 | after_path: String, 41 | old_after_path_position: u32, 42 | } 43 | 44 | // Not re-exported outside the crate 45 | pub fn new(url: &mut Url) -> PathSegmentsMut<'_> { 46 | let after_path = url.take_after_path(); 47 | let old_after_path_position = to_u32(url.serialization.len()).unwrap(); 48 | // Special urls always have a non empty path 49 | if SchemeType::from(url.scheme()).is_special() { 50 | debug_assert!(url.byte_at(url.path_start) == b'/'); 51 | } else { 52 | debug_assert!( 53 | url.serialization.len() == url.path_start as usize 54 | || url.byte_at(url.path_start) == b'/' 55 | ); 56 | } 57 | PathSegmentsMut { 58 | after_first_slash: url.path_start as usize + "/".len(), 59 | url, 60 | old_after_path_position, 61 | after_path, 62 | } 63 | } 64 | 65 | impl<'a> Drop for PathSegmentsMut<'a> { 66 | fn drop(&mut self) { 67 | self.url 68 | .restore_after_path(self.old_after_path_position, &self.after_path) 69 | } 70 | } 71 | 72 | impl<'a> PathSegmentsMut<'a> { 73 | /// Remove all segments in the path, leaving the minimal `url.path() == "/"`. 74 | /// 75 | /// Returns `&mut Self` so that method calls can be chained. 76 | /// 77 | /// Example: 78 | /// 79 | /// ```rust 80 | /// use url::Url; 81 | /// # use std::error::Error; 82 | /// 83 | /// # fn run() -> Result<(), Box> { 84 | /// let mut url = Url::parse("https://github.com/servo/rust-url/")?; 85 | /// url.path_segments_mut().map_err(|_| "cannot be base")? 86 | /// .clear().push("logout"); 87 | /// assert_eq!(url.as_str(), "https://github.com/logout"); 88 | /// # Ok(()) 89 | /// # } 90 | /// # run().unwrap(); 91 | /// ``` 92 | pub fn clear(&mut self) -> &mut Self { 93 | self.url.serialization.truncate(self.after_first_slash); 94 | self 95 | } 96 | 97 | /// Remove the last segment of this URL’s path if it is empty, 98 | /// except if these was only one segment to begin with. 99 | /// 100 | /// In other words, remove one path trailing slash, if any, 101 | /// unless it is also the initial slash (so this does nothing if `url.path() == "/")`. 102 | /// 103 | /// Returns `&mut Self` so that method calls can be chained. 104 | /// 105 | /// Example: 106 | /// 107 | /// ```rust 108 | /// use url::Url; 109 | /// # use std::error::Error; 110 | /// 111 | /// # fn run() -> Result<(), Box> { 112 | /// let mut url = Url::parse("https://github.com/servo/rust-url/")?; 113 | /// url.path_segments_mut().map_err(|_| "cannot be base")? 114 | /// .push("pulls"); 115 | /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url//pulls"); 116 | /// 117 | /// let mut url = Url::parse("https://github.com/servo/rust-url/")?; 118 | /// url.path_segments_mut().map_err(|_| "cannot be base")? 119 | /// .pop_if_empty().push("pulls"); 120 | /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls"); 121 | /// # Ok(()) 122 | /// # } 123 | /// # run().unwrap(); 124 | /// ``` 125 | pub fn pop_if_empty(&mut self) -> &mut Self { 126 | if self.after_first_slash >= self.url.serialization.len() { 127 | return self; 128 | } 129 | if self.url.serialization[self.after_first_slash..].ends_with('/') { 130 | self.url.serialization.pop(); 131 | } 132 | self 133 | } 134 | 135 | /// Remove the last segment of this URL’s path. 136 | /// 137 | /// If the path only has one segment, make it empty such that `url.path() == "/"`. 138 | /// 139 | /// Returns `&mut Self` so that method calls can be chained. 140 | pub fn pop(&mut self) -> &mut Self { 141 | if self.after_first_slash >= self.url.serialization.len() { 142 | return self; 143 | } 144 | let last_slash = self.url.serialization[self.after_first_slash..] 145 | .rfind('/') 146 | .unwrap_or(0); 147 | self.url 148 | .serialization 149 | .truncate(self.after_first_slash + last_slash); 150 | self 151 | } 152 | 153 | /// Append the given segment at the end of this URL’s path. 154 | /// 155 | /// See the documentation for `.extend()`. 156 | /// 157 | /// Returns `&mut Self` so that method calls can be chained. 158 | pub fn push(&mut self, segment: &str) -> &mut Self { 159 | self.extend(Some(segment)) 160 | } 161 | 162 | /// Append each segment from the given iterator at the end of this URL’s path. 163 | /// 164 | /// Each segment is percent-encoded like in `Url::parse` or `Url::join`, 165 | /// except that `%` and `/` characters are also encoded (to `%25` and `%2F`). 166 | /// This is unlike `Url::parse` where `%` is left as-is in case some of the input 167 | /// is already percent-encoded, and `/` denotes a path segment separator.) 168 | /// 169 | /// Note that, in addition to slashes between new segments, 170 | /// this always adds a slash between the existing path and the new segments 171 | /// *except* if the existing path is `"/"`. 172 | /// If the previous last segment was empty (if the path had a trailing slash) 173 | /// the path after `.extend()` will contain two consecutive slashes. 174 | /// If that is undesired, call `.pop_if_empty()` first. 175 | /// 176 | /// To obtain a behavior similar to `Url::join`, call `.pop()` unconditionally first. 177 | /// 178 | /// Returns `&mut Self` so that method calls can be chained. 179 | /// 180 | /// Example: 181 | /// 182 | /// ```rust 183 | /// use url::Url; 184 | /// # use std::error::Error; 185 | /// 186 | /// # fn run() -> Result<(), Box> { 187 | /// let mut url = Url::parse("https://github.com/")?; 188 | /// let org = "servo"; 189 | /// let repo = "rust-url"; 190 | /// let issue_number = "188"; 191 | /// url.path_segments_mut().map_err(|_| "cannot be base")? 192 | /// .extend(&[org, repo, "issues", issue_number]); 193 | /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/issues/188"); 194 | /// # Ok(()) 195 | /// # } 196 | /// # run().unwrap(); 197 | /// ``` 198 | /// 199 | /// In order to make sure that parsing the serialization of an URL gives the same URL, 200 | /// a segment is ignored if it is `"."` or `".."`: 201 | /// 202 | /// ```rust 203 | /// use url::Url; 204 | /// # use std::error::Error; 205 | /// 206 | /// # fn run() -> Result<(), Box> { 207 | /// let mut url = Url::parse("https://github.com/servo")?; 208 | /// url.path_segments_mut().map_err(|_| "cannot be base")? 209 | /// .extend(&["..", "rust-url", ".", "pulls"]); 210 | /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls"); 211 | /// # Ok(()) 212 | /// # } 213 | /// # run().unwrap(); 214 | /// ``` 215 | pub fn extend(&mut self, segments: I) -> &mut Self 216 | where 217 | I: IntoIterator, 218 | I::Item: AsRef, 219 | { 220 | let scheme_type = SchemeType::from(self.url.scheme()); 221 | let path_start = self.url.path_start as usize; 222 | self.url.mutate(|parser| { 223 | parser.context = parser::Context::PathSegmentSetter; 224 | for segment in segments { 225 | let segment = segment.as_ref(); 226 | if matches!(segment, "." | "..") { 227 | continue; 228 | } 229 | if parser.serialization.len() > path_start + 1 230 | // Non special url's path might still be empty 231 | || parser.serialization.len() == path_start 232 | { 233 | parser.serialization.push('/'); 234 | } 235 | let mut has_host = true; // FIXME account for this? 236 | parser.parse_path( 237 | scheme_type, 238 | &mut has_host, 239 | path_start, 240 | parser::Input::new(segment), 241 | ); 242 | } 243 | }); 244 | self 245 | } 246 | } 247 | -------------------------------------------------------------------------------- /url/src/quirks.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2016 The rust-url developers. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | //! Getters and setters for URL components implemented per https://url.spec.whatwg.org/#api 10 | //! 11 | //! Unless you need to be interoperable with web browsers, 12 | //! you probably want to use `Url` method instead. 13 | 14 | use crate::parser::{default_port, Context, Input, Parser, SchemeType}; 15 | use crate::{Host, ParseError, Position, Url}; 16 | 17 | /// https://url.spec.whatwg.org/#dom-url-domaintoascii 18 | pub fn domain_to_ascii(domain: &str) -> String { 19 | match Host::parse(domain) { 20 | Ok(Host::Domain(domain)) => domain, 21 | _ => String::new(), 22 | } 23 | } 24 | 25 | /// https://url.spec.whatwg.org/#dom-url-domaintounicode 26 | #[cfg(feature = "idna")] 27 | pub fn domain_to_unicode(domain: &str) -> String { 28 | match Host::parse(domain) { 29 | Ok(Host::Domain(ref domain)) => { 30 | let (unicode, _errors) = idna::domain_to_unicode(domain); 31 | unicode 32 | } 33 | _ => String::new(), 34 | } 35 | } 36 | 37 | /// Getter for https://url.spec.whatwg.org/#dom-url-href 38 | pub fn href(url: &Url) -> &str { 39 | url.as_str() 40 | } 41 | 42 | /// Setter for https://url.spec.whatwg.org/#dom-url-href 43 | pub fn set_href(url: &mut Url, value: &str) -> Result<(), ParseError> { 44 | *url = Url::parse(value)?; 45 | Ok(()) 46 | } 47 | 48 | /// Getter for https://url.spec.whatwg.org/#dom-url-origin 49 | pub fn origin(url: &Url) -> String { 50 | url.origin().ascii_serialization() 51 | } 52 | 53 | /// Getter for https://url.spec.whatwg.org/#dom-url-protocol 54 | #[inline] 55 | pub fn protocol(url: &Url) -> &str { 56 | &url.as_str()[..url.scheme().len() + ":".len()] 57 | } 58 | 59 | /// Setter for https://url.spec.whatwg.org/#dom-url-protocol 60 | #[allow(clippy::result_unit_err)] 61 | pub fn set_protocol(url: &mut Url, mut new_protocol: &str) -> Result<(), ()> { 62 | // The scheme state in the spec ignores everything after the first `:`, 63 | // but `set_scheme` errors if there is more. 64 | if let Some(position) = new_protocol.find(':') { 65 | new_protocol = &new_protocol[..position]; 66 | } 67 | url.set_scheme(new_protocol) 68 | } 69 | 70 | /// Getter for https://url.spec.whatwg.org/#dom-url-username 71 | #[inline] 72 | pub fn username(url: &Url) -> &str { 73 | url.username() 74 | } 75 | 76 | /// Setter for https://url.spec.whatwg.org/#dom-url-username 77 | #[allow(clippy::result_unit_err)] 78 | pub fn set_username(url: &mut Url, new_username: &str) -> Result<(), ()> { 79 | url.set_username(new_username) 80 | } 81 | 82 | /// Getter for https://url.spec.whatwg.org/#dom-url-password 83 | #[inline] 84 | pub fn password(url: &Url) -> &str { 85 | url.password().unwrap_or("") 86 | } 87 | 88 | /// Setter for https://url.spec.whatwg.org/#dom-url-password 89 | #[allow(clippy::result_unit_err)] 90 | pub fn set_password(url: &mut Url, new_password: &str) -> Result<(), ()> { 91 | url.set_password(if new_password.is_empty() { 92 | None 93 | } else { 94 | Some(new_password) 95 | }) 96 | } 97 | 98 | /// Getter for https://url.spec.whatwg.org/#dom-url-host 99 | #[inline] 100 | pub fn host(url: &Url) -> &str { 101 | &url[Position::BeforeHost..Position::AfterPort] 102 | } 103 | 104 | /// Setter for https://url.spec.whatwg.org/#dom-url-host 105 | #[allow(clippy::result_unit_err)] 106 | pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> { 107 | // If context object’s url’s cannot-be-a-base-URL flag is set, then return. 108 | if url.cannot_be_a_base() { 109 | return Err(()); 110 | } 111 | // Host parsing rules are strict, 112 | // We don't want to trim the input 113 | let input = Input::no_trim(new_host); 114 | let host; 115 | let opt_port; 116 | { 117 | let scheme = url.scheme(); 118 | let scheme_type = SchemeType::from(scheme); 119 | if scheme_type == SchemeType::File && new_host.is_empty() { 120 | url.set_host_internal(Host::Domain(String::new()), None); 121 | return Ok(()); 122 | } 123 | 124 | if let Ok((h, remaining)) = Parser::parse_host(input, scheme_type) { 125 | host = h; 126 | opt_port = if let Some(remaining) = remaining.split_prefix(':') { 127 | if remaining.is_empty() { 128 | None 129 | } else { 130 | Parser::parse_port(remaining, || default_port(scheme), Context::Setter) 131 | .ok() 132 | .map(|(port, _remaining)| port) 133 | } 134 | } else { 135 | None 136 | }; 137 | } else { 138 | return Err(()); 139 | } 140 | } 141 | // Make sure we won't set an empty host to a url with a username or a port 142 | if host == Host::Domain("".to_string()) 143 | && (!username(url).is_empty() || matches!(opt_port, Some(Some(_))) || url.port().is_some()) 144 | { 145 | return Err(()); 146 | } 147 | url.set_host_internal(host, opt_port); 148 | Ok(()) 149 | } 150 | 151 | /// Getter for https://url.spec.whatwg.org/#dom-url-hostname 152 | #[inline] 153 | pub fn hostname(url: &Url) -> &str { 154 | url.host_str().unwrap_or("") 155 | } 156 | 157 | /// Setter for https://url.spec.whatwg.org/#dom-url-hostname 158 | #[allow(clippy::result_unit_err)] 159 | pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> { 160 | if url.cannot_be_a_base() { 161 | return Err(()); 162 | } 163 | // Host parsing rules are strict we don't want to trim the input 164 | let input = Input::no_trim(new_hostname); 165 | let scheme_type = SchemeType::from(url.scheme()); 166 | if scheme_type == SchemeType::File && new_hostname.is_empty() { 167 | url.set_host_internal(Host::Domain(String::new()), None); 168 | return Ok(()); 169 | } 170 | 171 | if let Ok((host, _remaining)) = Parser::parse_host(input, scheme_type) { 172 | if let Host::Domain(h) = &host { 173 | if h.is_empty() { 174 | // Empty host on special not file url 175 | if SchemeType::from(url.scheme()) == SchemeType::SpecialNotFile 176 | // Port with an empty host 177 | ||!port(url).is_empty() 178 | // Empty host that includes credentials 179 | || !url.username().is_empty() 180 | || !url.password().unwrap_or("").is_empty() 181 | { 182 | return Err(()); 183 | } 184 | } 185 | } 186 | url.set_host_internal(host, None); 187 | Ok(()) 188 | } else { 189 | Err(()) 190 | } 191 | } 192 | 193 | /// Getter for https://url.spec.whatwg.org/#dom-url-port 194 | #[inline] 195 | pub fn port(url: &Url) -> &str { 196 | &url[Position::BeforePort..Position::AfterPort] 197 | } 198 | 199 | /// Setter for https://url.spec.whatwg.org/#dom-url-port 200 | #[allow(clippy::result_unit_err)] 201 | pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> { 202 | let result; 203 | { 204 | // has_host implies !cannot_be_a_base 205 | let scheme = url.scheme(); 206 | if !url.has_host() || url.host() == Some(Host::Domain("")) || scheme == "file" { 207 | return Err(()); 208 | } 209 | result = Parser::parse_port( 210 | Input::new(new_port), 211 | || default_port(scheme), 212 | Context::Setter, 213 | ) 214 | } 215 | if let Ok((new_port, _remaining)) = result { 216 | url.set_port_internal(new_port); 217 | Ok(()) 218 | } else { 219 | Err(()) 220 | } 221 | } 222 | 223 | /// Getter for https://url.spec.whatwg.org/#dom-url-pathname 224 | #[inline] 225 | pub fn pathname(url: &Url) -> &str { 226 | url.path() 227 | } 228 | 229 | /// Setter for https://url.spec.whatwg.org/#dom-url-pathname 230 | pub fn set_pathname(url: &mut Url, new_pathname: &str) { 231 | if url.cannot_be_a_base() { 232 | return; 233 | } 234 | if new_pathname.starts_with('/') 235 | || (SchemeType::from(url.scheme()).is_special() 236 | // \ is a segment delimiter for 'special' URLs" 237 | && new_pathname.starts_with('\\')) 238 | { 239 | url.set_path(new_pathname) 240 | } else { 241 | let mut path_to_set = String::from("/"); 242 | path_to_set.push_str(new_pathname); 243 | url.set_path(&path_to_set) 244 | } 245 | } 246 | 247 | /// Getter for https://url.spec.whatwg.org/#dom-url-search 248 | pub fn search(url: &Url) -> &str { 249 | trim(&url[Position::AfterPath..Position::AfterQuery]) 250 | } 251 | 252 | /// Setter for https://url.spec.whatwg.org/#dom-url-search 253 | pub fn set_search(url: &mut Url, new_search: &str) { 254 | url.set_query(match new_search { 255 | "" => None, 256 | _ if new_search.starts_with('?') => Some(&new_search[1..]), 257 | _ => Some(new_search), 258 | }) 259 | } 260 | 261 | /// Getter for https://url.spec.whatwg.org/#dom-url-hash 262 | pub fn hash(url: &Url) -> &str { 263 | trim(&url[Position::AfterQuery..]) 264 | } 265 | 266 | /// Setter for https://url.spec.whatwg.org/#dom-url-hash 267 | pub fn set_hash(url: &mut Url, new_hash: &str) { 268 | url.set_fragment(match new_hash { 269 | // If the given value is the empty string, 270 | // then set context object’s url’s fragment to null and return. 271 | "" => None, 272 | // Let input be the given value with a single leading U+0023 (#) removed, if any. 273 | _ if new_hash.starts_with('#') => Some(&new_hash[1..]), 274 | _ => Some(new_hash), 275 | }) 276 | } 277 | 278 | fn trim(s: &str) -> &str { 279 | if s.len() == 1 { 280 | "" 281 | } else { 282 | s 283 | } 284 | } 285 | -------------------------------------------------------------------------------- /idna/src/punycode.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The rust-url developers. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | //! Punycode ([RFC 3492](http://tools.ietf.org/html/rfc3492)) implementation. 10 | //! 11 | //! Since Punycode fundamentally works on unicode code points, 12 | //! `encode` and `decode` take and return slices and vectors of `char`. 13 | //! `encode_str` and `decode_to_string` provide convenience wrappers 14 | //! that convert from and to Rust’s UTF-8 based `str` and `String` types. 15 | 16 | use std::char; 17 | use std::u32; 18 | 19 | // Bootstring parameters for Punycode 20 | static BASE: u32 = 36; 21 | static T_MIN: u32 = 1; 22 | static T_MAX: u32 = 26; 23 | static SKEW: u32 = 38; 24 | static DAMP: u32 = 700; 25 | static INITIAL_BIAS: u32 = 72; 26 | static INITIAL_N: u32 = 0x80; 27 | static DELIMITER: char = '-'; 28 | 29 | #[inline] 30 | fn adapt(mut delta: u32, num_points: u32, first_time: bool) -> u32 { 31 | delta /= if first_time { DAMP } else { 2 }; 32 | delta += delta / num_points; 33 | let mut k = 0; 34 | while delta > ((BASE - T_MIN) * T_MAX) / 2 { 35 | delta /= BASE - T_MIN; 36 | k += BASE; 37 | } 38 | k + (((BASE - T_MIN + 1) * delta) / (delta + SKEW)) 39 | } 40 | 41 | /// Convert Punycode to an Unicode `String`. 42 | /// 43 | /// This is a convenience wrapper around `decode`. 44 | #[inline] 45 | pub fn decode_to_string(input: &str) -> Option { 46 | decode(input).map(|chars| chars.into_iter().collect()) 47 | } 48 | 49 | /// Convert Punycode to Unicode. 50 | /// 51 | /// Return None on malformed input or overflow. 52 | /// Overflow can only happen on inputs that take more than 53 | /// 63 encoded bytes, the DNS limit on domain name labels. 54 | pub fn decode(input: &str) -> Option> { 55 | Some(Decoder::default().decode(input).ok()?.collect()) 56 | } 57 | 58 | #[derive(Default)] 59 | pub(crate) struct Decoder { 60 | insertions: Vec<(usize, char)>, 61 | } 62 | 63 | impl Decoder { 64 | /// Split the input iterator and return a Vec with insertions of encoded characters 65 | pub(crate) fn decode<'a>(&'a mut self, input: &'a str) -> Result, ()> { 66 | self.insertions.clear(); 67 | // Handle "basic" (ASCII) code points. 68 | // They are encoded as-is before the last delimiter, if any. 69 | let (base, input) = match input.rfind(DELIMITER) { 70 | None => ("", input), 71 | Some(position) => ( 72 | &input[..position], 73 | if position > 0 { 74 | &input[position + 1..] 75 | } else { 76 | input 77 | }, 78 | ), 79 | }; 80 | 81 | if !base.is_ascii() { 82 | return Err(()); 83 | } 84 | 85 | let base_len = base.len(); 86 | let mut length = base_len as u32; 87 | let mut code_point = INITIAL_N; 88 | let mut bias = INITIAL_BIAS; 89 | let mut i = 0; 90 | let mut iter = input.bytes(); 91 | loop { 92 | let previous_i = i; 93 | let mut weight = 1; 94 | let mut k = BASE; 95 | let mut byte = match iter.next() { 96 | None => break, 97 | Some(byte) => byte, 98 | }; 99 | 100 | // Decode a generalized variable-length integer into delta, 101 | // which gets added to i. 102 | loop { 103 | let digit = match byte { 104 | byte @ b'0'..=b'9' => byte - b'0' + 26, 105 | byte @ b'A'..=b'Z' => byte - b'A', 106 | byte @ b'a'..=b'z' => byte - b'a', 107 | _ => return Err(()), 108 | } as u32; 109 | if digit > (u32::MAX - i) / weight { 110 | return Err(()); // Overflow 111 | } 112 | i += digit * weight; 113 | let t = if k <= bias { 114 | T_MIN 115 | } else if k >= bias + T_MAX { 116 | T_MAX 117 | } else { 118 | k - bias 119 | }; 120 | if digit < t { 121 | break; 122 | } 123 | if weight > u32::MAX / (BASE - t) { 124 | return Err(()); // Overflow 125 | } 126 | weight *= BASE - t; 127 | k += BASE; 128 | byte = match iter.next() { 129 | None => return Err(()), // End of input before the end of this delta 130 | Some(byte) => byte, 131 | }; 132 | } 133 | 134 | bias = adapt(i - previous_i, length + 1, previous_i == 0); 135 | if i / (length + 1) > u32::MAX - code_point { 136 | return Err(()); // Overflow 137 | } 138 | 139 | // i was supposed to wrap around from length+1 to 0, 140 | // incrementing code_point each time. 141 | code_point += i / (length + 1); 142 | i %= length + 1; 143 | let c = match char::from_u32(code_point) { 144 | Some(c) => c, 145 | None => return Err(()), 146 | }; 147 | 148 | // Move earlier insertions farther out in the string 149 | for (idx, _) in &mut self.insertions { 150 | if *idx >= i as usize { 151 | *idx += 1; 152 | } 153 | } 154 | self.insertions.push((i as usize, c)); 155 | length += 1; 156 | i += 1; 157 | } 158 | 159 | self.insertions.sort_by_key(|(i, _)| *i); 160 | Ok(Decode { 161 | base: base.chars(), 162 | insertions: &self.insertions, 163 | inserted: 0, 164 | position: 0, 165 | len: base_len + self.insertions.len(), 166 | }) 167 | } 168 | } 169 | 170 | pub(crate) struct Decode<'a> { 171 | base: std::str::Chars<'a>, 172 | pub(crate) insertions: &'a [(usize, char)], 173 | inserted: usize, 174 | position: usize, 175 | len: usize, 176 | } 177 | 178 | impl<'a> Iterator for Decode<'a> { 179 | type Item = char; 180 | 181 | fn next(&mut self) -> Option { 182 | loop { 183 | match self.insertions.get(self.inserted) { 184 | Some((pos, c)) if *pos == self.position => { 185 | self.inserted += 1; 186 | self.position += 1; 187 | return Some(*c); 188 | } 189 | _ => {} 190 | } 191 | if let Some(c) = self.base.next() { 192 | self.position += 1; 193 | return Some(c); 194 | } else if self.inserted >= self.insertions.len() { 195 | return None; 196 | } 197 | } 198 | } 199 | 200 | fn size_hint(&self) -> (usize, Option) { 201 | let len = self.len - self.position; 202 | (len, Some(len)) 203 | } 204 | } 205 | 206 | impl<'a> ExactSizeIterator for Decode<'a> { 207 | fn len(&self) -> usize { 208 | self.len - self.position 209 | } 210 | } 211 | 212 | /// Convert an Unicode `str` to Punycode. 213 | /// 214 | /// This is a convenience wrapper around `encode`. 215 | #[inline] 216 | pub fn encode_str(input: &str) -> Option { 217 | let mut buf = String::with_capacity(input.len()); 218 | encode_into(input.chars(), &mut buf).ok().map(|()| buf) 219 | } 220 | 221 | /// Convert Unicode to Punycode. 222 | /// 223 | /// Return None on overflow, which can only happen on inputs that would take more than 224 | /// 63 encoded bytes, the DNS limit on domain name labels. 225 | pub fn encode(input: &[char]) -> Option { 226 | let mut buf = String::with_capacity(input.len()); 227 | encode_into(input.iter().copied(), &mut buf) 228 | .ok() 229 | .map(|()| buf) 230 | } 231 | 232 | pub(crate) fn encode_into(input: I, output: &mut String) -> Result<(), ()> 233 | where 234 | I: Iterator + Clone, 235 | { 236 | // Handle "basic" (ASCII) code points. They are encoded as-is. 237 | let (mut input_length, mut basic_length) = (0, 0); 238 | for c in input.clone() { 239 | input_length += 1; 240 | if c.is_ascii() { 241 | output.push(c); 242 | basic_length += 1; 243 | } 244 | } 245 | 246 | if basic_length > 0 { 247 | output.push('-') 248 | } 249 | let mut code_point = INITIAL_N; 250 | let mut delta = 0; 251 | let mut bias = INITIAL_BIAS; 252 | let mut processed = basic_length; 253 | while processed < input_length { 254 | // All code points < code_point have been handled already. 255 | // Find the next larger one. 256 | let min_code_point = input 257 | .clone() 258 | .map(|c| c as u32) 259 | .filter(|&c| c >= code_point) 260 | .min() 261 | .unwrap(); 262 | if min_code_point - code_point > (u32::MAX - delta) / (processed + 1) { 263 | return Err(()); // Overflow 264 | } 265 | // Increase delta to advance the decoder’s state to 266 | delta += (min_code_point - code_point) * (processed + 1); 267 | code_point = min_code_point; 268 | for c in input.clone() { 269 | let c = c as u32; 270 | if c < code_point { 271 | delta += 1; 272 | if delta == 0 { 273 | return Err(()); // Overflow 274 | } 275 | } 276 | if c == code_point { 277 | // Represent delta as a generalized variable-length integer: 278 | let mut q = delta; 279 | let mut k = BASE; 280 | loop { 281 | let t = if k <= bias { 282 | T_MIN 283 | } else if k >= bias + T_MAX { 284 | T_MAX 285 | } else { 286 | k - bias 287 | }; 288 | if q < t { 289 | break; 290 | } 291 | let value = t + ((q - t) % (BASE - t)); 292 | output.push(value_to_digit(value)); 293 | q = (q - t) / (BASE - t); 294 | k += BASE; 295 | } 296 | output.push(value_to_digit(q)); 297 | bias = adapt(delta, processed + 1, processed == basic_length); 298 | delta = 0; 299 | processed += 1; 300 | } 301 | } 302 | delta += 1; 303 | code_point += 1; 304 | } 305 | Ok(()) 306 | } 307 | 308 | #[inline] 309 | fn value_to_digit(value: u32) -> char { 310 | match value { 311 | 0..=25 => (value as u8 + b'a') as char, // a..z 312 | 26..=35 => (value as u8 - 26 + b'0') as char, // 0..9 313 | _ => panic!(), 314 | } 315 | } 316 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /form_urlencoded/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /data-url/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Processing of `data:` URLs according to the Fetch Standard: 2 | //! 3 | //! but starting from a string rather than a parsed URL to avoid extra copies. 4 | //! 5 | //! ```rust 6 | //! use data_url::{DataUrl, mime}; 7 | //! 8 | //! let url = DataUrl::process("data:,Hello%20World!").unwrap(); 9 | //! let (body, fragment) = url.decode_to_vec().unwrap(); 10 | //! 11 | //! assert_eq!(url.mime_type().type_, "text"); 12 | //! assert_eq!(url.mime_type().subtype, "plain"); 13 | //! assert_eq!(url.mime_type().get_parameter("charset"), Some("US-ASCII")); 14 | //! assert_eq!(body, b"Hello World!"); 15 | //! assert!(fragment.is_none()); 16 | //! ``` 17 | 18 | macro_rules! require { 19 | ($condition: expr) => { 20 | if !$condition { 21 | return None; 22 | } 23 | }; 24 | } 25 | 26 | pub mod forgiving_base64; 27 | pub mod mime; 28 | 29 | pub struct DataUrl<'a> { 30 | mime_type: mime::Mime, 31 | base64: bool, 32 | encoded_body_plus_fragment: &'a str, 33 | } 34 | 35 | #[derive(Debug)] 36 | pub enum DataUrlError { 37 | NotADataUrl, 38 | NoComma, 39 | } 40 | 41 | impl<'a> DataUrl<'a> { 42 | /// 43 | /// but starting from a string rather than a parsed `Url`, to avoid extra string copies. 44 | pub fn process(input: &'a str) -> Result { 45 | use crate::DataUrlError::*; 46 | 47 | let after_colon = pretend_parse_data_url(input).ok_or(NotADataUrl)?; 48 | 49 | let (from_colon_to_comma, encoded_body_plus_fragment) = 50 | find_comma_before_fragment(after_colon).ok_or(NoComma)?; 51 | 52 | let (mime_type, base64) = parse_header(from_colon_to_comma); 53 | 54 | Ok(DataUrl { 55 | mime_type, 56 | base64, 57 | encoded_body_plus_fragment, 58 | }) 59 | } 60 | 61 | pub fn mime_type(&self) -> &mime::Mime { 62 | &self.mime_type 63 | } 64 | 65 | /// Streaming-decode the data URL’s body to `write_body_bytes`, 66 | /// and return the URL’s fragment identifier if it has one. 67 | pub fn decode( 68 | &self, 69 | write_body_bytes: F, 70 | ) -> Result>, forgiving_base64::DecodeError> 71 | where 72 | F: FnMut(&[u8]) -> Result<(), E>, 73 | { 74 | if self.base64 { 75 | decode_with_base64(self.encoded_body_plus_fragment, write_body_bytes) 76 | } else { 77 | decode_without_base64(self.encoded_body_plus_fragment, write_body_bytes) 78 | .map_err(forgiving_base64::DecodeError::WriteError) 79 | } 80 | } 81 | 82 | /// Return the decoded body, and the URL’s fragment identifier if it has one. 83 | pub fn decode_to_vec( 84 | &self, 85 | ) -> Result<(Vec, Option>), forgiving_base64::InvalidBase64> { 86 | let mut body = Vec::new(); 87 | let fragment = self.decode(|bytes| { 88 | body.extend_from_slice(bytes); 89 | Ok(()) 90 | })?; 91 | Ok((body, fragment)) 92 | } 93 | } 94 | 95 | /// The URL’s fragment identifier (after `#`) 96 | pub struct FragmentIdentifier<'a>(&'a str); 97 | 98 | impl<'a> FragmentIdentifier<'a> { 99 | /// Like in a parsed URL 100 | pub fn to_percent_encoded(&self) -> String { 101 | let mut string = String::new(); 102 | for byte in self.0.bytes() { 103 | match byte { 104 | // Ignore ASCII tabs or newlines like the URL parser would 105 | b'\t' | b'\n' | b'\r' => continue, 106 | // https://url.spec.whatwg.org/#fragment-percent-encode-set 107 | b'\0'..=b' ' | b'"' | b'<' | b'>' | b'`' | b'\x7F'..=b'\xFF' => { 108 | percent_encode(byte, &mut string) 109 | } 110 | // Printable ASCII 111 | _ => string.push(byte as char), 112 | } 113 | } 114 | string 115 | } 116 | } 117 | 118 | /// Similar to 119 | /// followed by 120 | /// 121 | /// * `None`: not a data URL. 122 | /// 123 | /// * `Some(s)`: sort of the result of serialization, except: 124 | /// 125 | /// - `data:` prefix removed 126 | /// - The fragment is included 127 | /// - Other components are **not** UTF-8 percent-encoded 128 | /// - ASCII tabs and newlines in the middle are **not** removed 129 | fn pretend_parse_data_url(input: &str) -> Option<&str> { 130 | // Trim C0 control or space 131 | let left_trimmed = input.trim_start_matches(|ch| ch <= ' '); 132 | 133 | let mut bytes = left_trimmed.bytes(); 134 | { 135 | // Ignore ASCII tabs or newlines like the URL parser would 136 | let mut iter = bytes 137 | .by_ref() 138 | .filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r')); 139 | require!(iter.next()?.to_ascii_lowercase() == b'd'); 140 | require!(iter.next()?.to_ascii_lowercase() == b'a'); 141 | require!(iter.next()?.to_ascii_lowercase() == b't'); 142 | require!(iter.next()?.to_ascii_lowercase() == b'a'); 143 | require!(iter.next()? == b':'); 144 | } 145 | let bytes_consumed = left_trimmed.len() - bytes.len(); 146 | let after_colon = &left_trimmed[bytes_consumed..]; 147 | 148 | // Trim C0 control or space 149 | Some(after_colon.trim_end_matches(|ch| ch <= ' ')) 150 | } 151 | 152 | fn find_comma_before_fragment(after_colon: &str) -> Option<(&str, &str)> { 153 | for (i, byte) in after_colon.bytes().enumerate() { 154 | if byte == b',' { 155 | return Some((&after_colon[..i], &after_colon[i + 1..])); 156 | } 157 | if byte == b'#' { 158 | break; 159 | } 160 | } 161 | None 162 | } 163 | 164 | fn parse_header(from_colon_to_comma: &str) -> (mime::Mime, bool) { 165 | // "Strip leading and trailing ASCII whitespace" 166 | // \t, \n, and \r would have been filtered by the URL parser 167 | // \f percent-encoded by the URL parser 168 | // space is the only remaining ASCII whitespace 169 | let trimmed = from_colon_to_comma.trim_matches(|c| matches!(c, ' ' | '\t' | '\n' | '\r')); 170 | 171 | let without_base64_suffix = remove_base64_suffix(trimmed); 172 | let base64 = without_base64_suffix.is_some(); 173 | let mime_type = without_base64_suffix.unwrap_or(trimmed); 174 | 175 | let mut string = String::new(); 176 | if mime_type.starts_with(';') { 177 | string.push_str("text/plain") 178 | } 179 | let mut in_query = false; 180 | for byte in mime_type.bytes() { 181 | match byte { 182 | // Ignore ASCII tabs or newlines like the URL parser would 183 | b'\t' | b'\n' | b'\r' => continue, 184 | 185 | // https://url.spec.whatwg.org/#c0-control-percent-encode-set 186 | b'\0'..=b'\x1F' | b'\x7F'..=b'\xFF' => percent_encode(byte, &mut string), 187 | 188 | // Bytes other than the C0 percent-encode set that are percent-encoded 189 | // by the URL parser in the query state. 190 | // '#' is also in that list but cannot occur here 191 | // since it indicates the start of the URL’s fragment. 192 | b' ' | b'"' | b'<' | b'>' if in_query => percent_encode(byte, &mut string), 193 | 194 | b'?' => { 195 | in_query = true; 196 | string.push('?') 197 | } 198 | 199 | // Printable ASCII 200 | _ => string.push(byte as char), 201 | } 202 | } 203 | 204 | // FIXME: does Mime::from_str match the MIME Sniffing Standard’s parsing algorithm? 205 | // 206 | let mime_type = string.parse().unwrap_or_else(|_| mime::Mime { 207 | type_: String::from("text"), 208 | subtype: String::from("plain"), 209 | parameters: vec![(String::from("charset"), String::from("US-ASCII"))], 210 | }); 211 | 212 | (mime_type, base64) 213 | } 214 | 215 | /// None: no base64 suffix 216 | #[allow(clippy::skip_while_next)] 217 | fn remove_base64_suffix(s: &str) -> Option<&str> { 218 | let mut bytes = s.bytes(); 219 | { 220 | // Ignore ASCII tabs or newlines like the URL parser would 221 | let iter = bytes 222 | .by_ref() 223 | .filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r')); 224 | 225 | // Search from the end 226 | let mut iter = iter.rev(); 227 | 228 | require!(iter.next()? == b'4'); 229 | require!(iter.next()? == b'6'); 230 | require!(iter.next()?.to_ascii_lowercase() == b'e'); 231 | require!(iter.next()?.to_ascii_lowercase() == b's'); 232 | require!(iter.next()?.to_ascii_lowercase() == b'a'); 233 | require!(iter.next()?.to_ascii_lowercase() == b'b'); 234 | require!(iter.skip_while(|&byte| byte == b' ').next()? == b';'); 235 | } 236 | Some(&s[..bytes.len()]) 237 | } 238 | 239 | fn percent_encode(byte: u8, string: &mut String) { 240 | const HEX_UPPER: [u8; 16] = *b"0123456789ABCDEF"; 241 | string.push('%'); 242 | string.push(HEX_UPPER[(byte >> 4) as usize] as char); 243 | string.push(HEX_UPPER[(byte & 0x0f) as usize] as char); 244 | } 245 | 246 | /// This is while also: 247 | /// 248 | /// * Ignoring ASCII tab or newlines 249 | /// * Stopping at the first '#' (which indicates the start of the fragment) 250 | /// 251 | /// Anything that would have been UTF-8 percent-encoded by the URL parser 252 | /// would be percent-decoded here. 253 | /// We skip that round-trip and pass it through unchanged. 254 | fn decode_without_base64( 255 | encoded_body_plus_fragment: &str, 256 | mut write_bytes: F, 257 | ) -> Result>, E> 258 | where 259 | F: FnMut(&[u8]) -> Result<(), E>, 260 | { 261 | let bytes = encoded_body_plus_fragment.as_bytes(); 262 | let mut slice_start = 0; 263 | for (i, &byte) in bytes.iter().enumerate() { 264 | // We only need to look for 5 different "special" byte values. 265 | // For everything else we make slices as large as possible, borrowing the input, 266 | // in order to make fewer write_all() calls. 267 | if matches!(byte, b'%' | b'#' | b'\t' | b'\n' | b'\r') { 268 | // Write everything (if anything) "non-special" we’ve accumulated 269 | // before this special byte 270 | if i > slice_start { 271 | write_bytes(&bytes[slice_start..i])?; 272 | } 273 | // Then deal with the special byte. 274 | match byte { 275 | b'%' => { 276 | let l = bytes.get(i + 2).and_then(|&b| (b as char).to_digit(16)); 277 | let h = bytes.get(i + 1).and_then(|&b| (b as char).to_digit(16)); 278 | if let (Some(h), Some(l)) = (h, l) { 279 | // '%' followed by two ASCII hex digits 280 | let one_byte = h as u8 * 0x10 + l as u8; 281 | write_bytes(&[one_byte])?; 282 | slice_start = i + 3; 283 | } else { 284 | // Do nothing. Leave slice_start unchanged. 285 | // The % sign will be part of the next slice. 286 | } 287 | } 288 | 289 | b'#' => { 290 | let fragment_start = i + 1; 291 | let fragment = &encoded_body_plus_fragment[fragment_start..]; 292 | return Ok(Some(FragmentIdentifier(fragment))); 293 | } 294 | 295 | // Ignore over '\t' | '\n' | '\r' 296 | _ => slice_start = i + 1, 297 | } 298 | } 299 | } 300 | write_bytes(&bytes[slice_start..])?; 301 | Ok(None) 302 | } 303 | 304 | /// `decode_without_base64()` composed with 305 | /// composed with 306 | /// . 307 | fn decode_with_base64( 308 | encoded_body_plus_fragment: &str, 309 | write_bytes: F, 310 | ) -> Result>, forgiving_base64::DecodeError> 311 | where 312 | F: FnMut(&[u8]) -> Result<(), E>, 313 | { 314 | let mut decoder = forgiving_base64::Decoder::new(write_bytes); 315 | let fragment = decode_without_base64(encoded_body_plus_fragment, |bytes| decoder.feed(bytes))?; 316 | decoder.finish()?; 317 | Ok(fragment) 318 | } 319 | -------------------------------------------------------------------------------- /data-url/tests/mime-types.json: -------------------------------------------------------------------------------- 1 | [ 2 | "Basics", 3 | { 4 | "input": "text/html;charset=gbk", 5 | "output": "text/html;charset=gbk", 6 | "navigable": true, 7 | "encoding": "GBK" 8 | }, 9 | { 10 | "input": "TEXT/HTML;CHARSET=GBK", 11 | "output": "text/html;charset=GBK", 12 | "navigable": true, 13 | "encoding": "GBK" 14 | }, 15 | "Legacy comment syntax", 16 | { 17 | "input": "text/html;charset=gbk(", 18 | "output": "text/html;charset=\"gbk(\"", 19 | "navigable": true, 20 | "encoding": null 21 | }, 22 | { 23 | "input": "text/html;x=(;charset=gbk", 24 | "output": "text/html;x=\"(\";charset=gbk", 25 | "navigable": true, 26 | "encoding": "GBK" 27 | }, 28 | "Duplicate parameter", 29 | { 30 | "input": "text/html;charset=gbk;charset=windows-1255", 31 | "output": "text/html;charset=gbk", 32 | "navigable": true, 33 | "encoding": "GBK" 34 | }, 35 | { 36 | "input": "text/html;charset=();charset=GBK", 37 | "output": "text/html;charset=\"()\"", 38 | "navigable": true, 39 | "encoding": null 40 | }, 41 | "Spaces", 42 | { 43 | "input": "text/html;charset =gbk", 44 | "output": "text/html", 45 | "navigable": true, 46 | "encoding": null 47 | }, 48 | { 49 | "input": "text/html ;charset=gbk", 50 | "output": "text/html;charset=gbk", 51 | "navigable": true, 52 | "encoding": "GBK" 53 | }, 54 | { 55 | "input": "text/html; charset=gbk", 56 | "output": "text/html;charset=gbk", 57 | "navigable": true, 58 | "encoding": "GBK" 59 | }, 60 | { 61 | "input": "text/html;charset= gbk", 62 | "output": "text/html;charset=\" gbk\"", 63 | "navigable": true, 64 | "encoding": "GBK" 65 | }, 66 | { 67 | "input": "text/html;charset= \"gbk\"", 68 | "output": "text/html;charset=\" \\\"gbk\\\"\"", 69 | "navigable": true, 70 | "encoding": null 71 | }, 72 | "0x0B and 0x0C", 73 | { 74 | "input": "text/html;charset=\u000Bgbk", 75 | "output": "text/html", 76 | "navigable": true, 77 | "encoding": null 78 | }, 79 | { 80 | "input": "text/html;charset=\u000Cgbk", 81 | "output": "text/html", 82 | "navigable": true, 83 | "encoding": null 84 | }, 85 | { 86 | "input": "text/html;\u000Bcharset=gbk", 87 | "output": "text/html", 88 | "navigable": true, 89 | "encoding": null 90 | }, 91 | { 92 | "input": "text/html;\u000Ccharset=gbk", 93 | "output": "text/html", 94 | "navigable": true, 95 | "encoding": null 96 | }, 97 | "Single quotes are a token, not a delimiter", 98 | { 99 | "input": "text/html;charset='gbk'", 100 | "output": "text/html;charset='gbk'", 101 | "navigable": true, 102 | "encoding": null 103 | }, 104 | { 105 | "input": "text/html;charset='gbk", 106 | "output": "text/html;charset='gbk", 107 | "navigable": true, 108 | "encoding": null 109 | }, 110 | { 111 | "input": "text/html;charset=gbk'", 112 | "output": "text/html;charset=gbk'", 113 | "navigable": true, 114 | "encoding": null 115 | }, 116 | { 117 | "input": "text/html;charset=';charset=GBK", 118 | "output": "text/html;charset='", 119 | "navigable": true, 120 | "encoding": null 121 | }, 122 | "Invalid parameters", 123 | { 124 | "input": "text/html;test;charset=gbk", 125 | "output": "text/html;charset=gbk", 126 | "navigable": true, 127 | "encoding": "GBK" 128 | }, 129 | { 130 | "input": "text/html;test=;charset=gbk", 131 | "output": "text/html;charset=gbk", 132 | "navigable": true, 133 | "encoding": "GBK" 134 | }, 135 | { 136 | "input": "text/html;';charset=gbk", 137 | "output": "text/html;charset=gbk", 138 | "navigable": true, 139 | "encoding": "GBK" 140 | }, 141 | { 142 | "input": "text/html;\";charset=gbk", 143 | "output": "text/html;charset=gbk", 144 | "navigable": true, 145 | "encoding": "GBK" 146 | }, 147 | { 148 | "input": "text/html ; ; charset=gbk", 149 | "output": "text/html;charset=gbk", 150 | "navigable": true, 151 | "encoding": "GBK" 152 | }, 153 | { 154 | "input": "text/html;;;;charset=gbk", 155 | "output": "text/html;charset=gbk", 156 | "navigable": true, 157 | "encoding": "GBK" 158 | }, 159 | { 160 | "input": "text/html;charset= \"\u007F;charset=GBK", 161 | "output": "text/html;charset=GBK", 162 | "navigable": true, 163 | "encoding": "GBK" 164 | }, 165 | { 166 | "input": "text/html;charset=\"\u007F;charset=foo\";charset=GBK", 167 | "output": "text/html;charset=GBK", 168 | "navigable": true, 169 | "encoding": "GBK" 170 | }, 171 | "Double quotes", 172 | { 173 | "input": "text/html;charset=\"gbk\"", 174 | "output": "text/html;charset=gbk", 175 | "navigable": true, 176 | "encoding": "GBK" 177 | }, 178 | { 179 | "input": "text/html;charset=\"gbk", 180 | "output": "text/html;charset=gbk", 181 | "navigable": true, 182 | "encoding": "GBK" 183 | }, 184 | { 185 | "input": "text/html;charset=gbk\"", 186 | "output": "text/html;charset=\"gbk\\\"\"", 187 | "navigable": true, 188 | "encoding": null 189 | }, 190 | { 191 | "input": "text/html;charset=\" gbk\"", 192 | "output": "text/html;charset=\" gbk\"", 193 | "navigable": true, 194 | "encoding": "GBK" 195 | }, 196 | { 197 | "input": "text/html;charset=\"gbk \"", 198 | "output": "text/html;charset=\"gbk \"", 199 | "navigable": true, 200 | "encoding": "GBK" 201 | }, 202 | { 203 | "input": "text/html;charset=\"\\ gbk\"", 204 | "output": "text/html;charset=\" gbk\"", 205 | "navigable": true, 206 | "encoding": "GBK" 207 | }, 208 | { 209 | "input": "text/html;charset=\"\\g\\b\\k\"", 210 | "output": "text/html;charset=gbk", 211 | "navigable": true, 212 | "encoding": "GBK" 213 | }, 214 | { 215 | "input": "text/html;charset=\"gbk\"x", 216 | "output": "text/html;charset=gbk", 217 | "navigable": true, 218 | "encoding": "GBK" 219 | }, 220 | { 221 | "input": "text/html;charset=\"\";charset=GBK", 222 | "output": "text/html;charset=\"\"", 223 | "navigable": true, 224 | "encoding": null 225 | }, 226 | { 227 | "input": "text/html;charset=\";charset=GBK", 228 | "output": "text/html;charset=\";charset=GBK\"", 229 | "navigable": true, 230 | "encoding": null 231 | }, 232 | "Unexpected code points", 233 | { 234 | "input": "text/html;charset={gbk}", 235 | "output": "text/html;charset=\"{gbk}\"", 236 | "navigable": true, 237 | "encoding": null 238 | }, 239 | "Parameter name longer than 127", 240 | { 241 | "input": "text/html;0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789=x;charset=gbk", 242 | "output": "text/html;0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789=x;charset=gbk", 243 | "navigable": true, 244 | "encoding": "GBK" 245 | }, 246 | "type/subtype longer than 127", 247 | { 248 | "input": "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789/0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789", 249 | "output": "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789/0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" 250 | }, 251 | "Invalid names", 252 | { 253 | "input": "text/html;a]=bar;b[=bar;c=bar", 254 | "output": "text/html;c=bar" 255 | }, 256 | "Semicolons in value", 257 | { 258 | "input": "text/html;valid=\";\";foo=bar", 259 | "output": "text/html;valid=\";\";foo=bar" 260 | }, 261 | { 262 | "input": "text/html;in]valid=\";asd=foo\";foo=bar", 263 | "output": "text/html;foo=bar" 264 | }, 265 | "Valid", 266 | { 267 | "input": "!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz;!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz=!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", 268 | "output": "!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz/!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz;!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz=!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" 269 | }, 270 | { 271 | "input": "x/x;x=\"\t !\\\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\u00A0\u00A1\u00A2\u00A3\u00A4\u00A5\u00A6\u00A7\u00A8\u00A9\u00AA\u00AB\u00AC\u00AD\u00AE\u00AF\u00B0\u00B1\u00B2\u00B3\u00B4\u00B5\u00B6\u00B7\u00B8\u00B9\u00BA\u00BB\u00BC\u00BD\u00BE\u00BF\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5\u00C6\u00C7\u00C8\u00C9\u00CA\u00CB\u00CC\u00CD\u00CE\u00CF\u00D0\u00D1\u00D2\u00D3\u00D4\u00D5\u00D6\u00D7\u00D8\u00D9\u00DA\u00DB\u00DC\u00DD\u00DE\u00DF\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5\u00E6\u00E7\u00E8\u00E9\u00EA\u00EB\u00EC\u00ED\u00EE\u00EF\u00F0\u00F1\u00F2\u00F3\u00F4\u00F5\u00F6\u00F7\u00F8\u00F9\u00FA\u00FB\u00FC\u00FD\u00FE\u00FF\"", 272 | "output": "x/x;x=\"\t !\\\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\u00A0\u00A1\u00A2\u00A3\u00A4\u00A5\u00A6\u00A7\u00A8\u00A9\u00AA\u00AB\u00AC\u00AD\u00AE\u00AF\u00B0\u00B1\u00B2\u00B3\u00B4\u00B5\u00B6\u00B7\u00B8\u00B9\u00BA\u00BB\u00BC\u00BD\u00BE\u00BF\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5\u00C6\u00C7\u00C8\u00C9\u00CA\u00CB\u00CC\u00CD\u00CE\u00CF\u00D0\u00D1\u00D2\u00D3\u00D4\u00D5\u00D6\u00D7\u00D8\u00D9\u00DA\u00DB\u00DC\u00DD\u00DE\u00DF\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5\u00E6\u00E7\u00E8\u00E9\u00EA\u00EB\u00EC\u00ED\u00EE\u00EF\u00F0\u00F1\u00F2\u00F3\u00F4\u00F5\u00F6\u00F7\u00F8\u00F9\u00FA\u00FB\u00FC\u00FD\u00FE\u00FF\"" 273 | }, 274 | "End-of-file handling", 275 | { 276 | "input": "x/x;test", 277 | "output": "x/x" 278 | }, 279 | { 280 | "input": "x/x;test=\"\\", 281 | "output": "x/x;test=\"\\\\\"" 282 | }, 283 | "Whitespace (not handled by generated-mime-types.json or above)", 284 | { 285 | "input": "x/x;x= ", 286 | "output": "x/x" 287 | }, 288 | { 289 | "input": "x/x;x=\t", 290 | "output": "x/x" 291 | }, 292 | { 293 | "input": "x/x\n\r\t ;x=x", 294 | "output": "x/x;x=x" 295 | }, 296 | { 297 | "input": "\n\r\t x/x;x=x\n\r\t ", 298 | "output": "x/x;x=x" 299 | }, 300 | { 301 | "input": "x/x;\n\r\t x=x\n\r\t ;x=y", 302 | "output": "x/x;x=x" 303 | }, 304 | "Latin1", 305 | { 306 | "input": "text/html;test=\u00FF;charset=gbk", 307 | "output": "text/html;test=\"\u00FF\";charset=gbk", 308 | "navigable": true, 309 | "encoding": "GBK" 310 | }, 311 | ">Latin1", 312 | { 313 | "input": "x/x;test=\uFFFD;x=x", 314 | "output": "x/x;x=x" 315 | }, 316 | "Failure", 317 | { 318 | "input": "\u000Bx/x", 319 | "output": null 320 | }, 321 | { 322 | "input": "\u000Cx/x", 323 | "output": null 324 | }, 325 | { 326 | "input": "x/x\u000B", 327 | "output": null 328 | }, 329 | { 330 | "input": "x/x\u000C", 331 | "output": null 332 | }, 333 | { 334 | "input": "", 335 | "output": null 336 | }, 337 | { 338 | "input": "\t", 339 | "output": null 340 | }, 341 | { 342 | "input": "/", 343 | "output": null 344 | }, 345 | { 346 | "input": "bogus", 347 | "output": null 348 | }, 349 | { 350 | "input": "bogus/", 351 | "output": null 352 | }, 353 | { 354 | "input": "bogus/ ", 355 | "output": null 356 | }, 357 | { 358 | "input": "bogus/bogus/;", 359 | "output": null 360 | }, 361 | { 362 | "input": "", 363 | "output": null 364 | }, 365 | { 366 | "input": "(/)", 367 | "output": null 368 | }, 369 | { 370 | "input": "ÿ/ÿ", 371 | "output": null 372 | }, 373 | { 374 | "input": "text/html(;doesnot=matter", 375 | "output": null 376 | }, 377 | { 378 | "input": "{/}", 379 | "output": null 380 | }, 381 | { 382 | "input": "\u0100/\u0100", 383 | "output": null 384 | }, 385 | { 386 | "input": "text /html", 387 | "output": null 388 | }, 389 | { 390 | "input": "text/ html", 391 | "output": null 392 | }, 393 | { 394 | "input": "\"text/html\"", 395 | "output": null 396 | } 397 | ] 398 | -------------------------------------------------------------------------------- /UPGRADING.md: -------------------------------------------------------------------------------- 1 | # Upgrade guide 2 | 3 | This guide contains steps for upgrading crates in this project between major 4 | versions. 5 | 6 | ## Upgrading from url 1.x to 2.1+ 7 | 8 | * The minimum supported Rust version is now v1.33.0. Verify that you can bump 9 | your library or application to the same MSRV. 10 | 11 | * `Url` no longer implements `std::net::ToSocketAddrs`. You will instead need to 12 | explicitly call `socket_addrs` to convert your `Url` to a type that implements 13 | `ToSocketAddrs`. 14 | 15 | Note that v2.0 removed support for `std::net::ToSocketAddrs` with no 16 | replacement; the `socket_addrs` method was not added until v2.1. 17 | 18 | Before upgrading: 19 | 20 | ```rust 21 | let url = Url::parse("http://github.com:80").unwrap(); 22 | let stream = TcpStream::connect(url).unwrap(); 23 | ``` 24 | 25 | After upgrading: 26 | 27 | ```rust 28 | let url = Url::parse("http://github.com:80").unwrap(); 29 | let addrs = url.socket_addrs(|| None).unwrap(); 30 | let stream = TcpStream::connect(addrs).unwrap(); 31 | ``` 32 | 33 | Before upgrading: 34 | 35 | ```rust 36 | let url = Url::parse("socks5://localhost").unwrap(); 37 | let stream = TcpStream::connect(url.with_default_port(|url| match url.scheme() { 38 | "socks5" => Ok(1080), 39 | _ => Err(()), 40 | })).unwrap(); 41 | ``` 42 | 43 | After upgrading: 44 | 45 | ```rust 46 | let url = Url::parse("http://github.com:80").unwrap(); 47 | let stream = TcpStream::connect(url.socket_addrs(|| match url.scheme() { 48 | "socks5" => Some(1080), 49 | _ => None, 50 | })).unwrap(); 51 | ``` 52 | 53 | * `url_serde` is no longer required to use `Url` with Serde 1.x. Remove 54 | references to `url_serde` and enable the `serde` feature instead. 55 | 56 | ```toml 57 | # Cargo.toml 58 | [dependencies] 59 | url = { version = "2.0", features = ["serde"] } 60 | ``` 61 | 62 | * The `idna` and `percent_export` crates are no longer exported by the `url` 63 | crate. Depend on those crates directly instead. See below for additional 64 | breaking changes in the percent-export package. 65 | 66 | Before upgrading: 67 | 68 | ```rust 69 | use url::percent_encoding::percent_decode; 70 | ``` 71 | 72 | After upgrading: 73 | 74 | ```rust 75 | use percent_encoding::percent_decode; 76 | ``` 77 | 78 | ## Upgrading from percent-encoding 1.x to 2.x 79 | 80 | * Prepackaged encoding sets, like `QUERY_ENCODE_SET` and 81 | `PATH_SEGMENT_ENCODE_SET`, are no longer provided. You 82 | will need to read the specifications relevant to your domain and construct 83 | your own encoding sets by using the `percent_encoding::AsciiSet` builder 84 | methods on either of the base encoding sets, `percent_encoding::CONTROLS` or 85 | `percent_encoding::NON_ALPHANUMERIC`. 86 | 87 | Before upgrading: 88 | 89 | ```rust 90 | use percent_encoding::QUERY_ENCODE_SET; 91 | 92 | percent_encoding::utf8_percent_encode(value, QUERY_ENCODE_SET); 93 | ``` 94 | 95 | After upgrading: 96 | 97 | ```rust 98 | /// https://url.spec.whatwg.org/#query-state 99 | const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>'); 100 | 101 | percent_encoding::utf8_percent_encode(value, QUERY); 102 | ``` 103 | 104 | 105 | ## Upgrading from url 0.x to 1.x 106 | 107 | * The fields of `Url` are now private because the `Url` constructor, parser, 108 | and setters maintain invariants that could be violated if you were to set the fields directly. 109 | Instead of accessing, for example, `url.scheme`, use the getter method, such as `url.scheme()`. 110 | Instead of assigning directly to a field, for example `url.scheme = "https".to_string()`, 111 | use the setter method, such as `url.set_scheme("https").unwrap()`. 112 | (Some setters validate the new value and return a `Result` that must be used). 113 | 114 | * The methods of `Url` now return `&str` instead of `String`, 115 | thus reducing allocations and making serialization cheap. 116 | 117 | * The `path()` method on `url::Url` instances used to return `Option<&[String]>`; 118 | now it returns `&str`. 119 | If you would like functionality more similar to the old behavior of `path()`, 120 | use `path_segments()` that returns `Option>`. 121 | 122 | Before upgrading: 123 | 124 | ```rust 125 | let issue_list_url = Url::parse( 126 | "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open" 127 | ).unwrap(); 128 | assert_eq!(issue_list_url.path(), Some(&["rust-lang".to_string(), 129 | "rust".to_string(), 130 | "issues".to_string()][..])); 131 | ``` 132 | 133 | After upgrading: 134 | 135 | ```rust 136 | let issue_list_url = Url::parse( 137 | "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open" 138 | ).unwrap(); 139 | assert_eq!(issue_list_url.path(), "/rust-lang/rust/issues"); 140 | assert_eq!(issue_list_url.path_segments().map(|c| c.collect::>()), 141 | Some(vec!["rust-lang", "rust", "issues"])); 142 | ``` 143 | 144 | * The `path_mut()` method on `url::Url` instances that allowed modification of a URL's path 145 | has been replaced by `path_segments_mut()`. 146 | 147 | Before upgrading: 148 | 149 | ```rust 150 | let mut url = Url::parse("https://github.com/rust-lang/rust").unwrap(); 151 | url.path_mut().unwrap().push("issues"); 152 | ``` 153 | 154 | After upgrading: 155 | 156 | ```rust 157 | let mut url = Url::parse("https://github.com/rust-lang/rust").unwrap(); 158 | url.path_segments_mut().unwrap().push("issues"); 159 | ``` 160 | 161 | * The `domain_mut()` method on `url::Url` instances that allowed modification of a URL's domain 162 | has been replaced by `set_host()` and `set_ip_host()`. 163 | 164 | * The `host()` method on `url::Url` instances used to return `Option<&Host>`; 165 | now it returns `Option>`. 166 | The `serialize_host()` method that returned `Option` 167 | has been replaced by the `host_str()` method that returns `Option<&str>`. 168 | 169 | * The `serialize()` method on `url::Url` instances that returned `String` 170 | has been replaced by an `as_str()` method that returns `&str`. 171 | 172 | Before upgrading: 173 | 174 | ```rust 175 | let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap(); 176 | assert_eq!(this_document.serialize(), "http://servo.github.io/rust-url/url/index.html".to_string()); 177 | ``` 178 | 179 | After upgrading: 180 | 181 | ```rust 182 | let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap(); 183 | assert_eq!(this_document.as_str(), "http://servo.github.io/rust-url/url/index.html"); 184 | ``` 185 | 186 | * `url::UrlParser` has been replaced by `url::Url::parse()` and `url::Url::join()`. 187 | 188 | Before upgrading: 189 | 190 | ```rust 191 | let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap(); 192 | let css_url = UrlParser::new().base_url(&this_document).parse("../main.css").unwrap(); 193 | assert_eq!(css_url.serialize(), "http://servo.github.io/rust-url/main.css".to_string()); 194 | ``` 195 | 196 | After upgrading: 197 | 198 | ```rust 199 | let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap(); 200 | let css_url = this_document.join("../main.css").unwrap(); 201 | assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css"); 202 | ``` 203 | 204 | * `url::parse_path()` and `url::UrlParser::parse_path()` have been removed without replacement. 205 | As a workaround, you can give a base URL that you then ignore too `url::Url::parse()`. 206 | 207 | Before upgrading: 208 | 209 | ```rust 210 | let (path, query, fragment) = url::parse_path("/foo/bar/../baz?q=42").unwrap(); 211 | assert_eq!(path, vec!["foo".to_string(), "baz".to_string()]); 212 | assert_eq!(query, Some("q=42".to_string())); 213 | assert_eq!(fragment, None); 214 | ``` 215 | 216 | After upgrading: 217 | 218 | ```rust 219 | let base = Url::parse("http://example.com").unwrap(); 220 | let with_path = base.join("/foo/bar/../baz?q=42").unwrap(); 221 | assert_eq!(with_path.path(), "/foo/baz"); 222 | assert_eq!(with_path.query(), Some("q=42")); 223 | assert_eq!(with_path.fragment(), None); 224 | ``` 225 | 226 | * The `url::form_urlencoded::serialize()` method 227 | has been replaced with the `url::form_urlencoded::Serializer` struct. 228 | Instead of calling `serialize()` with key/value pairs, 229 | create a new `Serializer` with a new string, 230 | call the `extend_pairs()` method on the `Serializer` instance with the key/value pairs as the argument, 231 | then call `finish()`. 232 | 233 | Before upgrading: 234 | 235 | ```rust 236 | let form = url::form_urlencoded::serialize(form.iter().map(|(k, v)| { 237 | (&k[..], &v[..]) 238 | })); 239 | ``` 240 | 241 | After upgrading: 242 | 243 | ```rust 244 | let form = url::form_urlencoded::Serializer::new(String::new()).extend_pairs( 245 | form.iter().map(|(k, v)| { (&k[..], &v[..]) }) 246 | ).finish(); 247 | ``` 248 | 249 | * The `set_query_from_pairs()` method on `url::Url` instances that took key/value pairs 250 | has been replaced with `query_pairs_mut()`, which allows you to modify the `url::Url`'s query pairs. 251 | 252 | Before upgrading: 253 | 254 | ```rust 255 | let mut url = Url::parse("https://duckduckgo.com/").unwrap(); 256 | let pairs = vec![ 257 | ("q", "test"), 258 | ("ia", "images"), 259 | ]; 260 | url.set_query_from_pairs(pairs.iter().map(|&(k, v)| { 261 | (&k[..], &v[..]) 262 | })); 263 | ``` 264 | 265 | After upgrading: 266 | 267 | ```rust 268 | let mut url = Url::parse("https://duckduckgo.com/").unwrap(); 269 | let pairs = vec![ 270 | ("q", "test"), 271 | ("ia", "images"), 272 | ]; 273 | url.query_pairs_mut().clear().extend_pairs( 274 | pairs.iter().map(|&(k, v)| { (&k[..], &v[..]) }) 275 | ); 276 | ``` 277 | 278 | * `url::SchemeData`, its variants `Relative` and `NonRelative`, 279 | and the struct `url::RelativeSchemeData` have been removed. 280 | Instead of matching on these variants 281 | to determine if you have a URL in a relative scheme such as HTTP 282 | versus a URL in a non-relative scheme as data, 283 | use the `cannot_be_a_base()` method to determine which kind you have. 284 | 285 | Before upgrading: 286 | 287 | ```rust 288 | match url.scheme_data { 289 | url::SchemeData::Relative(..) => {} 290 | url::SchemeData::NonRelative(..) => { 291 | return Err(human(format!("`{}` must have relative scheme \ 292 | data: {}", field, url))) 293 | } 294 | } 295 | ``` 296 | 297 | After upgrading: 298 | 299 | ```rust 300 | if url.cannot_be_a_base() { 301 | return Err(human(format!("`{}` must have relative scheme \ 302 | data: {}", field, url))) 303 | } 304 | ``` 305 | 306 | * The functions `url::whatwg_scheme_type_mapper()`, the `SchemeType` enum, 307 | and the `scheme_type_mapper()` method on `url::UrlParser` instances have been removed. 308 | `SchemeType` had a method for getting the `default_port()`; 309 | to replicate this functionality, use the method `port_or_known_default()` on `url::Url` instances. 310 | The `port_or_default()` method on `url::Url` instances has been removed; 311 | use `port_or_known_default()` instead. 312 | 313 | Before upgrading: 314 | 315 | ```rust 316 | let port = match whatwg_scheme_type_mapper(&url.scheme) { 317 | SchemeType::Relative(port) => port, 318 | _ => return Err(format!("Invalid special scheme: `{}`", 319 | raw_url.scheme)), 320 | }; 321 | ``` 322 | 323 | After upgrading: 324 | 325 | ```rust 326 | let port = match url.port_or_known_default() { 327 | Some(port) => port, 328 | _ => return Err(format!("Invalid special scheme: `{}`", 329 | url.scheme())), 330 | }; 331 | ``` 332 | 333 | * The following formatting utilities have been removed without replacement; 334 | look at their linked previous implementations 335 | if you would like to replicate the functionality in your code: 336 | * [`url::format::PathFormatter`](https://github.com/servo/rust-url/pull/176/commits/9e759f18726c8e1343162922b87163d4dd08fe3c#diff-0bb16ac13b75e9b568fa4aff61b0e71dL24) 337 | * [`url::format::UserInfoFormatter`](https://github.com/servo/rust-url/pull/176/commits/9e759f18726c8e1343162922b87163d4dd08fe3c#diff-0bb16ac13b75e9b568fa4aff61b0e71dL50) 338 | * [`url::format::UrlNoFragmentFormatter`](https://github.com/servo/rust-url/pull/176/commits/9e759f18726c8e1343162922b87163d4dd08fe3c#diff-0bb16ac13b75e9b568fa4aff61b0e71dL70) 339 | 340 | * `url::percent_encoding::percent_decode()` used to have a return type of `Vec`; 341 | now it returns an iterator of decoded `u8` bytes that also implements `Into>`. 342 | Use `.into().to_owned()` to obtain a `Vec`. 343 | (`.collect()` also works but might not be as efficient.) 344 | 345 | * The `url::percent_encoding::EncodeSet` struct and constant instances 346 | used with `url::percent_encoding::percent_encode()` 347 | have been changed to structs that implement the trait `url::percent_encoding::EncodeSet`. 348 | * `SIMPLE_ENCODE_SET`, `QUERY_ENCODE_SET`, `DEFAULT_ENCODE_SET`, 349 | and `USERINFO_ENCODE_SET` have the same behavior. 350 | * `USERNAME_ENCODE_SET` and `PASSWORD_ENCODE_SET` have been removed; 351 | use `USERINFO_ENCODE_SET` instead. 352 | * `HTTP_VALUE_ENCODE_SET` has been removed; 353 | an implementation of it in the new types can be found [in hyper's source]( 354 | https://github.com/hyperium/hyper/blob/67436c5bf615cf5a55a71e32b788afef5985570e/src/header/parsing.rs#L131-L138) 355 | if you need to replicate this functionality in your code. 356 | * `FORM_URLENCODED_ENCODE_SET` has been removed; 357 | instead, use the functionality in `url::form_urlencoded`. 358 | * `PATH_SEGMENT_ENCODE_SET` has been added for use on '/'-separated path segments. 359 | 360 | * `url::percent_encoding::percent_decode_to()` has been removed. 361 | Use `url::percent_encoding::percent_decode()` which returns an iterator. 362 | You can then use the iterator’s `collect()` method 363 | or give it to some data structure’s `extend()` method. 364 | * A number of `ParseError` variants have changed. 365 | [See the documentation for the current set](http://servo.github.io/rust-url/url/enum.ParseError.html). 366 | * `url::OpaqueOrigin::new()` and `url::Origin::UID(OpaqueOrigin)` 367 | have been replaced by `url::Origin::new_opaque()` and `url::Origin::Opaque(OpaqueOrigin)`, respectively. 368 | -------------------------------------------------------------------------------- /form_urlencoded/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2013-2016 The rust-url developers. 2 | // 3 | // Licensed under the Apache License, Version 2.0 or the MIT license 5 | // , at your 6 | // option. This file may not be copied, modified, or distributed 7 | // except according to those terms. 8 | 9 | //! Parser and serializer for the [`application/x-www-form-urlencoded` syntax]( 10 | //! http://url.spec.whatwg.org/#application/x-www-form-urlencoded), 11 | //! as used by HTML forms. 12 | //! 13 | //! Converts between a string (such as an URL’s query string) 14 | //! and a sequence of (name, value) pairs. 15 | 16 | use percent_encoding::{percent_decode, percent_encode_byte}; 17 | use std::borrow::{Borrow, Cow}; 18 | use std::str; 19 | 20 | /// Convert a byte string in the `application/x-www-form-urlencoded` syntax 21 | /// into a iterator of (name, value) pairs. 22 | /// 23 | /// Use `parse(input.as_bytes())` to parse a `&str` string. 24 | /// 25 | /// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be 26 | /// converted to `[("#first", "%try%")]`. 27 | #[inline] 28 | pub fn parse(input: &[u8]) -> Parse<'_> { 29 | Parse { input } 30 | } 31 | /// The return type of `parse()`. 32 | #[derive(Copy, Clone)] 33 | pub struct Parse<'a> { 34 | input: &'a [u8], 35 | } 36 | 37 | impl<'a> Iterator for Parse<'a> { 38 | type Item = (Cow<'a, str>, Cow<'a, str>); 39 | 40 | fn next(&mut self) -> Option { 41 | loop { 42 | if self.input.is_empty() { 43 | return None; 44 | } 45 | let mut split2 = self.input.splitn(2, |&b| b == b'&'); 46 | let sequence = split2.next().unwrap(); 47 | self.input = split2.next().unwrap_or(&[][..]); 48 | if sequence.is_empty() { 49 | continue; 50 | } 51 | let mut split2 = sequence.splitn(2, |&b| b == b'='); 52 | let name = split2.next().unwrap(); 53 | let value = split2.next().unwrap_or(&[][..]); 54 | return Some((decode(name), decode(value))); 55 | } 56 | } 57 | } 58 | 59 | fn decode(input: &[u8]) -> Cow<'_, str> { 60 | let replaced = replace_plus(input); 61 | decode_utf8_lossy(match percent_decode(&replaced).into() { 62 | Cow::Owned(vec) => Cow::Owned(vec), 63 | Cow::Borrowed(_) => replaced, 64 | }) 65 | } 66 | 67 | /// Replace b'+' with b' ' 68 | fn replace_plus(input: &[u8]) -> Cow<'_, [u8]> { 69 | match input.iter().position(|&b| b == b'+') { 70 | None => Cow::Borrowed(input), 71 | Some(first_position) => { 72 | let mut replaced = input.to_owned(); 73 | replaced[first_position] = b' '; 74 | for byte in &mut replaced[first_position + 1..] { 75 | if *byte == b'+' { 76 | *byte = b' '; 77 | } 78 | } 79 | Cow::Owned(replaced) 80 | } 81 | } 82 | } 83 | 84 | impl<'a> Parse<'a> { 85 | /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow`. 86 | pub fn into_owned(self) -> ParseIntoOwned<'a> { 87 | ParseIntoOwned { inner: self } 88 | } 89 | } 90 | 91 | /// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow`. 92 | pub struct ParseIntoOwned<'a> { 93 | inner: Parse<'a>, 94 | } 95 | 96 | impl<'a> Iterator for ParseIntoOwned<'a> { 97 | type Item = (String, String); 98 | 99 | fn next(&mut self) -> Option { 100 | self.inner 101 | .next() 102 | .map(|(k, v)| (k.into_owned(), v.into_owned())) 103 | } 104 | } 105 | 106 | /// The [`application/x-www-form-urlencoded` byte serializer]( 107 | /// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer). 108 | /// 109 | /// Return an iterator of `&str` slices. 110 | pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> { 111 | ByteSerialize { bytes: input } 112 | } 113 | 114 | /// Return value of `byte_serialize()`. 115 | #[derive(Debug)] 116 | pub struct ByteSerialize<'a> { 117 | bytes: &'a [u8], 118 | } 119 | 120 | fn byte_serialized_unchanged(byte: u8) -> bool { 121 | matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z') 122 | } 123 | 124 | impl<'a> Iterator for ByteSerialize<'a> { 125 | type Item = &'a str; 126 | 127 | fn next(&mut self) -> Option<&'a str> { 128 | if let Some((&first, tail)) = self.bytes.split_first() { 129 | if !byte_serialized_unchanged(first) { 130 | self.bytes = tail; 131 | return Some(if first == b' ' { 132 | "+" 133 | } else { 134 | percent_encode_byte(first) 135 | }); 136 | } 137 | let position = tail.iter().position(|&b| !byte_serialized_unchanged(b)); 138 | let (unchanged_slice, remaining) = match position { 139 | // 1 for first_byte + i unchanged in tail 140 | Some(i) => self.bytes.split_at(1 + i), 141 | None => (self.bytes, &[][..]), 142 | }; 143 | self.bytes = remaining; 144 | // This unsafe is appropriate because we have already checked these 145 | // bytes in byte_serialized_unchanged, which checks for a subset 146 | // of UTF-8. So we know these bytes are valid UTF-8, and doing 147 | // another UTF-8 check would be wasteful. 148 | Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }) 149 | } else { 150 | None 151 | } 152 | } 153 | 154 | fn size_hint(&self) -> (usize, Option) { 155 | if self.bytes.is_empty() { 156 | (0, Some(0)) 157 | } else { 158 | (1, Some(self.bytes.len())) 159 | } 160 | } 161 | } 162 | 163 | /// The [`application/x-www-form-urlencoded` serializer]( 164 | /// https://url.spec.whatwg.org/#concept-urlencoded-serializer). 165 | pub struct Serializer<'a, T: Target> { 166 | target: Option, 167 | start_position: usize, 168 | encoding: EncodingOverride<'a>, 169 | } 170 | 171 | pub trait Target { 172 | fn as_mut_string(&mut self) -> &mut String; 173 | fn finish(self) -> Self::Finished; 174 | type Finished; 175 | } 176 | 177 | impl Target for String { 178 | fn as_mut_string(&mut self) -> &mut String { 179 | self 180 | } 181 | fn finish(self) -> Self { 182 | self 183 | } 184 | type Finished = Self; 185 | } 186 | 187 | impl<'a> Target for &'a mut String { 188 | fn as_mut_string(&mut self) -> &mut String { 189 | &mut **self 190 | } 191 | fn finish(self) -> Self { 192 | self 193 | } 194 | type Finished = Self; 195 | } 196 | 197 | impl<'a, T: Target> Serializer<'a, T> { 198 | /// Create a new `application/x-www-form-urlencoded` serializer for the given target. 199 | /// 200 | /// If the target is non-empty, 201 | /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax. 202 | pub fn new(target: T) -> Self { 203 | Self::for_suffix(target, 0) 204 | } 205 | 206 | /// Create a new `application/x-www-form-urlencoded` serializer 207 | /// for a suffix of the given target. 208 | /// 209 | /// If that suffix is non-empty, 210 | /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax. 211 | pub fn for_suffix(mut target: T, start_position: usize) -> Self { 212 | if target.as_mut_string().len() < start_position { 213 | panic!( 214 | "invalid length {} for target of length {}", 215 | start_position, 216 | target.as_mut_string().len() 217 | ); 218 | } 219 | 220 | Serializer { 221 | target: Some(target), 222 | start_position, 223 | encoding: None, 224 | } 225 | } 226 | 227 | /// Remove any existing name/value pair. 228 | /// 229 | /// Panics if called after `.finish()`. 230 | pub fn clear(&mut self) -> &mut Self { 231 | string(&mut self.target).truncate(self.start_position); 232 | self 233 | } 234 | 235 | /// Set the character encoding to be used for names and values before percent-encoding. 236 | pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self { 237 | self.encoding = new; 238 | self 239 | } 240 | 241 | /// Serialize and append a name/value pair. 242 | /// 243 | /// Panics if called after `.finish()`. 244 | pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self { 245 | append_pair( 246 | string(&mut self.target), 247 | self.start_position, 248 | self.encoding, 249 | name, 250 | value, 251 | ); 252 | self 253 | } 254 | 255 | /// Serialize and append a name of parameter without any value. 256 | /// 257 | /// Panics if called after `.finish()`. 258 | pub fn append_key_only(&mut self, name: &str) -> &mut Self { 259 | append_key_only( 260 | string(&mut self.target), 261 | self.start_position, 262 | self.encoding, 263 | name, 264 | ); 265 | self 266 | } 267 | 268 | /// Serialize and append a number of name/value pairs. 269 | /// 270 | /// This simply calls `append_pair` repeatedly. 271 | /// This can be more convenient, so the user doesn’t need to introduce a block 272 | /// to limit the scope of `Serializer`’s borrow of its string. 273 | /// 274 | /// Panics if called after `.finish()`. 275 | pub fn extend_pairs(&mut self, iter: I) -> &mut Self 276 | where 277 | I: IntoIterator, 278 | I::Item: Borrow<(K, V)>, 279 | K: AsRef, 280 | V: AsRef, 281 | { 282 | { 283 | let string = string(&mut self.target); 284 | for pair in iter { 285 | let &(ref k, ref v) = pair.borrow(); 286 | append_pair( 287 | string, 288 | self.start_position, 289 | self.encoding, 290 | k.as_ref(), 291 | v.as_ref(), 292 | ); 293 | } 294 | } 295 | self 296 | } 297 | 298 | /// Serialize and append a number of names without values. 299 | /// 300 | /// This simply calls `append_key_only` repeatedly. 301 | /// This can be more convenient, so the user doesn’t need to introduce a block 302 | /// to limit the scope of `Serializer`’s borrow of its string. 303 | /// 304 | /// Panics if called after `.finish()`. 305 | pub fn extend_keys_only(&mut self, iter: I) -> &mut Self 306 | where 307 | I: IntoIterator, 308 | I::Item: Borrow, 309 | K: AsRef, 310 | { 311 | { 312 | let string = string(&mut self.target); 313 | for key in iter { 314 | let k = key.borrow().as_ref(); 315 | append_key_only(string, self.start_position, self.encoding, k); 316 | } 317 | } 318 | self 319 | } 320 | 321 | /// If this serializer was constructed with a string, take and return that string. 322 | /// 323 | /// ```rust 324 | /// use form_urlencoded; 325 | /// let encoded: String = form_urlencoded::Serializer::new(String::new()) 326 | /// .append_pair("foo", "bar & baz") 327 | /// .append_pair("saison", "Été+hiver") 328 | /// .finish(); 329 | /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver"); 330 | /// ``` 331 | /// 332 | /// Panics if called more than once. 333 | pub fn finish(&mut self) -> T::Finished { 334 | self.target 335 | .take() 336 | .expect("url::form_urlencoded::Serializer double finish") 337 | .finish() 338 | } 339 | } 340 | 341 | fn append_separator_if_needed(string: &mut String, start_position: usize) { 342 | if string.len() > start_position { 343 | string.push('&') 344 | } 345 | } 346 | 347 | fn string(target: &mut Option) -> &mut String { 348 | target 349 | .as_mut() 350 | .expect("url::form_urlencoded::Serializer finished") 351 | .as_mut_string() 352 | } 353 | 354 | fn append_pair( 355 | string: &mut String, 356 | start_position: usize, 357 | encoding: EncodingOverride<'_>, 358 | name: &str, 359 | value: &str, 360 | ) { 361 | append_separator_if_needed(string, start_position); 362 | append_encoded(name, string, encoding); 363 | string.push('='); 364 | append_encoded(value, string, encoding); 365 | } 366 | 367 | fn append_key_only( 368 | string: &mut String, 369 | start_position: usize, 370 | encoding: EncodingOverride, 371 | name: &str, 372 | ) { 373 | append_separator_if_needed(string, start_position); 374 | append_encoded(name, string, encoding); 375 | } 376 | 377 | fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>) { 378 | string.extend(byte_serialize(&encode(encoding, s))) 379 | } 380 | 381 | pub(crate) fn encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]> { 382 | if let Some(o) = encoding_override { 383 | return o(input); 384 | } 385 | input.as_bytes().into() 386 | } 387 | 388 | pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> { 389 | // Note: This function is duplicated in `percent_encoding/lib.rs`. 390 | match input { 391 | Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), 392 | Cow::Owned(bytes) => { 393 | match String::from_utf8_lossy(&bytes) { 394 | Cow::Borrowed(utf8) => { 395 | // If from_utf8_lossy returns a Cow::Borrowed, then we can 396 | // be sure our original bytes were valid UTF-8. This is because 397 | // if the bytes were invalid UTF-8 from_utf8_lossy would have 398 | // to allocate a new owned string to back the Cow so it could 399 | // replace invalid bytes with a placeholder. 400 | 401 | // First we do a debug_assert to confirm our description above. 402 | let raw_utf8: *const [u8] = utf8.as_bytes(); 403 | debug_assert!(raw_utf8 == &*bytes as *const [u8]); 404 | 405 | // Given we know the original input bytes are valid UTF-8, 406 | // and we have ownership of those bytes, we re-use them and 407 | // return a Cow::Owned here. 408 | Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) }) 409 | } 410 | Cow::Owned(s) => Cow::Owned(s), 411 | } 412 | } 413 | } 414 | } 415 | 416 | pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>; 417 | --------------------------------------------------------------------------------