├── idna
    ├── LICENSE-MIT
    ├── LICENSE-APACHE
    ├── Cargo.toml
    ├── tests
    │   ├── tests.rs
    │   ├── punycode.rs
    │   ├── punycode_tests.json
    │   ├── unit.rs
    │   └── uts46.rs
    ├── benches
    │   └── all.rs
    └── src
    │   ├── lib.rs
    │   ├── make_uts46_mapping_table.py
    │   └── punycode.rs
├── url
    ├── LICENSE-MIT
    ├── LICENSE-APACHE
    ├── fuzz
    │   ├── .gitignore
    │   ├── Cargo.toml
    │   └── fuzz_targets
    │   │   └── parse.rs
    ├── benches
    │   └── parse_url.rs
    ├── Cargo.toml
    ├── tests
    │   ├── debugger_visualizer.rs
    │   └── data.rs
    └── src
    │   ├── origin.rs
    │   ├── slicing.rs
    │   ├── path_segments.rs
    │   └── quirks.rs
├── data-url
    ├── LICENSE-MIT
    ├── LICENSE-APACHE
    ├── Cargo.toml
    ├── src
    │   ├── make_base64_decode_table.py
    │   ├── forgiving_base64.rs
    │   ├── mime.rs
    │   └── lib.rs
    ├── README.md
    └── tests
    │   ├── base64.json
    │   ├── data-urls.json
    │   ├── wpt.rs
    │   └── mime-types.json
├── percent_encoding
    ├── LICENSE-MIT
    ├── LICENSE-APACHE
    └── Cargo.toml
├── .gitignore
├── Cargo.toml
├── deny.toml
├── .github
    ├── ISSUE_TEMPLATE
    │   └── bug_report.md
    └── workflows
    │   ├── codecov.yml
    │   └── main.yml
├── form_urlencoded
    ├── Cargo.toml
    ├── LICENSE-MIT
    ├── LICENSE-APACHE
    └── src
    │   └── lib.rs
├── SECURITY.md
├── README.md
├── LICENSE-MIT
├── debug_metadata
    ├── url.natvis
    └── README.md
├── LICENSE-APACHE
└── UPGRADING.md


/idna/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | ../LICENSE-MIT


--------------------------------------------------------------------------------
/url/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | ../LICENSE-MIT


--------------------------------------------------------------------------------
/data-url/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | ../LICENSE-MIT


--------------------------------------------------------------------------------
/idna/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | ../LICENSE-APACHE


--------------------------------------------------------------------------------
/url/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | ../LICENSE-APACHE


--------------------------------------------------------------------------------
/data-url/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | ../LICENSE-APACHE


--------------------------------------------------------------------------------
/percent_encoding/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | ../LICENSE-MIT


--------------------------------------------------------------------------------
/percent_encoding/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | ../LICENSE-APACHE


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | Cargo.lock
3 | /.cargo/config
4 | 


--------------------------------------------------------------------------------
/url/fuzz/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target
3 | corpus
4 | artifacts
5 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | members = ["url", "form_urlencoded", "idna", "percent_encoding", "data-url"]
3 | 


--------------------------------------------------------------------------------
/deny.toml:
--------------------------------------------------------------------------------
1 | [licenses]
2 | allow-osi-fsf-free = "either"
3 | copyleft = "warn"
4 | private = { ignore = true }
5 | 


--------------------------------------------------------------------------------
/percent_encoding/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "percent-encoding"
 3 | version = "2.1.0"
 4 | authors = ["The rust-url developers"]
 5 | description = "Percent encoding and decoding"
 6 | repository = "https://github.com/servo/rust-url/"
 7 | license = "MIT OR Apache-2.0"
 8 | edition = "2018"
 9 | rust-version = "1.51"
10 | 
11 | [features]
12 | default = ["alloc"]
13 | alloc = []
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | - [ ] Note that this crate implements the [URL Standard](https://url.spec.whatwg.org/) not RFC 1738 or RFC 3986
11 | 
12 | **Describe the bug**
13 | A clear and concise description of what the bug is. Include code snippets if possible.
14 | 


--------------------------------------------------------------------------------
/url/fuzz/Cargo.toml:
--------------------------------------------------------------------------------
 1 | 
 2 | [package]
 3 | name = "url-fuzz"
 4 | version = "0.0.1"
 5 | authors = ["Automatically generated"]
 6 | publish = false
 7 | 
 8 | [package.metadata]
 9 | cargo-fuzz = true
10 | 
11 | [dependencies]
12 | libfuzzer-sys = "0.4.0"
13 | 
14 | [dependencies.url]
15 | path = ".."
16 | 
17 | [[bin]]
18 | name = "parse"
19 | path = "fuzz_targets/parse.rs"
20 | 
21 | [workspace]
22 | members = ["."]
23 | 


--------------------------------------------------------------------------------
/url/fuzz/fuzz_targets/parse.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | #[macro_use] extern crate libfuzzer_sys;
 3 | extern crate url;
 4 | use std::str;
 5 | 
 6 | fuzz_target!(|data: &[u8]| {
 7 |     if let Ok(utf8) = str::from_utf8(data) {
 8 |         if let Ok(parsed) = url::Url::parse(utf8) {
 9 |             let as_str = parsed.as_str();
10 |             assert_eq!(parsed, url::Url::parse(as_str).unwrap());
11 |         }
12 |     }
13 | });
14 | 


--------------------------------------------------------------------------------
/form_urlencoded/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "form_urlencoded"
 3 | version = "1.0.1"
 4 | authors = ["The rust-url developers"]
 5 | description = "Parser and serializer for the application/x-www-form-urlencoded syntax, as used by HTML forms."
 6 | repository = "https://github.com/servo/rust-url"
 7 | license = "MIT OR Apache-2.0"
 8 | edition = "2018"
 9 | rust-version = "1.51"
10 | 
11 | [lib]
12 | test = false
13 | 
14 | [dependencies]
15 | percent-encoding = { version = "2.1.0", path = "../percent_encoding" }
16 | 


--------------------------------------------------------------------------------
/data-url/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "data-url"
 3 | version = "0.1.1"
 4 | authors = ["Simon Sapin <simon.sapin@exyr.org>"]
 5 | description = "Processing of data: URL according to WHATWG’s Fetch Standard"
 6 | repository = "https://github.com/servo/rust-url"
 7 | license = "MIT OR Apache-2.0"
 8 | edition = "2018"
 9 | autotests = false
10 | rust-version = "1.51"
11 | 
12 | [dev-dependencies]
13 | tester = "0.9"
14 | serde = {version = "1.0", features = ["derive"]}
15 | serde_json = "1.0"
16 | 
17 | [lib]
18 | test = false
19 | 
20 | [[test]]
21 | name = "wpt"
22 | harness = false
23 | 


--------------------------------------------------------------------------------
/data-url/src/make_base64_decode_table.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Generate the BASE64_DECODE_TABLE constant. See its doc-comment.
 3 | """
 4 | 
 5 | import string
 6 | 
 7 | # https://tools.ietf.org/html/rfc4648#section-4
 8 | alphabet = string.ascii_uppercase + string.ascii_lowercase + string.digits + "+/"
 9 | assert len(alphabet) == 64
10 | 
11 | reverse_table = [-1] * 256
12 | for i, symbol in enumerate(alphabet):
13 |     reverse_table[ord(symbol)] = i
14 | 
15 | print("[")
16 | per_line = 16
17 | for line in range(0, 256, per_line):
18 |     print("   " + "".join(" %2s," % value for value in reverse_table[line:][:per_line]))
19 | print("]")
20 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Supported Versions
 4 | 
 5 | | Version | Supported          |
 6 | | ------- | ------------------ |
 7 | | 2.2.x   | :white_check_mark: |
 8 | 
 9 | ## Reporting a Vulnerability
10 | 
11 | The most secure way to report a vulnerability while this crate is maintained by a Mozilla contributor is by reporting a bug on the Bugzilla issue tracker using this [link](https://bugzilla.mozilla.org/enter_bug.cgi?product=Core&component=Networking&groups=network-core-security&short_desc=[rust-url]%20DESCRIPTION&bug_type=defect).
12 | 
13 | Alternatively, you may send an email to valentin AT mozilla DOT com.
14 | 


--------------------------------------------------------------------------------
/url/benches/parse_url.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | extern crate bencher;
 3 | 
 4 | use bencher::{black_box, Bencher};
 5 | 
 6 | use url::Url;
 7 | 
 8 | fn short(bench: &mut Bencher) {
 9 |     let url = "https://example.com/bench";
10 | 
11 |     bench.bytes = url.len() as u64;
12 |     bench.iter(|| black_box(url).parse::<Url>().unwrap());
13 | }
14 | 
15 | fn long(bench: &mut Bencher) {
16 |     let url = "https://example.com/parkbench?tre=es&st=uff";
17 | 
18 |     bench.bytes = url.len() as u64;
19 |     bench.iter(|| black_box(url).parse::<Url>().unwrap());
20 | }
21 | 
22 | benchmark_group!(benches, short, long);
23 | benchmark_main!(benches);
24 | 


--------------------------------------------------------------------------------
/idna/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "idna"
 3 | version = "0.2.3"
 4 | authors = ["The rust-url developers"]
 5 | description = "IDNA (Internationalizing Domain Names in Applications) and Punycode."
 6 | repository = "https://github.com/servo/rust-url/"
 7 | license = "MIT OR Apache-2.0"
 8 | autotests = false
 9 | edition = "2018"
10 | rust-version = "1.51"
11 | 
12 | [lib]
13 | doctest = false
14 | 
15 | [[test]]
16 | name = "tests"
17 | harness = false
18 | 
19 | [[test]]
20 | name = "unit"
21 | 
22 | [dev-dependencies]
23 | assert_matches = "1.3"
24 | bencher = "0.1"
25 | tester = "0.9"
26 | serde_json = "1.0"
27 | 
28 | [dependencies]
29 | unicode-bidi = "0.3"
30 | unicode-normalization = "0.1.17"
31 | 
32 | [[bench]]
33 | name = "all"
34 | harness = false
35 | 


--------------------------------------------------------------------------------
/idna/tests/tests.rs:
--------------------------------------------------------------------------------
 1 | use tester as test;
 2 | 
 3 | mod punycode;
 4 | mod uts46;
 5 | 
 6 | fn main() {
 7 |     let mut tests = Vec::new();
 8 |     {
 9 |         let mut add_test = |name, run| {
10 |             tests.push(test::TestDescAndFn {
11 |                 desc: test::TestDesc {
12 |                     name: test::DynTestName(name),
13 |                     ignore: false,
14 |                     should_panic: test::ShouldPanic::No,
15 |                     allow_fail: false,
16 |                     test_type: test::TestType::Unknown,
17 |                 },
18 |                 testfn: run,
19 |             })
20 |         };
21 |         punycode::collect_tests(&mut add_test);
22 |         uts46::collect_tests(&mut add_test);
23 |     }
24 |     test::test_main(&std::env::args().collect::<Vec<_>>(), tests, None)
25 | }
26 | 


--------------------------------------------------------------------------------
/.github/workflows/codecov.yml:
--------------------------------------------------------------------------------
 1 | name: Coverage
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["master"]
 6 |   pull_request:
 7 | 
 8 | jobs:
 9 |   coverage:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v1
13 |       - uses: actions-rs/toolchain@v1
14 |         with:
15 |           profile: minimal
16 |           toolchain: stable
17 |           override: true
18 |       - uses: actions-rs/tarpaulin@v0.1
19 |       - uses: codecov/codecov-action@v2.1.0
20 |         # A codecov token is not needed for public repos if the repo is linked
21 |         # on codecov.io. See https://docs.codecov.com/docs/frequently-asked-questions#where-is-the-repository-upload-token-found
22 |       - uses: actions/upload-artifact@v1
23 |         with:
24 |           name: code-coverage-report
25 |           path: cobertura.xml
26 | 


--------------------------------------------------------------------------------
/data-url/README.md:
--------------------------------------------------------------------------------
 1 | # data-url
 2 | 
 3 | [![crates.io](https://img.shields.io/crates/v/data-url.svg)](https://crates.io/crates/data-url)
 4 | [![docs.rs](https://img.shields.io/badge/docs.rs-%F0%9F%93%84-blue.svg)](https://docs.rs/data-url/)
 5 | 
 6 | Processing of `data:` URLs in Rust according to the Fetch Standard:
 7 | <https://fetch.spec.whatwg.org/#data-urls>
 8 | but starting from a string rather than a parsed URL to avoid extra copies.
 9 | 
10 | ```rust
11 | use data_url::{DataUrl, mime};
12 | 
13 | let url = DataUrl::process("data:,Hello%20World!").unwrap();
14 | let (body, fragment) = url.decode_to_vec().unwrap();
15 | 
16 | assert_eq!(url.mime_type().type_, "text");
17 | assert_eq!(url.mime_type().subtype, "plain");
18 | assert_eq!(url.mime_type().get_parameter("charset"), Some("US-ASCII"));
19 | assert_eq!(body, b"Hello World!");
20 | assert!(fragment.is_none());
21 | ```
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | rust-url
 2 | ========
 3 | 
 4 | [![Build status](https://github.com/servo/rust-url/workflows/CI/badge.svg)](https://github.com/servo/rust-url/actions?query=workflow%3ACI)
 5 | [![Coverage](https://codecov.io/gh/servo/rust-url/branch/master/graph/badge.svg)](https://codecov.io/gh/servo/rust-url)
 6 | [![Chat](https://img.shields.io/badge/chat-%23rust--url:mozilla.org-%2346BC99?logo=Matrix)](https://matrix.to/#/#rust-url:mozilla.org)
 7 | [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE-MIT)
 8 | [![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE-APACHE)
 9 | 
10 | URL library for Rust, based on the [URL Standard](https://url.spec.whatwg.org/).
11 | 
12 | [Documentation](https://docs.rs/url/)
13 | 
14 | Please see [UPGRADING.md](https://github.com/servo/rust-url/blob/master/UPGRADING.md) if you are upgrading from a previous version.
15 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013-2022 The rust-url developers
 2 | 
 3 | Permission is hereby granted, free of charge, to any
 4 | person obtaining a copy of this software and associated
 5 | documentation files (the "Software"), to deal in the
 6 | Software without restriction, including without
 7 | limitation the rights to use, copy, modify, merge,
 8 | publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software
10 | is furnished to do so, subject to the following
11 | conditions:
12 | 
13 | The above copyright notice and this permission notice
14 | shall be included in all copies or substantial portions
15 | of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 | DEALINGS IN THE SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/form_urlencoded/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013-2016 The rust-url developers
 2 | 
 3 | Permission is hereby granted, free of charge, to any
 4 | person obtaining a copy of this software and associated
 5 | documentation files (the "Software"), to deal in the
 6 | Software without restriction, including without
 7 | limitation the rights to use, copy, modify, merge,
 8 | publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software
10 | is furnished to do so, subject to the following
11 | conditions:
12 | 
13 | The above copyright notice and this permission notice
14 | shall be included in all copies or substantial portions
15 | of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 | DEALINGS IN THE SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/idna/benches/all.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | extern crate bencher;
 3 | extern crate idna;
 4 | 
 5 | use bencher::{black_box, Bencher};
 6 | use idna::Config;
 7 | 
 8 | fn to_unicode_puny_label(bench: &mut Bencher) {
 9 |     let encoded = "abc.xn--mgbcm";
10 |     let config = Config::default();
11 |     bench.iter(|| config.to_unicode(black_box(encoded)));
12 | }
13 | 
14 | fn to_unicode_ascii(bench: &mut Bencher) {
15 |     let encoded = "example.com";
16 |     let config = Config::default();
17 |     bench.iter(|| config.to_unicode(black_box(encoded)));
18 | }
19 | 
20 | fn to_unicode_merged_label(bench: &mut Bencher) {
21 |     let encoded = "Beispiel.xn--vermgensberater-ctb";
22 |     let config = Config::default();
23 |     bench.iter(|| config.to_unicode(black_box(encoded)));
24 | }
25 | 
26 | fn to_ascii_puny_label(bench: &mut Bencher) {
27 |     let encoded = "abc.ابج";
28 |     let config = Config::default();
29 |     bench.iter(|| config.to_ascii(black_box(encoded)));
30 | }
31 | 
32 | fn to_ascii_simple(bench: &mut Bencher) {
33 |     let encoded = "example.com";
34 |     let config = Config::default();
35 |     bench.iter(|| config.to_ascii(black_box(encoded)));
36 | }
37 | 
38 | fn to_ascii_merged(bench: &mut Bencher) {
39 |     let encoded = "beispiel.vermögensberater";
40 |     let config = Config::default();
41 |     bench.iter(|| config.to_ascii(black_box(encoded)));
42 | }
43 | 
44 | benchmark_group!(
45 |     benches,
46 |     to_unicode_puny_label,
47 |     to_unicode_ascii,
48 |     to_unicode_merged_label,
49 |     to_ascii_puny_label,
50 |     to_ascii_simple,
51 |     to_ascii_merged,
52 | );
53 | benchmark_main!(benches);
54 | 


--------------------------------------------------------------------------------
/url/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | 
 3 | name = "url"
 4 | # When updating version, also modify html_root_url in the lib.rs
 5 | version = "2.2.2"
 6 | authors = ["The rust-url developers"]
 7 | 
 8 | description = "URL library for Rust, based on the WHATWG URL Standard"
 9 | documentation = "https://docs.rs/url"
10 | repository = "https://github.com/servo/rust-url"
11 | readme = "../README.md"
12 | keywords = ["url", "parser"]
13 | categories = ["parser-implementations", "web-programming", "encoding"]
14 | license = "MIT OR Apache-2.0"
15 | include = ["src/**/*", "LICENSE-*", "README.md", "tests/**"]
16 | edition = "2018"
17 | rust-version = "1.51"
18 | 
19 | [badges]
20 | travis-ci = { repository = "servo/rust-url" }
21 | appveyor = { repository = "Manishearth/rust-url" }
22 | 
23 | [dev-dependencies]
24 | serde_json = "1.0"
25 | bencher = "0.1"
26 | # To test debugger visualizers defined for the url crate such as url.natvis
27 | debugger_test = "0.1"
28 | debugger_test_parser = "0.1"
29 | 
30 | [dependencies]
31 | form_urlencoded = { version = "1.0.0", path = "../form_urlencoded" }
32 | idna = { version = "0.2.0", path = "../idna", optional = true }
33 | percent-encoding = { version = "2.1.0", path = "../percent_encoding" }
34 | serde = {version = "1.0", optional = true, features = ["derive"]}
35 | 
36 | [features]
37 | default = ["idna"]
38 | # UNSTABLE FEATURES (requires Rust nightly)
39 | # Enable to use the #[debugger_visualizer] attribute.
40 | debugger_visualizer = []
41 | 
42 | [[bench]]
43 | name = "parse_url"
44 | path = "benches/parse_url.rs"
45 | harness = false
46 | 
47 | [[test]]
48 | name = "debugger_visualizer"
49 | path = "tests/debugger_visualizer.rs"
50 | required-features = ["debugger_visualizer"]
51 | test = false
52 | 


--------------------------------------------------------------------------------
/data-url/tests/base64.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   ["", []],
 3 |   ["abcd", [105, 183, 29]],
 4 |   [" abcd", [105, 183, 29]],
 5 |   ["abcd ", [105, 183, 29]],
 6 |   [" abcd===", null],
 7 |   ["abcd=== ", null],
 8 |   ["abcd ===", null],
 9 |   ["a", null],
10 |   ["ab", [105]],
11 |   ["abc", [105, 183]],
12 |   ["abcde", null],
13 |   ["𐀀", null],
14 |   ["=", null],
15 |   ["==", null],
16 |   ["===", null],
17 |   ["====", null],
18 |   ["=====", null],
19 |   ["a=", null],
20 |   ["a==", null],
21 |   ["a===", null],
22 |   ["a====", null],
23 |   ["a=====", null],
24 |   ["ab=", null],
25 |   ["ab==", [105]],
26 |   ["ab===", null],
27 |   ["ab====", null],
28 |   ["ab=====", null],
29 |   ["abc=", [105, 183]],
30 |   ["abc==", null],
31 |   ["abc===", null],
32 |   ["abc====", null],
33 |   ["abc=====", null],
34 |   ["abcd=", null],
35 |   ["abcd==", null],
36 |   ["abcd===", null],
37 |   ["abcd====", null],
38 |   ["abcd=====", null],
39 |   ["abcde=", null],
40 |   ["abcde==", null],
41 |   ["abcde===", null],
42 |   ["abcde====", null],
43 |   ["abcde=====", null],
44 |   ["=a", null],
45 |   ["=a=", null],
46 |   ["a=b", null],
47 |   ["a=b=", null],
48 |   ["ab=c", null],
49 |   ["ab=c=", null],
50 |   ["abc=d", null],
51 |   ["abc=d=", null],
52 |   ["ab\u000Bcd", null],
53 |   ["ab\u3000cd", null],
54 |   ["ab\u3001cd", null],
55 |   ["ab\tcd", [105, 183, 29]],
56 |   ["ab\ncd", [105, 183, 29]],
57 |   ["ab\fcd", [105, 183, 29]],
58 |   ["ab\rcd", [105, 183, 29]],
59 |   ["ab cd", [105, 183, 29]],
60 |   ["ab\u00a0cd", null],
61 |   ["ab\t\n\f\r cd", [105, 183, 29]],
62 |   [" \t\n\f\r ab\t\n\f\r cd\t\n\f\r ", [105, 183, 29]],
63 |   ["ab\t\n\f\r =\t\n\f\r =\t\n\f\r ", [105]],
64 |   ["A", null],
65 |   ["/A", [252]],
66 |   ["//A", [255, 240]],
67 |   ["///A", [255, 255, 192]],
68 |   ["////A", null],
69 |   ["/", null],
70 |   ["A/", [3]],
71 |   ["AA/", [0, 15]],
72 |   ["AAAA/", null],
73 |   ["AAA/", [0, 0, 63]],
74 |   ["\u0000nonsense", null],
75 |   ["abcd\u0000nonsense", null],
76 |   ["YQ", [97]],
77 |   ["YR", [97]],
78 |   ["~~", null],
79 |   ["..", null],
80 |   ["--", null],
81 |   ["__", null]
82 | ]
83 | 


--------------------------------------------------------------------------------
/debug_metadata/url.natvis:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
 3 |   <Type Name="url::Url">
 4 |     <Intrinsic Name="ptr" Expression="serialization.vec.buf.ptr.pointer.pointer" />
 5 |     <DisplayString>{serialization}</DisplayString>
 6 |     <Expand>
 7 |       <Synthetic Name="[scheme]">
 8 |         <DisplayString>{(char*)(ptr()),[scheme_end]s8}</DisplayString>
 9 |       </Synthetic>
10 |       <Synthetic Name="[username]" Condition="username_end > (scheme_end + 3)">
11 |         <!-- Add 3 to the scheme end to account for the scheme separator which is '://' -->
12 |         <DisplayString>{(char*)(ptr()+(scheme_end + 3)),[((username_end)-(scheme_end + 3))]s8}</DisplayString>
13 |       </Synthetic>
14 |       <Synthetic Name="[host]" Condition="host.tag != 0">
15 |         <DisplayString>{(char*)(ptr()+host_start),[host_end-host_start]s8}</DisplayString>
16 |       </Synthetic>
17 |       <Synthetic Name="[port]" Condition="port.tag == 1">
18 |         <DisplayString>{port.variant1.value.__0,d}</DisplayString>
19 |       </Synthetic>
20 |       <Synthetic Name="[path]">
21 |         <DisplayString Condition="query_start.tag == 0 &amp;&amp; fragment_start.tag == 0">{(char*)(ptr()+path_start),[(serialization.vec.len-path_start)]s8}</DisplayString>
22 |         <DisplayString Condition="query_start.tag == 1">{(char*)(ptr()+path_start),[(query_start.variant1.value.__0-path_start)]s8}</DisplayString>
23 |         <DisplayString Condition="fragment_start.tag == 1">{(char*)(ptr()+path_start),[(fragment_start.variant1.value.__0-path_start)]s8}</DisplayString>
24 |       </Synthetic>
25 |       <Synthetic Name="[query]" Condition="query_start.tag == 1">
26 |         <DisplayString Condition="fragment_start.tag == 0">{(char*)(ptr()+query_start.variant1.value.__0+1),[((serialization.vec.len)-(query_start.variant1.value.__0+1))]s8}</DisplayString>
27 |         <DisplayString Condition="fragment_start.tag == 1">{(char*)(ptr()+query_start.variant1.value.__0+1),[((fragment_start.variant1.value.__0)-(query_start.variant1.value.__0+1))]s8}</DisplayString>
28 |       </Synthetic>
29 |       <Synthetic Name="[fragment]" Condition="fragment_start.tag == 1">
30 |         <DisplayString>{(char*)(ptr()+fragment_start.variant1.value.__0+1),[(serialization.vec.len-fragment_start.variant1.value.__0-1)]s8}</DisplayString>
31 |       </Synthetic>
32 |     </Expand>
33 |   </Type>
34 | </AutoVisualizer>
35 | 


--------------------------------------------------------------------------------
/idna/tests/punycode.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 The rust-url developers.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 6 | // option. This file may not be copied, modified, or distributed
 7 | // except according to those terms.
 8 | 
 9 | use crate::test::TestFn;
10 | use idna::punycode::{decode, encode_str};
11 | use serde_json::map::Map;
12 | use serde_json::Value;
13 | use std::str::FromStr;
14 | 
15 | fn one_test(decoded: &str, encoded: &str) {
16 |     match decode(encoded) {
17 |         None => panic!("Decoding {} failed.", encoded),
18 |         Some(result) => {
19 |             let result = result.into_iter().collect::<String>();
20 |             assert!(
21 |                 result == decoded,
22 |                 "Incorrect decoding of \"{}\":\n   \"{}\"\n!= \"{}\"\n",
23 |                 encoded,
24 |                 result,
25 |                 decoded
26 |             )
27 |         }
28 |     }
29 | 
30 |     match encode_str(decoded) {
31 |         None => panic!("Encoding {} failed.", decoded),
32 |         Some(result) => assert!(
33 |             result == encoded,
34 |             "Incorrect encoding of \"{}\":\n   \"{}\"\n!= \"{}\"\n",
35 |             decoded,
36 |             result,
37 |             encoded
38 |         ),
39 |     }
40 | }
41 | 
42 | fn get_string<'a>(map: &'a Map<String, Value>, key: &str) -> &'a str {
43 |     match map.get(&key.to_string()) {
44 |         Some(&Value::String(ref s)) => s,
45 |         None => "",
46 |         _ => panic!(),
47 |     }
48 | }
49 | 
50 | pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
51 |     match Value::from_str(include_str!("punycode_tests.json")) {
52 |         Ok(Value::Array(tests)) => {
53 |             for (i, test) in tests.into_iter().enumerate() {
54 |                 match test {
55 |                     Value::Object(o) => {
56 |                         let test_name = {
57 |                             let desc = get_string(&o, "description");
58 |                             if desc.is_empty() {
59 |                                 format!("Punycode {}", i + 1)
60 |                             } else {
61 |                                 format!("Punycode {}: {}", i + 1, desc)
62 |                             }
63 |                         };
64 |                         add_test(
65 |                             test_name,
66 |                             TestFn::DynTestFn(Box::new(move || {
67 |                                 one_test(get_string(&o, "decoded"), get_string(&o, "encoded"))
68 |                             })),
69 |                         )
70 |                     }
71 |                     _ => panic!(),
72 |                 }
73 |             }
74 |         }
75 |         other => panic!("{:?}", other),
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: ["master"]
  6 |   pull_request:
  7 | 
  8 | env:
  9 |   CARGO_NET_GIT_FETCH_WITH_CLI: true
 10 | 
 11 | jobs:
 12 |   Test:
 13 |     strategy:
 14 |       matrix:
 15 |         os: [ubuntu-latest, macos-latest, windows-latest]
 16 |         rust: [1.51.0, stable, beta, nightly]
 17 |         exclude:
 18 |           - os: macos-latest
 19 |             rust: 1.51.0
 20 |           - os: windows-latest
 21 |             rust: 1.51.0
 22 |           - os: macos-latest
 23 |             rust: beta
 24 |           - os: windows-latest
 25 |             rust: beta
 26 |           - os: macos-latest
 27 |             rust: nightly
 28 | 
 29 |     runs-on: ${{ matrix.os }}
 30 | 
 31 |     steps:
 32 |       - uses: actions/checkout@v2
 33 |       - uses: actions-rs/toolchain@v1
 34 |         with:
 35 |           profile: minimal
 36 |           toolchain: ${{ matrix.rust }}
 37 |           override: true
 38 |       - uses: actions-rs/cargo@v1
 39 |         with:
 40 |           command: build
 41 |           args: --all-targets
 42 |         # Run tests
 43 |       - name: Run tests
 44 |         uses: actions-rs/cargo@v1
 45 |         with:
 46 |           command: test
 47 |         # Run tests enabling the serde feature
 48 |       - name: Run tests with the serde feature
 49 |         uses: actions-rs/cargo@v1
 50 |         with:
 51 |           command: test
 52 |           args: --features "url/serde"
 53 |       # The #[debugger_visualizer] attribute is currently gated behind an unstable feature flag.
 54 |       # In order to test the visualizers for the url crate, they have to be tested on a nightly build.
 55 |       - name: Run debugger_visualizer tests
 56 |         if: |
 57 |           matrix.os == 'windows-latest' &&
 58 |           matrix.rust == 'nightly'
 59 |         uses: actions-rs/cargo@v1
 60 |         with:
 61 |           command: test
 62 |           args: --test debugger_visualizer --features "url/serde,url/debugger_visualizer" -- --test-threads=1
 63 | 
 64 |   WASM:
 65 |     runs-on: ubuntu-latest
 66 |     steps:
 67 |       - uses: actions/checkout@v2
 68 |       - uses: actions-rs/toolchain@v1
 69 |         with:
 70 |           profile: minimal
 71 |           toolchain: stable
 72 |           target: wasm32-unknown-unknown
 73 |           override: true
 74 |       - uses: actions-rs/cargo@v1
 75 |         with:
 76 |           command: build
 77 |           args: --target wasm32-unknown-unknown
 78 | 
 79 |   Lint:
 80 |     runs-on: ubuntu-latest
 81 |     steps:
 82 |       - uses: actions/checkout@v2
 83 |       - uses: actions-rs/toolchain@v1
 84 |         with:
 85 |           profile: minimal
 86 |           toolchain: stable
 87 |           override: true
 88 |           components: rustfmt, clippy
 89 |       - uses: actions-rs/cargo@v1
 90 |         with:
 91 |           command: fmt
 92 |           args: --all -- --check
 93 |       - uses: actions-rs/cargo@v1
 94 |         if: always()
 95 |         with:
 96 |           command: clippy
 97 |           args: --workspace --all-targets -- -D warnings
 98 | 
 99 |   Audit:
100 |     runs-on: ubuntu-latest
101 |     steps:
102 |       - uses: actions/checkout@v1
103 |       - uses: EmbarkStudios/cargo-deny-action@v1
104 | 


--------------------------------------------------------------------------------
/idna/src/lib.rs:
--------------------------------------------------------------------------------
 1 | // Copyright 2016 The rust-url developers.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 6 | // option. This file may not be copied, modified, or distributed
 7 | // except according to those terms.
 8 | 
 9 | //! This Rust crate implements IDNA
10 | //! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna).
11 | //!
12 | //! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing*
13 | //! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
14 | //! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492).
15 | //!
16 | //! Quoting from [UTS #46’s introduction](http://www.unicode.org/reports/tr46/#Introduction):
17 | //!
18 | //! > Initially, domain names were restricted to ASCII characters.
19 | //! > A system was introduced in 2003 for internationalized domain names (IDN).
20 | //! > This system is called Internationalizing Domain Names for Applications,
21 | //! > or IDNA2003 for short.
22 | //! > This mechanism supports IDNs by means of a client software transformation
23 | //! > into a format known as Punycode.
24 | //! > A revision of IDNA was approved in 2010 (IDNA2008).
25 | //! > This revision has a number of incompatibilities with IDNA2003.
26 | //! >
27 | //! > The incompatibilities force implementers of client software,
28 | //! > such as browsers and emailers,
29 | //! > to face difficult choices during the transition period
30 | //! > as registries shift from IDNA2003 to IDNA2008.
31 | //! > This document specifies a mechanism
32 | //! > that minimizes the impact of this transition for client software,
33 | //! > allowing client software to access domains that are valid under either system.
34 | 
35 | #[cfg(test)]
36 | #[macro_use]
37 | extern crate assert_matches;
38 | 
39 | pub mod punycode;
40 | mod uts46;
41 | 
42 | pub use crate::uts46::{Config, Errors, Idna};
43 | 
44 | /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm.
45 | ///
46 | /// Return the ASCII representation a domain name,
47 | /// normalizing characters (upper-case to lower-case and other kinds of equivalence)
48 | /// and using Punycode as necessary.
49 | ///
50 | /// This process may fail.
51 | pub fn domain_to_ascii(domain: &str) -> Result<String, uts46::Errors> {
52 |     Config::default().to_ascii(domain)
53 | }
54 | 
55 | /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm,
56 | /// with the `beStrict` flag set.
57 | pub fn domain_to_ascii_strict(domain: &str) -> Result<String, uts46::Errors> {
58 |     Config::default()
59 |         .use_std3_ascii_rules(true)
60 |         .verify_dns_length(true)
61 |         .to_ascii(domain)
62 | }
63 | 
64 | /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm.
65 | ///
66 | /// Return the Unicode representation of a domain name,
67 | /// normalizing characters (upper-case to lower-case and other kinds of equivalence)
68 | /// and decoding Punycode as necessary.
69 | ///
70 | /// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation)
71 | /// but always returns a string for the mapped domain.
72 | pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) {
73 |     Config::default().to_unicode(domain)
74 | }
75 | 


--------------------------------------------------------------------------------
/url/tests/debugger_visualizer.rs:
--------------------------------------------------------------------------------
  1 | use debugger_test::debugger_test;
  2 | use url::Url;
  3 | 
  4 | #[inline(never)]
  5 | fn __break() {}
  6 | 
  7 | #[debugger_test(
  8 |     debugger = "cdb",
  9 |     commands = "
 10 |     .nvlist
 11 | 
 12 |     dx base_url
 13 | 
 14 |     dx url_with_non_special_scheme
 15 | 
 16 |     dx url_with_user_pass_port_query_fragments
 17 | 
 18 |     dx url_blob
 19 | 
 20 |     dx url_with_base
 21 | 
 22 |     dx url_with_base_replaced
 23 | 
 24 |     dx url_with_comma",
 25 |     expected_statements = r#"
 26 |     pattern:debugger_visualizer-.*\.exe \(embedded NatVis ".*-[0-9]+\.natvis"\)
 27 | 
 28 |     base_url         : "http://example.org/foo/bar" [Type: url::Url]
 29 |     [<Raw View>]     [Type: url::Url]
 30 |     [scheme]         : "http"
 31 |     [host]           : "example.org"
 32 |     [path]           : "/foo/bar"
 33 | 
 34 |     url_with_non_special_scheme : "non-special://test/x" [Type: url::Url]
 35 |     [<Raw View>]     [Type: url::Url]
 36 |     [scheme]         : "non-special"
 37 |     [host]           : "test"
 38 |     [path]           : "/x"
 39 | 
 40 |     url_with_user_pass_port_query_fragments : "http://user:pass@foo:21/bar;par?b#c" [Type: url::Url]
 41 |     [<Raw View>]     [Type: url::Url]
 42 |     [scheme]         : "http"
 43 |     [username]       : "user"
 44 |     [host]           : "foo"
 45 |     [port]           : 21
 46 |     [path]           : "/bar;par"
 47 |     [query]          : "b"
 48 |     [fragment]       : "c"
 49 | 
 50 |     url_blob         : "blob:https://example.com:443/" [Type: url::Url]
 51 |     [<Raw View>]     [Type: url::Url]
 52 |     [scheme]         : "blob"
 53 |     [path]           : "https://example.com:443/"
 54 | 
 55 |     url_with_base    : "http://example.org/a%2fc" [Type: url::Url]
 56 |     [<Raw View>]     [Type: url::Url]
 57 |     [scheme]         : "http"
 58 |     [host]           : "example.org"
 59 |     [path]           : "/a%2fc"
 60 | 
 61 |     url_with_base_replaced : "http://[::7f00:1]/" [Type: url::Url]
 62 |     [<Raw View>]     [Type: url::Url]
 63 |     [scheme]         : "http"
 64 |     [host]           : "[::7f00:1]"
 65 |     [path]           : "/"
 66 | 
 67 |     url_with_comma   : "data:text/html,test#test" [Type: url::Url]
 68 |     [<Raw View>]     [Type: url::Url]
 69 |     [scheme]         : "data"
 70 |     [path]           : "text/html,test"
 71 |     [fragment]       : "test"
 72 |     "#
 73 | )]
 74 | fn test_url_visualizer() {
 75 |     // Copied from https://github.com/web-platform-tests/wpt/blob/master/url/
 76 |     let base_url = Url::parse("http://example.org/foo/bar").unwrap();
 77 |     assert_eq!(base_url.as_str(), "http://example.org/foo/bar");
 78 | 
 79 |     let url_with_non_special_scheme = Url::parse("non-special://:@test/x").unwrap();
 80 |     assert_eq!(url_with_non_special_scheme.as_str(), "non-special://test/x");
 81 | 
 82 |     let url_with_user_pass_port_query_fragments =
 83 |         Url::parse("http://user:pass@foo:21/bar;par?b#c").unwrap();
 84 |     assert_eq!(
 85 |         url_with_user_pass_port_query_fragments.as_str(),
 86 |         "http://user:pass@foo:21/bar;par?b#c"
 87 |     );
 88 | 
 89 |     let url_blob = Url::parse("blob:https://example.com:443/").unwrap();
 90 |     assert_eq!(url_blob.as_str(), "blob:https://example.com:443/");
 91 | 
 92 |     let url_with_base = base_url.join("/a%2fc").unwrap();
 93 |     assert_eq!(url_with_base.as_str(), "http://example.org/a%2fc");
 94 | 
 95 |     let url_with_base_replaced = base_url.join("http://[::127.0.0.1]").unwrap();
 96 |     assert_eq!(url_with_base_replaced.as_str(), "http://[::7f00:1]/");
 97 | 
 98 |     let url_with_comma = base_url.join("data:text/html,test#test").unwrap();
 99 |     assert_eq!(url_with_comma.as_str(), "data:text/html,test#test");
100 | 
101 |     __break();
102 | }
103 | 


--------------------------------------------------------------------------------
/url/src/origin.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2016 The rust-url developers.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
  4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
  5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  6 | // option. This file may not be copied, modified, or distributed
  7 | // except according to those terms.
  8 | 
  9 | use crate::host::Host;
 10 | use crate::parser::default_port;
 11 | use crate::Url;
 12 | use std::sync::atomic::{AtomicUsize, Ordering};
 13 | 
 14 | pub fn url_origin(url: &Url) -> Origin {
 15 |     let scheme = url.scheme();
 16 |     match scheme {
 17 |         "blob" => {
 18 |             let result = Url::parse(url.path());
 19 |             match result {
 20 |                 Ok(ref url) => url_origin(url),
 21 |                 Err(_) => Origin::new_opaque(),
 22 |             }
 23 |         }
 24 |         "ftp" | "http" | "https" | "ws" | "wss" => Origin::Tuple(
 25 |             scheme.to_owned(),
 26 |             url.host().unwrap().to_owned(),
 27 |             url.port_or_known_default().unwrap(),
 28 |         ),
 29 |         // TODO: Figure out what to do if the scheme is a file
 30 |         "file" => Origin::new_opaque(),
 31 |         _ => Origin::new_opaque(),
 32 |     }
 33 | }
 34 | 
 35 | /// The origin of an URL
 36 | ///
 37 | /// Two URLs with the same origin are considered
 38 | /// to originate from the same entity and can therefore trust
 39 | /// each other.
 40 | ///
 41 | /// The origin is determined based on the scheme as follows:
 42 | ///
 43 | /// - If the scheme is "blob" the origin is the origin of the
 44 | ///   URL contained in the path component. If parsing fails,
 45 | ///   it is an opaque origin.
 46 | /// - If the scheme is "ftp", "http", "https", "ws", or "wss",
 47 | ///   then the origin is a tuple of the scheme, host, and port.
 48 | /// - If the scheme is anything else, the origin is opaque, meaning
 49 | ///   the URL does not have the same origin as any other URL.
 50 | ///
 51 | /// For more information see <https://url.spec.whatwg.org/#origin>
 52 | #[derive(PartialEq, Eq, Hash, Clone, Debug)]
 53 | pub enum Origin {
 54 |     /// A globally unique identifier
 55 |     Opaque(OpaqueOrigin),
 56 | 
 57 |     /// Consists of the URL's scheme, host and port
 58 |     Tuple(String, Host<String>, u16),
 59 | }
 60 | 
 61 | impl Origin {
 62 |     /// Creates a new opaque origin that is only equal to itself.
 63 |     pub fn new_opaque() -> Origin {
 64 |         static COUNTER: AtomicUsize = AtomicUsize::new(0);
 65 |         Origin::Opaque(OpaqueOrigin(COUNTER.fetch_add(1, Ordering::SeqCst)))
 66 |     }
 67 | 
 68 |     /// Return whether this origin is a (scheme, host, port) tuple
 69 |     /// (as opposed to an opaque origin).
 70 |     pub fn is_tuple(&self) -> bool {
 71 |         matches!(*self, Origin::Tuple(..))
 72 |     }
 73 | 
 74 |     /// <https://html.spec.whatwg.org/multipage/#ascii-serialisation-of-an-origin>
 75 |     pub fn ascii_serialization(&self) -> String {
 76 |         match *self {
 77 |             Origin::Opaque(_) => "null".to_owned(),
 78 |             Origin::Tuple(ref scheme, ref host, port) => {
 79 |                 if default_port(scheme) == Some(port) {
 80 |                     format!("{}://{}", scheme, host)
 81 |                 } else {
 82 |                     format!("{}://{}:{}", scheme, host, port)
 83 |                 }
 84 |             }
 85 |         }
 86 |     }
 87 | 
 88 |     /// <https://html.spec.whatwg.org/multipage/#unicode-serialisation-of-an-origin>
 89 |     #[cfg(feature = "idna")]
 90 |     pub fn unicode_serialization(&self) -> String {
 91 |         match *self {
 92 |             Origin::Opaque(_) => "null".to_owned(),
 93 |             Origin::Tuple(ref scheme, ref host, port) => {
 94 |                 let host = match *host {
 95 |                     Host::Domain(ref domain) => {
 96 |                         let (domain, _errors) = idna::domain_to_unicode(domain);
 97 |                         Host::Domain(domain)
 98 |                     }
 99 |                     _ => host.clone(),
100 |                 };
101 |                 if default_port(scheme) == Some(port) {
102 |                     format!("{}://{}", scheme, host)
103 |                 } else {
104 |                     format!("{}://{}:{}", scheme, host, port)
105 |                 }
106 |             }
107 |         }
108 |     }
109 | }
110 | 
111 | /// Opaque identifier for URLs that have file or other schemes
112 | #[derive(Eq, PartialEq, Hash, Clone, Debug)]
113 | pub struct OpaqueOrigin(usize);
114 | 


--------------------------------------------------------------------------------
/idna/tests/punycode_tests.json:
--------------------------------------------------------------------------------
  1 | [
  2 | {
  3 |     "description": "These tests are copied from https://github.com/bestiejs/punycode.js/blob/master/tests/tests.js , used under the MIT license.",
  4 |     "decoded": "",
  5 |     "encoded": ""
  6 | },
  7 | {
  8 |     "description": "a single basic code point",
  9 |     "decoded": "Bach",
 10 |     "encoded": "Bach-"
 11 | },
 12 | {
 13 |     "description": "a single non-ASCII character",
 14 |     "decoded": "\u00FC",
 15 |     "encoded": "tda"
 16 | },
 17 | {
 18 |     "description": "multiple non-ASCII characters",
 19 |     "decoded": "\u00FC\u00EB\u00E4\u00F6\u2665",
 20 |     "encoded": "4can8av2009b"
 21 | },
 22 | {
 23 |     "description": "mix of ASCII and non-ASCII characters",
 24 |     "decoded": "b\u00FCcher",
 25 |     "encoded": "bcher-kva"
 26 | },
 27 | {
 28 |     "description": "long string with both ASCII and non-ASCII characters",
 29 |     "decoded": "Willst du die Bl\u00FCthe des fr\u00FChen, die Fr\u00FCchte des sp\u00E4teren Jahres",
 30 |     "encoded": "Willst du die Blthe des frhen, die Frchte des spteren Jahres-x9e96lkal"
 31 | },
 32 | {
 33 |     "description": "Arabic (Egyptian)",
 34 |     "decoded": "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
 35 |     "encoded": "egbpdaj6bu4bxfgehfvwxn"
 36 | },
 37 | {
 38 |     "description": "Chinese (simplified)",
 39 |     "decoded": "\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2d\u6587",
 40 |     "encoded": "ihqwcrb4cv8a8dqg056pqjye"
 41 | },
 42 | {
 43 |     "description": "Chinese (traditional)",
 44 |     "decoded": "\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
 45 |     "encoded": "ihqwctvzc91f659drss3x8bo0yb"
 46 | },
 47 | {
 48 |     "description": "Czech",
 49 |     "decoded": "Pro\u010Dprost\u011Bnemluv\u00ED\u010Desky",
 50 |     "encoded": "Proprostnemluvesky-uyb24dma41a"
 51 | },
 52 | {
 53 |     "description": "Hebrew",
 54 |     "decoded": "\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2\u05D1\u05E8\u05D9\u05EA",
 55 |     "encoded": "4dbcagdahymbxekheh6e0a7fei0b"
 56 | },
 57 | {
 58 |     "description": "Hindi (Devanagari)",
 59 |     "decoded": "\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947\u0939\u0948\u0902",
 60 |     "encoded": "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
 61 | },
 62 | {
 63 |     "description": "Japanese (kanji and hiragana)",
 64 |     "decoded": "\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
 65 |     "encoded": "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
 66 | },
 67 | {
 68 |     "description": "Korean (Hangul syllables)",
 69 |     "decoded": "\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
 70 |     "encoded": "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
 71 | },
 72 | {
 73 |     "description": "Russian (Cyrillic)",
 74 |     "decoded": "\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A\u0438",
 75 |     "encoded": "b1abfaaepdrnnbgefbadotcwatmq2g4l"
 76 | },
 77 | {
 78 |     "description": "Spanish",
 79 |     "decoded": "Porqu\u00E9nopuedensimplementehablarenEspa\u00F1ol",
 80 |     "encoded": "PorqunopuedensimplementehablarenEspaol-fmd56a"
 81 | },
 82 | {
 83 |     "description": "Vietnamese",
 84 |     "decoded": "T\u1EA1isaoh\u1ECDkh\u00F4ngth\u1EC3ch\u1EC9n\u00F3iti\u1EBFngVi\u1EC7t",
 85 |     "encoded": "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
 86 | },
 87 | {
 88 |     "decoded": "3\u5E74B\u7D44\u91D1\u516B\u5148\u751F",
 89 |     "encoded": "3B-ww4c5e180e575a65lsy2b"
 90 | },
 91 | {
 92 |     "decoded": "\u5B89\u5BA4\u5948\u7F8E\u6075-with-SUPER-MONKEYS",
 93 |     "encoded": "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"
 94 | },
 95 | {
 96 |     "decoded": "Hello-Another-Way-\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
 97 |     "encoded": "Hello-Another-Way--fc4qua05auwb3674vfr0b"
 98 | },
 99 | {
100 |     "decoded": "\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B2",
101 |     "encoded": "2-u9tlzr9756bt3uc0v"
102 | },
103 | {
104 |     "decoded": "Maji\u3067Koi\u3059\u308B5\u79D2\u524D",
105 |     "encoded": "MajiKoi5-783gue6qz075azm5e"
106 | },
107 | {
108 |     "decoded": "\u30D1\u30D5\u30A3\u30FCde\u30EB\u30F3\u30D0",
109 |     "encoded": "de-jg4avhby1noc0d"
110 | },
111 | {
112 |     "decoded": "\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
113 |     "encoded": "d9juau41awczczp"
114 | },
115 | {
116 |     "description": "ASCII string that breaks the existing rules for host-name labels (It's not a realistic example for IDNA, because IDNA never encodes pure ASCII labels.)",
117 |     "decoded": "-> $1.00 <-",
118 |     "encoded": "-> $1.00 <--"
119 | }
120 | ]
121 | 


--------------------------------------------------------------------------------
/idna/tests/unit.rs:
--------------------------------------------------------------------------------
  1 | use assert_matches::assert_matches;
  2 | use unicode_normalization::char::is_combining_mark;
  3 | 
  4 | /// https://github.com/servo/rust-url/issues/373
  5 | #[test]
  6 | fn test_punycode_prefix_with_length_check() {
  7 |     let config = idna::Config::default()
  8 |         .verify_dns_length(true)
  9 |         .check_hyphens(true)
 10 |         .use_std3_ascii_rules(true);
 11 | 
 12 |     assert!(config.to_ascii("xn--").is_err());
 13 |     assert!(config.to_ascii("xn---").is_err());
 14 |     assert!(config.to_ascii("xn-----").is_err());
 15 |     assert!(config.to_ascii("xn--.").is_err());
 16 |     assert!(config.to_ascii("xn--...").is_err());
 17 |     assert!(config.to_ascii(".xn--").is_err());
 18 |     assert!(config.to_ascii("...xn--").is_err());
 19 |     assert!(config.to_ascii("xn--.xn--").is_err());
 20 |     assert!(config.to_ascii("xn--.example.org").is_err());
 21 | }
 22 | 
 23 | /// https://github.com/servo/rust-url/issues/373
 24 | #[test]
 25 | fn test_punycode_prefix_without_length_check() {
 26 |     let config = idna::Config::default()
 27 |         .verify_dns_length(false)
 28 |         .check_hyphens(true)
 29 |         .use_std3_ascii_rules(true);
 30 | 
 31 |     assert_eq!(config.to_ascii("xn--").unwrap(), "");
 32 |     assert!(config.to_ascii("xn---").is_err());
 33 |     assert!(config.to_ascii("xn-----").is_err());
 34 |     assert_eq!(config.to_ascii("xn--.").unwrap(), ".");
 35 |     assert_eq!(config.to_ascii("xn--...").unwrap(), "...");
 36 |     assert_eq!(config.to_ascii(".xn--").unwrap(), ".");
 37 |     assert_eq!(config.to_ascii("...xn--").unwrap(), "...");
 38 |     assert_eq!(config.to_ascii("xn--.xn--").unwrap(), ".");
 39 |     assert_eq!(config.to_ascii("xn--.example.org").unwrap(), ".example.org");
 40 | }
 41 | 
 42 | // http://www.unicode.org/reports/tr46/#Table_Example_Processing
 43 | #[test]
 44 | fn test_examples() {
 45 |     let mut codec = idna::Idna::default();
 46 |     let mut out = String::new();
 47 | 
 48 |     assert_matches!(codec.to_unicode("Bloß.de", &mut out), Ok(()));
 49 |     assert_eq!(out, "bloß.de");
 50 | 
 51 |     out.clear();
 52 |     assert_matches!(codec.to_unicode("xn--blo-7ka.de", &mut out), Ok(()));
 53 |     assert_eq!(out, "bloß.de");
 54 | 
 55 |     out.clear();
 56 |     assert_matches!(codec.to_unicode("u\u{308}.com", &mut out), Ok(()));
 57 |     assert_eq!(out, "ü.com");
 58 | 
 59 |     out.clear();
 60 |     assert_matches!(codec.to_unicode("xn--tda.com", &mut out), Ok(()));
 61 |     assert_eq!(out, "ü.com");
 62 | 
 63 |     out.clear();
 64 |     assert_matches!(codec.to_unicode("xn--u-ccb.com", &mut out), Err(_));
 65 | 
 66 |     out.clear();
 67 |     assert_matches!(codec.to_unicode("a⒈com", &mut out), Err(_));
 68 | 
 69 |     out.clear();
 70 |     assert_matches!(codec.to_unicode("xn--a-ecp.ru", &mut out), Err(_));
 71 | 
 72 |     out.clear();
 73 |     assert_matches!(codec.to_unicode("xn--0.pt", &mut out), Err(_));
 74 | 
 75 |     out.clear();
 76 |     assert_matches!(codec.to_unicode("日本語。ＪＰ", &mut out), Ok(()));
 77 |     assert_eq!(out, "日本語.jp");
 78 | 
 79 |     out.clear();
 80 |     assert_matches!(codec.to_unicode("☕.us", &mut out), Ok(()));
 81 |     assert_eq!(out, "☕.us");
 82 | }
 83 | 
 84 | #[test]
 85 | fn test_v5() {
 86 |     let config = idna::Config::default()
 87 |         .verify_dns_length(true)
 88 |         .use_std3_ascii_rules(true);
 89 | 
 90 |     // IdnaTest:784 蔏｡𑰺
 91 |     assert!(is_combining_mark('\u{11C3A}'));
 92 |     assert!(config.to_ascii("\u{11C3A}").is_err());
 93 |     assert!(config.to_ascii("\u{850f}.\u{11C3A}").is_err());
 94 |     assert!(config.to_ascii("\u{850f}\u{ff61}\u{11C3A}").is_err());
 95 | }
 96 | 
 97 | #[test]
 98 | fn test_v8_bidi_rules() {
 99 |     let config = idna::Config::default()
100 |         .verify_dns_length(true)
101 |         .use_std3_ascii_rules(true);
102 | 
103 |     assert_eq!(config.to_ascii("abc").unwrap(), "abc");
104 |     assert_eq!(config.to_ascii("123").unwrap(), "123");
105 |     assert_eq!(config.to_ascii("אבּג").unwrap(), "xn--kdb3bdf");
106 |     assert_eq!(config.to_ascii("ابج").unwrap(), "xn--mgbcm");
107 |     assert_eq!(config.to_ascii("abc.ابج").unwrap(), "abc.xn--mgbcm");
108 |     assert_eq!(config.to_ascii("אבּג.ابج").unwrap(), "xn--kdb3bdf.xn--mgbcm");
109 | 
110 |     // Bidi domain names cannot start with digits
111 |     assert!(config.to_ascii("0a.\u{05D0}").is_err());
112 |     assert!(config.to_ascii("0à.\u{05D0}").is_err());
113 | 
114 |     // Bidi chars may be punycode-encoded
115 |     assert!(config.to_ascii("xn--0ca24w").is_err());
116 | }
117 | 
118 | #[test]
119 | fn emoji_domains() {
120 |     // HOT BEVERAGE is allowed here...
121 |     let config = idna::Config::default()
122 |         .verify_dns_length(true)
123 |         .use_std3_ascii_rules(true);
124 |     assert_eq!(config.to_ascii("☕.com").unwrap(), "xn--53h.com");
125 | 
126 |     // ... but not here
127 |     let config = idna::Config::default()
128 |         .verify_dns_length(true)
129 |         .use_std3_ascii_rules(true)
130 |         .use_idna_2008_rules(true);
131 |     let error = format!("{:?}", config.to_ascii("☕.com").unwrap_err());
132 |     assert!(error.contains("disallowed_in_idna_2008"));
133 | }
134 | 
135 | #[test]
136 | fn unicode_before_delimiter() {
137 |     let config = idna::Config::default();
138 |     assert!(config.to_ascii("xn--f\u{34a}-PTP").is_err());
139 | }
140 | 


--------------------------------------------------------------------------------
/data-url/tests/data-urls.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   ["data://test/,X",
  3 |    "text/plain;charset=US-ASCII",
  4 |    [88]],
  5 |   ["data://test:test/,X",
  6 |    null],
  7 |   ["data:,X",
  8 |    "text/plain;charset=US-ASCII",
  9 |    [88]],
 10 |   ["data:",
 11 |    null],
 12 |   ["data:text/html",
 13 |    null],
 14 |   ["data:text/html    ;charset=x   ",
 15 |    null],
 16 |   ["data:,",
 17 |    "text/plain;charset=US-ASCII",
 18 |    []],
 19 |   ["data:,X#X",
 20 |    "text/plain;charset=US-ASCII",
 21 |    [88]],
 22 |   ["data:,%FF",
 23 |    "text/plain;charset=US-ASCII",
 24 |    [255]],
 25 |   ["data:text/plain,X",
 26 |    "text/plain",
 27 |    [88]],
 28 |   ["data:text/plain ,X",
 29 |    "text/plain",
 30 |    [88]],
 31 |   ["data:text/plain%20,X",
 32 |    "text/plain%20",
 33 |    [88]],
 34 |   ["data:text/plain\f,X",
 35 |    "text/plain%0c",
 36 |    [88]],
 37 |   ["data:text/plain%0C,X",
 38 |    "text/plain%0c",
 39 |    [88]],
 40 |   ["data:text/plain;,X",
 41 |    "text/plain",
 42 |    [88]],
 43 |   ["data:;x=x;charset=x,X",
 44 |    "text/plain;x=x;charset=x",
 45 |    [88]],
 46 |   ["data:;x=x,X",
 47 |    "text/plain;x=x",
 48 |    [88]],
 49 |   ["data:text/plain;charset=windows-1252,%C2%B1",
 50 |    "text/plain;charset=windows-1252",
 51 |    [194, 177]],
 52 |   ["data:text/plain;Charset=UTF-8,%C2%B1",
 53 |    "text/plain;charset=UTF-8",
 54 |    [194, 177]],
 55 |   ["data:text/plain;charset=windows-1252,áñçə💩",
 56 |    "text/plain;charset=windows-1252",
 57 |    [195, 161, 195, 177, 195, 167, 201, 153, 240, 159, 146, 169]],
 58 |   ["data:text/plain;charset=UTF-8,áñçə💩",
 59 |    "text/plain;charset=UTF-8",
 60 |    [195, 161, 195, 177, 195, 167, 201, 153, 240, 159, 146, 169]],
 61 |   ["data:image/gif,%C2%B1",
 62 |    "image/gif",
 63 |    [194, 177]],
 64 |   ["data:IMAGE/gif,%C2%B1",
 65 |    "image/gif",
 66 |    [194, 177]],
 67 |   ["data:IMAGE/gif;hi=x,%C2%B1",
 68 |    "image/gif;hi=x",
 69 |    [194, 177]],
 70 |   ["data:IMAGE/gif;CHARSET=x,%C2%B1",
 71 |    "image/gif;charset=x",
 72 |    [194, 177]],
 73 |   ["data: ,%FF",
 74 |    "text/plain;charset=US-ASCII",
 75 |    [255]],
 76 |   ["data:%20,%FF",
 77 |    "text/plain;charset=US-ASCII",
 78 |    [255]],
 79 |   ["data:\f,%FF",
 80 |    "text/plain;charset=US-ASCII",
 81 |    [255]],
 82 |   ["data:%1F,%FF",
 83 |    "text/plain;charset=US-ASCII",
 84 |    [255]],
 85 |   ["data:\u0000,%FF",
 86 |    "text/plain;charset=US-ASCII",
 87 |    [255]],
 88 |   ["data:%00,%FF",
 89 |    "text/plain;charset=US-ASCII",
 90 |    [255]],
 91 |   ["data:text/html  ,X",
 92 |    "text/html",
 93 |    [88]],
 94 |   ["data:text / html,X",
 95 |    "text/plain;charset=US-ASCII",
 96 |    [88]],
 97 |   ["data:†,X",
 98 |    "text/plain;charset=US-ASCII",
 99 |    [88]],
100 |   ["data:†/†,X",
101 |    "%e2%80%a0/%e2%80%a0",
102 |    [88]],
103 |   ["data:X,X",
104 |    "text/plain;charset=US-ASCII",
105 |    [88]],
106 |   ["data:image/png,X X",
107 |    "image/png",
108 |    [88, 32, 88]],
109 |   ["data:application/javascript,X X",
110 |    "application/javascript",
111 |    [88, 32, 88]],
112 |   ["data:application/xml,X X",
113 |    "application/xml",
114 |    [88, 32, 88]],
115 |   ["data:text/javascript,X X",
116 |    "text/javascript",
117 |    [88, 32, 88]],
118 |   ["data:text/plain,X X",
119 |    "text/plain",
120 |    [88, 32, 88]],
121 |   ["data:unknown/unknown,X X",
122 |    "unknown/unknown",
123 |    [88, 32, 88]],
124 |   ["data:text/plain;a=\",\",X",
125 |    "text/plain;a=\"\"",
126 |    [34, 44, 88]],
127 |   ["data:text/plain;a=%2C,X",
128 |    "text/plain;a=%2C",
129 |    [88]],
130 |   ["data:;base64;base64,WA",
131 |    "text/plain",
132 |    [88]],
133 |   ["data:x/x;base64;base64,WA",
134 |    "x/x",
135 |    [88]],
136 |   ["data:x/x;base64;charset=x,WA",
137 |    "x/x;charset=x",
138 |    [87, 65]],
139 |   ["data:x/x;base64;charset=x;base64,WA",
140 |    "x/x;charset=x",
141 |    [88]],
142 |   ["data:x/x;base64;base64x,WA",
143 |    "x/x",
144 |    [87, 65]],
145 |   ["data:;base64,W%20A",
146 |    "text/plain;charset=US-ASCII",
147 |    [88]],
148 |   ["data:;base64,W%0CA",
149 |    "text/plain;charset=US-ASCII",
150 |    [88]],
151 |   ["data:x;base64x,WA",
152 |    "text/plain;charset=US-ASCII",
153 |    [87, 65]],
154 |   ["data:x;base64;x,WA",
155 |    "text/plain;charset=US-ASCII",
156 |    [87, 65]],
157 |   ["data:x;base64=x,WA",
158 |    "text/plain;charset=US-ASCII",
159 |    [87, 65]],
160 |   ["data:; base64,WA",
161 |    "text/plain;charset=US-ASCII",
162 |    [88]],
163 |   ["data:;  base64,WA",
164 |    "text/plain;charset=US-ASCII",
165 |    [88]],
166 |   ["data:  ;charset=x   ;  base64,WA",
167 |    "text/plain;charset=x",
168 |    [88]],
169 |   ["data:;base64;,WA",
170 |    "text/plain",
171 |    [87, 65]],
172 |   ["data:;base64 ,WA",
173 |    "text/plain;charset=US-ASCII",
174 |    [88]],
175 |   ["data:;base64   ,WA",
176 |    "text/plain;charset=US-ASCII",
177 |    [88]],
178 |   ["data:;base 64,WA",
179 |    "text/plain",
180 |    [87, 65]],
181 |   ["data:;BASe64,WA",
182 |    "text/plain;charset=US-ASCII",
183 |    [88]],
184 |   ["data:;%62ase64,WA",
185 |    "text/plain",
186 |    [87, 65]],
187 |   ["data:%3Bbase64,WA",
188 |    "text/plain;charset=US-ASCII",
189 |    [87, 65]],
190 |   ["data:;charset=x,X",
191 |    "text/plain;charset=x",
192 |    [88]],
193 |   ["data:; charset=x,X",
194 |    "text/plain;charset=x",
195 |    [88]],
196 |   ["data:;charset =x,X",
197 |    "text/plain",
198 |    [88]],
199 |   ["data:;charset= x,X",
200 |    "text/plain;charset=\" x\"",
201 |    [88]],
202 |   ["data:;charset=,X",
203 |    "text/plain",
204 |    [88]],
205 |   ["data:;charset,X",
206 |    "text/plain",
207 |    [88]],
208 |   ["data:;charset=\"x\",X",
209 |    "text/plain;charset=x",
210 |    [88]],
211 |   ["data:;CHARSET=\"X\",X",
212 |    "text/plain;charset=X",
213 |    [88]]
214 | ]
215 | 


--------------------------------------------------------------------------------
/debug_metadata/README.md:
--------------------------------------------------------------------------------
  1 | ## Debugger Visualizers
  2 | 
  3 | Many languages and debuggers enable developers to control how a type is
  4 | displayed in a debugger. These are called "debugger visualizations" or "debugger
  5 | views".
  6 | 
  7 | The Windows debuggers (WinDbg\CDB) support defining custom debugger visualizations using
  8 | the `Natvis` framework. To use Natvis, developers write XML documents using the natvis
  9 | schema that describe how debugger types should be displayed with the `.natvis` extension.
 10 | (See: https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects?view=vs-2019)
 11 | The Natvis files provide patterns which match type names a description of how to display
 12 | those types.
 13 | 
 14 | The Natvis schema can be found either online (See: https://code.visualstudio.com/docs/cpp/natvis#_schema)
 15 | or locally at `<VS Installation Folder>\Xml\Schemas\1033\natvis.xsd`.
 16 | 
 17 | The GNU debugger (GDB) supports defining custom debugger views using Pretty Printers.
 18 | Pretty printers are written as python scripts that describe how a type should be displayed
 19 | when loaded up in GDB/LLDB. (See: https://sourceware.org/gdb/onlinedocs/gdb/Pretty-Printing.html#Pretty-Printing)
 20 | The pretty printers provide patterns, which match type names, and for matching
 21 | types, descibe how to display those types. (For writing a pretty printer, see: https://sourceware.org/gdb/onlinedocs/gdb/Writing-a-Pretty_002dPrinter.html#Writing-a-Pretty_002dPrinter).
 22 | 
 23 | ### Embedding Visualizers
 24 | 
 25 | Through the use of the currently unstable `#[debugger_visualizer]` attribute, the `url`
 26 | crate can embed debugger visualizers into the crate metadata.
 27 | 
 28 | Currently the two types of visualizers supported are Natvis and Pretty printers.
 29 | 
 30 | For Natvis files, when linking an executable with a crate that includes Natvis files,
 31 | the MSVC linker will embed the contents of all Natvis files into the generated `PDB`.
 32 | 
 33 | For pretty printers, the compiler will encode the contents of the pretty printer
 34 | in the `.debug_gdb_scripts` section of the `ELF` generated.
 35 | 
 36 | ### Testing Visualizers
 37 | 
 38 | The `url` crate supports testing debugger visualizers defined for this crate. The entry point for
 39 | these tests are `tests/debugger_visualizer.rs`. These tests are defined using the `debugger_test` and
 40 | `debugger_test_parser` crates. The `debugger_test` crate is a proc macro crate which defines a
 41 | single proc macro attribute, `#[debugger_test]`. For more detailed information about this crate,
 42 | see https://crates.io/crates/debugger_test. The CI pipeline for the `url` crate has been updated
 43 | to run the debugger visualizer tests to ensure debugger visualizers do not become broken/stale.
 44 | 
 45 | The `#[debugger_test]` proc macro attribute may only be used on test functions and will run the
 46 | function under the debugger specified by the `debugger` meta item.
 47 | 
 48 | This proc macro attribute has 3 required values:
 49 | 
 50 | 1. The first required meta item, `debugger`, takes a string value which specifies the debugger to launch.
 51 | 2. The second required meta item, `commands`, takes a string of new line (`\n`) separated list of debugger
 52 | commands to run.
 53 | 3. The third required meta item, `expected_statements`, takes a string of new line (`\n`) separated list of
 54 | statements that must exist in the debugger output. Pattern matching through regular expressions is also
 55 | supported by using the `pattern:` prefix for each expected statement.
 56 | 
 57 | #### Example:
 58 | 
 59 | ```rust
 60 | #[debugger_test(
 61 |     debugger = "cdb",
 62 |     commands = "command1\ncommand2\ncommand3",
 63 |     expected_statements = "statement1\nstatement2\nstatement3")]
 64 | fn test() {
 65 | 
 66 | }
 67 | ```
 68 | 
 69 | Using a multiline string is also supported, with a single debugger command/expected statement per line:
 70 | 
 71 | ```rust
 72 | #[debugger_test(
 73 |     debugger = "cdb",
 74 |     commands = "
 75 | command1
 76 | command2
 77 | command3",
 78 |     expected_statements = "
 79 | statement1
 80 | pattern:statement[0-9]+
 81 | statement3")]
 82 | fn test() {
 83 |     
 84 | }
 85 | ```
 86 | 
 87 | In the example above, the second expected statement uses pattern matching through a regular expression
 88 | by using the `pattern:` prefix.
 89 | 
 90 | #### Testing Locally
 91 | 
 92 | Currently, only Natvis visualizations have been defined for the `url` crate via `debug_metadata/url.natvis`,
 93 | which means the `tests/debugger_visualizer.rs` tests need to be run on Windows using the `*-pc-windows-msvc` targets.
 94 | To run these tests locally, first ensure the debugging tools for Windows are installed or install them following
 95 | the steps listed here, [Debugging Tools for Windows](https://docs.microsoft.com/en-us/windows-hardware/drivers/debugger/).
 96 | Once the debugging tools have been installed, the tests can be run in the same manner as they are in the CI
 97 | pipeline.
 98 | 
 99 | #### Note
100 | 
101 | When running the debugger visualizer tests, `tests/debugger_visualizer.rs`, they need to be run consecutively
102 | and not in parallel. This can be achieved by passing the flag `--test-threads=1` to rustc. This is due to
103 | how the debugger tests are run. Each test marked with the `#[debugger_test]` attribute launches a debugger
104 | and attaches it to the current test process. If tests are running in parallel, the test will try to attach
105 | a debugger to the current process which may already have a debugger attached causing the test to fail.
106 | 
107 | For example:
108 | 
109 | ```
110 | cargo test --test debugger_visualizer --features debugger_visualizer -- --test-threads=1
111 | ```
112 | 


--------------------------------------------------------------------------------
/data-url/tests/wpt.rs:
--------------------------------------------------------------------------------
  1 | use tester as test;
  2 | 
  3 | #[macro_use]
  4 | extern crate serde;
  5 | 
  6 | fn run_data_url(
  7 |     input: String,
  8 |     expected_mime: Option<String>,
  9 |     expected_body: Option<Vec<u8>>,
 10 |     expected_panic: bool,
 11 | ) {
 12 |     let priorhook = std::panic::take_hook();
 13 |     std::panic::set_hook(Box::new(move |p| {
 14 |         if !expected_panic {
 15 |             priorhook(p);
 16 |         }
 17 |     }));
 18 |     let url = data_url::DataUrl::process(&input);
 19 |     if let Some(expected_mime) = expected_mime {
 20 |         let url = url.unwrap();
 21 |         let (body, _) = url.decode_to_vec().unwrap();
 22 |         if expected_mime.is_empty() {
 23 |             assert_eq!(url.mime_type().to_string(), "text/plain;charset=US-ASCII")
 24 |         } else {
 25 |             assert_eq!(url.mime_type().to_string(), expected_mime)
 26 |         }
 27 |         if let Some(expected_body) = expected_body {
 28 |             assert_eq!(body, expected_body)
 29 |         }
 30 |     } else if let Ok(url) = url {
 31 |         assert!(url.decode_to_vec().is_err(), "{:?}", url.mime_type())
 32 |     }
 33 | }
 34 | 
 35 | fn collect_data_url<F>(add_test: &mut F)
 36 | where
 37 |     F: FnMut(String, bool, test::TestFn),
 38 | {
 39 |     let known_failures = ["data://test:test/,X"];
 40 | 
 41 |     #[derive(Deserialize)]
 42 |     #[serde(untagged)]
 43 |     enum TestCase {
 44 |         Two(String, Option<String>),
 45 |         Three(String, Option<String>, Vec<u8>),
 46 |     }
 47 | 
 48 |     let v: Vec<TestCase> = serde_json::from_str(include_str!("data-urls.json")).unwrap();
 49 |     for test in v {
 50 |         let (input, expected_mime, expected_body) = match test {
 51 |             TestCase::Two(i, m) => (i, m, None),
 52 |             TestCase::Three(i, m, b) => (i, m, Some(b)),
 53 |         };
 54 |         let should_panic = known_failures.contains(&&*input);
 55 |         add_test(
 56 |             format!("data: URL {:?}", input),
 57 |             should_panic,
 58 |             test::TestFn::DynTestFn(Box::new(move || {
 59 |                 run_data_url(input, expected_mime, expected_body, should_panic)
 60 |             })),
 61 |         );
 62 |     }
 63 | }
 64 | 
 65 | fn run_base64(input: String, expected: Option<Vec<u8>>) {
 66 |     let result = data_url::forgiving_base64::decode_to_vec(input.as_bytes());
 67 |     match (result, expected) {
 68 |         (Ok(bytes), Some(expected)) => assert_eq!(bytes, expected),
 69 |         (Ok(bytes), None) => panic!("Expected error, got {:?}", bytes),
 70 |         (Err(e), Some(expected)) => panic!("Expected {:?}, got error {:?}", expected, e),
 71 |         (Err(_), None) => {}
 72 |     }
 73 | }
 74 | 
 75 | fn collect_base64<F>(add_test: &mut F)
 76 | where
 77 |     F: FnMut(String, bool, test::TestFn),
 78 | {
 79 |     let known_failures = [];
 80 | 
 81 |     let v: Vec<(String, Option<Vec<u8>>)> =
 82 |         serde_json::from_str(include_str!("base64.json")).unwrap();
 83 |     for (input, expected) in v {
 84 |         let should_panic = known_failures.contains(&&*input);
 85 |         add_test(
 86 |             format!("base64 {:?}", input),
 87 |             should_panic,
 88 |             test::TestFn::DynTestFn(Box::new(move || run_base64(input, expected))),
 89 |         );
 90 |     }
 91 | }
 92 | 
 93 | fn run_mime(input: String, expected: Option<String>) {
 94 |     let result = input.parse::<data_url::mime::Mime>();
 95 |     match (result, expected) {
 96 |         (Ok(mime), Some(expected)) => assert_eq!(mime.to_string(), expected),
 97 |         (Ok(mime), None) => panic!("Expected error, got {:?}", mime),
 98 |         (Err(e), Some(expected)) => panic!("Expected {:?}, got error {:?}", expected, e),
 99 |         (Err(_), None) => {}
100 |     }
101 | }
102 | 
103 | fn collect_mime<F>(add_test: &mut F)
104 | where
105 |     F: FnMut(String, bool, test::TestFn),
106 | {
107 |     let known_failures = [];
108 | 
109 |     #[derive(Deserialize)]
110 |     #[serde(untagged)]
111 |     enum Entry {
112 |         Comment(String),
113 |         TestCase {
114 |             input: String,
115 |             output: Option<String>,
116 |         },
117 |     }
118 | 
119 |     let v: Vec<Entry> = serde_json::from_str(include_str!("mime-types.json")).unwrap();
120 |     let v2: Vec<Entry> = serde_json::from_str(include_str!("generated-mime-types.json")).unwrap();
121 |     let entries = v.into_iter().chain(v2);
122 | 
123 |     let mut last_comment = None;
124 |     for entry in entries {
125 |         let (input, expected) = match entry {
126 |             Entry::TestCase { input, output } => (input, output),
127 |             Entry::Comment(s) => {
128 |                 last_comment = Some(s);
129 |                 continue;
130 |             }
131 |         };
132 | 
133 |         let should_panic = known_failures.contains(&&*input);
134 |         add_test(
135 |             if let Some(ref s) = last_comment {
136 |                 format!("MIME type {:?} {:?}", s, input)
137 |             } else {
138 |                 format!("MIME type {:?}", input)
139 |             },
140 |             should_panic,
141 |             test::TestFn::DynTestFn(Box::new(move || run_mime(input, expected))),
142 |         );
143 |     }
144 | }
145 | 
146 | fn main() {
147 |     let mut tests = Vec::new();
148 |     {
149 |         let mut add_one = |name: String, should_panic: bool, run: test::TestFn| {
150 |             let desc = test::TestDesc {
151 |                 name: test::DynTestName(name),
152 |                 ignore: false,
153 |                 should_panic: match should_panic {
154 |                     true => test::ShouldPanic::Yes,
155 |                     false => test::ShouldPanic::No,
156 |                 },
157 |                 allow_fail: false,
158 |                 test_type: test::TestType::Unknown,
159 |             };
160 |             tests.push(test::TestDescAndFn { desc, testfn: run })
161 |         };
162 |         collect_data_url(&mut add_one);
163 |         collect_base64(&mut add_one);
164 |         collect_mime(&mut add_one);
165 |     }
166 |     test::test_main(&std::env::args().collect::<Vec<_>>(), tests, None)
167 | }
168 | 


--------------------------------------------------------------------------------
/idna/src/make_uts46_mapping_table.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2013-2014 The rust-url developers.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
  4 | # http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
  5 | # <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  6 | # option. This file may not be copied, modified, or distributed
  7 | # except according to those terms.
  8 | 
  9 | # Run as: python make_uts46_mapping_table.py IdnaMappingTable.txt > uts46_mapping_table.rs
 10 | # You can get the latest idna table from
 11 | # http://www.unicode.org/Public/idna/latest/IdnaMappingTable.txt
 12 | 
 13 | import collections
 14 | import itertools
 15 | 
 16 | print('''\
 17 | // Copyright 2013-2020 The rust-url developers.
 18 | //
 19 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 20 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 21 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 22 | // option. This file may not be copied, modified, or distributed
 23 | // except according to those terms.
 24 | 
 25 | // Generated by make_idna_table.py
 26 | ''')
 27 | 
 28 | txt = open("IdnaMappingTable.txt")
 29 | 
 30 | def escape_char(c):
 31 |     return "\\u{%x}" % ord(c[0])
 32 | 
 33 | def char(s):
 34 |     return chr(int(s, 16))
 35 | 
 36 | strtab = collections.OrderedDict()
 37 | strtab_offset = 0
 38 | 
 39 | def strtab_slice(s):
 40 |     global strtab, strtab_offset
 41 | 
 42 |     if s in strtab:
 43 |         return strtab[s]
 44 |     else:
 45 |         utf8_len = len(s.encode('utf8'))
 46 |         c = (strtab_offset, utf8_len)
 47 |         strtab[s] = c
 48 |         strtab_offset += utf8_len
 49 |         return c
 50 | 
 51 | def rust_slice(s):
 52 |     start = s[0]
 53 |     length = s[1]
 54 |     start_lo = start & 0xff
 55 |     start_hi = start >> 8
 56 |     assert length <= 255
 57 |     assert start_hi <= 255
 58 |     return "(StringTableSlice { byte_start_lo: %d, byte_start_hi: %d, byte_len: %d })" % (start_lo, start_hi, length)
 59 | 
 60 | ranges = []
 61 | 
 62 | for line in txt:
 63 |     # remove comments
 64 |     line, _, _ = line.partition('#')
 65 |     # skip empty lines
 66 |     if len(line.strip()) == 0:
 67 |         continue
 68 |     fields = line.split(';')
 69 |     if fields[0].strip() == 'D800..DFFF':
 70 |         continue  # Surrogates don't occur in Rust strings.
 71 |     first, _, last = fields[0].strip().partition('..')
 72 |     if not last:
 73 |         last = first
 74 |     mapping = fields[1].strip().replace('_', ' ').title().replace(' ', '')
 75 |     unicode_str = None
 76 |     if len(fields) > 2:
 77 |         if fields[2].strip():
 78 |             unicode_str = u''.join(char(c) for c in fields[2].strip().split(' '))
 79 |         elif mapping == "Deviation":
 80 |             unicode_str = u''
 81 | 
 82 |     if len(fields) > 3:
 83 |         assert fields[3].strip() in ('NV8', 'XV8'), fields[3]
 84 |         assert mapping == 'Valid', mapping
 85 |         mapping = 'DisallowedIdna2008'
 86 | 
 87 |     ranges.append((first, last, mapping, unicode_str))
 88 | 
 89 | def mergeable_key(r):
 90 |     mapping = r[2]
 91 | 
 92 |     # These types have associated data, so we should not merge them.
 93 |     if mapping in ('Mapped', 'Deviation', 'DisallowedStd3Mapped'):
 94 |         return r
 95 |     assert mapping in ('Valid', 'Ignored', 'Disallowed', 'DisallowedStd3Valid', 'DisallowedIdna2008')
 96 |     return mapping
 97 | 
 98 | grouped_ranges = itertools.groupby(ranges, key=mergeable_key)
 99 | 
100 | optimized_ranges = []
101 | 
102 | for (k, g) in grouped_ranges:
103 |     group = list(g)
104 |     if len(group) == 1:
105 |         optimized_ranges.append(group[0])
106 |         continue
107 |     # Assert that nothing in the group has an associated unicode string.
108 |     for g in group:
109 |         if g[3] is not None and len(g[3]) > 2:
110 |             assert not g[3][2].strip()
111 |     # Assert that consecutive members of the group don't leave gaps in
112 |     # the codepoint space.
113 |     a, b = itertools.tee(group)
114 |     next(b, None)
115 |     for (g1, g2) in zip(a, b):
116 |         last_char = int(g1[1], 16)
117 |         next_char = int(g2[0], 16)
118 |         if last_char + 1 == next_char:
119 |             continue
120 |         # There's a gap where surrogates would appear, but we don't have to
121 |         # worry about that gap, as surrogates never appear in Rust strings.
122 |         # Assert we're seeing the surrogate case here.
123 |         assert last_char == 0xd7ff
124 |         assert next_char == 0xe000
125 |     optimized_ranges.append((group[0][0], group[-1][1]) + group[0][2:])
126 | 
127 | def is_single_char_range(r):
128 |     (first, last, _, _) = r
129 |     return first == last
130 | 
131 | # We can reduce the size of the character range table and the index table to about 1/4
132 | # by merging runs of single character ranges and using character offsets from the start
133 | # of that range to retrieve the correct `Mapping` value
134 | def merge_single_char_ranges(ranges):
135 |     current = []
136 |     for r in ranges:
137 |         if not current or is_single_char_range(current[-1]) and is_single_char_range(r):
138 |             current.append(r)
139 |             continue
140 |         if len(current) != 0:
141 |             ret = current
142 |             current = [r]
143 |             yield ret
144 |             continue
145 |         current.append(r)
146 |         ret = current
147 |         current = []
148 |         yield ret
149 |     yield current
150 | 
151 | optimized_ranges = list(merge_single_char_ranges(optimized_ranges))
152 | 
153 | SINGLE_MARKER = 1 << 15
154 | 
155 | print("static TABLE: &[(char, u16)] = &[")
156 | 
157 | offset = 0
158 | for ranges in optimized_ranges:
159 |     assert offset < SINGLE_MARKER
160 | 
161 |     block_len = len(ranges)
162 |     single = SINGLE_MARKER if block_len == 1 else 0
163 |     index = offset | single
164 |     offset += block_len
165 | 
166 |     start = escape_char(char(ranges[0][0]))
167 |     print("    ('%s', %s)," % (start, index))
168 | 
169 | print("];\n")
170 | 
171 | print("static MAPPING_TABLE: &[Mapping] = &[")
172 | 
173 | for ranges in optimized_ranges:
174 |     for (first, last, mapping, unicode_str) in ranges:
175 |         if unicode_str is not None:
176 |             mapping += rust_slice(strtab_slice(unicode_str))
177 |         print("    %s," % mapping)
178 | 
179 | print("];\n")
180 | 
181 | def escape_str(s):
182 |     return [escape_char(c) for c in s]
183 | 
184 | print("static STRING_TABLE: &str = \"%s\";"
185 |       % '\\\n  '.join(itertools.chain(*[escape_str(s) for s in strtab.keys()])))
186 | 


--------------------------------------------------------------------------------
/data-url/src/forgiving_base64.rs:
--------------------------------------------------------------------------------
  1 | //! <https://infra.spec.whatwg.org/#forgiving-base64-decode>
  2 | 
  3 | #[derive(Debug)]
  4 | pub struct InvalidBase64(InvalidBase64Details);
  5 | 
  6 | #[derive(Debug)]
  7 | enum InvalidBase64Details {
  8 |     UnexpectedSymbol(u8),
  9 |     AlphabetSymbolAfterPadding,
 10 |     LoneAlphabetSymbol,
 11 |     Padding,
 12 | }
 13 | 
 14 | #[derive(Debug)]
 15 | pub enum DecodeError<E> {
 16 |     InvalidBase64(InvalidBase64),
 17 |     WriteError(E),
 18 | }
 19 | 
 20 | impl<E> From<InvalidBase64Details> for DecodeError<E> {
 21 |     fn from(e: InvalidBase64Details) -> Self {
 22 |         DecodeError::InvalidBase64(InvalidBase64(e))
 23 |     }
 24 | }
 25 | 
 26 | pub(crate) enum Impossible {}
 27 | 
 28 | impl From<DecodeError<Impossible>> for InvalidBase64 {
 29 |     fn from(e: DecodeError<Impossible>) -> Self {
 30 |         match e {
 31 |             DecodeError::InvalidBase64(e) => e,
 32 |             DecodeError::WriteError(e) => match e {},
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | /// `input` is assumed to be in an ASCII-compatible encoding
 38 | pub fn decode_to_vec(input: &[u8]) -> Result<Vec<u8>, InvalidBase64> {
 39 |     let mut v = Vec::new();
 40 |     {
 41 |         let mut decoder = Decoder::new(|bytes| {
 42 |             v.extend_from_slice(bytes);
 43 |             Ok(())
 44 |         });
 45 |         decoder.feed(input)?;
 46 |         decoder.finish()?;
 47 |     }
 48 |     Ok(v)
 49 | }
 50 | 
 51 | /// <https://infra.spec.whatwg.org/#forgiving-base64-decode>
 52 | pub struct Decoder<F, E>
 53 | where
 54 |     F: FnMut(&[u8]) -> Result<(), E>,
 55 | {
 56 |     write_bytes: F,
 57 |     bit_buffer: u32,
 58 |     buffer_bit_length: u8,
 59 |     padding_symbols: u8,
 60 | }
 61 | 
 62 | impl<F, E> Decoder<F, E>
 63 | where
 64 |     F: FnMut(&[u8]) -> Result<(), E>,
 65 | {
 66 |     pub fn new(write_bytes: F) -> Self {
 67 |         Self {
 68 |             write_bytes,
 69 |             bit_buffer: 0,
 70 |             buffer_bit_length: 0,
 71 |             padding_symbols: 0,
 72 |         }
 73 |     }
 74 | 
 75 |     /// Feed to the decoder partial input in an ASCII-compatible encoding
 76 |     pub fn feed(&mut self, input: &[u8]) -> Result<(), DecodeError<E>> {
 77 |         for &byte in input.iter() {
 78 |             let value = BASE64_DECODE_TABLE[byte as usize];
 79 |             if value < 0 {
 80 |                 // A character that’s not part of the alphabet
 81 | 
 82 |                 // Remove ASCII whitespace
 83 |                 if matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | b'\x0C') {
 84 |                     continue;
 85 |                 }
 86 | 
 87 |                 if byte == b'=' {
 88 |                     self.padding_symbols = self.padding_symbols.saturating_add(1);
 89 |                     continue;
 90 |                 }
 91 | 
 92 |                 return Err(InvalidBase64Details::UnexpectedSymbol(byte).into());
 93 |             }
 94 |             if self.padding_symbols > 0 {
 95 |                 return Err(InvalidBase64Details::AlphabetSymbolAfterPadding.into());
 96 |             }
 97 |             self.bit_buffer <<= 6;
 98 |             self.bit_buffer |= value as u32;
 99 |             // 18 before incrementing means we’ve just reached 24
100 |             if self.buffer_bit_length < 18 {
101 |                 self.buffer_bit_length += 6;
102 |             } else {
103 |                 // We’ve accumulated four times 6 bits, which equals three times 8 bits.
104 |                 let byte_buffer = [
105 |                     (self.bit_buffer >> 16) as u8,
106 |                     (self.bit_buffer >> 8) as u8,
107 |                     self.bit_buffer as u8,
108 |                 ];
109 |                 (self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
110 |                 self.buffer_bit_length = 0;
111 |                 // No need to reset bit_buffer,
112 |                 // since next time we’re only gonna read relevant bits.
113 |             }
114 |         }
115 |         Ok(())
116 |     }
117 | 
118 |     /// Call this to signal the end of the input
119 |     pub fn finish(mut self) -> Result<(), DecodeError<E>> {
120 |         match (self.buffer_bit_length, self.padding_symbols) {
121 |             (0, 0) => {
122 |                 // A multiple of four of alphabet symbols, and nothing else.
123 |             }
124 |             (12, 2) | (12, 0) => {
125 |                 // A multiple of four of alphabet symbols, followed by two more symbols,
126 |                 // optionally followed by two padding characters (which make a total multiple of four).
127 |                 let byte_buffer = [(self.bit_buffer >> 4) as u8];
128 |                 (self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
129 |             }
130 |             (18, 1) | (18, 0) => {
131 |                 // A multiple of four of alphabet symbols, followed by three more symbols,
132 |                 // optionally followed by one padding character (which make a total multiple of four).
133 |                 let byte_buffer = [(self.bit_buffer >> 10) as u8, (self.bit_buffer >> 2) as u8];
134 |                 (self.write_bytes)(&byte_buffer).map_err(DecodeError::WriteError)?;
135 |             }
136 |             (6, _) => return Err(InvalidBase64Details::LoneAlphabetSymbol.into()),
137 |             _ => return Err(InvalidBase64Details::Padding.into()),
138 |         }
139 |         Ok(())
140 |     }
141 | }
142 | 
143 | /// Generated by `make_base64_decode_table.py` based on "Table 1: The Base 64 Alphabet"
144 | /// at <https://tools.ietf.org/html/rfc4648#section-4>
145 | ///
146 | /// Array indices are the byte value of symbols.
147 | /// Array values are their positions in the base64 alphabet,
148 | /// or -1 for symbols not in the alphabet.
149 | /// The position contributes 6 bits to the decoded bytes.
150 | #[rustfmt::skip]
151 | const BASE64_DECODE_TABLE: [i8; 256] = [
152 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
153 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
154 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
155 |     52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
156 |     -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
157 |     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
158 |     -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
159 |     41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
160 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
161 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
162 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
163 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
164 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
165 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
166 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
167 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168 | ];
169 | 


--------------------------------------------------------------------------------
/url/src/slicing.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2016 The rust-url developers.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
  4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
  5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  6 | // option. This file may not be copied, modified, or distributed
  7 | // except according to those terms.
  8 | 
  9 | use crate::Url;
 10 | use std::ops::{Index, Range, RangeFrom, RangeFull, RangeTo};
 11 | 
 12 | impl Index<RangeFull> for Url {
 13 |     type Output = str;
 14 |     fn index(&self, _: RangeFull) -> &str {
 15 |         &self.serialization
 16 |     }
 17 | }
 18 | 
 19 | impl Index<RangeFrom<Position>> for Url {
 20 |     type Output = str;
 21 |     fn index(&self, range: RangeFrom<Position>) -> &str {
 22 |         &self.serialization[self.index(range.start)..]
 23 |     }
 24 | }
 25 | 
 26 | impl Index<RangeTo<Position>> for Url {
 27 |     type Output = str;
 28 |     fn index(&self, range: RangeTo<Position>) -> &str {
 29 |         &self.serialization[..self.index(range.end)]
 30 |     }
 31 | }
 32 | 
 33 | impl Index<Range<Position>> for Url {
 34 |     type Output = str;
 35 |     fn index(&self, range: Range<Position>) -> &str {
 36 |         &self.serialization[self.index(range.start)..self.index(range.end)]
 37 |     }
 38 | }
 39 | 
 40 | /// Indicates a position within a URL based on its components.
 41 | ///
 42 | /// A range of positions can be used for slicing `Url`:
 43 | ///
 44 | /// ```rust
 45 | /// # use url::{Url, Position};
 46 | /// # fn something(some_url: Url) {
 47 | /// let serialization: &str = &some_url[..];
 48 | /// let serialization_without_fragment: &str = &some_url[..Position::AfterQuery];
 49 | /// let authority: &str = &some_url[Position::BeforeUsername..Position::AfterPort];
 50 | /// let data_url_payload: &str = &some_url[Position::BeforePath..Position::AfterQuery];
 51 | /// let scheme_relative: &str = &some_url[Position::BeforeUsername..];
 52 | /// # }
 53 | /// ```
 54 | ///
 55 | /// In a pseudo-grammar (where `[`…`]?` makes a sub-sequence optional),
 56 | /// URL components and delimiters that separate them are:
 57 | ///
 58 | /// ```notrust
 59 | /// url =
 60 | ///     scheme ":"
 61 | ///     [ "//" [ username [ ":" password ]? "@" ]? host [ ":" port ]? ]?
 62 | ///     path [ "?" query ]? [ "#" fragment ]?
 63 | /// ```
 64 | ///
 65 | /// When a given component is not present,
 66 | /// its "before" and "after" position are the same
 67 | /// (so that `&some_url[BeforeFoo..AfterFoo]` is the empty string)
 68 | /// and component ordering is preserved
 69 | /// (so that a missing query "is between" a path and a fragment).
 70 | ///
 71 | /// The end of a component and the start of the next are either the same or separate
 72 | /// by a delimiter.
 73 | /// (Not that the initial `/` of a path is considered part of the path here, not a delimiter.)
 74 | /// For example, `&url[..BeforeFragment]` would include a `#` delimiter (if present in `url`),
 75 | /// so `&url[..AfterQuery]` might be desired instead.
 76 | ///
 77 | /// `BeforeScheme` and `AfterFragment` are always the start and end of the entire URL,
 78 | /// so `&url[BeforeScheme..X]` is the same as `&url[..X]`
 79 | /// and `&url[X..AfterFragment]` is the same as `&url[X..]`.
 80 | #[derive(Copy, Clone, Debug)]
 81 | pub enum Position {
 82 |     BeforeScheme,
 83 |     AfterScheme,
 84 |     BeforeUsername,
 85 |     AfterUsername,
 86 |     BeforePassword,
 87 |     AfterPassword,
 88 |     BeforeHost,
 89 |     AfterHost,
 90 |     BeforePort,
 91 |     AfterPort,
 92 |     BeforePath,
 93 |     AfterPath,
 94 |     BeforeQuery,
 95 |     AfterQuery,
 96 |     BeforeFragment,
 97 |     AfterFragment,
 98 | }
 99 | 
100 | impl Url {
101 |     #[inline]
102 |     fn index(&self, position: Position) -> usize {
103 |         match position {
104 |             Position::BeforeScheme => 0,
105 | 
106 |             Position::AfterScheme => self.scheme_end as usize,
107 | 
108 |             Position::BeforeUsername => {
109 |                 if self.has_authority() {
110 |                     self.scheme_end as usize + "://".len()
111 |                 } else {
112 |                     debug_assert!(self.byte_at(self.scheme_end) == b':');
113 |                     debug_assert!(self.scheme_end + ":".len() as u32 == self.username_end);
114 |                     self.scheme_end as usize + ":".len()
115 |                 }
116 |             }
117 | 
118 |             Position::AfterUsername => self.username_end as usize,
119 | 
120 |             Position::BeforePassword => {
121 |                 if self.has_authority() && self.byte_at(self.username_end) == b':' {
122 |                     self.username_end as usize + ":".len()
123 |                 } else {
124 |                     debug_assert!(self.username_end == self.host_start);
125 |                     self.username_end as usize
126 |                 }
127 |             }
128 | 
129 |             Position::AfterPassword => {
130 |                 if self.has_authority() && self.byte_at(self.username_end) == b':' {
131 |                     debug_assert!(self.byte_at(self.host_start - "@".len() as u32) == b'@');
132 |                     self.host_start as usize - "@".len()
133 |                 } else {
134 |                     debug_assert!(self.username_end == self.host_start);
135 |                     self.host_start as usize
136 |                 }
137 |             }
138 | 
139 |             Position::BeforeHost => self.host_start as usize,
140 | 
141 |             Position::AfterHost => self.host_end as usize,
142 | 
143 |             Position::BeforePort => {
144 |                 if self.port.is_some() {
145 |                     debug_assert!(self.byte_at(self.host_end) == b':');
146 |                     self.host_end as usize + ":".len()
147 |                 } else {
148 |                     self.host_end as usize
149 |                 }
150 |             }
151 | 
152 |             Position::AfterPort => self.path_start as usize,
153 | 
154 |             Position::BeforePath => self.path_start as usize,
155 | 
156 |             Position::AfterPath => match (self.query_start, self.fragment_start) {
157 |                 (Some(q), _) => q as usize,
158 |                 (None, Some(f)) => f as usize,
159 |                 (None, None) => self.serialization.len(),
160 |             },
161 | 
162 |             Position::BeforeQuery => match (self.query_start, self.fragment_start) {
163 |                 (Some(q), _) => {
164 |                     debug_assert!(self.byte_at(q) == b'?');
165 |                     q as usize + "?".len()
166 |                 }
167 |                 (None, Some(f)) => f as usize,
168 |                 (None, None) => self.serialization.len(),
169 |             },
170 | 
171 |             Position::AfterQuery => match self.fragment_start {
172 |                 None => self.serialization.len(),
173 |                 Some(f) => f as usize,
174 |             },
175 | 
176 |             Position::BeforeFragment => match self.fragment_start {
177 |                 Some(f) => {
178 |                     debug_assert!(self.byte_at(f) == b'#');
179 |                     f as usize + "#".len()
180 |                 }
181 |                 None => self.serialization.len(),
182 |             },
183 | 
184 |             Position::AfterFragment => self.serialization.len(),
185 |         }
186 |     }
187 | }
188 | 


--------------------------------------------------------------------------------
/idna/tests/uts46.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2013-2014 The rust-url developers.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
  4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
  5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  6 | // option. This file may not be copied, modified, or distributed
  7 | // except according to those terms.
  8 | 
  9 | use crate::test::TestFn;
 10 | use std::char;
 11 | use std::fmt::Write;
 12 | 
 13 | use idna::Errors;
 14 | 
 15 | pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
 16 |     // https://www.unicode.org/Public/idna/13.0.0/IdnaTestV2.txt
 17 |     for (i, line) in include_str!("IdnaTestV2.txt").lines().enumerate() {
 18 |         if line.is_empty() || line.starts_with('#') {
 19 |             continue;
 20 |         }
 21 | 
 22 |         // Remove comments
 23 |         let line = match line.find('#') {
 24 |             Some(index) => &line[0..index],
 25 |             None => line,
 26 |         };
 27 | 
 28 |         let mut pieces = line.split(';').map(|x| x.trim()).collect::<Vec<&str>>();
 29 |         let source = unescape(pieces.remove(0));
 30 | 
 31 |         // ToUnicode
 32 |         let mut to_unicode = unescape(pieces.remove(0));
 33 |         if to_unicode.is_empty() {
 34 |             to_unicode = source.clone();
 35 |         }
 36 |         let to_unicode_status = status(pieces.remove(0));
 37 | 
 38 |         // ToAsciiN
 39 |         let to_ascii_n = pieces.remove(0);
 40 |         let to_ascii_n = if to_ascii_n.is_empty() {
 41 |             to_unicode.clone()
 42 |         } else {
 43 |             to_ascii_n.to_owned()
 44 |         };
 45 |         let to_ascii_n_status = pieces.remove(0);
 46 |         let to_ascii_n_status = if to_ascii_n_status.is_empty() {
 47 |             to_unicode_status.clone()
 48 |         } else {
 49 |             status(to_ascii_n_status)
 50 |         };
 51 | 
 52 |         // ToAsciiT
 53 |         let to_ascii_t = pieces.remove(0);
 54 |         let to_ascii_t = if to_ascii_t.is_empty() {
 55 |             to_ascii_n.clone()
 56 |         } else {
 57 |             to_ascii_t.to_owned()
 58 |         };
 59 |         let to_ascii_t_status = pieces.remove(0);
 60 |         let to_ascii_t_status = if to_ascii_t_status.is_empty() {
 61 |             to_ascii_n_status.clone()
 62 |         } else {
 63 |             status(to_ascii_t_status)
 64 |         };
 65 | 
 66 |         let test_name = format!("UTS #46 line {}", i + 1);
 67 |         add_test(
 68 |             test_name,
 69 |             TestFn::DynTestFn(Box::new(move || {
 70 |                 let config = idna::Config::default()
 71 |                     .use_std3_ascii_rules(true)
 72 |                     .verify_dns_length(true)
 73 |                     .check_hyphens(true);
 74 | 
 75 |                 // http://unicode.org/reports/tr46/#Deviations
 76 |                 // applications that perform IDNA2008 lookup are not required to check
 77 |                 // for these contexts, so we skip all tests annotated with C*
 78 | 
 79 |                 // Everybody ignores V2
 80 |                 // https://github.com/servo/rust-url/pull/240
 81 |                 // https://github.com/whatwg/url/issues/53#issuecomment-181528158
 82 |                 // http://www.unicode.org/review/pri317/
 83 | 
 84 |                 // "The special error codes X3 and X4_2 are now returned where a toASCII error code
 85 |                 // was formerly being generated in toUnicode due to an empty label."
 86 |                 // This is not implemented yet, so we skip toUnicode X4_2 tests for now, too.
 87 | 
 88 |                 let (to_unicode_value, to_unicode_result) =
 89 |                     config.transitional_processing(false).to_unicode(&source);
 90 |                 let to_unicode_result = to_unicode_result.map(|()| to_unicode_value);
 91 |                 check(
 92 |                     &source,
 93 |                     (&to_unicode, &to_unicode_status),
 94 |                     to_unicode_result,
 95 |                     |e| e.starts_with('C') || e == "V2" || e == "X4_2",
 96 |                 );
 97 | 
 98 |                 let to_ascii_n_result = config.transitional_processing(false).to_ascii(&source);
 99 |                 check(
100 |                     &source,
101 |                     (&to_ascii_n, &to_ascii_n_status),
102 |                     to_ascii_n_result,
103 |                     |e| e.starts_with('C') || e == "V2",
104 |                 );
105 | 
106 |                 let to_ascii_t_result = config.transitional_processing(true).to_ascii(&source);
107 |                 check(
108 |                     &source,
109 |                     (&to_ascii_t, &to_ascii_t_status),
110 |                     to_ascii_t_result,
111 |                     |e| e.starts_with('C') || e == "V2",
112 |                 );
113 |             })),
114 |         )
115 |     }
116 | }
117 | 
118 | #[allow(clippy::redundant_clone)]
119 | fn check<F>(source: &str, expected: (&str, &[&str]), actual: Result<String, Errors>, ignore: F)
120 | where
121 |     F: Fn(&str) -> bool,
122 | {
123 |     if !expected.1.is_empty() {
124 |         if !expected.1.iter().copied().any(ignore) {
125 |             let res = actual.ok();
126 |             assert_eq!(
127 |                 res.clone(),
128 |                 None,
129 |                 "Expected error {:?}. result: {} | source: {}",
130 |                 expected.1,
131 |                 res.unwrap(),
132 |                 source,
133 |             );
134 |         }
135 |     } else {
136 |         assert!(
137 |             actual.is_ok(),
138 |             "Couldn't parse {} | error: {:?}",
139 |             source,
140 |             actual.err().unwrap(),
141 |         );
142 |         assert_eq!(actual.unwrap(), expected.0, "source: {}", source);
143 |     }
144 | }
145 | 
146 | fn unescape(input: &str) -> String {
147 |     let mut output = String::new();
148 |     let mut chars = input.chars();
149 |     loop {
150 |         match chars.next() {
151 |             None => return output,
152 |             Some(c) => {
153 |                 if c == '\\' {
154 |                     match chars.next().unwrap() {
155 |                         '\\' => output.push('\\'),
156 |                         'u' => {
157 |                             let c1 = chars.next().unwrap().to_digit(16).unwrap();
158 |                             let c2 = chars.next().unwrap().to_digit(16).unwrap();
159 |                             let c3 = chars.next().unwrap().to_digit(16).unwrap();
160 |                             let c4 = chars.next().unwrap().to_digit(16).unwrap();
161 |                             match char::from_u32(((c1 * 16 + c2) * 16 + c3) * 16 + c4) {
162 |                                 Some(c) => output.push(c),
163 |                                 None => {
164 |                                     write!(&mut output, "\\u{:X}{:X}{:X}{:X}", c1, c2, c3, c4)
165 |                                         .expect("Could not write to output");
166 |                                 }
167 |                             };
168 |                         }
169 |                         _ => panic!("Invalid test data input"),
170 |                     }
171 |                 } else {
172 |                     output.push(c);
173 |                 }
174 |             }
175 |         }
176 |     }
177 | }
178 | 
179 | fn status(status: &str) -> Vec<&str> {
180 |     if status.is_empty() || status == "[]" {
181 |         return Vec::new();
182 |     }
183 | 
184 |     let mut result = status.split(", ").collect::<Vec<_>>();
185 |     assert!(result[0].starts_with('['));
186 |     result[0] = &result[0][1..];
187 | 
188 |     let idx = result.len() - 1;
189 |     let last = &mut result[idx];
190 |     assert!(last.ends_with(']'));
191 |     *last = &last[..last.len() - 1];
192 | 
193 |     result
194 | }
195 | 


--------------------------------------------------------------------------------
/data-url/src/mime.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt::{self, Write};
  2 | use std::str::FromStr;
  3 | 
  4 | /// <https://mimesniff.spec.whatwg.org/#mime-type-representation>
  5 | #[derive(Debug, PartialEq, Eq)]
  6 | pub struct Mime {
  7 |     pub type_: String,
  8 |     pub subtype: String,
  9 |     /// (name, value)
 10 |     pub parameters: Vec<(String, String)>,
 11 | }
 12 | 
 13 | impl Mime {
 14 |     pub fn get_parameter<P>(&self, name: &P) -> Option<&str>
 15 |     where
 16 |         P: ?Sized + PartialEq<str>,
 17 |     {
 18 |         self.parameters
 19 |             .iter()
 20 |             .find(|&&(ref n, _)| name == &**n)
 21 |             .map(|&(_, ref v)| &**v)
 22 |     }
 23 | }
 24 | 
 25 | #[derive(Debug)]
 26 | pub struct MimeParsingError(());
 27 | 
 28 | /// <https://mimesniff.spec.whatwg.org/#parsing-a-mime-type>
 29 | impl FromStr for Mime {
 30 |     type Err = MimeParsingError;
 31 | 
 32 |     fn from_str(s: &str) -> Result<Self, Self::Err> {
 33 |         parse(s).ok_or(MimeParsingError(()))
 34 |     }
 35 | }
 36 | 
 37 | fn parse(s: &str) -> Option<Mime> {
 38 |     let trimmed = s.trim_matches(http_whitespace);
 39 | 
 40 |     let (type_, rest) = split2(trimmed, '/');
 41 |     require!(only_http_token_code_points(type_) && !type_.is_empty());
 42 | 
 43 |     let (subtype, rest) = split2(rest?, ';');
 44 |     let subtype = subtype.trim_end_matches(http_whitespace);
 45 |     require!(only_http_token_code_points(subtype) && !subtype.is_empty());
 46 | 
 47 |     let mut parameters = Vec::new();
 48 |     if let Some(rest) = rest {
 49 |         parse_parameters(rest, &mut parameters)
 50 |     }
 51 | 
 52 |     Some(Mime {
 53 |         type_: type_.to_ascii_lowercase(),
 54 |         subtype: subtype.to_ascii_lowercase(),
 55 |         parameters,
 56 |     })
 57 | }
 58 | 
 59 | fn split2(s: &str, separator: char) -> (&str, Option<&str>) {
 60 |     let mut iter = s.splitn(2, separator);
 61 |     let first = iter.next().unwrap();
 62 |     (first, iter.next())
 63 | }
 64 | 
 65 | fn parse_parameters(s: &str, parameters: &mut Vec<(String, String)>) {
 66 |     let mut semicolon_separated = s.split(';');
 67 | 
 68 |     while let Some(piece) = semicolon_separated.next() {
 69 |         let piece = piece.trim_start_matches(http_whitespace);
 70 |         let (name, value) = split2(piece, '=');
 71 |         // We can not early return on an invalid name here, because the value
 72 |         // parsing later may consume more semicolon seperated pieces.
 73 |         let name_valid =
 74 |             !name.is_empty() && only_http_token_code_points(name) && !contains(parameters, name);
 75 |         if let Some(value) = value {
 76 |             let value = if let Some(stripped) = value.strip_prefix('"') {
 77 |                 let max_len = stripped.len().saturating_sub(1); // without end quote
 78 |                 let mut unescaped_value = String::with_capacity(max_len);
 79 |                 let mut chars = stripped.chars();
 80 |                 'until_closing_quote: loop {
 81 |                     while let Some(c) = chars.next() {
 82 |                         match c {
 83 |                             '"' => break 'until_closing_quote,
 84 |                             '\\' => unescaped_value.push(chars.next().unwrap_or_else(|| {
 85 |                                 semicolon_separated
 86 |                                     .next()
 87 |                                     .map(|piece| {
 88 |                                         // A semicolon inside a quoted value is not a separator
 89 |                                         // for the next parameter, but part of the value.
 90 |                                         chars = piece.chars();
 91 |                                         ';'
 92 |                                     })
 93 |                                     .unwrap_or('\\')
 94 |                             })),
 95 |                             _ => unescaped_value.push(c),
 96 |                         }
 97 |                     }
 98 |                     if let Some(piece) = semicolon_separated.next() {
 99 |                         // A semicolon inside a quoted value is not a separator
100 |                         // for the next parameter, but part of the value.
101 |                         unescaped_value.push(';');
102 |                         chars = piece.chars()
103 |                     } else {
104 |                         break;
105 |                     }
106 |                 }
107 |                 if !name_valid || !valid_value(value) {
108 |                     continue;
109 |                 }
110 |                 unescaped_value
111 |             } else {
112 |                 let value = value.trim_end_matches(http_whitespace);
113 |                 if value.is_empty() {
114 |                     continue;
115 |                 }
116 |                 if !name_valid || !valid_value(value) {
117 |                     continue;
118 |                 }
119 |                 value.to_owned()
120 |             };
121 |             parameters.push((name.to_ascii_lowercase(), value))
122 |         }
123 |     }
124 | }
125 | 
126 | fn contains(parameters: &[(String, String)], name: &str) -> bool {
127 |     parameters.iter().any(|&(ref n, _)| n == name)
128 | }
129 | 
130 | fn valid_value(s: &str) -> bool {
131 |     s.chars().all(|c| {
132 |         // <https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point>
133 |         matches!(c, '\t' | ' '..='~' | '\u{80}'..='\u{FF}')
134 |     })
135 | }
136 | 
137 | /// <https://mimesniff.spec.whatwg.org/#serializing-a-mime-type>
138 | impl fmt::Display for Mime {
139 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
140 |         f.write_str(&self.type_)?;
141 |         f.write_str("/")?;
142 |         f.write_str(&self.subtype)?;
143 |         for &(ref name, ref value) in &self.parameters {
144 |             f.write_str(";")?;
145 |             f.write_str(name)?;
146 |             f.write_str("=")?;
147 |             if only_http_token_code_points(value) && !value.is_empty() {
148 |                 f.write_str(value)?
149 |             } else {
150 |                 f.write_str("\"")?;
151 |                 for c in value.chars() {
152 |                     if c == '"' || c == '\\' {
153 |                         f.write_str("\\")?
154 |                     }
155 |                     f.write_char(c)?
156 |                 }
157 |                 f.write_str("\"")?
158 |             }
159 |         }
160 |         Ok(())
161 |     }
162 | }
163 | 
164 | fn http_whitespace(c: char) -> bool {
165 |     matches!(c, ' ' | '\t' | '\n' | '\r')
166 | }
167 | 
168 | fn only_http_token_code_points(s: &str) -> bool {
169 |     s.bytes().all(|byte| IS_HTTP_TOKEN[byte as usize])
170 | }
171 | 
172 | macro_rules! byte_map {
173 |     ($($flag:expr,)*) => ([
174 |         $($flag != 0,)*
175 |     ])
176 | }
177 | 
178 | // Copied from https://github.com/hyperium/mime/blob/v0.3.5/src/parse.rs#L293
179 | #[rustfmt::skip]
180 | static IS_HTTP_TOKEN: [bool; 256] = byte_map![
181 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
182 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
183 |     0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
184 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
185 |     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
186 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
187 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
188 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
189 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
191 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
192 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
193 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
194 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
195 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
196 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197 | ];
198 | 


--------------------------------------------------------------------------------
/url/tests/data.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2013-2014 The rust-url developers.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
  4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
  5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  6 | // option. This file may not be copied, modified, or distributed
  7 | // except according to those terms.
  8 | 
  9 | //! Data-driven tests
 10 | 
 11 | use std::str::FromStr;
 12 | 
 13 | use serde_json::Value;
 14 | use url::{quirks, Url};
 15 | 
 16 | #[test]
 17 | fn urltestdata() {
 18 |     #[cfg(not(feature = "idna"))]
 19 |     let idna_skip_inputs = [
 20 |         "http://www.foo。bar.com",
 21 |         "http://Ｇｏ.com",
 22 |         "http://你好你好",
 23 |         "https://faß.ExAmPlE/",
 24 |         "http://０Ｘｃ０．０２５０．０１",
 25 |         "ftp://%e2%98%83",
 26 |         "https://%e2%98%83",
 27 |         "file://a\u{ad}b/p",
 28 |         "file://a%C2%ADb/p",
 29 |         "http://GOO\u{200b}\u{2060}\u{feff}goo.com",
 30 |     ];
 31 | 
 32 |     // Copied from https://github.com/web-platform-tests/wpt/blob/master/url/
 33 |     let mut json = Value::from_str(include_str!("urltestdata.json"))
 34 |         .expect("JSON parse error in urltestdata.json");
 35 | 
 36 |     let mut passed = true;
 37 |     for entry in json.as_array_mut().unwrap() {
 38 |         if entry.is_string() {
 39 |             continue; // ignore comments
 40 |         }
 41 | 
 42 |         let maybe_base = entry
 43 |             .take_key("base")
 44 |             .expect("missing base key")
 45 |             .maybe_string();
 46 |         let input = entry.take_string("input");
 47 |         let failure = entry.take_key("failure").is_some();
 48 | 
 49 |         #[cfg(not(feature = "idna"))]
 50 |         {
 51 |             if idna_skip_inputs.contains(&input.as_str()) {
 52 |                 continue;
 53 |             }
 54 |         }
 55 | 
 56 |         let res = if let Some(base) = maybe_base {
 57 |             let base = match Url::parse(&base) {
 58 |                 Ok(base) => base,
 59 |                 Err(_) if failure => continue,
 60 |                 Err(message) => {
 61 |                     eprint_failure(
 62 |                         format!("  failed: error parsing base {:?}: {}", base, message),
 63 |                         &format!("parse base for {:?}", input),
 64 |                         None,
 65 |                     );
 66 |                     passed = false;
 67 |                     continue;
 68 |                 }
 69 |             };
 70 |             base.join(&input)
 71 |         } else {
 72 |             Url::parse(&input)
 73 |         };
 74 | 
 75 |         let url = match (res, failure) {
 76 |             (Ok(url), false) => url,
 77 |             (Err(_), true) => continue,
 78 |             (Err(message), false) => {
 79 |                 eprint_failure(
 80 |                     format!("  failed: {}", message),
 81 |                     &format!("parse URL for {:?}", input),
 82 |                     None,
 83 |                 );
 84 |                 passed = false;
 85 |                 continue;
 86 |             }
 87 |             (Ok(_), true) => {
 88 |                 eprint_failure(
 89 |                     format!("  failed: expected parse error for URL {:?}", input),
 90 |                     &format!("parse URL for {:?}", input),
 91 |                     None,
 92 |                 );
 93 |                 passed = false;
 94 |                 continue;
 95 |             }
 96 |         };
 97 | 
 98 |         passed &= check_invariants(&url, &format!("invariants for {:?}", input), None);
 99 | 
100 |         for &attr in ATTRIBS {
101 |             passed &= test_eq_eprint(
102 |                 entry.take_string(attr),
103 |                 get(&url, attr),
104 |                 &format!("{:?} - {}", input, attr),
105 |                 None,
106 |             );
107 |         }
108 | 
109 |         if let Some(expected_origin) = entry.take_key("origin").map(|s| s.string()) {
110 |             passed &= test_eq_eprint(
111 |                 expected_origin,
112 |                 &quirks::origin(&url),
113 |                 &format!("origin for {:?}", input),
114 |                 None,
115 |             );
116 |         }
117 |     }
118 | 
119 |     assert!(passed)
120 | }
121 | 
122 | #[test]
123 | fn setters_tests() {
124 |     let mut json = Value::from_str(include_str!("setters_tests.json"))
125 |         .expect("JSON parse error in setters_tests.json");
126 | 
127 |     let mut passed = true;
128 |     for &attr in ATTRIBS {
129 |         if attr == "href" {
130 |             continue;
131 |         }
132 | 
133 |         let mut tests = json.take_key(attr).unwrap();
134 |         for mut test in tests.as_array_mut().unwrap().drain(..) {
135 |             let comment = test.take_key("comment").map(|s| s.string());
136 |             #[cfg(not(feature = "idna"))]
137 |             {
138 |                 if let Some(comment) = comment.as_ref() {
139 |                     if comment.starts_with("IDNA Nontransitional_Processing") {
140 |                         continue;
141 |                     }
142 |                 }
143 |             }
144 |             let href = test.take_string("href");
145 |             let new_value = test.take_string("new_value");
146 |             let name = format!("{:?}.{} = {:?}", href, attr, new_value);
147 |             let mut expected = test.take_key("expected").unwrap();
148 | 
149 |             let mut url = Url::parse(&href).unwrap();
150 |             let comment_ref = comment.as_deref();
151 |             passed &= check_invariants(&url, &name, comment_ref);
152 |             set(&mut url, attr, &new_value);
153 | 
154 |             for attr in ATTRIBS {
155 |                 if let Some(value) = expected.take_key(attr) {
156 |                     passed &= test_eq_eprint(value.string(), get(&url, attr), &name, comment_ref);
157 |                 };
158 |             }
159 | 
160 |             passed &= check_invariants(&url, &name, comment_ref);
161 |         }
162 |     }
163 | 
164 |     assert!(passed);
165 | }
166 | 
167 | fn check_invariants(url: &Url, name: &str, comment: Option<&str>) -> bool {
168 |     let mut passed = true;
169 |     if let Err(e) = url.check_invariants() {
170 |         passed = false;
171 |         eprint_failure(
172 |             format!("  failed: invariants checked -> {:?}", e),
173 |             name,
174 |             comment,
175 |         );
176 |     }
177 | 
178 |     #[cfg(feature = "serde")]
179 |     {
180 |         let bytes = serde_json::to_vec(url).unwrap();
181 |         let new_url: Url = serde_json::from_slice(&bytes).unwrap();
182 |         passed &= test_eq_eprint(url.to_string(), &new_url.to_string(), name, comment);
183 |     }
184 | 
185 |     passed
186 | }
187 | 
188 | trait JsonExt {
189 |     fn take_key(&mut self, key: &str) -> Option<Value>;
190 |     fn string(self) -> String;
191 |     fn maybe_string(self) -> Option<String>;
192 |     fn take_string(&mut self, key: &str) -> String;
193 | }
194 | 
195 | impl JsonExt for Value {
196 |     fn take_key(&mut self, key: &str) -> Option<Value> {
197 |         self.as_object_mut().unwrap().remove(key)
198 |     }
199 | 
200 |     fn string(self) -> String {
201 |         self.maybe_string().expect("")
202 |     }
203 | 
204 |     fn maybe_string(self) -> Option<String> {
205 |         match self {
206 |             Value::String(s) => Some(s),
207 |             Value::Null => None,
208 |             _ => panic!("Not a Value::String or Value::Null"),
209 |         }
210 |     }
211 | 
212 |     fn take_string(&mut self, key: &str) -> String {
213 |         self.take_key(key).unwrap().string()
214 |     }
215 | }
216 | 
217 | fn get<'a>(url: &'a Url, attr: &str) -> &'a str {
218 |     match attr {
219 |         "href" => quirks::href(url),
220 |         "protocol" => quirks::protocol(url),
221 |         "username" => quirks::username(url),
222 |         "password" => quirks::password(url),
223 |         "hostname" => quirks::hostname(url),
224 |         "host" => quirks::host(url),
225 |         "port" => quirks::port(url),
226 |         "pathname" => quirks::pathname(url),
227 |         "search" => quirks::search(url),
228 |         "hash" => quirks::hash(url),
229 |         _ => unreachable!(),
230 |     }
231 | }
232 | 
233 | #[allow(clippy::unit_arg)]
234 | fn set<'a>(url: &'a mut Url, attr: &str, new: &str) {
235 |     let _ = match attr {
236 |         "protocol" => quirks::set_protocol(url, new),
237 |         "username" => quirks::set_username(url, new),
238 |         "password" => quirks::set_password(url, new),
239 |         "hostname" => quirks::set_hostname(url, new),
240 |         "host" => quirks::set_host(url, new),
241 |         "port" => quirks::set_port(url, new),
242 |         "pathname" => Ok(quirks::set_pathname(url, new)),
243 |         "search" => Ok(quirks::set_search(url, new)),
244 |         "hash" => Ok(quirks::set_hash(url, new)),
245 |         _ => unreachable!(),
246 |     };
247 | }
248 | 
249 | fn test_eq_eprint(expected: String, actual: &str, name: &str, comment: Option<&str>) -> bool {
250 |     if expected == actual {
251 |         return true;
252 |     }
253 |     eprint_failure(
254 |         format!("expected: {}\n  actual: {}", expected, actual),
255 |         name,
256 |         comment,
257 |     );
258 |     false
259 | }
260 | 
261 | fn eprint_failure(err: String, name: &str, comment: Option<&str>) {
262 |     eprintln!("    test: {}\n{}", name, err);
263 |     if let Some(comment) = comment {
264 |         eprintln!("{}\n", comment);
265 |     } else {
266 |         eprintln!();
267 |     }
268 | }
269 | 
270 | const ATTRIBS: &[&str] = &[
271 |     "href", "protocol", "username", "password", "host", "hostname", "port", "pathname", "search",
272 |     "hash",
273 | ];
274 | 


--------------------------------------------------------------------------------
/url/src/path_segments.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2016 The rust-url developers.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
  4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
  5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  6 | // option. This file may not be copied, modified, or distributed
  7 | // except according to those terms.
  8 | 
  9 | use crate::parser::{self, to_u32, SchemeType};
 10 | use crate::Url;
 11 | use std::str;
 12 | 
 13 | /// Exposes methods to manipulate the path of an URL that is not cannot-be-base.
 14 | ///
 15 | /// The path always starts with a `/` slash, and is made of slash-separated segments.
 16 | /// There is always at least one segment (which may be the empty string).
 17 | ///
 18 | /// Examples:
 19 | ///
 20 | /// ```rust
 21 | /// use url::Url;
 22 | /// # use std::error::Error;
 23 | ///
 24 | /// # fn run() -> Result<(), Box<dyn Error>> {
 25 | /// let mut url = Url::parse("mailto:me@example.com")?;
 26 | /// assert!(url.path_segments_mut().is_err());
 27 | ///
 28 | /// let mut url = Url::parse("http://example.net/foo/index.html")?;
 29 | /// url.path_segments_mut().map_err(|_| "cannot be base")?
 30 | ///     .pop().push("img").push("2/100%.png");
 31 | /// assert_eq!(url.as_str(), "http://example.net/foo/img/2%2F100%25.png");
 32 | /// # Ok(())
 33 | /// # }
 34 | /// # run().unwrap();
 35 | /// ```
 36 | #[derive(Debug)]
 37 | pub struct PathSegmentsMut<'a> {
 38 |     url: &'a mut Url,
 39 |     after_first_slash: usize,
 40 |     after_path: String,
 41 |     old_after_path_position: u32,
 42 | }
 43 | 
 44 | // Not re-exported outside the crate
 45 | pub fn new(url: &mut Url) -> PathSegmentsMut<'_> {
 46 |     let after_path = url.take_after_path();
 47 |     let old_after_path_position = to_u32(url.serialization.len()).unwrap();
 48 |     // Special urls always have a non empty path
 49 |     if SchemeType::from(url.scheme()).is_special() {
 50 |         debug_assert!(url.byte_at(url.path_start) == b'/');
 51 |     } else {
 52 |         debug_assert!(
 53 |             url.serialization.len() == url.path_start as usize
 54 |                 || url.byte_at(url.path_start) == b'/'
 55 |         );
 56 |     }
 57 |     PathSegmentsMut {
 58 |         after_first_slash: url.path_start as usize + "/".len(),
 59 |         url,
 60 |         old_after_path_position,
 61 |         after_path,
 62 |     }
 63 | }
 64 | 
 65 | impl<'a> Drop for PathSegmentsMut<'a> {
 66 |     fn drop(&mut self) {
 67 |         self.url
 68 |             .restore_after_path(self.old_after_path_position, &self.after_path)
 69 |     }
 70 | }
 71 | 
 72 | impl<'a> PathSegmentsMut<'a> {
 73 |     /// Remove all segments in the path, leaving the minimal `url.path() == "/"`.
 74 |     ///
 75 |     /// Returns `&mut Self` so that method calls can be chained.
 76 |     ///
 77 |     /// Example:
 78 |     ///
 79 |     /// ```rust
 80 |     /// use url::Url;
 81 |     /// # use std::error::Error;
 82 |     ///
 83 |     /// # fn run() -> Result<(), Box<dyn Error>> {
 84 |     /// let mut url = Url::parse("https://github.com/servo/rust-url/")?;
 85 |     /// url.path_segments_mut().map_err(|_| "cannot be base")?
 86 |     ///     .clear().push("logout");
 87 |     /// assert_eq!(url.as_str(), "https://github.com/logout");
 88 |     /// # Ok(())
 89 |     /// # }
 90 |     /// # run().unwrap();
 91 |     /// ```
 92 |     pub fn clear(&mut self) -> &mut Self {
 93 |         self.url.serialization.truncate(self.after_first_slash);
 94 |         self
 95 |     }
 96 | 
 97 |     /// Remove the last segment of this URL’s path if it is empty,
 98 |     /// except if these was only one segment to begin with.
 99 |     ///
100 |     /// In other words, remove one path trailing slash, if any,
101 |     /// unless it is also the initial slash (so this does nothing if `url.path() == "/")`.
102 |     ///
103 |     /// Returns `&mut Self` so that method calls can be chained.
104 |     ///
105 |     /// Example:
106 |     ///
107 |     /// ```rust
108 |     /// use url::Url;
109 |     /// # use std::error::Error;
110 |     ///
111 |     /// # fn run() -> Result<(), Box<dyn Error>> {
112 |     /// let mut url = Url::parse("https://github.com/servo/rust-url/")?;
113 |     /// url.path_segments_mut().map_err(|_| "cannot be base")?
114 |     ///     .push("pulls");
115 |     /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url//pulls");
116 |     ///
117 |     /// let mut url = Url::parse("https://github.com/servo/rust-url/")?;
118 |     /// url.path_segments_mut().map_err(|_| "cannot be base")?
119 |     ///     .pop_if_empty().push("pulls");
120 |     /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls");
121 |     /// # Ok(())
122 |     /// # }
123 |     /// # run().unwrap();
124 |     /// ```
125 |     pub fn pop_if_empty(&mut self) -> &mut Self {
126 |         if self.after_first_slash >= self.url.serialization.len() {
127 |             return self;
128 |         }
129 |         if self.url.serialization[self.after_first_slash..].ends_with('/') {
130 |             self.url.serialization.pop();
131 |         }
132 |         self
133 |     }
134 | 
135 |     /// Remove the last segment of this URL’s path.
136 |     ///
137 |     /// If the path only has one segment, make it empty such that `url.path() == "/"`.
138 |     ///
139 |     /// Returns `&mut Self` so that method calls can be chained.
140 |     pub fn pop(&mut self) -> &mut Self {
141 |         if self.after_first_slash >= self.url.serialization.len() {
142 |             return self;
143 |         }
144 |         let last_slash = self.url.serialization[self.after_first_slash..]
145 |             .rfind('/')
146 |             .unwrap_or(0);
147 |         self.url
148 |             .serialization
149 |             .truncate(self.after_first_slash + last_slash);
150 |         self
151 |     }
152 | 
153 |     /// Append the given segment at the end of this URL’s path.
154 |     ///
155 |     /// See the documentation for `.extend()`.
156 |     ///
157 |     /// Returns `&mut Self` so that method calls can be chained.
158 |     pub fn push(&mut self, segment: &str) -> &mut Self {
159 |         self.extend(Some(segment))
160 |     }
161 | 
162 |     /// Append each segment from the given iterator at the end of this URL’s path.
163 |     ///
164 |     /// Each segment is percent-encoded like in `Url::parse` or `Url::join`,
165 |     /// except that `%` and `/` characters are also encoded (to `%25` and `%2F`).
166 |     /// This is unlike `Url::parse` where `%` is left as-is in case some of the input
167 |     /// is already percent-encoded, and `/` denotes a path segment separator.)
168 |     ///
169 |     /// Note that, in addition to slashes between new segments,
170 |     /// this always adds a slash between the existing path and the new segments
171 |     /// *except* if the existing path is `"/"`.
172 |     /// If the previous last segment was empty (if the path had a trailing slash)
173 |     /// the path after `.extend()` will contain two consecutive slashes.
174 |     /// If that is undesired, call `.pop_if_empty()` first.
175 |     ///
176 |     /// To obtain a behavior similar to `Url::join`, call `.pop()` unconditionally first.
177 |     ///
178 |     /// Returns `&mut Self` so that method calls can be chained.
179 |     ///
180 |     /// Example:
181 |     ///
182 |     /// ```rust
183 |     /// use url::Url;
184 |     /// # use std::error::Error;
185 |     ///
186 |     /// # fn run() -> Result<(), Box<dyn Error>> {
187 |     /// let mut url = Url::parse("https://github.com/")?;
188 |     /// let org = "servo";
189 |     /// let repo = "rust-url";
190 |     /// let issue_number = "188";
191 |     /// url.path_segments_mut().map_err(|_| "cannot be base")?
192 |     ///     .extend(&[org, repo, "issues", issue_number]);
193 |     /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/issues/188");
194 |     /// # Ok(())
195 |     /// # }
196 |     /// # run().unwrap();
197 |     /// ```
198 |     ///
199 |     /// In order to make sure that parsing the serialization of an URL gives the same URL,
200 |     /// a segment is ignored if it is `"."` or `".."`:
201 |     ///
202 |     /// ```rust
203 |     /// use url::Url;
204 |     /// # use std::error::Error;
205 |     ///
206 |     /// # fn run() -> Result<(), Box<dyn Error>> {
207 |     /// let mut url = Url::parse("https://github.com/servo")?;
208 |     /// url.path_segments_mut().map_err(|_| "cannot be base")?
209 |     ///     .extend(&["..", "rust-url", ".", "pulls"]);
210 |     /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls");
211 |     /// # Ok(())
212 |     /// # }
213 |     /// # run().unwrap();
214 |     /// ```
215 |     pub fn extend<I>(&mut self, segments: I) -> &mut Self
216 |     where
217 |         I: IntoIterator,
218 |         I::Item: AsRef<str>,
219 |     {
220 |         let scheme_type = SchemeType::from(self.url.scheme());
221 |         let path_start = self.url.path_start as usize;
222 |         self.url.mutate(|parser| {
223 |             parser.context = parser::Context::PathSegmentSetter;
224 |             for segment in segments {
225 |                 let segment = segment.as_ref();
226 |                 if matches!(segment, "." | "..") {
227 |                     continue;
228 |                 }
229 |                 if parser.serialization.len() > path_start + 1
230 |                     // Non special url's path might still be empty
231 |                     || parser.serialization.len() == path_start
232 |                 {
233 |                     parser.serialization.push('/');
234 |                 }
235 |                 let mut has_host = true; // FIXME account for this?
236 |                 parser.parse_path(
237 |                     scheme_type,
238 |                     &mut has_host,
239 |                     path_start,
240 |                     parser::Input::new(segment),
241 |                 );
242 |             }
243 |         });
244 |         self
245 |     }
246 | }
247 | 


--------------------------------------------------------------------------------
/url/src/quirks.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2016 The rust-url developers.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
  4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
  5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  6 | // option. This file may not be copied, modified, or distributed
  7 | // except according to those terms.
  8 | 
  9 | //! Getters and setters for URL components implemented per https://url.spec.whatwg.org/#api
 10 | //!
 11 | //! Unless you need to be interoperable with web browsers,
 12 | //! you probably want to use `Url` method instead.
 13 | 
 14 | use crate::parser::{default_port, Context, Input, Parser, SchemeType};
 15 | use crate::{Host, ParseError, Position, Url};
 16 | 
 17 | /// https://url.spec.whatwg.org/#dom-url-domaintoascii
 18 | pub fn domain_to_ascii(domain: &str) -> String {
 19 |     match Host::parse(domain) {
 20 |         Ok(Host::Domain(domain)) => domain,
 21 |         _ => String::new(),
 22 |     }
 23 | }
 24 | 
 25 | /// https://url.spec.whatwg.org/#dom-url-domaintounicode
 26 | #[cfg(feature = "idna")]
 27 | pub fn domain_to_unicode(domain: &str) -> String {
 28 |     match Host::parse(domain) {
 29 |         Ok(Host::Domain(ref domain)) => {
 30 |             let (unicode, _errors) = idna::domain_to_unicode(domain);
 31 |             unicode
 32 |         }
 33 |         _ => String::new(),
 34 |     }
 35 | }
 36 | 
 37 | /// Getter for https://url.spec.whatwg.org/#dom-url-href
 38 | pub fn href(url: &Url) -> &str {
 39 |     url.as_str()
 40 | }
 41 | 
 42 | /// Setter for https://url.spec.whatwg.org/#dom-url-href
 43 | pub fn set_href(url: &mut Url, value: &str) -> Result<(), ParseError> {
 44 |     *url = Url::parse(value)?;
 45 |     Ok(())
 46 | }
 47 | 
 48 | /// Getter for https://url.spec.whatwg.org/#dom-url-origin
 49 | pub fn origin(url: &Url) -> String {
 50 |     url.origin().ascii_serialization()
 51 | }
 52 | 
 53 | /// Getter for https://url.spec.whatwg.org/#dom-url-protocol
 54 | #[inline]
 55 | pub fn protocol(url: &Url) -> &str {
 56 |     &url.as_str()[..url.scheme().len() + ":".len()]
 57 | }
 58 | 
 59 | /// Setter for https://url.spec.whatwg.org/#dom-url-protocol
 60 | #[allow(clippy::result_unit_err)]
 61 | pub fn set_protocol(url: &mut Url, mut new_protocol: &str) -> Result<(), ()> {
 62 |     // The scheme state in the spec ignores everything after the first `:`,
 63 |     // but `set_scheme` errors if there is more.
 64 |     if let Some(position) = new_protocol.find(':') {
 65 |         new_protocol = &new_protocol[..position];
 66 |     }
 67 |     url.set_scheme(new_protocol)
 68 | }
 69 | 
 70 | /// Getter for https://url.spec.whatwg.org/#dom-url-username
 71 | #[inline]
 72 | pub fn username(url: &Url) -> &str {
 73 |     url.username()
 74 | }
 75 | 
 76 | /// Setter for https://url.spec.whatwg.org/#dom-url-username
 77 | #[allow(clippy::result_unit_err)]
 78 | pub fn set_username(url: &mut Url, new_username: &str) -> Result<(), ()> {
 79 |     url.set_username(new_username)
 80 | }
 81 | 
 82 | /// Getter for https://url.spec.whatwg.org/#dom-url-password
 83 | #[inline]
 84 | pub fn password(url: &Url) -> &str {
 85 |     url.password().unwrap_or("")
 86 | }
 87 | 
 88 | /// Setter for https://url.spec.whatwg.org/#dom-url-password
 89 | #[allow(clippy::result_unit_err)]
 90 | pub fn set_password(url: &mut Url, new_password: &str) -> Result<(), ()> {
 91 |     url.set_password(if new_password.is_empty() {
 92 |         None
 93 |     } else {
 94 |         Some(new_password)
 95 |     })
 96 | }
 97 | 
 98 | /// Getter for https://url.spec.whatwg.org/#dom-url-host
 99 | #[inline]
100 | pub fn host(url: &Url) -> &str {
101 |     &url[Position::BeforeHost..Position::AfterPort]
102 | }
103 | 
104 | /// Setter for https://url.spec.whatwg.org/#dom-url-host
105 | #[allow(clippy::result_unit_err)]
106 | pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
107 |     // If context object’s url’s cannot-be-a-base-URL flag is set, then return.
108 |     if url.cannot_be_a_base() {
109 |         return Err(());
110 |     }
111 |     // Host parsing rules are strict,
112 |     // We don't want to trim the input
113 |     let input = Input::no_trim(new_host);
114 |     let host;
115 |     let opt_port;
116 |     {
117 |         let scheme = url.scheme();
118 |         let scheme_type = SchemeType::from(scheme);
119 |         if scheme_type == SchemeType::File && new_host.is_empty() {
120 |             url.set_host_internal(Host::Domain(String::new()), None);
121 |             return Ok(());
122 |         }
123 | 
124 |         if let Ok((h, remaining)) = Parser::parse_host(input, scheme_type) {
125 |             host = h;
126 |             opt_port = if let Some(remaining) = remaining.split_prefix(':') {
127 |                 if remaining.is_empty() {
128 |                     None
129 |                 } else {
130 |                     Parser::parse_port(remaining, || default_port(scheme), Context::Setter)
131 |                         .ok()
132 |                         .map(|(port, _remaining)| port)
133 |                 }
134 |             } else {
135 |                 None
136 |             };
137 |         } else {
138 |             return Err(());
139 |         }
140 |     }
141 |     // Make sure we won't set an empty host to a url with a username or a port
142 |     if host == Host::Domain("".to_string())
143 |         && (!username(url).is_empty() || matches!(opt_port, Some(Some(_))) || url.port().is_some())
144 |     {
145 |         return Err(());
146 |     }
147 |     url.set_host_internal(host, opt_port);
148 |     Ok(())
149 | }
150 | 
151 | /// Getter for https://url.spec.whatwg.org/#dom-url-hostname
152 | #[inline]
153 | pub fn hostname(url: &Url) -> &str {
154 |     url.host_str().unwrap_or("")
155 | }
156 | 
157 | /// Setter for https://url.spec.whatwg.org/#dom-url-hostname
158 | #[allow(clippy::result_unit_err)]
159 | pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
160 |     if url.cannot_be_a_base() {
161 |         return Err(());
162 |     }
163 |     // Host parsing rules are strict we don't want to trim the input
164 |     let input = Input::no_trim(new_hostname);
165 |     let scheme_type = SchemeType::from(url.scheme());
166 |     if scheme_type == SchemeType::File && new_hostname.is_empty() {
167 |         url.set_host_internal(Host::Domain(String::new()), None);
168 |         return Ok(());
169 |     }
170 | 
171 |     if let Ok((host, _remaining)) = Parser::parse_host(input, scheme_type) {
172 |         if let Host::Domain(h) = &host {
173 |             if h.is_empty() {
174 |                 // Empty host on special not file url
175 |                 if SchemeType::from(url.scheme()) == SchemeType::SpecialNotFile
176 |                     // Port with an empty host
177 |                     ||!port(url).is_empty()
178 |                     // Empty host that includes credentials
179 |                     || !url.username().is_empty()
180 |                     || !url.password().unwrap_or("").is_empty()
181 |                 {
182 |                     return Err(());
183 |                 }
184 |             }
185 |         }
186 |         url.set_host_internal(host, None);
187 |         Ok(())
188 |     } else {
189 |         Err(())
190 |     }
191 | }
192 | 
193 | /// Getter for https://url.spec.whatwg.org/#dom-url-port
194 | #[inline]
195 | pub fn port(url: &Url) -> &str {
196 |     &url[Position::BeforePort..Position::AfterPort]
197 | }
198 | 
199 | /// Setter for https://url.spec.whatwg.org/#dom-url-port
200 | #[allow(clippy::result_unit_err)]
201 | pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> {
202 |     let result;
203 |     {
204 |         // has_host implies !cannot_be_a_base
205 |         let scheme = url.scheme();
206 |         if !url.has_host() || url.host() == Some(Host::Domain("")) || scheme == "file" {
207 |             return Err(());
208 |         }
209 |         result = Parser::parse_port(
210 |             Input::new(new_port),
211 |             || default_port(scheme),
212 |             Context::Setter,
213 |         )
214 |     }
215 |     if let Ok((new_port, _remaining)) = result {
216 |         url.set_port_internal(new_port);
217 |         Ok(())
218 |     } else {
219 |         Err(())
220 |     }
221 | }
222 | 
223 | /// Getter for https://url.spec.whatwg.org/#dom-url-pathname
224 | #[inline]
225 | pub fn pathname(url: &Url) -> &str {
226 |     url.path()
227 | }
228 | 
229 | /// Setter for https://url.spec.whatwg.org/#dom-url-pathname
230 | pub fn set_pathname(url: &mut Url, new_pathname: &str) {
231 |     if url.cannot_be_a_base() {
232 |         return;
233 |     }
234 |     if new_pathname.starts_with('/')
235 |         || (SchemeType::from(url.scheme()).is_special()
236 |             // \ is a segment delimiter for 'special' URLs"
237 |             && new_pathname.starts_with('\\'))
238 |     {
239 |         url.set_path(new_pathname)
240 |     } else {
241 |         let mut path_to_set = String::from("/");
242 |         path_to_set.push_str(new_pathname);
243 |         url.set_path(&path_to_set)
244 |     }
245 | }
246 | 
247 | /// Getter for https://url.spec.whatwg.org/#dom-url-search
248 | pub fn search(url: &Url) -> &str {
249 |     trim(&url[Position::AfterPath..Position::AfterQuery])
250 | }
251 | 
252 | /// Setter for https://url.spec.whatwg.org/#dom-url-search
253 | pub fn set_search(url: &mut Url, new_search: &str) {
254 |     url.set_query(match new_search {
255 |         "" => None,
256 |         _ if new_search.starts_with('?') => Some(&new_search[1..]),
257 |         _ => Some(new_search),
258 |     })
259 | }
260 | 
261 | /// Getter for https://url.spec.whatwg.org/#dom-url-hash
262 | pub fn hash(url: &Url) -> &str {
263 |     trim(&url[Position::AfterQuery..])
264 | }
265 | 
266 | /// Setter for https://url.spec.whatwg.org/#dom-url-hash
267 | pub fn set_hash(url: &mut Url, new_hash: &str) {
268 |     url.set_fragment(match new_hash {
269 |         // If the given value is the empty string,
270 |         // then set context object’s url’s fragment to null and return.
271 |         "" => None,
272 |         // Let input be the given value with a single leading U+0023 (#) removed, if any.
273 |         _ if new_hash.starts_with('#') => Some(&new_hash[1..]),
274 |         _ => Some(new_hash),
275 |     })
276 | }
277 | 
278 | fn trim(s: &str) -> &str {
279 |     if s.len() == 1 {
280 |         ""
281 |     } else {
282 |         s
283 |     }
284 | }
285 | 


--------------------------------------------------------------------------------
/idna/src/punycode.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 The rust-url developers.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
  4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
  5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  6 | // option. This file may not be copied, modified, or distributed
  7 | // except according to those terms.
  8 | 
  9 | //! Punycode ([RFC 3492](http://tools.ietf.org/html/rfc3492)) implementation.
 10 | //!
 11 | //! Since Punycode fundamentally works on unicode code points,
 12 | //! `encode` and `decode` take and return slices and vectors of `char`.
 13 | //! `encode_str` and `decode_to_string` provide convenience wrappers
 14 | //! that convert from and to Rust’s UTF-8 based `str` and `String` types.
 15 | 
 16 | use std::char;
 17 | use std::u32;
 18 | 
 19 | // Bootstring parameters for Punycode
 20 | static BASE: u32 = 36;
 21 | static T_MIN: u32 = 1;
 22 | static T_MAX: u32 = 26;
 23 | static SKEW: u32 = 38;
 24 | static DAMP: u32 = 700;
 25 | static INITIAL_BIAS: u32 = 72;
 26 | static INITIAL_N: u32 = 0x80;
 27 | static DELIMITER: char = '-';
 28 | 
 29 | #[inline]
 30 | fn adapt(mut delta: u32, num_points: u32, first_time: bool) -> u32 {
 31 |     delta /= if first_time { DAMP } else { 2 };
 32 |     delta += delta / num_points;
 33 |     let mut k = 0;
 34 |     while delta > ((BASE - T_MIN) * T_MAX) / 2 {
 35 |         delta /= BASE - T_MIN;
 36 |         k += BASE;
 37 |     }
 38 |     k + (((BASE - T_MIN + 1) * delta) / (delta + SKEW))
 39 | }
 40 | 
 41 | /// Convert Punycode to an Unicode `String`.
 42 | ///
 43 | /// This is a convenience wrapper around `decode`.
 44 | #[inline]
 45 | pub fn decode_to_string(input: &str) -> Option<String> {
 46 |     decode(input).map(|chars| chars.into_iter().collect())
 47 | }
 48 | 
 49 | /// Convert Punycode to Unicode.
 50 | ///
 51 | /// Return None on malformed input or overflow.
 52 | /// Overflow can only happen on inputs that take more than
 53 | /// 63 encoded bytes, the DNS limit on domain name labels.
 54 | pub fn decode(input: &str) -> Option<Vec<char>> {
 55 |     Some(Decoder::default().decode(input).ok()?.collect())
 56 | }
 57 | 
 58 | #[derive(Default)]
 59 | pub(crate) struct Decoder {
 60 |     insertions: Vec<(usize, char)>,
 61 | }
 62 | 
 63 | impl Decoder {
 64 |     /// Split the input iterator and return a Vec with insertions of encoded characters
 65 |     pub(crate) fn decode<'a>(&'a mut self, input: &'a str) -> Result<Decode<'a>, ()> {
 66 |         self.insertions.clear();
 67 |         // Handle "basic" (ASCII) code points.
 68 |         // They are encoded as-is before the last delimiter, if any.
 69 |         let (base, input) = match input.rfind(DELIMITER) {
 70 |             None => ("", input),
 71 |             Some(position) => (
 72 |                 &input[..position],
 73 |                 if position > 0 {
 74 |                     &input[position + 1..]
 75 |                 } else {
 76 |                     input
 77 |                 },
 78 |             ),
 79 |         };
 80 | 
 81 |         if !base.is_ascii() {
 82 |             return Err(());
 83 |         }
 84 | 
 85 |         let base_len = base.len();
 86 |         let mut length = base_len as u32;
 87 |         let mut code_point = INITIAL_N;
 88 |         let mut bias = INITIAL_BIAS;
 89 |         let mut i = 0;
 90 |         let mut iter = input.bytes();
 91 |         loop {
 92 |             let previous_i = i;
 93 |             let mut weight = 1;
 94 |             let mut k = BASE;
 95 |             let mut byte = match iter.next() {
 96 |                 None => break,
 97 |                 Some(byte) => byte,
 98 |             };
 99 | 
100 |             // Decode a generalized variable-length integer into delta,
101 |             // which gets added to i.
102 |             loop {
103 |                 let digit = match byte {
104 |                     byte @ b'0'..=b'9' => byte - b'0' + 26,
105 |                     byte @ b'A'..=b'Z' => byte - b'A',
106 |                     byte @ b'a'..=b'z' => byte - b'a',
107 |                     _ => return Err(()),
108 |                 } as u32;
109 |                 if digit > (u32::MAX - i) / weight {
110 |                     return Err(()); // Overflow
111 |                 }
112 |                 i += digit * weight;
113 |                 let t = if k <= bias {
114 |                     T_MIN
115 |                 } else if k >= bias + T_MAX {
116 |                     T_MAX
117 |                 } else {
118 |                     k - bias
119 |                 };
120 |                 if digit < t {
121 |                     break;
122 |                 }
123 |                 if weight > u32::MAX / (BASE - t) {
124 |                     return Err(()); // Overflow
125 |                 }
126 |                 weight *= BASE - t;
127 |                 k += BASE;
128 |                 byte = match iter.next() {
129 |                     None => return Err(()), // End of input before the end of this delta
130 |                     Some(byte) => byte,
131 |                 };
132 |             }
133 | 
134 |             bias = adapt(i - previous_i, length + 1, previous_i == 0);
135 |             if i / (length + 1) > u32::MAX - code_point {
136 |                 return Err(()); // Overflow
137 |             }
138 | 
139 |             // i was supposed to wrap around from length+1 to 0,
140 |             // incrementing code_point each time.
141 |             code_point += i / (length + 1);
142 |             i %= length + 1;
143 |             let c = match char::from_u32(code_point) {
144 |                 Some(c) => c,
145 |                 None => return Err(()),
146 |             };
147 | 
148 |             // Move earlier insertions farther out in the string
149 |             for (idx, _) in &mut self.insertions {
150 |                 if *idx >= i as usize {
151 |                     *idx += 1;
152 |                 }
153 |             }
154 |             self.insertions.push((i as usize, c));
155 |             length += 1;
156 |             i += 1;
157 |         }
158 | 
159 |         self.insertions.sort_by_key(|(i, _)| *i);
160 |         Ok(Decode {
161 |             base: base.chars(),
162 |             insertions: &self.insertions,
163 |             inserted: 0,
164 |             position: 0,
165 |             len: base_len + self.insertions.len(),
166 |         })
167 |     }
168 | }
169 | 
170 | pub(crate) struct Decode<'a> {
171 |     base: std::str::Chars<'a>,
172 |     pub(crate) insertions: &'a [(usize, char)],
173 |     inserted: usize,
174 |     position: usize,
175 |     len: usize,
176 | }
177 | 
178 | impl<'a> Iterator for Decode<'a> {
179 |     type Item = char;
180 | 
181 |     fn next(&mut self) -> Option<Self::Item> {
182 |         loop {
183 |             match self.insertions.get(self.inserted) {
184 |                 Some((pos, c)) if *pos == self.position => {
185 |                     self.inserted += 1;
186 |                     self.position += 1;
187 |                     return Some(*c);
188 |                 }
189 |                 _ => {}
190 |             }
191 |             if let Some(c) = self.base.next() {
192 |                 self.position += 1;
193 |                 return Some(c);
194 |             } else if self.inserted >= self.insertions.len() {
195 |                 return None;
196 |             }
197 |         }
198 |     }
199 | 
200 |     fn size_hint(&self) -> (usize, Option<usize>) {
201 |         let len = self.len - self.position;
202 |         (len, Some(len))
203 |     }
204 | }
205 | 
206 | impl<'a> ExactSizeIterator for Decode<'a> {
207 |     fn len(&self) -> usize {
208 |         self.len - self.position
209 |     }
210 | }
211 | 
212 | /// Convert an Unicode `str` to Punycode.
213 | ///
214 | /// This is a convenience wrapper around `encode`.
215 | #[inline]
216 | pub fn encode_str(input: &str) -> Option<String> {
217 |     let mut buf = String::with_capacity(input.len());
218 |     encode_into(input.chars(), &mut buf).ok().map(|()| buf)
219 | }
220 | 
221 | /// Convert Unicode to Punycode.
222 | ///
223 | /// Return None on overflow, which can only happen on inputs that would take more than
224 | /// 63 encoded bytes, the DNS limit on domain name labels.
225 | pub fn encode(input: &[char]) -> Option<String> {
226 |     let mut buf = String::with_capacity(input.len());
227 |     encode_into(input.iter().copied(), &mut buf)
228 |         .ok()
229 |         .map(|()| buf)
230 | }
231 | 
232 | pub(crate) fn encode_into<I>(input: I, output: &mut String) -> Result<(), ()>
233 | where
234 |     I: Iterator<Item = char> + Clone,
235 | {
236 |     // Handle "basic" (ASCII) code points. They are encoded as-is.
237 |     let (mut input_length, mut basic_length) = (0, 0);
238 |     for c in input.clone() {
239 |         input_length += 1;
240 |         if c.is_ascii() {
241 |             output.push(c);
242 |             basic_length += 1;
243 |         }
244 |     }
245 | 
246 |     if basic_length > 0 {
247 |         output.push('-')
248 |     }
249 |     let mut code_point = INITIAL_N;
250 |     let mut delta = 0;
251 |     let mut bias = INITIAL_BIAS;
252 |     let mut processed = basic_length;
253 |     while processed < input_length {
254 |         // All code points < code_point have been handled already.
255 |         // Find the next larger one.
256 |         let min_code_point = input
257 |             .clone()
258 |             .map(|c| c as u32)
259 |             .filter(|&c| c >= code_point)
260 |             .min()
261 |             .unwrap();
262 |         if min_code_point - code_point > (u32::MAX - delta) / (processed + 1) {
263 |             return Err(()); // Overflow
264 |         }
265 |         // Increase delta to advance the decoder’s <code_point,i> state to <min_code_point,0>
266 |         delta += (min_code_point - code_point) * (processed + 1);
267 |         code_point = min_code_point;
268 |         for c in input.clone() {
269 |             let c = c as u32;
270 |             if c < code_point {
271 |                 delta += 1;
272 |                 if delta == 0 {
273 |                     return Err(()); // Overflow
274 |                 }
275 |             }
276 |             if c == code_point {
277 |                 // Represent delta as a generalized variable-length integer:
278 |                 let mut q = delta;
279 |                 let mut k = BASE;
280 |                 loop {
281 |                     let t = if k <= bias {
282 |                         T_MIN
283 |                     } else if k >= bias + T_MAX {
284 |                         T_MAX
285 |                     } else {
286 |                         k - bias
287 |                     };
288 |                     if q < t {
289 |                         break;
290 |                     }
291 |                     let value = t + ((q - t) % (BASE - t));
292 |                     output.push(value_to_digit(value));
293 |                     q = (q - t) / (BASE - t);
294 |                     k += BASE;
295 |                 }
296 |                 output.push(value_to_digit(q));
297 |                 bias = adapt(delta, processed + 1, processed == basic_length);
298 |                 delta = 0;
299 |                 processed += 1;
300 |             }
301 |         }
302 |         delta += 1;
303 |         code_point += 1;
304 |     }
305 |     Ok(())
306 | }
307 | 
308 | #[inline]
309 | fn value_to_digit(value: u32) -> char {
310 |     match value {
311 |         0..=25 => (value as u8 + b'a') as char,       // a..z
312 |         26..=35 => (value as u8 - 26 + b'0') as char, // 0..9
313 |         _ => panic!(),
314 |     }
315 | }
316 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                               Apache License
  2 |                         Version 2.0, January 2004
  3 |                      http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 |    "License" shall mean the terms and conditions for use, reproduction,
 10 |    and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |    "Licensor" shall mean the copyright owner or entity authorized by
 13 |    the copyright owner that is granting the License.
 14 | 
 15 |    "Legal Entity" shall mean the union of the acting entity and all
 16 |    other entities that control, are controlled by, or are under common
 17 |    control with that entity. For the purposes of this definition,
 18 |    "control" means (i) the power, direct or indirect, to cause the
 19 |    direction or management of such entity, whether by contract or
 20 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |    outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |    "You" (or "Your") shall mean an individual or Legal Entity
 24 |    exercising permissions granted by this License.
 25 | 
 26 |    "Source" form shall mean the preferred form for making modifications,
 27 |    including but not limited to software source code, documentation
 28 |    source, and configuration files.
 29 | 
 30 |    "Object" form shall mean any form resulting from mechanical
 31 |    transformation or translation of a Source form, including but
 32 |    not limited to compiled object code, generated documentation,
 33 |    and conversions to other media types.
 34 | 
 35 |    "Work" shall mean the work of authorship, whether in Source or
 36 |    Object form, made available under the License, as indicated by a
 37 |    copyright notice that is included in or attached to the work
 38 |    (an example is provided in the Appendix below).
 39 | 
 40 |    "Derivative Works" shall mean any work, whether in Source or Object
 41 |    form, that is based on (or derived from) the Work and for which the
 42 |    editorial revisions, annotations, elaborations, or other modifications
 43 |    represent, as a whole, an original work of authorship. For the purposes
 44 |    of this License, Derivative Works shall not include works that remain
 45 |    separable from, or merely link (or bind by name) to the interfaces of,
 46 |    the Work and Derivative Works thereof.
 47 | 
 48 |    "Contribution" shall mean any work of authorship, including
 49 |    the original version of the Work and any modifications or additions
 50 |    to that Work or Derivative Works thereof, that is intentionally
 51 |    submitted to Licensor for inclusion in the Work by the copyright owner
 52 |    or by an individual or Legal Entity authorized to submit on behalf of
 53 |    the copyright owner. For the purposes of this definition, "submitted"
 54 |    means any form of electronic, verbal, or written communication sent
 55 |    to the Licensor or its representatives, including but not limited to
 56 |    communication on electronic mailing lists, source code control systems,
 57 |    and issue tracking systems that are managed by, or on behalf of, the
 58 |    Licensor for the purpose of discussing and improving the Work, but
 59 |    excluding communication that is conspicuously marked or otherwise
 60 |    designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |    on behalf of whom a Contribution has been received by Licensor and
 64 |    subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 |    this License, each Contributor hereby grants to You a perpetual,
 68 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |    copyright license to reproduce, prepare Derivative Works of,
 70 |    publicly display, publicly perform, sublicense, and distribute the
 71 |    Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 |    this License, each Contributor hereby grants to You a perpetual,
 75 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |    (except as stated in this section) patent license to make, have made,
 77 |    use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |    where such license applies only to those patent claims licensable
 79 |    by such Contributor that are necessarily infringed by their
 80 |    Contribution(s) alone or by combination of their Contribution(s)
 81 |    with the Work to which such Contribution(s) was submitted. If You
 82 |    institute patent litigation against any entity (including a
 83 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |    or a Contribution incorporated within the Work constitutes direct
 85 |    or contributory patent infringement, then any patent licenses
 86 |    granted to You under this License for that Work shall terminate
 87 |    as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 |    Work or Derivative Works thereof in any medium, with or without
 91 |    modifications, and in Source or Object form, provided that You
 92 |    meet the following conditions:
 93 | 
 94 |    (a) You must give any other recipients of the Work or
 95 |        Derivative Works a copy of this License; and
 96 | 
 97 |    (b) You must cause any modified files to carry prominent notices
 98 |        stating that You changed the files; and
 99 | 
100 |    (c) You must retain, in the Source form of any Derivative Works
101 |        that You distribute, all copyright, patent, trademark, and
102 |        attribution notices from the Source form of the Work,
103 |        excluding those notices that do not pertain to any part of
104 |        the Derivative Works; and
105 | 
106 |    (d) If the Work includes a "NOTICE" text file as part of its
107 |        distribution, then any Derivative Works that You distribute must
108 |        include a readable copy of the attribution notices contained
109 |        within such NOTICE file, excluding those notices that do not
110 |        pertain to any part of the Derivative Works, in at least one
111 |        of the following places: within a NOTICE text file distributed
112 |        as part of the Derivative Works; within the Source form or
113 |        documentation, if provided along with the Derivative Works; or,
114 |        within a display generated by the Derivative Works, if and
115 |        wherever such third-party notices normally appear. The contents
116 |        of the NOTICE file are for informational purposes only and
117 |        do not modify the License. You may add Your own attribution
118 |        notices within Derivative Works that You distribute, alongside
119 |        or as an addendum to the NOTICE text from the Work, provided
120 |        that such additional attribution notices cannot be construed
121 |        as modifying the License.
122 | 
123 |    You may add Your own copyright statement to Your modifications and
124 |    may provide additional or different license terms and conditions
125 |    for use, reproduction, or distribution of Your modifications, or
126 |    for any such Derivative Works as a whole, provided Your use,
127 |    reproduction, and distribution of the Work otherwise complies with
128 |    the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 |    any Contribution intentionally submitted for inclusion in the Work
132 |    by You to the Licensor shall be under the terms and conditions of
133 |    this License, without any additional terms or conditions.
134 |    Notwithstanding the above, nothing herein shall supersede or modify
135 |    the terms of any separate license agreement you may have executed
136 |    with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 |    names, trademarks, service marks, or product names of the Licensor,
140 |    except as required for reasonable and customary use in describing the
141 |    origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 |    agreed to in writing, Licensor provides the Work (and each
145 |    Contributor provides its Contributions) on an "AS IS" BASIS,
146 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |    implied, including, without limitation, any warranties or conditions
148 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |    PARTICULAR PURPOSE. You are solely responsible for determining the
150 |    appropriateness of using or redistributing the Work and assume any
151 |    risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 |    whether in tort (including negligence), contract, or otherwise,
155 |    unless required by applicable law (such as deliberate and grossly
156 |    negligent acts) or agreed to in writing, shall any Contributor be
157 |    liable to You for damages, including any direct, indirect, special,
158 |    incidental, or consequential damages of any character arising as a
159 |    result of this License or out of the use or inability to use the
160 |    Work (including but not limited to damages for loss of goodwill,
161 |    work stoppage, computer failure or malfunction, or any and all
162 |    other commercial damages or losses), even if such Contributor
163 |    has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 |    the Work or Derivative Works thereof, You may choose to offer,
167 |    and charge a fee for, acceptance of support, warranty, indemnity,
168 |    or other liability obligations and/or rights consistent with this
169 |    License. However, in accepting such obligations, You may act only
170 |    on Your own behalf and on Your sole responsibility, not on behalf
171 |    of any other Contributor, and only if You agree to indemnify,
172 |    defend, and hold each Contributor harmless for any liability
173 |    incurred by, or claims asserted against, such Contributor by reason
174 |    of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 |    To apply the Apache License to your work, attach the following
181 |    boilerplate notice, with the fields enclosed by brackets "[]"
182 |    replaced with your own identifying information. (Don't include
183 |    the brackets!)  The text should be enclosed in the appropriate
184 |    comment syntax for the file format. We also recommend that a
185 |    file or class name and description of purpose be included on the
186 |    same "printed page" as the copyright notice for easier
187 |    identification within third-party archives.
188 | 
189 | Copyright [yyyy] [name of copyright owner]
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 | 	http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 | 


--------------------------------------------------------------------------------
/form_urlencoded/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                               Apache License
  2 |                         Version 2.0, January 2004
  3 |                      http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 |    "License" shall mean the terms and conditions for use, reproduction,
 10 |    and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |    "Licensor" shall mean the copyright owner or entity authorized by
 13 |    the copyright owner that is granting the License.
 14 | 
 15 |    "Legal Entity" shall mean the union of the acting entity and all
 16 |    other entities that control, are controlled by, or are under common
 17 |    control with that entity. For the purposes of this definition,
 18 |    "control" means (i) the power, direct or indirect, to cause the
 19 |    direction or management of such entity, whether by contract or
 20 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |    outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |    "You" (or "Your") shall mean an individual or Legal Entity
 24 |    exercising permissions granted by this License.
 25 | 
 26 |    "Source" form shall mean the preferred form for making modifications,
 27 |    including but not limited to software source code, documentation
 28 |    source, and configuration files.
 29 | 
 30 |    "Object" form shall mean any form resulting from mechanical
 31 |    transformation or translation of a Source form, including but
 32 |    not limited to compiled object code, generated documentation,
 33 |    and conversions to other media types.
 34 | 
 35 |    "Work" shall mean the work of authorship, whether in Source or
 36 |    Object form, made available under the License, as indicated by a
 37 |    copyright notice that is included in or attached to the work
 38 |    (an example is provided in the Appendix below).
 39 | 
 40 |    "Derivative Works" shall mean any work, whether in Source or Object
 41 |    form, that is based on (or derived from) the Work and for which the
 42 |    editorial revisions, annotations, elaborations, or other modifications
 43 |    represent, as a whole, an original work of authorship. For the purposes
 44 |    of this License, Derivative Works shall not include works that remain
 45 |    separable from, or merely link (or bind by name) to the interfaces of,
 46 |    the Work and Derivative Works thereof.
 47 | 
 48 |    "Contribution" shall mean any work of authorship, including
 49 |    the original version of the Work and any modifications or additions
 50 |    to that Work or Derivative Works thereof, that is intentionally
 51 |    submitted to Licensor for inclusion in the Work by the copyright owner
 52 |    or by an individual or Legal Entity authorized to submit on behalf of
 53 |    the copyright owner. For the purposes of this definition, "submitted"
 54 |    means any form of electronic, verbal, or written communication sent
 55 |    to the Licensor or its representatives, including but not limited to
 56 |    communication on electronic mailing lists, source code control systems,
 57 |    and issue tracking systems that are managed by, or on behalf of, the
 58 |    Licensor for the purpose of discussing and improving the Work, but
 59 |    excluding communication that is conspicuously marked or otherwise
 60 |    designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |    on behalf of whom a Contribution has been received by Licensor and
 64 |    subsequently incorporated within the Work.
 65 | 
 66 | 2. Grant of Copyright License. Subject to the terms and conditions of
 67 |    this License, each Contributor hereby grants to You a perpetual,
 68 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |    copyright license to reproduce, prepare Derivative Works of,
 70 |    publicly display, publicly perform, sublicense, and distribute the
 71 |    Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3. Grant of Patent License. Subject to the terms and conditions of
 74 |    this License, each Contributor hereby grants to You a perpetual,
 75 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |    (except as stated in this section) patent license to make, have made,
 77 |    use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |    where such license applies only to those patent claims licensable
 79 |    by such Contributor that are necessarily infringed by their
 80 |    Contribution(s) alone or by combination of their Contribution(s)
 81 |    with the Work to which such Contribution(s) was submitted. If You
 82 |    institute patent litigation against any entity (including a
 83 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |    or a Contribution incorporated within the Work constitutes direct
 85 |    or contributory patent infringement, then any patent licenses
 86 |    granted to You under this License for that Work shall terminate
 87 |    as of the date such litigation is filed.
 88 | 
 89 | 4. Redistribution. You may reproduce and distribute copies of the
 90 |    Work or Derivative Works thereof in any medium, with or without
 91 |    modifications, and in Source or Object form, provided that You
 92 |    meet the following conditions:
 93 | 
 94 |    (a) You must give any other recipients of the Work or
 95 |        Derivative Works a copy of this License; and
 96 | 
 97 |    (b) You must cause any modified files to carry prominent notices
 98 |        stating that You changed the files; and
 99 | 
100 |    (c) You must retain, in the Source form of any Derivative Works
101 |        that You distribute, all copyright, patent, trademark, and
102 |        attribution notices from the Source form of the Work,
103 |        excluding those notices that do not pertain to any part of
104 |        the Derivative Works; and
105 | 
106 |    (d) If the Work includes a "NOTICE" text file as part of its
107 |        distribution, then any Derivative Works that You distribute must
108 |        include a readable copy of the attribution notices contained
109 |        within such NOTICE file, excluding those notices that do not
110 |        pertain to any part of the Derivative Works, in at least one
111 |        of the following places: within a NOTICE text file distributed
112 |        as part of the Derivative Works; within the Source form or
113 |        documentation, if provided along with the Derivative Works; or,
114 |        within a display generated by the Derivative Works, if and
115 |        wherever such third-party notices normally appear. The contents
116 |        of the NOTICE file are for informational purposes only and
117 |        do not modify the License. You may add Your own attribution
118 |        notices within Derivative Works that You distribute, alongside
119 |        or as an addendum to the NOTICE text from the Work, provided
120 |        that such additional attribution notices cannot be construed
121 |        as modifying the License.
122 | 
123 |    You may add Your own copyright statement to Your modifications and
124 |    may provide additional or different license terms and conditions
125 |    for use, reproduction, or distribution of Your modifications, or
126 |    for any such Derivative Works as a whole, provided Your use,
127 |    reproduction, and distribution of the Work otherwise complies with
128 |    the conditions stated in this License.
129 | 
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 |    any Contribution intentionally submitted for inclusion in the Work
132 |    by You to the Licensor shall be under the terms and conditions of
133 |    this License, without any additional terms or conditions.
134 |    Notwithstanding the above, nothing herein shall supersede or modify
135 |    the terms of any separate license agreement you may have executed
136 |    with Licensor regarding such Contributions.
137 | 
138 | 6. Trademarks. This License does not grant permission to use the trade
139 |    names, trademarks, service marks, or product names of the Licensor,
140 |    except as required for reasonable and customary use in describing the
141 |    origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 |    agreed to in writing, Licensor provides the Work (and each
145 |    Contributor provides its Contributions) on an "AS IS" BASIS,
146 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |    implied, including, without limitation, any warranties or conditions
148 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |    PARTICULAR PURPOSE. You are solely responsible for determining the
150 |    appropriateness of using or redistributing the Work and assume any
151 |    risks associated with Your exercise of permissions under this License.
152 | 
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 |    whether in tort (including negligence), contract, or otherwise,
155 |    unless required by applicable law (such as deliberate and grossly
156 |    negligent acts) or agreed to in writing, shall any Contributor be
157 |    liable to You for damages, including any direct, indirect, special,
158 |    incidental, or consequential damages of any character arising as a
159 |    result of this License or out of the use or inability to use the
160 |    Work (including but not limited to damages for loss of goodwill,
161 |    work stoppage, computer failure or malfunction, or any and all
162 |    other commercial damages or losses), even if such Contributor
163 |    has been advised of the possibility of such damages.
164 | 
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 |    the Work or Derivative Works thereof, You may choose to offer,
167 |    and charge a fee for, acceptance of support, warranty, indemnity,
168 |    or other liability obligations and/or rights consistent with this
169 |    License. However, in accepting such obligations, You may act only
170 |    on Your own behalf and on Your sole responsibility, not on behalf
171 |    of any other Contributor, and only if You agree to indemnify,
172 |    defend, and hold each Contributor harmless for any liability
173 |    incurred by, or claims asserted against, such Contributor by reason
174 |    of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 |    To apply the Apache License to your work, attach the following
181 |    boilerplate notice, with the fields enclosed by brackets "[]"
182 |    replaced with your own identifying information. (Don't include
183 |    the brackets!)  The text should be enclosed in the appropriate
184 |    comment syntax for the file format. We also recommend that a
185 |    file or class name and description of purpose be included on the
186 |    same "printed page" as the copyright notice for easier
187 |    identification within third-party archives.
188 | 
189 | Copyright [yyyy] [name of copyright owner]
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 | 	http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 | 


--------------------------------------------------------------------------------
/data-url/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! Processing of `data:` URLs according to the Fetch Standard:
  2 | //! <https://fetch.spec.whatwg.org/#data-urls>
  3 | //! but starting from a string rather than a parsed URL to avoid extra copies.
  4 | //!
  5 | //! ```rust
  6 | //! use data_url::{DataUrl, mime};
  7 | //!
  8 | //! let url = DataUrl::process("data:,Hello%20World!").unwrap();
  9 | //! let (body, fragment) = url.decode_to_vec().unwrap();
 10 | //!
 11 | //! assert_eq!(url.mime_type().type_, "text");
 12 | //! assert_eq!(url.mime_type().subtype, "plain");
 13 | //! assert_eq!(url.mime_type().get_parameter("charset"), Some("US-ASCII"));
 14 | //! assert_eq!(body, b"Hello World!");
 15 | //! assert!(fragment.is_none());
 16 | //! ```
 17 | 
 18 | macro_rules! require {
 19 |     ($condition: expr) => {
 20 |         if !$condition {
 21 |             return None;
 22 |         }
 23 |     };
 24 | }
 25 | 
 26 | pub mod forgiving_base64;
 27 | pub mod mime;
 28 | 
 29 | pub struct DataUrl<'a> {
 30 |     mime_type: mime::Mime,
 31 |     base64: bool,
 32 |     encoded_body_plus_fragment: &'a str,
 33 | }
 34 | 
 35 | #[derive(Debug)]
 36 | pub enum DataUrlError {
 37 |     NotADataUrl,
 38 |     NoComma,
 39 | }
 40 | 
 41 | impl<'a> DataUrl<'a> {
 42 |     /// <https://fetch.spec.whatwg.org/#data-url-processor>
 43 |     /// but starting from a string rather than a parsed `Url`, to avoid extra string copies.
 44 |     pub fn process(input: &'a str) -> Result<Self, DataUrlError> {
 45 |         use crate::DataUrlError::*;
 46 | 
 47 |         let after_colon = pretend_parse_data_url(input).ok_or(NotADataUrl)?;
 48 | 
 49 |         let (from_colon_to_comma, encoded_body_plus_fragment) =
 50 |             find_comma_before_fragment(after_colon).ok_or(NoComma)?;
 51 | 
 52 |         let (mime_type, base64) = parse_header(from_colon_to_comma);
 53 | 
 54 |         Ok(DataUrl {
 55 |             mime_type,
 56 |             base64,
 57 |             encoded_body_plus_fragment,
 58 |         })
 59 |     }
 60 | 
 61 |     pub fn mime_type(&self) -> &mime::Mime {
 62 |         &self.mime_type
 63 |     }
 64 | 
 65 |     /// Streaming-decode the data URL’s body to `write_body_bytes`,
 66 |     /// and return the URL’s fragment identifier if it has one.
 67 |     pub fn decode<F, E>(
 68 |         &self,
 69 |         write_body_bytes: F,
 70 |     ) -> Result<Option<FragmentIdentifier<'a>>, forgiving_base64::DecodeError<E>>
 71 |     where
 72 |         F: FnMut(&[u8]) -> Result<(), E>,
 73 |     {
 74 |         if self.base64 {
 75 |             decode_with_base64(self.encoded_body_plus_fragment, write_body_bytes)
 76 |         } else {
 77 |             decode_without_base64(self.encoded_body_plus_fragment, write_body_bytes)
 78 |                 .map_err(forgiving_base64::DecodeError::WriteError)
 79 |         }
 80 |     }
 81 | 
 82 |     /// Return the decoded body, and the URL’s fragment identifier if it has one.
 83 |     pub fn decode_to_vec(
 84 |         &self,
 85 |     ) -> Result<(Vec<u8>, Option<FragmentIdentifier<'a>>), forgiving_base64::InvalidBase64> {
 86 |         let mut body = Vec::new();
 87 |         let fragment = self.decode(|bytes| {
 88 |             body.extend_from_slice(bytes);
 89 |             Ok(())
 90 |         })?;
 91 |         Ok((body, fragment))
 92 |     }
 93 | }
 94 | 
 95 | /// The URL’s fragment identifier (after `#`)
 96 | pub struct FragmentIdentifier<'a>(&'a str);
 97 | 
 98 | impl<'a> FragmentIdentifier<'a> {
 99 |     /// Like in a parsed URL
100 |     pub fn to_percent_encoded(&self) -> String {
101 |         let mut string = String::new();
102 |         for byte in self.0.bytes() {
103 |             match byte {
104 |                 // Ignore ASCII tabs or newlines like the URL parser would
105 |                 b'\t' | b'\n' | b'\r' => continue,
106 |                 // https://url.spec.whatwg.org/#fragment-percent-encode-set
107 |                 b'\0'..=b' ' | b'"' | b'<' | b'>' | b'`' | b'\x7F'..=b'\xFF' => {
108 |                     percent_encode(byte, &mut string)
109 |                 }
110 |                 // Printable ASCII
111 |                 _ => string.push(byte as char),
112 |             }
113 |         }
114 |         string
115 |     }
116 | }
117 | 
118 | /// Similar to <https://url.spec.whatwg.org/#concept-basic-url-parser>
119 | /// followed by <https://url.spec.whatwg.org/#concept-url-serializer>
120 | ///
121 | /// * `None`: not a data URL.
122 | ///
123 | /// * `Some(s)`: sort of the result of serialization, except:
124 | ///
125 | ///   - `data:` prefix removed
126 | ///   - The fragment is included
127 | ///   - Other components are **not** UTF-8 percent-encoded
128 | ///   - ASCII tabs and newlines in the middle are **not** removed
129 | fn pretend_parse_data_url(input: &str) -> Option<&str> {
130 |     // Trim C0 control or space
131 |     let left_trimmed = input.trim_start_matches(|ch| ch <= ' ');
132 | 
133 |     let mut bytes = left_trimmed.bytes();
134 |     {
135 |         // Ignore ASCII tabs or newlines like the URL parser would
136 |         let mut iter = bytes
137 |             .by_ref()
138 |             .filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r'));
139 |         require!(iter.next()?.to_ascii_lowercase() == b'd');
140 |         require!(iter.next()?.to_ascii_lowercase() == b'a');
141 |         require!(iter.next()?.to_ascii_lowercase() == b't');
142 |         require!(iter.next()?.to_ascii_lowercase() == b'a');
143 |         require!(iter.next()? == b':');
144 |     }
145 |     let bytes_consumed = left_trimmed.len() - bytes.len();
146 |     let after_colon = &left_trimmed[bytes_consumed..];
147 | 
148 |     // Trim C0 control or space
149 |     Some(after_colon.trim_end_matches(|ch| ch <= ' '))
150 | }
151 | 
152 | fn find_comma_before_fragment(after_colon: &str) -> Option<(&str, &str)> {
153 |     for (i, byte) in after_colon.bytes().enumerate() {
154 |         if byte == b',' {
155 |             return Some((&after_colon[..i], &after_colon[i + 1..]));
156 |         }
157 |         if byte == b'#' {
158 |             break;
159 |         }
160 |     }
161 |     None
162 | }
163 | 
164 | fn parse_header(from_colon_to_comma: &str) -> (mime::Mime, bool) {
165 |     // "Strip leading and trailing ASCII whitespace"
166 |     //     \t, \n, and \r would have been filtered by the URL parser
167 |     //     \f percent-encoded by the URL parser
168 |     //     space is the only remaining ASCII whitespace
169 |     let trimmed = from_colon_to_comma.trim_matches(|c| matches!(c, ' ' | '\t' | '\n' | '\r'));
170 | 
171 |     let without_base64_suffix = remove_base64_suffix(trimmed);
172 |     let base64 = without_base64_suffix.is_some();
173 |     let mime_type = without_base64_suffix.unwrap_or(trimmed);
174 | 
175 |     let mut string = String::new();
176 |     if mime_type.starts_with(';') {
177 |         string.push_str("text/plain")
178 |     }
179 |     let mut in_query = false;
180 |     for byte in mime_type.bytes() {
181 |         match byte {
182 |             // Ignore ASCII tabs or newlines like the URL parser would
183 |             b'\t' | b'\n' | b'\r' => continue,
184 | 
185 |             // https://url.spec.whatwg.org/#c0-control-percent-encode-set
186 |             b'\0'..=b'\x1F' | b'\x7F'..=b'\xFF' => percent_encode(byte, &mut string),
187 | 
188 |             // Bytes other than the C0 percent-encode set that are percent-encoded
189 |             // by the URL parser in the query state.
190 |             // '#' is also in that list but cannot occur here
191 |             // since it indicates the start of the URL’s fragment.
192 |             b' ' | b'"' | b'<' | b'>' if in_query => percent_encode(byte, &mut string),
193 | 
194 |             b'?' => {
195 |                 in_query = true;
196 |                 string.push('?')
197 |             }
198 | 
199 |             // Printable ASCII
200 |             _ => string.push(byte as char),
201 |         }
202 |     }
203 | 
204 |     // FIXME: does Mime::from_str match the MIME Sniffing Standard’s parsing algorithm?
205 |     // <https://mimesniff.spec.whatwg.org/#parse-a-mime-type>
206 |     let mime_type = string.parse().unwrap_or_else(|_| mime::Mime {
207 |         type_: String::from("text"),
208 |         subtype: String::from("plain"),
209 |         parameters: vec![(String::from("charset"), String::from("US-ASCII"))],
210 |     });
211 | 
212 |     (mime_type, base64)
213 | }
214 | 
215 | /// None: no base64 suffix
216 | #[allow(clippy::skip_while_next)]
217 | fn remove_base64_suffix(s: &str) -> Option<&str> {
218 |     let mut bytes = s.bytes();
219 |     {
220 |         // Ignore ASCII tabs or newlines like the URL parser would
221 |         let iter = bytes
222 |             .by_ref()
223 |             .filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r'));
224 | 
225 |         // Search from the end
226 |         let mut iter = iter.rev();
227 | 
228 |         require!(iter.next()? == b'4');
229 |         require!(iter.next()? == b'6');
230 |         require!(iter.next()?.to_ascii_lowercase() == b'e');
231 |         require!(iter.next()?.to_ascii_lowercase() == b's');
232 |         require!(iter.next()?.to_ascii_lowercase() == b'a');
233 |         require!(iter.next()?.to_ascii_lowercase() == b'b');
234 |         require!(iter.skip_while(|&byte| byte == b' ').next()? == b';');
235 |     }
236 |     Some(&s[..bytes.len()])
237 | }
238 | 
239 | fn percent_encode(byte: u8, string: &mut String) {
240 |     const HEX_UPPER: [u8; 16] = *b"0123456789ABCDEF";
241 |     string.push('%');
242 |     string.push(HEX_UPPER[(byte >> 4) as usize] as char);
243 |     string.push(HEX_UPPER[(byte & 0x0f) as usize] as char);
244 | }
245 | 
246 | /// This is <https://url.spec.whatwg.org/#string-percent-decode> while also:
247 | ///
248 | /// * Ignoring ASCII tab or newlines
249 | /// * Stopping at the first '#' (which indicates the start of the fragment)
250 | ///
251 | /// Anything that would have been UTF-8 percent-encoded by the URL parser
252 | /// would be percent-decoded here.
253 | /// We skip that round-trip and pass it through unchanged.
254 | fn decode_without_base64<F, E>(
255 |     encoded_body_plus_fragment: &str,
256 |     mut write_bytes: F,
257 | ) -> Result<Option<FragmentIdentifier<'_>>, E>
258 | where
259 |     F: FnMut(&[u8]) -> Result<(), E>,
260 | {
261 |     let bytes = encoded_body_plus_fragment.as_bytes();
262 |     let mut slice_start = 0;
263 |     for (i, &byte) in bytes.iter().enumerate() {
264 |         // We only need to look for 5 different "special" byte values.
265 |         // For everything else we make slices as large as possible, borrowing the input,
266 |         // in order to make fewer write_all() calls.
267 |         if matches!(byte, b'%' | b'#' | b'\t' | b'\n' | b'\r') {
268 |             // Write everything (if anything) "non-special" we’ve accumulated
269 |             // before this special byte
270 |             if i > slice_start {
271 |                 write_bytes(&bytes[slice_start..i])?;
272 |             }
273 |             // Then deal with the special byte.
274 |             match byte {
275 |                 b'%' => {
276 |                     let l = bytes.get(i + 2).and_then(|&b| (b as char).to_digit(16));
277 |                     let h = bytes.get(i + 1).and_then(|&b| (b as char).to_digit(16));
278 |                     if let (Some(h), Some(l)) = (h, l) {
279 |                         // '%' followed by two ASCII hex digits
280 |                         let one_byte = h as u8 * 0x10 + l as u8;
281 |                         write_bytes(&[one_byte])?;
282 |                         slice_start = i + 3;
283 |                     } else {
284 |                         // Do nothing. Leave slice_start unchanged.
285 |                         // The % sign will be part of the next slice.
286 |                     }
287 |                 }
288 | 
289 |                 b'#' => {
290 |                     let fragment_start = i + 1;
291 |                     let fragment = &encoded_body_plus_fragment[fragment_start..];
292 |                     return Ok(Some(FragmentIdentifier(fragment)));
293 |                 }
294 | 
295 |                 // Ignore over '\t' | '\n' | '\r'
296 |                 _ => slice_start = i + 1,
297 |             }
298 |         }
299 |     }
300 |     write_bytes(&bytes[slice_start..])?;
301 |     Ok(None)
302 | }
303 | 
304 | /// `decode_without_base64()` composed with
305 | /// <https://infra.spec.whatwg.org/#isomorphic-decode> composed with
306 | /// <https://infra.spec.whatwg.org/#forgiving-base64-decode>.
307 | fn decode_with_base64<F, E>(
308 |     encoded_body_plus_fragment: &str,
309 |     write_bytes: F,
310 | ) -> Result<Option<FragmentIdentifier<'_>>, forgiving_base64::DecodeError<E>>
311 | where
312 |     F: FnMut(&[u8]) -> Result<(), E>,
313 | {
314 |     let mut decoder = forgiving_base64::Decoder::new(write_bytes);
315 |     let fragment = decode_without_base64(encoded_body_plus_fragment, |bytes| decoder.feed(bytes))?;
316 |     decoder.finish()?;
317 |     Ok(fragment)
318 | }
319 | 


--------------------------------------------------------------------------------
/data-url/tests/mime-types.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   "Basics",
  3 |   {
  4 |     "input": "text/html;charset=gbk",
  5 |     "output": "text/html;charset=gbk",
  6 |     "navigable": true,
  7 |     "encoding": "GBK"
  8 |   },
  9 |   {
 10 |     "input": "TEXT/HTML;CHARSET=GBK",
 11 |     "output": "text/html;charset=GBK",
 12 |     "navigable": true,
 13 |     "encoding": "GBK"
 14 |   },
 15 |   "Legacy comment syntax",
 16 |   {
 17 |     "input": "text/html;charset=gbk(",
 18 |     "output": "text/html;charset=\"gbk(\"",
 19 |     "navigable": true,
 20 |     "encoding": null
 21 |   },
 22 |   {
 23 |     "input": "text/html;x=(;charset=gbk",
 24 |     "output": "text/html;x=\"(\";charset=gbk",
 25 |     "navigable": true,
 26 |     "encoding": "GBK"
 27 |   },
 28 |   "Duplicate parameter",
 29 |   {
 30 |     "input": "text/html;charset=gbk;charset=windows-1255",
 31 |     "output": "text/html;charset=gbk",
 32 |     "navigable": true,
 33 |     "encoding": "GBK"
 34 |   },
 35 |   {
 36 |     "input": "text/html;charset=();charset=GBK",
 37 |     "output": "text/html;charset=\"()\"",
 38 |     "navigable": true,
 39 |     "encoding": null
 40 |   },
 41 |   "Spaces",
 42 |   {
 43 |     "input": "text/html;charset =gbk",
 44 |     "output": "text/html",
 45 |     "navigable": true,
 46 |     "encoding": null
 47 |   },
 48 |   {
 49 |     "input": "text/html ;charset=gbk",
 50 |     "output": "text/html;charset=gbk",
 51 |     "navigable": true,
 52 |     "encoding": "GBK"
 53 |   },
 54 |   {
 55 |     "input": "text/html; charset=gbk",
 56 |     "output": "text/html;charset=gbk",
 57 |     "navigable": true,
 58 |     "encoding": "GBK"
 59 |   },
 60 |   {
 61 |     "input": "text/html;charset= gbk",
 62 |     "output": "text/html;charset=\" gbk\"",
 63 |     "navigable": true,
 64 |     "encoding": "GBK"
 65 |   },
 66 |   {
 67 |     "input": "text/html;charset= \"gbk\"",
 68 |     "output": "text/html;charset=\" \\\"gbk\\\"\"",
 69 |     "navigable": true,
 70 |     "encoding": null
 71 |   },
 72 |   "0x0B and 0x0C",
 73 |   {
 74 |     "input": "text/html;charset=\u000Bgbk",
 75 |     "output": "text/html",
 76 |     "navigable": true,
 77 |     "encoding": null
 78 |   },
 79 |   {
 80 |     "input": "text/html;charset=\u000Cgbk",
 81 |     "output": "text/html",
 82 |     "navigable": true,
 83 |     "encoding": null
 84 |   },
 85 |   {
 86 |     "input": "text/html;\u000Bcharset=gbk",
 87 |     "output": "text/html",
 88 |     "navigable": true,
 89 |     "encoding": null
 90 |   },
 91 |   {
 92 |     "input": "text/html;\u000Ccharset=gbk",
 93 |     "output": "text/html",
 94 |     "navigable": true,
 95 |     "encoding": null
 96 |   },
 97 |   "Single quotes are a token, not a delimiter",
 98 |   {
 99 |     "input": "text/html;charset='gbk'",
100 |     "output": "text/html;charset='gbk'",
101 |     "navigable": true,
102 |     "encoding": null
103 |   },
104 |   {
105 |     "input": "text/html;charset='gbk",
106 |     "output": "text/html;charset='gbk",
107 |     "navigable": true,
108 |     "encoding": null
109 |   },
110 |   {
111 |     "input": "text/html;charset=gbk'",
112 |     "output": "text/html;charset=gbk'",
113 |     "navigable": true,
114 |     "encoding": null
115 |   },
116 |   {
117 |     "input": "text/html;charset=';charset=GBK",
118 |     "output": "text/html;charset='",
119 |     "navigable": true,
120 |     "encoding": null
121 |   },
122 |   "Invalid parameters",
123 |   {
124 |     "input": "text/html;test;charset=gbk",
125 |     "output": "text/html;charset=gbk",
126 |     "navigable": true,
127 |     "encoding": "GBK"
128 |   },
129 |   {
130 |     "input": "text/html;test=;charset=gbk",
131 |     "output": "text/html;charset=gbk",
132 |     "navigable": true,
133 |     "encoding": "GBK"
134 |   },
135 |   {
136 |     "input": "text/html;';charset=gbk",
137 |     "output": "text/html;charset=gbk",
138 |     "navigable": true,
139 |     "encoding": "GBK"
140 |   },
141 |   {
142 |     "input": "text/html;\";charset=gbk",
143 |     "output": "text/html;charset=gbk",
144 |     "navigable": true,
145 |     "encoding": "GBK"
146 |   },
147 |   {
148 |     "input": "text/html ; ; charset=gbk",
149 |     "output": "text/html;charset=gbk",
150 |     "navigable": true,
151 |     "encoding": "GBK"
152 |   },
153 |   {
154 |     "input": "text/html;;;;charset=gbk",
155 |     "output": "text/html;charset=gbk",
156 |     "navigable": true,
157 |     "encoding": "GBK"
158 |   },
159 |   {
160 |     "input": "text/html;charset= \"\u007F;charset=GBK",
161 |     "output": "text/html;charset=GBK",
162 |     "navigable": true,
163 |     "encoding": "GBK"
164 |   },
165 |   {
166 |     "input": "text/html;charset=\"\u007F;charset=foo\";charset=GBK",
167 |     "output": "text/html;charset=GBK",
168 |     "navigable": true,
169 |     "encoding": "GBK"
170 |   },
171 |   "Double quotes",
172 |   {
173 |     "input": "text/html;charset=\"gbk\"",
174 |     "output": "text/html;charset=gbk",
175 |     "navigable": true,
176 |     "encoding": "GBK"
177 |   },
178 |   {
179 |     "input": "text/html;charset=\"gbk",
180 |     "output": "text/html;charset=gbk",
181 |     "navigable": true,
182 |     "encoding": "GBK"
183 |   },
184 |   {
185 |     "input": "text/html;charset=gbk\"",
186 |     "output": "text/html;charset=\"gbk\\\"\"",
187 |     "navigable": true,
188 |     "encoding": null
189 |   },
190 |   {
191 |     "input": "text/html;charset=\" gbk\"",
192 |     "output": "text/html;charset=\" gbk\"",
193 |     "navigable": true,
194 |     "encoding": "GBK"
195 |   },
196 |   {
197 |     "input": "text/html;charset=\"gbk \"",
198 |     "output": "text/html;charset=\"gbk \"",
199 |     "navigable": true,
200 |     "encoding": "GBK"
201 |   },
202 |   {
203 |     "input": "text/html;charset=\"\\ gbk\"",
204 |     "output": "text/html;charset=\" gbk\"",
205 |     "navigable": true,
206 |     "encoding": "GBK"
207 |   },
208 |   {
209 |     "input": "text/html;charset=\"\\g\\b\\k\"",
210 |     "output": "text/html;charset=gbk",
211 |     "navigable": true,
212 |     "encoding": "GBK"
213 |   },
214 |   {
215 |     "input": "text/html;charset=\"gbk\"x",
216 |     "output": "text/html;charset=gbk",
217 |     "navigable": true,
218 |     "encoding": "GBK"
219 |   },
220 |   {
221 |     "input": "text/html;charset=\"\";charset=GBK",
222 |     "output": "text/html;charset=\"\"",
223 |     "navigable": true,
224 |     "encoding": null
225 |   },
226 |   {
227 |     "input": "text/html;charset=\";charset=GBK",
228 |     "output": "text/html;charset=\";charset=GBK\"",
229 |     "navigable": true,
230 |     "encoding": null
231 |   },
232 |   "Unexpected code points",
233 |   {
234 |     "input": "text/html;charset={gbk}",
235 |     "output": "text/html;charset=\"{gbk}\"",
236 |     "navigable": true,
237 |     "encoding": null
238 |   },
239 |   "Parameter name longer than 127",
240 |   {
241 |     "input": "text/html;0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789=x;charset=gbk",
242 |     "output": "text/html;0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789=x;charset=gbk",
243 |     "navigable": true,
244 |     "encoding": "GBK"
245 |   },
246 |   "type/subtype longer than 127",
247 |   {
248 |     "input": "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789/0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789",
249 |     "output": "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789/0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"
250 |   },
251 |   "Invalid names",
252 |   {
253 |     "input": "text/html;a]=bar;b[=bar;c=bar",
254 |     "output": "text/html;c=bar"
255 |   },
256 |   "Semicolons in value",
257 |   {
258 |     "input": "text/html;valid=\";\";foo=bar",
259 |     "output": "text/html;valid=\";\";foo=bar"
260 |   },
261 |   {
262 |     "input": "text/html;in]valid=\";asd=foo\";foo=bar",
263 |     "output": "text/html;foo=bar"
264 |   },
265 |   "Valid",
266 |   {
267 |     "input": "!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz/!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz;!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz=!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
268 |     "output": "!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz/!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz;!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz=!#$%&'*+-.^_`|~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
269 |   },
270 |   {
271 |     "input": "x/x;x=\"\t !\\\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\u00A0\u00A1\u00A2\u00A3\u00A4\u00A5\u00A6\u00A7\u00A8\u00A9\u00AA\u00AB\u00AC\u00AD\u00AE\u00AF\u00B0\u00B1\u00B2\u00B3\u00B4\u00B5\u00B6\u00B7\u00B8\u00B9\u00BA\u00BB\u00BC\u00BD\u00BE\u00BF\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5\u00C6\u00C7\u00C8\u00C9\u00CA\u00CB\u00CC\u00CD\u00CE\u00CF\u00D0\u00D1\u00D2\u00D3\u00D4\u00D5\u00D6\u00D7\u00D8\u00D9\u00DA\u00DB\u00DC\u00DD\u00DE\u00DF\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5\u00E6\u00E7\u00E8\u00E9\u00EA\u00EB\u00EC\u00ED\u00EE\u00EF\u00F0\u00F1\u00F2\u00F3\u00F4\u00F5\u00F6\u00F7\u00F8\u00F9\u00FA\u00FB\u00FC\u00FD\u00FE\u00FF\"",
272 |     "output": "x/x;x=\"\t !\\\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\u00A0\u00A1\u00A2\u00A3\u00A4\u00A5\u00A6\u00A7\u00A8\u00A9\u00AA\u00AB\u00AC\u00AD\u00AE\u00AF\u00B0\u00B1\u00B2\u00B3\u00B4\u00B5\u00B6\u00B7\u00B8\u00B9\u00BA\u00BB\u00BC\u00BD\u00BE\u00BF\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5\u00C6\u00C7\u00C8\u00C9\u00CA\u00CB\u00CC\u00CD\u00CE\u00CF\u00D0\u00D1\u00D2\u00D3\u00D4\u00D5\u00D6\u00D7\u00D8\u00D9\u00DA\u00DB\u00DC\u00DD\u00DE\u00DF\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5\u00E6\u00E7\u00E8\u00E9\u00EA\u00EB\u00EC\u00ED\u00EE\u00EF\u00F0\u00F1\u00F2\u00F3\u00F4\u00F5\u00F6\u00F7\u00F8\u00F9\u00FA\u00FB\u00FC\u00FD\u00FE\u00FF\""
273 |   },
274 |   "End-of-file handling",
275 |   {
276 |     "input": "x/x;test",
277 |     "output": "x/x"
278 |   },
279 |   {
280 |     "input": "x/x;test=\"\\",
281 |     "output": "x/x;test=\"\\\\\""
282 |   },
283 |   "Whitespace (not handled by generated-mime-types.json or above)",
284 |   {
285 |     "input": "x/x;x= ",
286 |     "output": "x/x"
287 |   },
288 |   {
289 |     "input": "x/x;x=\t",
290 |     "output": "x/x"
291 |   },
292 |   {
293 |     "input": "x/x\n\r\t ;x=x",
294 |     "output": "x/x;x=x"
295 |   },
296 |   {
297 |     "input": "\n\r\t x/x;x=x\n\r\t ",
298 |     "output": "x/x;x=x"
299 |   },
300 |   {
301 |     "input": "x/x;\n\r\t x=x\n\r\t ;x=y",
302 |     "output": "x/x;x=x"
303 |   },
304 |   "Latin1",
305 |   {
306 |     "input": "text/html;test=\u00FF;charset=gbk",
307 |     "output": "text/html;test=\"\u00FF\";charset=gbk",
308 |     "navigable": true,
309 |     "encoding": "GBK"
310 |   },
311 |   ">Latin1",
312 |   {
313 |     "input": "x/x;test=\uFFFD;x=x",
314 |     "output": "x/x;x=x"
315 |   },
316 |   "Failure",
317 |   {
318 |     "input": "\u000Bx/x",
319 |     "output": null
320 |   },
321 |   {
322 |     "input": "\u000Cx/x",
323 |     "output": null
324 |   },
325 |   {
326 |     "input": "x/x\u000B",
327 |     "output": null
328 |   },
329 |   {
330 |     "input": "x/x\u000C",
331 |     "output": null
332 |   },
333 |   {
334 |     "input": "",
335 |     "output": null
336 |   },
337 |   {
338 |     "input": "\t",
339 |     "output": null
340 |   },
341 |   {
342 |     "input": "/",
343 |     "output": null
344 |   },
345 |   {
346 |     "input": "bogus",
347 |     "output": null
348 |   },
349 |   {
350 |     "input": "bogus/",
351 |     "output": null
352 |   },
353 |   {
354 |     "input": "bogus/ ",
355 |     "output": null
356 |   },
357 |   {
358 |     "input": "bogus/bogus/;",
359 |     "output": null
360 |   },
361 |   {
362 |     "input": "</>",
363 |     "output": null
364 |   },
365 |   {
366 |     "input": "(/)",
367 |     "output": null
368 |   },
369 |   {
370 |     "input": "ÿ/ÿ",
371 |     "output": null
372 |   },
373 |   {
374 |     "input": "text/html(;doesnot=matter",
375 |     "output": null
376 |   },
377 |   {
378 |     "input": "{/}",
379 |     "output": null
380 |   },
381 |   {
382 |     "input": "\u0100/\u0100",
383 |     "output": null
384 |   },
385 |   {
386 |     "input": "text /html",
387 |     "output": null
388 |   },
389 |   {
390 |     "input": "text/ html",
391 |     "output": null
392 |   },
393 |   {
394 |     "input": "\"text/html\"",
395 |     "output": null
396 |   }
397 | ]
398 | 


--------------------------------------------------------------------------------
/UPGRADING.md:
--------------------------------------------------------------------------------
  1 | # Upgrade guide
  2 | 
  3 | This guide contains steps for upgrading crates in this project between major
  4 | versions.
  5 | 
  6 | ## Upgrading from url 1.x to 2.1+
  7 | 
  8 | * The minimum supported Rust version is now v1.33.0. Verify that you can bump
  9 |   your library or application to the same MSRV.
 10 | 
 11 | * `Url` no longer implements `std::net::ToSocketAddrs`. You will instead need to
 12 |   explicitly call `socket_addrs` to convert your `Url` to a type that implements
 13 |   `ToSocketAddrs`.
 14 | 
 15 |   Note that v2.0 removed support for `std::net::ToSocketAddrs` with no
 16 |   replacement; the `socket_addrs` method was not added until v2.1.
 17 | 
 18 |     Before upgrading:
 19 | 
 20 |     ```rust
 21 |     let url = Url::parse("http://github.com:80").unwrap();
 22 |     let stream = TcpStream::connect(url).unwrap();
 23 |     ```
 24 | 
 25 |     After upgrading:
 26 | 
 27 |     ```rust
 28 |     let url = Url::parse("http://github.com:80").unwrap();
 29 |     let addrs = url.socket_addrs(|| None).unwrap();
 30 |     let stream = TcpStream::connect(addrs).unwrap();
 31 |     ```
 32 | 
 33 |     Before upgrading:
 34 | 
 35 |     ```rust
 36 |     let url = Url::parse("socks5://localhost").unwrap();
 37 |     let stream = TcpStream::connect(url.with_default_port(|url| match url.scheme() {
 38 |         "socks5" => Ok(1080),
 39 |         _ => Err(()),
 40 |     })).unwrap();
 41 |     ```
 42 | 
 43 |     After upgrading:
 44 | 	
 45 |     ```rust
 46 |     let url = Url::parse("http://github.com:80").unwrap();
 47 |     let stream = TcpStream::connect(url.socket_addrs(|| match url.scheme() {
 48 |         "socks5" => Some(1080),
 49 |         _ => None,
 50 |     })).unwrap();
 51 |     ```
 52 | 
 53 | * `url_serde` is no longer required to use `Url` with Serde 1.x. Remove
 54 |   references to `url_serde` and enable the `serde` feature instead.
 55 | 
 56 |      ```toml
 57 |      # Cargo.toml
 58 |      [dependencies]
 59 |      url = { version = "2.0", features = ["serde"] }
 60 |      ```
 61 | 
 62 | * The `idna` and `percent_export` crates are no longer exported by the `url`
 63 |   crate. Depend on those crates directly instead. See below for additional
 64 |   breaking changes in the percent-export package.
 65 | 
 66 |     Before upgrading:
 67 | 
 68 |     ```rust
 69 |     use url::percent_encoding::percent_decode;
 70 |     ```
 71 | 
 72 |     After upgrading:
 73 | 
 74 |     ```rust
 75 |     use percent_encoding::percent_decode;
 76 |     ```
 77 | 
 78 | ## Upgrading from percent-encoding 1.x to 2.x
 79 | 
 80 | * Prepackaged encoding sets, like `QUERY_ENCODE_SET` and
 81 |   `PATH_SEGMENT_ENCODE_SET`, are no longer provided. You
 82 |   will need to read the specifications relevant to your domain and construct
 83 |   your own encoding sets by using the `percent_encoding::AsciiSet` builder
 84 |   methods on either of the base encoding sets, `percent_encoding::CONTROLS` or
 85 |   `percent_encoding::NON_ALPHANUMERIC`.
 86 | 
 87 |     Before upgrading:
 88 | 
 89 |     ```rust
 90 |     use percent_encoding::QUERY_ENCODE_SET;
 91 | 
 92 |     percent_encoding::utf8_percent_encode(value, QUERY_ENCODE_SET);
 93 |     ```
 94 | 
 95 |     After upgrading:
 96 | 
 97 |     ```rust
 98 |     /// https://url.spec.whatwg.org/#query-state
 99 |     const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>');
100 | 
101 |     percent_encoding::utf8_percent_encode(value, QUERY);
102 |     ```
103 | 
104 | 
105 | ## Upgrading from url 0.x to 1.x
106 | 
107 | * The fields of `Url` are now private because the `Url` constructor, parser,
108 |   and setters maintain invariants that could be violated if you were to set the fields directly.
109 |   Instead of accessing, for example, `url.scheme`, use the getter method, such as `url.scheme()`.
110 |   Instead of assigning directly to a field, for example `url.scheme = "https".to_string()`,
111 |   use the setter method, such as `url.set_scheme("https").unwrap()`.
112 |   (Some setters validate the new value and return a `Result` that must be used).
113 | 
114 | * The methods of `Url` now return `&str` instead of `String`,
115 |   thus reducing allocations and making serialization cheap.
116 | 
117 | * The `path()` method on `url::Url` instances used to return `Option<&[String]>`;
118 |   now it returns `&str`.
119 |   If you would like functionality more similar to the old behavior of `path()`,
120 |   use `path_segments()` that returns `Option<str::Split<char>>`.
121 | 
122 |     Before upgrading:
123 | 
124 |     ```rust
125 |     let issue_list_url = Url::parse(
126 |          "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
127 |     ).unwrap();
128 |     assert_eq!(issue_list_url.path(), Some(&["rust-lang".to_string(),
129 |                                              "rust".to_string(),
130 |                                              "issues".to_string()][..]));
131 |     ```
132 | 
133 |     After upgrading:
134 | 
135 |     ```rust
136 |     let issue_list_url = Url::parse(
137 |          "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
138 |     ).unwrap();
139 |     assert_eq!(issue_list_url.path(), "/rust-lang/rust/issues");
140 |     assert_eq!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()),
141 |                Some(vec!["rust-lang", "rust", "issues"]));
142 |     ```
143 | 
144 | * The `path_mut()` method on `url::Url` instances that allowed modification of a URL's path
145 |   has been replaced by `path_segments_mut()`.
146 | 
147 |     Before upgrading:
148 | 
149 |     ```rust
150 |     let mut url = Url::parse("https://github.com/rust-lang/rust").unwrap();
151 |     url.path_mut().unwrap().push("issues");
152 |     ```
153 | 
154 |     After upgrading:
155 | 
156 |     ```rust
157 |     let mut url = Url::parse("https://github.com/rust-lang/rust").unwrap();
158 |     url.path_segments_mut().unwrap().push("issues");
159 |     ```
160 | 
161 | * The `domain_mut()` method on `url::Url` instances that allowed modification of a URL's domain
162 |   has been replaced by `set_host()` and `set_ip_host()`.
163 | 
164 | * The `host()` method on `url::Url` instances used to return `Option<&Host>`;
165 |   now it returns `Option<Host<&str>>`.
166 |   The `serialize_host()` method that returned `Option<String>`
167 |   has been replaced by the `host_str()` method that returns `Option<&str>`.
168 | 
169 | * The `serialize()` method on `url::Url` instances that returned `String`
170 |   has been replaced by an `as_str()` method that returns `&str`.
171 | 
172 |     Before upgrading:
173 | 
174 |     ```rust
175 |     let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap();
176 |     assert_eq!(this_document.serialize(), "http://servo.github.io/rust-url/url/index.html".to_string());
177 |     ```
178 | 
179 |     After upgrading:
180 | 
181 |     ```rust
182 |     let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap();
183 |     assert_eq!(this_document.as_str(), "http://servo.github.io/rust-url/url/index.html");
184 |     ```
185 | 
186 | * `url::UrlParser` has been replaced by `url::Url::parse()` and `url::Url::join()`.
187 | 
188 |     Before upgrading:
189 | 
190 |     ```rust
191 |     let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap();
192 |     let css_url = UrlParser::new().base_url(&this_document).parse("../main.css").unwrap();
193 |     assert_eq!(css_url.serialize(), "http://servo.github.io/rust-url/main.css".to_string());
194 |     ```
195 | 
196 |     After upgrading:
197 | 
198 |     ```rust
199 |     let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap();
200 |     let css_url = this_document.join("../main.css").unwrap();
201 |     assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
202 |     ```
203 | 
204 | * `url::parse_path()` and `url::UrlParser::parse_path()` have been removed without replacement.
205 |   As a workaround, you can give a base URL that you then ignore too `url::Url::parse()`.
206 | 
207 |     Before upgrading:
208 | 
209 |     ```rust
210 |     let (path, query, fragment) = url::parse_path("/foo/bar/../baz?q=42").unwrap();
211 |     assert_eq!(path, vec!["foo".to_string(), "baz".to_string()]);
212 |     assert_eq!(query, Some("q=42".to_string()));
213 |     assert_eq!(fragment, None);
214 |     ```
215 | 
216 |     After upgrading:
217 | 
218 |     ```rust
219 |     let base = Url::parse("http://example.com").unwrap();
220 |     let with_path = base.join("/foo/bar/../baz?q=42").unwrap();
221 |     assert_eq!(with_path.path(), "/foo/baz");
222 |     assert_eq!(with_path.query(), Some("q=42"));
223 |     assert_eq!(with_path.fragment(), None);
224 |     ```
225 | 
226 | * The `url::form_urlencoded::serialize()` method
227 |   has been replaced with the `url::form_urlencoded::Serializer` struct.
228 |   Instead of calling `serialize()` with key/value pairs,
229 |   create a new `Serializer` with a new string,
230 |   call the `extend_pairs()` method on the `Serializer` instance with the key/value pairs as the argument,
231 |   then call `finish()`.
232 | 
233 |     Before upgrading:
234 | 
235 |     ```rust
236 |     let form = url::form_urlencoded::serialize(form.iter().map(|(k, v)| {
237 |         (&k[..], &v[..])
238 |     }));
239 |     ```
240 | 
241 |     After upgrading:
242 | 
243 |     ```rust
244 |     let form = url::form_urlencoded::Serializer::new(String::new()).extend_pairs(
245 |         form.iter().map(|(k, v)| { (&k[..], &v[..]) })
246 |     ).finish();
247 |     ```
248 | 
249 | * The `set_query_from_pairs()` method on `url::Url` instances that took key/value pairs
250 |   has been replaced with `query_pairs_mut()`, which allows you to modify the `url::Url`'s query pairs.
251 | 
252 |     Before upgrading:
253 | 
254 |     ```rust
255 |     let mut url = Url::parse("https://duckduckgo.com/").unwrap();
256 |     let pairs = vec![
257 |         ("q", "test"),
258 |         ("ia", "images"),
259 |     ];
260 |     url.set_query_from_pairs(pairs.iter().map(|&(k, v)| {
261 |         (&k[..], &v[..])
262 |     }));
263 |     ```
264 | 
265 |     After upgrading:
266 | 
267 |     ```rust
268 |     let mut url = Url::parse("https://duckduckgo.com/").unwrap();
269 |     let pairs = vec![
270 |         ("q", "test"),
271 |         ("ia", "images"),
272 |     ];
273 |     url.query_pairs_mut().clear().extend_pairs(
274 |       pairs.iter().map(|&(k, v)| { (&k[..], &v[..]) })
275 |     );
276 |     ```
277 | 
278 | * `url::SchemeData`, its variants `Relative` and `NonRelative`,
279 |   and the struct `url::RelativeSchemeData` have been removed.
280 |   Instead of matching on these variants
281 |   to determine if you have a URL in a relative scheme such as HTTP
282 |   versus a URL in a non-relative scheme as data,
283 |   use the `cannot_be_a_base()` method to determine which kind you have.
284 | 
285 |     Before upgrading:
286 | 
287 |     ```rust
288 |     match url.scheme_data {
289 |         url::SchemeData::Relative(..) => {}
290 |         url::SchemeData::NonRelative(..) => {
291 |             return Err(human(format!("`{}` must have relative scheme \
292 |                                       data: {}", field, url)))
293 |         }
294 |     }
295 |     ```
296 | 
297 |     After upgrading:
298 | 
299 |     ```rust
300 |     if url.cannot_be_a_base() {
301 |         return Err(human(format!("`{}` must have relative scheme \
302 |                                   data: {}", field, url)))
303 |     }
304 |     ```
305 | 
306 | * The functions `url::whatwg_scheme_type_mapper()`, the `SchemeType` enum,
307 |   and the `scheme_type_mapper()` method on `url::UrlParser` instances have been removed.
308 |   `SchemeType` had a method for getting the `default_port()`;
309 |   to replicate this functionality, use the method `port_or_known_default()` on `url::Url` instances.
310 |   The `port_or_default()` method on `url::Url` instances has been removed;
311 |   use `port_or_known_default()` instead.
312 | 
313 |     Before upgrading:
314 | 
315 |     ```rust
316 |     let port = match whatwg_scheme_type_mapper(&url.scheme) {
317 |         SchemeType::Relative(port) => port,
318 |         _ => return Err(format!("Invalid special scheme: `{}`",
319 |                                 raw_url.scheme)),
320 |     };
321 |     ```
322 | 
323 |     After upgrading:
324 | 
325 |     ```rust
326 |     let port = match url.port_or_known_default() {
327 |         Some(port) => port,
328 |         _ => return Err(format!("Invalid special scheme: `{}`",
329 |                                 url.scheme())),
330 |     };
331 |     ```
332 | 
333 | * The following formatting utilities have been removed without replacement;
334 |   look at their linked previous implementations
335 |   if you would like to replicate the functionality in your code:
336 |   * [`url::format::PathFormatter`](https://github.com/servo/rust-url/pull/176/commits/9e759f18726c8e1343162922b87163d4dd08fe3c#diff-0bb16ac13b75e9b568fa4aff61b0e71dL24)
337 |   * [`url::format::UserInfoFormatter`](https://github.com/servo/rust-url/pull/176/commits/9e759f18726c8e1343162922b87163d4dd08fe3c#diff-0bb16ac13b75e9b568fa4aff61b0e71dL50)
338 |   * [`url::format::UrlNoFragmentFormatter`](https://github.com/servo/rust-url/pull/176/commits/9e759f18726c8e1343162922b87163d4dd08fe3c#diff-0bb16ac13b75e9b568fa4aff61b0e71dL70)
339 | 
340 | * `url::percent_encoding::percent_decode()` used to have a return type of `Vec<u8>`;
341 |   now it returns an iterator of decoded `u8` bytes that also implements `Into<Cow<u8>>`.
342 |   Use `.into().to_owned()` to obtain a `Vec<u8>`.
343 |   (`.collect()` also works but might not be as efficient.)
344 | 
345 | * The `url::percent_encoding::EncodeSet` struct and constant instances
346 |   used with `url::percent_encoding::percent_encode()`
347 |   have been changed to structs that implement the trait `url::percent_encoding::EncodeSet`.
348 |   * `SIMPLE_ENCODE_SET`, `QUERY_ENCODE_SET`, `DEFAULT_ENCODE_SET`,
349 |     and `USERINFO_ENCODE_SET` have the same behavior.
350 |   * `USERNAME_ENCODE_SET` and `PASSWORD_ENCODE_SET` have been removed;
351 |     use `USERINFO_ENCODE_SET` instead.
352 |   * `HTTP_VALUE_ENCODE_SET` has been removed;
353 |     an implementation of it in the new types can be found [in hyper's source](
354 |     https://github.com/hyperium/hyper/blob/67436c5bf615cf5a55a71e32b788afef5985570e/src/header/parsing.rs#L131-L138)
355 |     if you need to replicate this functionality in your code.
356 |   * `FORM_URLENCODED_ENCODE_SET` has been removed;
357 |     instead, use the functionality in `url::form_urlencoded`.
358 |   * `PATH_SEGMENT_ENCODE_SET` has been added for use on '/'-separated path segments.
359 | 
360 | * `url::percent_encoding::percent_decode_to()` has been removed.
361 |   Use `url::percent_encoding::percent_decode()` which returns an iterator.
362 |   You can then use the iterator’s `collect()` method
363 |   or give it to some data structure’s `extend()` method.
364 | * A number of `ParseError` variants have changed.
365 |   [See the documentation for the current set](http://servo.github.io/rust-url/url/enum.ParseError.html).
366 | * `url::OpaqueOrigin::new()` and `url::Origin::UID(OpaqueOrigin)`
367 |   have been replaced by `url::Origin::new_opaque()` and `url::Origin::Opaque(OpaqueOrigin)`, respectively.
368 | 


--------------------------------------------------------------------------------
/form_urlencoded/src/lib.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2013-2016 The rust-url developers.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
  4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
  5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  6 | // option. This file may not be copied, modified, or distributed
  7 | // except according to those terms.
  8 | 
  9 | //! Parser and serializer for the [`application/x-www-form-urlencoded` syntax](
 10 | //! http://url.spec.whatwg.org/#application/x-www-form-urlencoded),
 11 | //! as used by HTML forms.
 12 | //!
 13 | //! Converts between a string (such as an URL’s query string)
 14 | //! and a sequence of (name, value) pairs.
 15 | 
 16 | use percent_encoding::{percent_decode, percent_encode_byte};
 17 | use std::borrow::{Borrow, Cow};
 18 | use std::str;
 19 | 
 20 | /// Convert a byte string in the `application/x-www-form-urlencoded` syntax
 21 | /// into a iterator of (name, value) pairs.
 22 | ///
 23 | /// Use `parse(input.as_bytes())` to parse a `&str` string.
 24 | ///
 25 | /// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be
 26 | /// converted to `[("#first", "%try%")]`.
 27 | #[inline]
 28 | pub fn parse(input: &[u8]) -> Parse<'_> {
 29 |     Parse { input }
 30 | }
 31 | /// The return type of `parse()`.
 32 | #[derive(Copy, Clone)]
 33 | pub struct Parse<'a> {
 34 |     input: &'a [u8],
 35 | }
 36 | 
 37 | impl<'a> Iterator for Parse<'a> {
 38 |     type Item = (Cow<'a, str>, Cow<'a, str>);
 39 | 
 40 |     fn next(&mut self) -> Option<Self::Item> {
 41 |         loop {
 42 |             if self.input.is_empty() {
 43 |                 return None;
 44 |             }
 45 |             let mut split2 = self.input.splitn(2, |&b| b == b'&');
 46 |             let sequence = split2.next().unwrap();
 47 |             self.input = split2.next().unwrap_or(&[][..]);
 48 |             if sequence.is_empty() {
 49 |                 continue;
 50 |             }
 51 |             let mut split2 = sequence.splitn(2, |&b| b == b'=');
 52 |             let name = split2.next().unwrap();
 53 |             let value = split2.next().unwrap_or(&[][..]);
 54 |             return Some((decode(name), decode(value)));
 55 |         }
 56 |     }
 57 | }
 58 | 
 59 | fn decode(input: &[u8]) -> Cow<'_, str> {
 60 |     let replaced = replace_plus(input);
 61 |     decode_utf8_lossy(match percent_decode(&replaced).into() {
 62 |         Cow::Owned(vec) => Cow::Owned(vec),
 63 |         Cow::Borrowed(_) => replaced,
 64 |     })
 65 | }
 66 | 
 67 | /// Replace b'+' with b' '
 68 | fn replace_plus(input: &[u8]) -> Cow<'_, [u8]> {
 69 |     match input.iter().position(|&b| b == b'+') {
 70 |         None => Cow::Borrowed(input),
 71 |         Some(first_position) => {
 72 |             let mut replaced = input.to_owned();
 73 |             replaced[first_position] = b' ';
 74 |             for byte in &mut replaced[first_position + 1..] {
 75 |                 if *byte == b'+' {
 76 |                     *byte = b' ';
 77 |                 }
 78 |             }
 79 |             Cow::Owned(replaced)
 80 |         }
 81 |     }
 82 | }
 83 | 
 84 | impl<'a> Parse<'a> {
 85 |     /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`.
 86 |     pub fn into_owned(self) -> ParseIntoOwned<'a> {
 87 |         ParseIntoOwned { inner: self }
 88 |     }
 89 | }
 90 | 
 91 | /// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`.
 92 | pub struct ParseIntoOwned<'a> {
 93 |     inner: Parse<'a>,
 94 | }
 95 | 
 96 | impl<'a> Iterator for ParseIntoOwned<'a> {
 97 |     type Item = (String, String);
 98 | 
 99 |     fn next(&mut self) -> Option<Self::Item> {
100 |         self.inner
101 |             .next()
102 |             .map(|(k, v)| (k.into_owned(), v.into_owned()))
103 |     }
104 | }
105 | 
106 | /// The [`application/x-www-form-urlencoded` byte serializer](
107 | /// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
108 | ///
109 | /// Return an iterator of `&str` slices.
110 | pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> {
111 |     ByteSerialize { bytes: input }
112 | }
113 | 
114 | /// Return value of `byte_serialize()`.
115 | #[derive(Debug)]
116 | pub struct ByteSerialize<'a> {
117 |     bytes: &'a [u8],
118 | }
119 | 
120 | fn byte_serialized_unchanged(byte: u8) -> bool {
121 |     matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z')
122 | }
123 | 
124 | impl<'a> Iterator for ByteSerialize<'a> {
125 |     type Item = &'a str;
126 | 
127 |     fn next(&mut self) -> Option<&'a str> {
128 |         if let Some((&first, tail)) = self.bytes.split_first() {
129 |             if !byte_serialized_unchanged(first) {
130 |                 self.bytes = tail;
131 |                 return Some(if first == b' ' {
132 |                     "+"
133 |                 } else {
134 |                     percent_encode_byte(first)
135 |                 });
136 |             }
137 |             let position = tail.iter().position(|&b| !byte_serialized_unchanged(b));
138 |             let (unchanged_slice, remaining) = match position {
139 |                 // 1 for first_byte + i unchanged in tail
140 |                 Some(i) => self.bytes.split_at(1 + i),
141 |                 None => (self.bytes, &[][..]),
142 |             };
143 |             self.bytes = remaining;
144 |             // This unsafe is appropriate because we have already checked these
145 |             // bytes in byte_serialized_unchanged, which checks for a subset
146 |             // of UTF-8. So we know these bytes are valid UTF-8, and doing
147 |             // another UTF-8 check would be wasteful.
148 |             Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
149 |         } else {
150 |             None
151 |         }
152 |     }
153 | 
154 |     fn size_hint(&self) -> (usize, Option<usize>) {
155 |         if self.bytes.is_empty() {
156 |             (0, Some(0))
157 |         } else {
158 |             (1, Some(self.bytes.len()))
159 |         }
160 |     }
161 | }
162 | 
163 | /// The [`application/x-www-form-urlencoded` serializer](
164 | /// https://url.spec.whatwg.org/#concept-urlencoded-serializer).
165 | pub struct Serializer<'a, T: Target> {
166 |     target: Option<T>,
167 |     start_position: usize,
168 |     encoding: EncodingOverride<'a>,
169 | }
170 | 
171 | pub trait Target {
172 |     fn as_mut_string(&mut self) -> &mut String;
173 |     fn finish(self) -> Self::Finished;
174 |     type Finished;
175 | }
176 | 
177 | impl Target for String {
178 |     fn as_mut_string(&mut self) -> &mut String {
179 |         self
180 |     }
181 |     fn finish(self) -> Self {
182 |         self
183 |     }
184 |     type Finished = Self;
185 | }
186 | 
187 | impl<'a> Target for &'a mut String {
188 |     fn as_mut_string(&mut self) -> &mut String {
189 |         &mut **self
190 |     }
191 |     fn finish(self) -> Self {
192 |         self
193 |     }
194 |     type Finished = Self;
195 | }
196 | 
197 | impl<'a, T: Target> Serializer<'a, T> {
198 |     /// Create a new `application/x-www-form-urlencoded` serializer for the given target.
199 |     ///
200 |     /// If the target is non-empty,
201 |     /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
202 |     pub fn new(target: T) -> Self {
203 |         Self::for_suffix(target, 0)
204 |     }
205 | 
206 |     /// Create a new `application/x-www-form-urlencoded` serializer
207 |     /// for a suffix of the given target.
208 |     ///
209 |     /// If that suffix is non-empty,
210 |     /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
211 |     pub fn for_suffix(mut target: T, start_position: usize) -> Self {
212 |         if target.as_mut_string().len() < start_position {
213 |             panic!(
214 |                 "invalid length {} for target of length {}",
215 |                 start_position,
216 |                 target.as_mut_string().len()
217 |             );
218 |         }
219 | 
220 |         Serializer {
221 |             target: Some(target),
222 |             start_position,
223 |             encoding: None,
224 |         }
225 |     }
226 | 
227 |     /// Remove any existing name/value pair.
228 |     ///
229 |     /// Panics if called after `.finish()`.
230 |     pub fn clear(&mut self) -> &mut Self {
231 |         string(&mut self.target).truncate(self.start_position);
232 |         self
233 |     }
234 | 
235 |     /// Set the character encoding to be used for names and values before percent-encoding.
236 |     pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self {
237 |         self.encoding = new;
238 |         self
239 |     }
240 | 
241 |     /// Serialize and append a name/value pair.
242 |     ///
243 |     /// Panics if called after `.finish()`.
244 |     pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self {
245 |         append_pair(
246 |             string(&mut self.target),
247 |             self.start_position,
248 |             self.encoding,
249 |             name,
250 |             value,
251 |         );
252 |         self
253 |     }
254 | 
255 |     /// Serialize and append a name of parameter without any value.
256 |     ///
257 |     /// Panics if called after `.finish()`.
258 |     pub fn append_key_only(&mut self, name: &str) -> &mut Self {
259 |         append_key_only(
260 |             string(&mut self.target),
261 |             self.start_position,
262 |             self.encoding,
263 |             name,
264 |         );
265 |         self
266 |     }
267 | 
268 |     /// Serialize and append a number of name/value pairs.
269 |     ///
270 |     /// This simply calls `append_pair` repeatedly.
271 |     /// This can be more convenient, so the user doesn’t need to introduce a block
272 |     /// to limit the scope of `Serializer`’s borrow of its string.
273 |     ///
274 |     /// Panics if called after `.finish()`.
275 |     pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self
276 |     where
277 |         I: IntoIterator,
278 |         I::Item: Borrow<(K, V)>,
279 |         K: AsRef<str>,
280 |         V: AsRef<str>,
281 |     {
282 |         {
283 |             let string = string(&mut self.target);
284 |             for pair in iter {
285 |                 let &(ref k, ref v) = pair.borrow();
286 |                 append_pair(
287 |                     string,
288 |                     self.start_position,
289 |                     self.encoding,
290 |                     k.as_ref(),
291 |                     v.as_ref(),
292 |                 );
293 |             }
294 |         }
295 |         self
296 |     }
297 | 
298 |     /// Serialize and append a number of names without values.
299 |     ///
300 |     /// This simply calls `append_key_only` repeatedly.
301 |     /// This can be more convenient, so the user doesn’t need to introduce a block
302 |     /// to limit the scope of `Serializer`’s borrow of its string.
303 |     ///
304 |     /// Panics if called after `.finish()`.
305 |     pub fn extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self
306 |     where
307 |         I: IntoIterator,
308 |         I::Item: Borrow<K>,
309 |         K: AsRef<str>,
310 |     {
311 |         {
312 |             let string = string(&mut self.target);
313 |             for key in iter {
314 |                 let k = key.borrow().as_ref();
315 |                 append_key_only(string, self.start_position, self.encoding, k);
316 |             }
317 |         }
318 |         self
319 |     }
320 | 
321 |     /// If this serializer was constructed with a string, take and return that string.
322 |     ///
323 |     /// ```rust
324 |     /// use form_urlencoded;
325 |     /// let encoded: String = form_urlencoded::Serializer::new(String::new())
326 |     ///     .append_pair("foo", "bar & baz")
327 |     ///     .append_pair("saison", "Été+hiver")
328 |     ///     .finish();
329 |     /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver");
330 |     /// ```
331 |     ///
332 |     /// Panics if called more than once.
333 |     pub fn finish(&mut self) -> T::Finished {
334 |         self.target
335 |             .take()
336 |             .expect("url::form_urlencoded::Serializer double finish")
337 |             .finish()
338 |     }
339 | }
340 | 
341 | fn append_separator_if_needed(string: &mut String, start_position: usize) {
342 |     if string.len() > start_position {
343 |         string.push('&')
344 |     }
345 | }
346 | 
347 | fn string<T: Target>(target: &mut Option<T>) -> &mut String {
348 |     target
349 |         .as_mut()
350 |         .expect("url::form_urlencoded::Serializer finished")
351 |         .as_mut_string()
352 | }
353 | 
354 | fn append_pair(
355 |     string: &mut String,
356 |     start_position: usize,
357 |     encoding: EncodingOverride<'_>,
358 |     name: &str,
359 |     value: &str,
360 | ) {
361 |     append_separator_if_needed(string, start_position);
362 |     append_encoded(name, string, encoding);
363 |     string.push('=');
364 |     append_encoded(value, string, encoding);
365 | }
366 | 
367 | fn append_key_only(
368 |     string: &mut String,
369 |     start_position: usize,
370 |     encoding: EncodingOverride,
371 |     name: &str,
372 | ) {
373 |     append_separator_if_needed(string, start_position);
374 |     append_encoded(name, string, encoding);
375 | }
376 | 
377 | fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>) {
378 |     string.extend(byte_serialize(&encode(encoding, s)))
379 | }
380 | 
381 | pub(crate) fn encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]> {
382 |     if let Some(o) = encoding_override {
383 |         return o(input);
384 |     }
385 |     input.as_bytes().into()
386 | }
387 | 
388 | pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
389 |     // Note: This function is duplicated in `percent_encoding/lib.rs`.
390 |     match input {
391 |         Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
392 |         Cow::Owned(bytes) => {
393 |             match String::from_utf8_lossy(&bytes) {
394 |                 Cow::Borrowed(utf8) => {
395 |                     // If from_utf8_lossy returns a Cow::Borrowed, then we can
396 |                     // be sure our original bytes were valid UTF-8. This is because
397 |                     // if the bytes were invalid UTF-8 from_utf8_lossy would have
398 |                     // to allocate a new owned string to back the Cow so it could
399 |                     // replace invalid bytes with a placeholder.
400 | 
401 |                     // First we do a debug_assert to confirm our description above.
402 |                     let raw_utf8: *const [u8] = utf8.as_bytes();
403 |                     debug_assert!(raw_utf8 == &*bytes as *const [u8]);
404 | 
405 |                     // Given we know the original input bytes are valid UTF-8,
406 |                     // and we have ownership of those bytes, we re-use them and
407 |                     // return a Cow::Owned here.
408 |                     Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
409 |                 }
410 |                 Cow::Owned(s) => Cow::Owned(s),
411 |             }
412 |         }
413 |     }
414 | }
415 | 
416 | pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>;
417 | 


--------------------------------------------------------------------------------