├── .gitignore ├── rustfmt.toml ├── Cargo.toml ├── .builds └── linux.yml ├── LICENSE-MIT ├── README.md ├── src ├── scheme.rs ├── lib.rs └── tests.rs └── LICENSE-APACHE /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | format_code_in_doc_comments = true 2 | match_block_trailing_comma = true 3 | condense_wildcard_suffixes = true 4 | use_field_init_shorthand = true 5 | overflow_delimited_expr = true 6 | use_small_heuristics = "Max" 7 | normalize_comments = true 8 | reorder_impl_items = true 9 | use_try_shorthand = true 10 | newline_style = "Unix" 11 | format_strings = true 12 | wrap_comments = true 13 | comment_width = 100 14 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Christian Duerr "] 3 | repository = "https://github.com/alacritty/urlocator.git" 4 | description = "Locate URLs in character streams" 5 | documentation = "https://docs.rs/urlocator" 6 | keywords = ["stream", "url", "parser"] 7 | categories = ["parsing", "no-std"] 8 | exclude = ["/.travis.yml"] 9 | license = "MIT/Apache-2.0" 10 | readme = "README.md" 11 | name = "urlocator" 12 | version = "0.1.4" 13 | edition = "2018" 14 | 15 | [features] 16 | nightly = [] 17 | -------------------------------------------------------------------------------- /.builds/linux.yml: -------------------------------------------------------------------------------- 1 | image: archlinux 2 | sources: 3 | - https://github.com/alacritty/urlocator 4 | tasks: 5 | - rustup: | 6 | curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable --profile minimal -c clippy 7 | - stable: | 8 | cd urlocator 9 | $HOME/.cargo/bin/cargo +stable test 10 | - clippy: | 11 | cd urlocator 12 | $HOME/.cargo/bin/cargo +stable clippy 13 | - rustfmt: | 14 | $HOME/.cargo/bin/rustup toolchain install nightly -c rustfmt 15 | cd urlocator 16 | $HOME/.cargo/bin/cargo +nightly fmt -- --check 17 | - 1-36-0: | 18 | $HOME/.cargo/bin/rustup toolchain install --profile minimal 1.36.0 19 | cd urlocator 20 | rm Cargo.lock 21 | $HOME/.cargo/bin/cargo +1.36.0 test 22 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Christian Duerr, The Alacritty Project Contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # URL Locator 2 | 3 | This library provides a streaming parser for locating URLs. 4 | 5 | Instead of returning the URL itself, this library will only return the length of the URL and 6 | the offset from the current parsing position. 7 | 8 | The length and offset counts follow the example of Rust's standard library's `char` type and are 9 | based on unicode scalar values instead of graphemes. 10 | 11 | ### Usage 12 | 13 | This crate is available on [crates.io](https://crates.io/crates/urlocator) and can be used by 14 | adding `urlocator` to your dependencies in your project's Cargo.toml: 15 | 16 | ```toml 17 | [dependencies] 18 | urlocator = "0.1.4" 19 | ``` 20 | 21 | ### Example: URL boundaries 22 | 23 | By keeping track of the current parser position, it is possible to locate the boundaries of a 24 | URL in a character stream: 25 | 26 | ```rust 27 | use urlocator::{UrlLocator, UrlLocation}; 28 | 29 | // Boundaries: 10-v v-28 30 | let input = "[example](https://example.org)"; 31 | 32 | let mut locator = UrlLocator::new(); 33 | 34 | let (mut start, mut end) = (0, 0); 35 | 36 | for (i, c) in input.chars().enumerate() { 37 | if let UrlLocation::Url(length, end_offset) = locator.advance(c) { 38 | start = 1 + i - length as usize; 39 | end = i - end_offset as usize; 40 | } 41 | } 42 | 43 | assert_eq!(start, 10); 44 | assert_eq!(end, 28); 45 | ``` 46 | 47 | ### Examlpe: Counting URLs 48 | 49 | By checking for the return state of the parser, it is possible to determine exactly when a URL 50 | has been broken. Using this, you can count the number of URLs in a stream: 51 | 52 | ```rust 53 | use urlocator::{UrlLocator, UrlLocation}; 54 | 55 | let input = "https://example.org/1 https://rust-lang.org/二 https://example.com/Ⅲ"; 56 | 57 | let mut locator = UrlLocator::new(); 58 | 59 | let mut url_count = 0; 60 | let mut reset = true; 61 | 62 | for c in input.chars() { 63 | match locator.advance(c) { 64 | UrlLocation::Url(_, _) if reset => { 65 | url_count += 1; 66 | reset = false; 67 | } 68 | UrlLocation::Reset => reset = true, 69 | _ => (), 70 | } 71 | } 72 | 73 | assert_eq!(url_count, 3); 74 | ``` 75 | -------------------------------------------------------------------------------- /src/scheme.rs: -------------------------------------------------------------------------------- 1 | macro_rules! schemes { 2 | ($name:ident { 3 | $([$state:ident, $($match:literal)|+ => $result:ident]$(,)?)* 4 | } 5 | [$($complete:ident),*]) => ( 6 | #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] 7 | pub enum $name { 8 | $($result,)* 9 | COMPLETE, 10 | INVALID, 11 | RESET, 12 | } 13 | 14 | impl Default for $name { 15 | #[inline] 16 | fn default() -> Self { 17 | $name::RESET 18 | } 19 | } 20 | 21 | impl $name { 22 | #[inline] 23 | pub fn advance(self, c: char) -> Self { 24 | match (self, c) { 25 | $($(($name::$state, $match))|+ => $name::$result,)* 26 | $(($name::$complete, ':') => $name::COMPLETE,)* 27 | (_, 'a'..='z') | (_, 'A'..='Z') => $name::INVALID, 28 | _ => $name::RESET, 29 | } 30 | } 31 | } 32 | ) 33 | } 34 | 35 | schemes! { 36 | SchemeState { 37 | [RESET, 'h'|'H' => H], 38 | [H, 't'|'T' => HT], 39 | [HT, 't'|'T' => HTT], 40 | [HTT, 'p'|'P' => HTTP], 41 | [HTTP, 's'|'S' => HTTPS], 42 | [RESET, 'f'|'F' => F], 43 | [F, 't'|'T' => FT], 44 | [FT, 'p'|'P' => FTP], 45 | [F, 'i'|'I' => FI], 46 | [FI, 'l'|'L' => FIL], 47 | [FIL, 'e'|'E' => FILE], 48 | [RESET, 'm'|'M' => M], 49 | [M, 'a'|'A' => MA], 50 | [MA, 'i'|'I' => MAI], 51 | [MAI, 'l'|'L' => MAIL], 52 | [MAIL, 't'|'T' => MAILT], 53 | [MAILT, 'o'|'O' => MAILTO], 54 | [RESET, 'n'|'N' => N], 55 | [N, 'e'|'E' => NE], 56 | [NE, 'w'|'W' => NEW], 57 | [NEW, 's'|'S' => NEWS], 58 | [RESET, 'g'|'G' => G], 59 | [G, 'e'|'E' => GE], 60 | [GE, 'm'|'M' => GEM], 61 | [GEM, 'i'|'I' => GEMI], 62 | [GEMI, 'n'|'N' => GEMIN], 63 | [GEMIN, 'i'|'I' => GEMINI], 64 | [G, 'i'|'I' => GI], 65 | [GI, 't'|'T' => GIT], 66 | [G, 'o'|'O' => GO], 67 | [GO, 'p'|'P' => GOP], 68 | [GOP, 'h'|'H' => GOPH], 69 | [GOPH, 'e'|'E' => GOPHE], 70 | [GOPHE, 'r'|'R' => GOPHER], 71 | [RESET, 's'|'S' => S], 72 | [S, 's'|'S' => SS], 73 | [SS, 'h'|'H' => SSH], 74 | } 75 | 76 | [HTTP, HTTPS, FTP, FILE, MAILTO, NEWS, GEMINI, GIT, GOPHER, SSH] 77 | } 78 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # URL Locator 2 | //! 3 | //! This library provides a streaming parser for locating URLs. 4 | //! 5 | //! Instead of returning the URL itself, this library will only return the length of the URL and 6 | //! the offset from the current parsing position. 7 | //! 8 | //! The length and offset counts follow the example of Rust's standard library's [`char`] type and 9 | //! are based on unicode scalar values instead of graphemes. 10 | //! 11 | //! # Usage 12 | //! 13 | //! This crate is available on [crates.io](https://crates.io/crates/urlocator) and can be used by 14 | //! adding `urlocator` to your dependencies in your project's Cargo.toml: 15 | //! 16 | //! ```toml 17 | //! [dependencies] 18 | //! urlocator = "0.1.4" 19 | //! ``` 20 | //! 21 | //! # Example: URL boundaries 22 | //! 23 | //! By keeping track of the current parser position, it is possible to locate the boundaries of a 24 | //! URL in a character stream: 25 | //! 26 | //! ```rust 27 | //! # use urlocator::{UrlLocator, UrlLocation}; 28 | //! // Boundaries: 10-v v-28 29 | //! let input = "[example](https://example.org)"; 30 | //! 31 | //! let mut locator = UrlLocator::new(); 32 | //! 33 | //! let (mut start, mut end) = (0, 0); 34 | //! 35 | //! for (i, c) in input.chars().enumerate() { 36 | //! if let UrlLocation::Url(length, end_offset) = locator.advance(c) { 37 | //! start = 1 + i - length as usize; 38 | //! end = i - end_offset as usize; 39 | //! } 40 | //! } 41 | //! 42 | //! assert_eq!(start, 10); 43 | //! assert_eq!(end, 28); 44 | //! ``` 45 | //! 46 | //! # Examlpe: Counting URLs 47 | //! 48 | //! By checking for the return state of the parser, it is possible to determine exactly when a URL 49 | //! has been broken. Using this, you can count the number of URLs in a stream: 50 | //! 51 | //! ```rust 52 | //! # use urlocator::{UrlLocator, UrlLocation}; 53 | //! let input = "https://example.org/1 https://rust-lang.org/二 https://example.com/Ⅲ"; 54 | //! 55 | //! let mut locator = UrlLocator::new(); 56 | //! 57 | //! let mut url_count = 0; 58 | //! let mut reset = true; 59 | //! 60 | //! for c in input.chars() { 61 | //! match locator.advance(c) { 62 | //! UrlLocation::Url(..) if reset => { 63 | //! url_count += 1; 64 | //! reset = false; 65 | //! }, 66 | //! UrlLocation::Reset => reset = true, 67 | //! _ => (), 68 | //! } 69 | //! } 70 | //! 71 | //! assert_eq!(url_count, 3); 72 | //! ``` 73 | 74 | #![cfg_attr(all(test, feature = "nightly"), feature(test))] 75 | #![cfg_attr(not(test), no_std)] 76 | 77 | mod scheme; 78 | #[cfg(test)] 79 | mod tests; 80 | 81 | use scheme::SchemeState; 82 | 83 | /// Position of the URL parser. 84 | #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] 85 | pub enum UrlLocation { 86 | /// Current location is the end of a valid URL. 87 | Url(u16, u16), 88 | /// Current location is possibly a URL scheme. 89 | Scheme, 90 | /// Last advancement has reset the URL parser. 91 | Reset, 92 | } 93 | 94 | /// URL parser positional state. 95 | #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] 96 | enum State { 97 | /// Parsing the URL scheme. 98 | Scheme(SchemeState), 99 | /// Parsing a valid URL. 100 | Url, 101 | } 102 | 103 | impl Default for State { 104 | #[inline] 105 | fn default() -> Self { 106 | State::Scheme(SchemeState::default()) 107 | } 108 | } 109 | 110 | /// URL parser. 111 | #[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash)] 112 | pub struct UrlLocator { 113 | state: State, 114 | 115 | illegal_end_chars: u16, 116 | len: u16, 117 | 118 | open_parentheses: u8, 119 | open_brackets: u8, 120 | } 121 | 122 | impl UrlLocator { 123 | /// Create a new parser. 124 | #[inline] 125 | pub fn new() -> Self { 126 | Self::default() 127 | } 128 | 129 | /// Advance the parser by one char. 130 | /// 131 | /// # Example 132 | /// 133 | /// ```rust 134 | /// # use urlocator::{UrlLocator, UrlLocation}; 135 | /// let mut locator = UrlLocator::new(); 136 | /// 137 | /// let location = locator.advance('h'); 138 | /// 139 | /// assert_eq!(location, UrlLocation::Scheme); 140 | /// ``` 141 | #[inline] 142 | pub fn advance(&mut self, c: char) -> UrlLocation { 143 | self.len += 1; 144 | 145 | match self.state { 146 | State::Scheme(state) => self.advance_scheme(state, c), 147 | State::Url => self.advance_url(c), 148 | } 149 | } 150 | 151 | #[inline] 152 | fn advance_scheme(&mut self, state: SchemeState, c: char) -> UrlLocation { 153 | self.state = match state.advance(c) { 154 | SchemeState::RESET => return self.reset(), 155 | SchemeState::COMPLETE => State::Url, 156 | state => State::Scheme(state), 157 | }; 158 | 159 | UrlLocation::Scheme 160 | } 161 | 162 | #[inline] 163 | fn advance_url(&mut self, c: char) -> UrlLocation { 164 | if Self::is_illegal_at_end(c) { 165 | self.illegal_end_chars += 1; 166 | } else { 167 | self.illegal_end_chars = 0; 168 | } 169 | 170 | self.url(c) 171 | } 172 | 173 | #[inline] 174 | fn url(&mut self, c: char) -> UrlLocation { 175 | match c { 176 | '(' => self.open_parentheses += 1, 177 | '[' => self.open_brackets += 1, 178 | ')' => { 179 | if self.open_parentheses == 0 { 180 | return self.reset(); 181 | } else { 182 | self.open_parentheses -= 1; 183 | } 184 | }, 185 | ']' => { 186 | if self.open_brackets == 0 { 187 | return self.reset(); 188 | } else { 189 | self.open_brackets -= 1; 190 | } 191 | }, 192 | // Illegal URL characters 193 | '\u{00}'..='\u{1F}' 194 | | '\u{7F}'..='\u{9F}' 195 | | '<' 196 | | '>' 197 | | '"' 198 | | ' ' 199 | | '{'..='}' 200 | | '\\' 201 | | '^' 202 | | '⟨' 203 | | '⟩' 204 | | '`' => return self.reset(), 205 | _ => (), 206 | } 207 | 208 | self.state = State::Url; 209 | 210 | UrlLocation::Url(self.len - self.illegal_end_chars, self.illegal_end_chars) 211 | } 212 | 213 | #[inline] 214 | fn is_illegal_at_end(c: char) -> bool { 215 | match c { 216 | '.' | ',' | ':' | ';' | '?' | '!' | '(' | '[' | '\'' => true, 217 | _ => false, 218 | } 219 | } 220 | 221 | #[inline] 222 | fn reset(&mut self) -> UrlLocation { 223 | *self = Self::default(); 224 | UrlLocation::Reset 225 | } 226 | } 227 | -------------------------------------------------------------------------------- /src/tests.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use crate::{SchemeState, UrlLocation, UrlLocator}; 4 | 5 | #[test] 6 | fn advance_schemes() { 7 | let state = SchemeState::RESET; 8 | 9 | let state = state.advance('h'); 10 | assert_eq!(state, SchemeState::H); 11 | let state = state.advance('x'); 12 | assert_eq!(state, SchemeState::INVALID); 13 | let state = state.advance(' '); 14 | assert_eq!(state, SchemeState::RESET); 15 | 16 | let state = state.advance('h'); 17 | assert_eq!(state, SchemeState::H); 18 | let state = state.advance('t'); 19 | assert_eq!(state, SchemeState::HT); 20 | let state = state.advance('T'); 21 | assert_eq!(state, SchemeState::HTT); 22 | let state = state.advance('p'); 23 | assert_eq!(state, SchemeState::HTTP); 24 | let state = state.advance('S'); 25 | assert_eq!(state, SchemeState::HTTPS); 26 | let state = state.advance(':'); 27 | assert_eq!(state, SchemeState::COMPLETE); 28 | } 29 | 30 | #[test] 31 | fn boundaries() { 32 | assert_eq!(max_len("before https://example.org after"), Some(19)); 33 | 34 | assert_eq!(position("before https://example.org after"), (7, 6)); 35 | assert_eq!(position("before https://example.org"), (7, 0)); 36 | assert_eq!(position("https://example.org after"), (0, 6)); 37 | assert_eq!(position("https://example.org/test'ing;"), (0, 1)); 38 | } 39 | 40 | #[test] 41 | fn exclude_end() { 42 | assert_eq!(max_len("https://example.org/test\u{00}ing"), Some(24)); 43 | assert_eq!(max_len("https://example.org/test\u{1F}ing"), Some(24)); 44 | assert_eq!(max_len("https://example.org/test\u{7F}ing"), Some(24)); 45 | assert_eq!(max_len("https://example.org/test\u{9F}ing"), Some(24)); 46 | assert_eq!(max_len("https://example.org/test\ting"), Some(24)); 47 | assert_eq!(max_len("https://example.org/test ing"), Some(24)); 48 | assert_eq!(max_len("https://example.org/test?ing"), Some(28)); 49 | assert_eq!(max_len("https://example.org/.,;:(!?"), Some(20)); 50 | assert_eq!(max_len("https://example.org/"), Some(20)); 51 | } 52 | 53 | #[test] 54 | fn exclude_start() { 55 | assert_eq!(max_len("complicated:https://example.org"), Some(19)); 56 | assert_eq!(max_len("\u{2502}https://example.org"), Some(19)); 57 | assert_eq!(max_len("test.https://example.org"), Some(19)); 58 | assert_eq!(max_len("https://sub.example.org"), Some(23)); 59 | assert_eq!(max_len(",https://example.org"), Some(19)); 60 | } 61 | 62 | #[test] 63 | fn url_unicode() { 64 | assert_eq!(max_len("https://xn--example-2b07f.org"), Some(29)); 65 | assert_eq!(max_len("https://example.org/\u{2008A}"), Some(21)); 66 | assert_eq!(max_len("https://example.org/\u{f17c}"), Some(21)); 67 | assert_eq!(max_len("https://üñîçøðé.com/ä"), Some(21)); 68 | } 69 | 70 | #[test] 71 | fn url_schemes() { 72 | assert_eq!(max_len("invalidscheme://example.org"), None); 73 | assert_eq!(max_len("makefile://example.org"), None); 74 | assert_eq!(max_len("mailto://example.org"), Some(20)); 75 | assert_eq!(max_len("gemini://example.org"), Some(20)); 76 | assert_eq!(max_len("gopher://example.org"), Some(20)); 77 | assert_eq!(max_len("https://example.org"), Some(19)); 78 | assert_eq!(max_len("http://example.org"), Some(18)); 79 | assert_eq!(max_len("news://example.org"), Some(18)); 80 | assert_eq!(max_len("file://example.org"), Some(18)); 81 | assert_eq!(max_len("git://example.org"), Some(17)); 82 | assert_eq!(max_len("ssh://example.org"), Some(17)); 83 | assert_eq!(max_len("ftp://example.org"), Some(17)); 84 | } 85 | 86 | #[test] 87 | fn url_matching_chars() { 88 | assert_eq!(max_len("(https://example.org/test(ing)/?)"), Some(30)); 89 | assert_eq!(max_len("(https://example.org/test(ing))"), Some(29)); 90 | assert_eq!(max_len("https://example.org/test(ing)"), Some(29)); 91 | assert_eq!(max_len("((https://example.org))"), Some(19)); 92 | assert_eq!(max_len(")https://example.org("), Some(19)); 93 | assert_eq!(max_len("https://example.org)"), Some(19)); 94 | assert_eq!(max_len("https://example.org("), Some(19)); 95 | 96 | assert_eq!(max_len("https://[2001:db8:a0b:12f0::1]:80"), Some(33)); 97 | assert_eq!(max_len("([(https://example.org/test(ing))])"), Some(29)); 98 | assert_eq!(max_len("https://example.org/]()"), Some(20)); 99 | assert_eq!(max_len("[https://example.org]"), Some(19)); 100 | 101 | assert_eq!(max_len("https://example.org/tester's_dream"), Some(34)); 102 | assert_eq!(max_len("'https://example.org/test'ing'/'"), Some(30)); 103 | assert_eq!(max_len("https://example.org/test'ing'/"), Some(30)); 104 | assert_eq!(max_len("'https://example.org'"), Some(19)); 105 | 106 | assert_eq!(max_len("\"https://example.org\""), Some(19)); 107 | assert_eq!(max_len("\"https://example.org"), Some(19)); 108 | 109 | assert_eq!(max_len("⟨https://example.org⟩"), Some(19)); 110 | assert_eq!(max_len("⟩https://example.org⟨"), Some(19)); 111 | } 112 | 113 | #[test] 114 | fn markdown() { 115 | let input = "[test](https://example.org)"; 116 | let mut result_map = HashMap::new(); 117 | result_map.insert(25, UrlLocation::Url(19, 0)); 118 | result_map.insert(26, UrlLocation::Reset); 119 | exact_url_match(input, result_map); 120 | 121 | let input = "[https://example.org](test)"; 122 | let mut result_map = HashMap::new(); 123 | result_map.insert(19, UrlLocation::Url(19, 0)); 124 | result_map.insert(20, UrlLocation::Reset); 125 | exact_url_match(input, result_map); 126 | 127 | let input = "[https://example.org](https://example.org/longer)"; 128 | let mut result_map = HashMap::new(); 129 | result_map.insert(19, UrlLocation::Url(19, 0)); 130 | result_map.insert(20, UrlLocation::Reset); 131 | result_map.insert(47, UrlLocation::Url(26, 0)); 132 | result_map.insert(48, UrlLocation::Reset); 133 | exact_url_match(input, result_map); 134 | } 135 | 136 | #[test] 137 | fn file() { 138 | assert_eq!(max_len("file:///test.rs:13:9"), Some(20)); 139 | assert_eq!(max_len("file:///test"), Some(12)); 140 | assert_eq!(max_len("file://test"), Some(11)); 141 | assert_eq!(max_len("file:/test"), Some(10)); 142 | assert_eq!(max_len("file:test"), Some(9)); 143 | } 144 | 145 | #[test] 146 | fn multiple_urls() { 147 | let input = "https://example.org https://example.com/test"; 148 | let mut result_map = HashMap::new(); 149 | result_map.insert(18, UrlLocation::Url(19, 0)); 150 | result_map.insert(19, UrlLocation::Reset); 151 | result_map.insert(43, UrlLocation::Url(24, 0)); 152 | exact_url_match(input, result_map); 153 | } 154 | 155 | #[test] 156 | fn parser_states() { 157 | let input = " https://example.org test ;"; 158 | let mut result_map = HashMap::new(); 159 | result_map.insert(0, UrlLocation::Reset); 160 | result_map.insert(3, UrlLocation::Scheme); 161 | result_map.insert(8, UrlLocation::Scheme); 162 | result_map.insert(9, UrlLocation::Url(7, 0)); 163 | result_map.insert(21, UrlLocation::Url(19, 0)); 164 | result_map.insert(22, UrlLocation::Reset); 165 | result_map.insert(24, UrlLocation::Scheme); 166 | result_map.insert(27, UrlLocation::Reset); 167 | exact_url_match(input, result_map); 168 | } 169 | 170 | fn exact_url_match(input: &str, result_map: HashMap) { 171 | let mut locator = UrlLocator::new(); 172 | 173 | for (i, c) in input.chars().enumerate() { 174 | let result = locator.advance(c); 175 | 176 | if let Some(expected) = result_map.get(&i) { 177 | assert_eq!(&result, expected); 178 | } 179 | } 180 | } 181 | 182 | fn max_len(input: &str) -> Option { 183 | let mut locator = UrlLocator::new(); 184 | let mut url_len = None; 185 | 186 | for c in input.chars() { 187 | if let UrlLocation::Url(len, _end_offset) = locator.advance(c) { 188 | url_len = Some(len); 189 | } 190 | } 191 | 192 | url_len 193 | } 194 | 195 | fn position(input: &str) -> (usize, usize) { 196 | let mut locator = UrlLocator::new(); 197 | let mut url = None; 198 | 199 | for (i, c) in input.chars().enumerate() { 200 | if let UrlLocation::Url(len, end_offset) = locator.advance(c) { 201 | url = Some((i + 1 - end_offset as usize, len as usize)); 202 | } 203 | } 204 | 205 | url.map(|(end, len)| (end - len, input.len() - end)).unwrap() 206 | } 207 | 208 | #[cfg(all(test, feature = "nightly"))] 209 | mod bench { 210 | extern crate test; 211 | 212 | use crate::{UrlLocation, UrlLocator}; 213 | 214 | #[bench] 215 | fn library(b: &mut test::Bencher) { 216 | let mut input = String::new(); 217 | for i in 0..10_000 { 218 | if i % 1_000 == 0 { 219 | input.push_str("https://example.org"); 220 | } else { 221 | input.push_str(" test "); 222 | } 223 | } 224 | 225 | b.iter(|| { 226 | let mut locator = UrlLocator::new(); 227 | for c in input.chars() { 228 | if let UrlLocation::Url(len, end_offset) = locator.advance(c) { 229 | test::black_box((len, end_offset)); 230 | } 231 | } 232 | }); 233 | } 234 | 235 | #[bench] 236 | fn lower_bound(b: &mut test::Bencher) { 237 | let mut input = String::new(); 238 | for i in 0..10_000 { 239 | if i % 1_000 == 0 { 240 | input.push_str("https://example.org"); 241 | } else { 242 | input.push_str(" test "); 243 | } 244 | } 245 | 246 | b.iter(|| { 247 | for c in input.chars().rev() { 248 | test::black_box(c); 249 | } 250 | }); 251 | } 252 | } 253 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2020 Christian Duerr, The Alacritty Project Contributors 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------