├── .gitignore ├── .travis.yml ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── examples └── test.rs ├── src ├── attribute.rs ├── base.rs ├── lib.rs └── tag.rs └── tests ├── lib.rs └── parser.rs /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled files 2 | *.o 3 | *.so 4 | *.rlib 5 | *.dll 6 | 7 | # Executables 8 | *.exe 9 | 10 | # Generated by Cargo 11 | /target/ 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | script: 3 | - cargo test 4 | sudo: false 5 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | [root] 2 | name = "htmlstream" 3 | version = "0.1.2" 4 | 5 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "htmlstream" 3 | version = "0.1.3" 4 | authors = ["Zongmin Lei "] 5 | readme = "README.md" 6 | 7 | repository = "https://leizongmin.github.io/htmlstream-rust/" 8 | homepage = "https://leizongmin.github.io/htmlstream-rust/" 9 | documentation = "https://leizongmin.github.io/htmlstream-rust/" 10 | 11 | description = "Lightweight HTML parser for rust" 12 | license = "MIT" 13 | 14 | [lib] 15 | name = "htmlstream" 16 | path = "src/lib.rs" 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Zongmin Lei 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # htmlstream-rust 2 | Lightweight HTML parser 3 | 4 | [![Build Status](https://travis-ci.org/leizongmin/htmlstream-rust.svg?branch=master)](https://travis-ci.org/leizongmin/htmlstream-rust) 5 | [![](http://meritbadge.herokuapp.com/htmlstream)](https://crates.io/crates/htmlstream) 6 | [![](https://img.shields.io/crates/d/htmlstream.svg)](https://crates.io/crates/htmlstream) 7 | [![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE) 8 | 9 | ### Documents 10 | 11 | https://leizongmin.github.io/htmlstream-rust/ 12 | 13 | 14 | ### Dependencies 15 | 16 | Add the following to the `Cargo.toml` file: 17 | 18 | ```toml 19 | [dependencies.htmlstream] 20 | version = "*" 21 | ``` 22 | 23 | 24 | ### Examples 25 | 26 | ```rust 27 | extern crate htmlstream; 28 | 29 | let html = "this is a test: The Rust Programing Language"; 30 | for (pos, tag) in htmlstream::tag_iter(html) { 31 | println!("{:?} {:?}", pos, tag); 32 | for (pos, attr) in htmlstream::attr_iter(&tag.attributes) { 33 | println!(" {:?} {:?}", pos, attr); 34 | } 35 | } 36 | ``` 37 | 38 | Output: 39 | 40 | ``` 41 | Position { start: 0, end: 16 } HTMLTag { name: "", html: "this is a test: ", attributes: "", state: Text } 42 | Position { start: 16, end: 47 } HTMLTag { name: "a", html: "", attributes: "href=\"http://rust-lang.org\"", state: Opening } 43 | Position { start: 0, end: 27 } HTMLTagAttribute { name: "href", value: "http://rust-lang.org" } 44 | Position { start: 47, end: 75 } HTMLTag { name: "", html: "The Rust Programing Language", attributes: "", state: Text } 45 | Position { start: 75, end: 79 } HTMLTag { name: "a", html: "", attributes: "", state: Closing } 46 | ``` 47 | 48 | ## License 49 | 50 | ``` 51 | The MIT License (MIT) 52 | 53 | Copyright (c) 2015 Zongmin Lei 54 | 55 | Permission is hereby granted, free of charge, to any person obtaining a copy 56 | of this software and associated documentation files (the "Software"), to deal 57 | in the Software without restriction, including without limitation the rights 58 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 59 | copies of the Software, and to permit persons to whom the Software is 60 | furnished to do so, subject to the following conditions: 61 | 62 | The above copyright notice and this permission notice shall be included in all 63 | copies or substantial portions of the Software. 64 | 65 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 66 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 67 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 68 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 69 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 70 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 71 | SOFTWARE. 72 | ``` 73 | -------------------------------------------------------------------------------- /examples/test.rs: -------------------------------------------------------------------------------- 1 | extern crate htmlstream; 2 | use htmlstream::*; 3 | 4 | fn main() { 5 | let html = "this is a test: The Rust Programing Language"; 6 | for (pos, tag) in tag_iter(html) { 7 | println!("{:?} {:?}", pos, tag); 8 | for (pos, attr) in attr_iter(&tag.attributes) { 9 | println!(" {:?} {:?}", pos, attr); 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/attribute.rs: -------------------------------------------------------------------------------- 1 | use base::{Position, HTMLTagAttribute}; 2 | 3 | 4 | #[derive(Debug)] 5 | pub struct HTMLTagAttributeIterator<'a> { 6 | pub html: &'a str, 7 | 8 | is_quote_start: bool, 9 | is_attribute_start: bool, 10 | is_get_attribute_name: bool, 11 | 12 | quote_char: u8, 13 | last_char: u8, 14 | last_index: usize, 15 | current_index: usize, 16 | value_start_index: usize, 17 | 18 | html_bytes: &'a [u8], 19 | html_len: usize, 20 | } 21 | 22 | impl<'a> HTMLTagAttributeIterator<'a> { 23 | fn new(html: &'a str) -> HTMLTagAttributeIterator<'a> { 24 | HTMLTagAttributeIterator { 25 | html: html, 26 | is_quote_start: false, 27 | is_attribute_start: false, 28 | is_get_attribute_name: false, 29 | quote_char: 0, 30 | last_char: 0, 31 | last_index: 0, 32 | current_index: 0, 33 | value_start_index: 0, 34 | html_bytes: html.as_bytes(), 35 | html_len: html.len() 36 | } 37 | } 38 | 39 | #[inline] 40 | fn finished_item(&mut self) { 41 | self.is_attribute_start = false; 42 | self.is_quote_start = false; 43 | self.is_get_attribute_name = false; 44 | self.last_index = self.current_index - 1; 45 | } 46 | } 47 | 48 | impl<'a> Iterator for HTMLTagAttributeIterator<'a> { 49 | type Item = (Position, HTMLTagAttribute); 50 | 51 | fn next(&mut self) -> Option<(Position, HTMLTagAttribute)> { 52 | while self.current_index < self.html_len { 53 | let c = self.html_bytes[self.current_index]; 54 | if self.current_index > 0 { 55 | self.last_char = self.html_bytes[self.current_index - 1];; 56 | } 57 | self.current_index += 1; 58 | 59 | if self.is_attribute_start { 60 | 61 | if self.is_get_attribute_name { 62 | 63 | if self.is_quote_start { 64 | if c == self.quote_char { 65 | // only when match the same `quote` char 66 | if c == self.quote_char { 67 | let name = &self.html[self.last_index..(self.value_start_index - 1)]; 68 | let value = &self.html[(self.value_start_index + 1)..(self.current_index - 1)]; 69 | let position = Position { start: self.last_index, end: self.current_index }; 70 | let attribute = HTMLTagAttribute { 71 | name: name.to_string(), 72 | value: value.to_string(), 73 | }; 74 | self.finished_item(); 75 | return Some((position, attribute)); 76 | } 77 | } else { 78 | continue; 79 | } 80 | } 81 | 82 | // quote start 83 | if b'\'' == c || b'"' == c { 84 | // only when the last char is `equal` 85 | if b'=' == self.last_char { 86 | self.is_quote_start = true; 87 | self.quote_char = c; 88 | } 89 | continue; 90 | } 91 | 92 | // only when match a `blank` char 93 | if c <= b' ' { 94 | let name = &self.html[self.last_index..(self.value_start_index - 1)]; 95 | let value = &self.html[(self.value_start_index)..(self.current_index - 1)]; 96 | let position = Position { start: self.last_index, end: self.current_index - 1 }; 97 | let attribute = HTMLTagAttribute { 98 | name: name.to_string(), 99 | value: value.to_string(), 100 | }; 101 | self.finished_item(); 102 | return Some((position, attribute)); 103 | } 104 | 105 | } else { 106 | 107 | // only when match an `equal` char, start the attribute value 108 | if b'=' == c { 109 | self.value_start_index = self.current_index; 110 | self.is_get_attribute_name = true; 111 | continue; 112 | } 113 | 114 | // only when match an `blank` char, stop current attribute 115 | if c <= b' ' { 116 | let name = &self.html[self.last_index..(self.current_index - 1)]; 117 | let position = Position { start: self.last_index, end: self.current_index - 1 }; 118 | let attribute = HTMLTagAttribute { 119 | name: name.to_string(), 120 | value: "".to_string(), 121 | }; 122 | self.finished_item(); 123 | return Some((position, attribute)); 124 | } 125 | 126 | } 127 | 128 | } else { 129 | 130 | // ignore `blank` char 131 | if c <= b' ' { 132 | continue; 133 | } 134 | 135 | self.is_attribute_start = true; 136 | self.is_get_attribute_name = false; 137 | self.is_quote_start = false; 138 | self.last_index = self.current_index - 1; 139 | 140 | } 141 | } 142 | 143 | // the rest text 144 | if self.current_index > 1 && self.last_index < self.current_index - 1 { 145 | let name = &self.html[self.last_index..]; 146 | let position = Position { start: self.last_index, end: self.current_index }; 147 | let attribute = HTMLTagAttribute { 148 | name: name.to_string(), 149 | value: "".to_string() 150 | }; 151 | self.finished_item(); 152 | return Some((position, attribute)); 153 | } 154 | 155 | return None; 156 | } 157 | } 158 | 159 | /// Return a HTMLTagAttribute Iterator 160 | pub fn attr_iter(html: &str) -> HTMLTagAttributeIterator { 161 | HTMLTagAttributeIterator::new(html) 162 | } 163 | -------------------------------------------------------------------------------- /src/base.rs: -------------------------------------------------------------------------------- 1 | /// The HTML source position 2 | /// 3 | /// # Examples 4 | /// ``` 5 | /// # use self::htmlstream::*; 6 | /// let pos = Position { 7 | /// start: 0, 8 | /// end: 10, 9 | /// }; 10 | /// ``` 11 | #[derive(Debug, PartialEq)] 12 | pub struct Position { 13 | pub start: usize, 14 | pub end: usize, 15 | } 16 | 17 | /// The tag state 18 | /// 19 | /// + `Text`: not a HTML tag, e.g. hello 20 | /// + `Opening`: an opening tag, e.g. 21 | /// + `Closing`: a closing tag, e.g. 22 | /// + `SelfClosing`: a selfclosing tag,e.g.
23 | #[derive(Debug, PartialEq)] 24 | pub enum HTMLTagState { 25 | Text, Opening, Closing, SelfClosing 26 | } 27 | 28 | /// The HTML tag 29 | /// 30 | /// # Examples 31 | /// 32 | /// ``` 33 | /// # use self::htmlstream::*; 34 | /// let tag = HTMLTag { 35 | /// name: "a".to_string(), 36 | /// html: "link".to_string(), 37 | /// attributes: "href=\"#\"".to_string(), 38 | /// state: HTMLTagState::Opening, 39 | /// }; 40 | /// ``` 41 | #[derive(Debug, PartialEq)] 42 | pub struct HTMLTag { 43 | pub name: String, 44 | pub html: String, 45 | pub attributes: String, 46 | pub state: HTMLTagState 47 | } 48 | 49 | /// The tag attribute 50 | /// 51 | /// # Examples 52 | /// 53 | /// ``` 54 | /// # use self::htmlstream::*; 55 | /// let attr = HTMLTagAttribute { 56 | /// name: "href".to_string(), 57 | /// value: "#".to_string(), 58 | /// }; 59 | /// ``` 60 | #[derive(Debug, PartialEq)] 61 | pub struct HTMLTagAttribute { 62 | pub name: String, 63 | pub value: String, 64 | } 65 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![crate_name = "htmlstream"] 2 | #![crate_type = "rlib"] 3 | 4 | //! #Lightweight HTML parser 5 | //! 6 | //! Examples: 7 | //! 8 | //! ```rust 9 | //! extern crate htmlstream; 10 | //! 11 | //! fn main() { 12 | //! let html = "this is a test: The Rust Programing Language"; 13 | //! for (pos, tag) in htmlstream::tag_iter(html) { 14 | //! println!("{:?} {:?}", pos, tag); 15 | //! for (pos, attr) in htmlstream::attr_iter(&tag.attributes) { 16 | //! println!(" {:?} {:?}", pos, attr); 17 | //! } 18 | //! } 19 | //! } 20 | //! ``` 21 | //! 22 | //! Output: 23 | //! 24 | //! ```{rust,ignore} 25 | //! Position { start: 0, end: 16 } HTMLTag { name: "", html: "this is a test: ", attributes: "", state: Text } 26 | //! Position { start: 16, end: 47 } HTMLTag { name: "a", html: "", attributes: "href=\"http://rust-lang.org\"", state: Opening } 27 | //! Position { start: 0, end: 27 } HTMLTagAttribute { name: "href", value: "http://rust-lang.org" } 28 | //! Position { start: 47, end: 75 } HTMLTag { name: "", html: "The Rust Programing Language", attributes: "", state: Text } 29 | //! Position { start: 75, end: 79 } HTMLTag { name: "a", html: "", attributes: "", state: Closing } 30 | //! ``` 31 | 32 | pub use base::{Position, HTMLTagState, HTMLTag, HTMLTagAttribute}; 33 | pub use tag::{HTMLTagIterator, tag_iter}; 34 | pub use attribute::{HTMLTagAttributeIterator, attr_iter}; 35 | 36 | mod base; 37 | mod tag; 38 | mod attribute; 39 | -------------------------------------------------------------------------------- /src/tag.rs: -------------------------------------------------------------------------------- 1 | use base::{Position, HTMLTagState, HTMLTag}; 2 | 3 | #[derive(Debug)] 4 | pub struct HTMLTagIterator<'a> { 5 | pub html: &'a str, 6 | 7 | is_tag_start: bool, 8 | is_quote_start: bool, 9 | is_get_tag_name: bool, 10 | is_closing_tag: bool, 11 | current_tag_name: &'a str, 12 | 13 | quote_char: u8, 14 | last_char: u8, 15 | last_index: usize, 16 | current_index: usize, 17 | attributes_start_index: usize, 18 | 19 | html_bytes: &'a [u8], 20 | html_len: usize, 21 | } 22 | 23 | impl<'a> HTMLTagIterator<'a> { 24 | fn new(html: &'a str) -> HTMLTagIterator<'a> { 25 | HTMLTagIterator { 26 | html: html, 27 | is_tag_start: false, 28 | is_quote_start: false, 29 | is_get_tag_name: false, 30 | is_closing_tag: false, 31 | current_tag_name: "", 32 | quote_char: 0, 33 | last_char: 0, 34 | last_index: 0, 35 | current_index: 0, 36 | attributes_start_index: 0, 37 | html_bytes: html.as_bytes(), 38 | html_len: html.len() 39 | } 40 | } 41 | } 42 | 43 | impl<'a> Iterator for HTMLTagIterator<'a> { 44 | type Item = (Position, HTMLTag); 45 | 46 | fn next(&mut self) -> Option<(Position, HTMLTag)> { 47 | while self.current_index < self.html_len { 48 | let c = self.html_bytes[self.current_index]; 49 | if self.current_index > 0 { 50 | self.last_char = self.html_bytes[self.current_index - 1];; 51 | } 52 | self.current_index += 1; 53 | 54 | if self.is_tag_start { 55 | 56 | if !self.is_get_tag_name { 57 | if b'/' == c && self.last_index + 2 == self.current_index { 58 | self.is_closing_tag = true; 59 | } else { 60 | if c <= b' ' || b'/' == c || b'>' == c || b'<' == c { 61 | if self.is_closing_tag { 62 | self.current_tag_name = &self.html[(self.last_index + 2)..(self.current_index - 1)]; 63 | } else { 64 | self.current_tag_name = &self.html[(self.last_index + 1)..(self.current_index - 1)]; 65 | } 66 | self.attributes_start_index = self.current_index; 67 | self.is_get_tag_name = true; 68 | } 69 | } 70 | } 71 | 72 | // only when match the same `quote` char 73 | if self.is_quote_start { 74 | if c == self.quote_char { 75 | self.is_quote_start = false; 76 | continue; 77 | } else { 78 | continue; 79 | } 80 | } 81 | 82 | // quote start 83 | if b'\'' == c || b'"' == c { 84 | // only when the last char is `equal` 85 | if b'=' == self.last_char { 86 | self.is_quote_start = true; 87 | self.quote_char = c; 88 | } 89 | continue; 90 | } 91 | 92 | // tag end 93 | if b'>' == c { 94 | let tag_html = &self.html[self.last_index..self.current_index]; 95 | let position = Position { start: self.last_index, end: self.current_index }; 96 | 97 | let tag_state: HTMLTagState; 98 | if self.is_closing_tag { 99 | tag_state = HTMLTagState::Closing; 100 | } else if b'/' == self.last_char { 101 | tag_state = HTMLTagState::SelfClosing; 102 | } else { 103 | tag_state = HTMLTagState::Opening; 104 | }; 105 | 106 | let attributes_html = 107 | if self.is_get_tag_name && self.attributes_start_index < self.current_index - 1 { 108 | match tag_state { 109 | HTMLTagState::SelfClosing => { 110 | &self.html[self.attributes_start_index..(self.current_index - 2)] 111 | }, 112 | HTMLTagState::Opening | HTMLTagState::Closing => { 113 | &self.html[self.attributes_start_index..(self.current_index - 1)] 114 | }, 115 | _ => "", 116 | } 117 | } else { "" }; 118 | 119 | let tag = HTMLTag { 120 | name: self.current_tag_name.to_string(), 121 | html: tag_html.to_string(), 122 | attributes: attributes_html.to_string(), 123 | state: tag_state, 124 | }; 125 | self.last_index = self.current_index; 126 | self.is_tag_start = false; 127 | return Some((position, tag)); 128 | } 129 | 130 | } else { 131 | 132 | if b'<' == c { 133 | let last_index = self.last_index; 134 | self.is_tag_start = true; 135 | self.is_get_tag_name = false; 136 | self.is_closing_tag = false; 137 | self.is_quote_start = false; 138 | self.last_index = self.current_index - 1; 139 | 140 | // text 141 | if last_index < self.current_index - 1 { 142 | let tag_html = &self.html[last_index..(self.current_index - 1)]; 143 | let position = Position { start: last_index, end: self.current_index - 1 }; 144 | let tag = HTMLTag { 145 | name: "".to_string(), 146 | html: tag_html.to_string(), 147 | attributes: "".to_string(), 148 | state: HTMLTagState::Text, 149 | }; 150 | return Some((position, tag)); 151 | } 152 | } 153 | 154 | } 155 | } 156 | 157 | // the rest text 158 | if self.current_index > 1 && self.last_index < self.current_index - 1 { 159 | let tag_html = &self.html[self.last_index..]; 160 | let position = Position { start: self.last_index, end: self.current_index }; 161 | let tag = HTMLTag { 162 | name: "".to_string(), 163 | html: tag_html.to_string(), 164 | attributes: "".to_string(), 165 | state: HTMLTagState::Text, 166 | }; 167 | self.last_index = self.current_index - 1; 168 | return Some((position, tag)); 169 | } 170 | 171 | return None; 172 | } 173 | } 174 | 175 | /// Return a HTMLTag Iterator 176 | pub fn tag_iter(html: &str) -> HTMLTagIterator { 177 | HTMLTagIterator::new(html) 178 | } 179 | -------------------------------------------------------------------------------- /tests/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate htmlstream; 2 | 3 | #[cfg(test)] 4 | mod parser; 5 | -------------------------------------------------------------------------------- /tests/parser.rs: -------------------------------------------------------------------------------- 1 | extern crate htmlstream; 2 | use htmlstream::*; 3 | use htmlstream::HTMLTagState::*; 4 | 5 | #[test] 6 | fn test_parse_rest_text() { 7 | let html = "this is a test: The Rust Programing Language\r\n\r\n"; 8 | let mut list: Vec<(Position, HTMLTag)> = vec![]; 9 | for (pos, tag) in tag_iter(&html) { 10 | list.push((pos, tag)); 11 | } 12 | assert_eq!(list, [ 13 | (Position { start: 0, end: 16 }, HTMLTag { name: "".to_string(), html: "this is a test: ".to_string(), attributes: "".to_string(), state: Text }), 14 | (Position { start: 16, end: 56 }, HTMLTag { name: "a".to_string(), html: "".to_string(), attributes: "href=\"http://rust-lang.org\" disabled".to_string(), state: Opening }), 15 | (Position { start: 56, end: 84 }, HTMLTag { name: "".to_string(), html: "The Rust Programing Language".to_string(), attributes: "".to_string(), state: Text }), 16 | (Position { start: 84, end: 88 }, HTMLTag { name: "a".to_string(), html: "".to_string(), attributes: "".to_string(), state: Closing }), 17 | (Position { start: 88, end: 92 }, HTMLTag { name: "".to_string(), html: "\r\n\r\n".to_string(), attributes: "".to_string(), state: Text }) 18 | ]); 19 | } 20 | 21 | #[test] 22 | fn test_parse_tag() { 23 | let html = "this is a test: The Rust Programing Language"; 24 | let mut list: Vec<(Position, HTMLTag)> = vec![]; 25 | for (pos, tag) in tag_iter(&html) { 26 | list.push((pos, tag)); 27 | } 28 | assert_eq!(list, [ 29 | (Position { start: 0, end: 16 }, HTMLTag { name: "".to_string(), html: "this is a test: ".to_string(), attributes: "".to_string(), state: Text }), 30 | (Position { start: 16, end: 56 }, HTMLTag { name: "a".to_string(), html: "".to_string(), attributes: "href=\"http://rust-lang.org\" disabled".to_string(), state: Opening }), 31 | (Position { start: 56, end: 84 }, HTMLTag { name: "".to_string(), html: "The Rust Programing Language".to_string(), attributes: "".to_string(), state: Text }), 32 | (Position { start: 84, end: 88 }, HTMLTag { name: "a".to_string(), html: "".to_string(), attributes: "".to_string(), state: Closing }) 33 | ]); 34 | } 35 | 36 | #[test] 37 | fn test_parse_attributes() { 38 | let html = "this is a test: The Rust Programing Language"; 39 | let mut list: Vec<(Position, HTMLTagAttribute)> = vec![]; 40 | for (_, tag) in tag_iter(&html) { 41 | for (pos, attr) in attr_iter(&tag.attributes) { 42 | list.push((pos, attr)); 43 | } 44 | } 45 | assert_eq!(list, [ 46 | (Position { start: 0, end: 27 }, HTMLTagAttribute { name: "href".to_string(), value: "http://rust-lang.org".to_string() }), 47 | (Position { start: 28, end: 36 }, HTMLTagAttribute { name: "disabled".to_string(), value: "".to_string() }) 48 | ]); 49 | } 50 | 51 | #[test] 52 | fn test_parse_attributes_2() { 53 | let html = "a=123\"bbb bbb=\"456\"ccc='789' ddd 54 | fff=ggg hhhh"; 55 | let mut list: Vec<(Position, HTMLTagAttribute)> = vec![]; 56 | for (pos, attr) in attr_iter(html) { 57 | list.push((pos, attr)); 58 | } 59 | assert_eq!(list, [ 60 | (Position { start: 0, end: 9 }, HTMLTagAttribute { name: "a".to_string(), value: "123\"bbb".to_string() }), 61 | (Position { start: 10, end: 19 }, HTMLTagAttribute { name: "bbb".to_string(), value: "456".to_string() }), 62 | (Position { start: 19, end: 28 }, HTMLTagAttribute { name: "ccc".to_string(), value: "789".to_string() }), 63 | (Position { start: 29, end: 32 }, HTMLTagAttribute { name: "ddd".to_string(), value: "".to_string() }), 64 | (Position { start: 33, end: 40 }, HTMLTagAttribute { name: "fff".to_string(), value: "ggg".to_string() }), 65 | (Position { start: 41, end: 45 }, HTMLTagAttribute { name: "hhhh".to_string(), value: "".to_string() }) 66 | ]); 67 | } 68 | 69 | #[test] 70 | fn test_parse_html_1() { 71 | let html = "hello ok"; 72 | let mut list: Vec<(Position, HTMLTag)> = vec![]; 73 | for (pos, tag) in tag_iter(&html) { 74 | list.push((pos, tag)); 75 | } 76 | assert_eq!(list, [ 77 | (Position { start: 0, end: 52 }, HTMLTag { name: "a".to_string(), html: "".to_string(), attributes: "href=\"javascript:alert(/xss/)\" title=ok disabled".to_string(), state: Opening }), 78 | (Position { start: 52, end: 57 }, HTMLTag { name: "".to_string(), html: "hello".to_string(), attributes: "".to_string(), state: Text }), 79 | (Position { start: 57, end: 61 }, HTMLTag { name: "a".to_string(), html: "".to_string(), attributes: "".to_string(), state: Closing }), 80 | (Position { start: 61, end: 62 }, HTMLTag { name: "".to_string(), html: " ".to_string(), attributes: "".to_string(), state: Text }), 81 | (Position { start: 62, end: 65 }, HTMLTag { name: "b".to_string(), html: "".to_string(), attributes: "".to_string(), state: Opening }), 82 | (Position { start: 65, end: 67 }, HTMLTag { name: "".to_string(), html: "ok".to_string(), attributes: "".to_string(), state: Text }), 83 | (Position { start: 67, end: 71 }, HTMLTag { name: "b".to_string(), html: "".to_string(), attributes: "".to_string(), state: Closing }) 84 | ]); 85 | } 86 | --------------------------------------------------------------------------------