├── .gitignore ├── .travis.yml ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── src ├── document │ └── mod.rs ├── lib.rs └── selector.rs └── tests ├── document_test.rs ├── element_test.rs ├── fixtures └── sample.xml ├── lib.rs ├── querying_by_selectors_test.rs ├── selector_test.rs └── xml_document_test.rs /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled files 2 | *.o 3 | *.so 4 | *.rlib 5 | *.dll 6 | 7 | # Executables 8 | *.exe 9 | 10 | # Generated by Cargo 11 | /target/ 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | 3 | rust: 4 | - stable 5 | - beta 6 | - nightly 7 | 8 | matrix: 9 | allow_failures: 10 | - rust: nightly 11 | - rust: beta 12 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "bitflags" 3 | version = "1.0.1" 4 | source = "registry+https://github.com/rust-lang/crates.io-index" 5 | 6 | [[package]] 7 | name = "rquery" 8 | version = "0.4.1" 9 | dependencies = [ 10 | "xml-rs 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", 11 | ] 12 | 13 | [[package]] 14 | name = "xml-rs" 15 | version = "0.7.0" 16 | source = "registry+https://github.com/rust-lang/crates.io-index" 17 | dependencies = [ 18 | "bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", 19 | ] 20 | 21 | [metadata] 22 | "checksum bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b3c30d3802dfb7281680d6285f2ccdaa8c2d8fee41f93805dba5c4cf50dc23cf" 23 | "checksum xml-rs 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3c1cb601d29fe2c2ac60a2b2e5e293994d87a1f6fa9687a31a15270f909be9c2" 24 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rquery" 3 | version = "0.4.1" 4 | authors = ["Bryan Yap "] 5 | description = "A simple implementation of a HTML/XML DOM tree which allows simple operations like querying by CSS selectors, makes dealing with XML files less painful." 6 | documentation = "https://yggie.github.io/rquery/rquery" 7 | homepage = "https://github.com/yggie/rquery" 8 | repository = "https://github.com/yggie/rquery" 9 | readme = "README.md" 10 | keywords = ["xml", "DOM", "jquery"] 11 | license = "MIT" 12 | 13 | [[test]] 14 | name = "rquery-tests" 15 | path = "tests/lib.rs" 16 | 17 | [dependencies] 18 | xml-rs = "0.7" 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Bryan Yap 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rquery 2 | 3 | [![Build Status](https://travis-ci.org/yggie/rquery.svg?branch=master)](https://travis-ci.org/yggie/rquery) 4 | [![docs](https://img.shields.io/badge/documentation-rust_beta-6495ed.svg?style=flat-square)](https://yggie.github.io/rquery/rquery) 5 | [![crates.io](https://img.shields.io/crates/v/rquery.svg)](https://crates.io/crates/rquery) 6 | [![license](https://img.shields.io/crates/l/rquery.svg)](/LICENSE) 7 | 8 | A simple implementation of a HTML/XML DOM tree which allows simple operations 9 | like querying by CSS selectors, makes dealing with XML files less painful. 10 | 11 | ## Example 12 | 13 | ```rust 14 | extern crate rquery; 15 | 16 | use rquery::Document; 17 | 18 | fn main() { 19 | let document = Document::new_from_xml_file("tests/fixtures/sample.xml").unwrap(); 20 | 21 | let title = document.select("title").unwrap(); 22 | assert_eq!(title.text(), "Sample Document"); 23 | assert_eq!(title.attr("ref").unwrap(), "main-title"); 24 | 25 | let item_count = document.select_all("item").unwrap().count(); 26 | assert_eq!(item_count, 2); 27 | 28 | let item_titles = document.select_all("item > title").unwrap() 29 | .map(|element| element.text().clone()) 30 | .collect::>() 31 | .join(", "); 32 | assert_eq!(item_titles, "Another Sample, Other Sample"); 33 | } 34 | ``` 35 | -------------------------------------------------------------------------------- /src/document/mod.rs: -------------------------------------------------------------------------------- 1 | use std::io::{ BufReader, Read }; 2 | use std::fs::File; 3 | use std::rc::Rc; 4 | use std::path::Path; 5 | use std::collections::HashMap; 6 | 7 | use xml::reader::{ EventReader, XmlEvent }; 8 | 9 | use super::{ Element, SelectError }; 10 | 11 | /// The various errors that can happen when creating a document. 12 | #[derive(Clone, Debug)] 13 | pub enum DocumentError { 14 | UnableToOpenFile(String), 15 | ParseError(String), 16 | } 17 | 18 | /// The DOM tree representation of the parsed document. 19 | #[derive(Clone, Debug)] 20 | pub struct Document { 21 | root: Element, 22 | } 23 | 24 | impl Document { 25 | /// Creates a new document from a byte stream. 26 | pub fn new_from_xml_stream(stream: R) -> Result { 27 | let event_reader = EventReader::new(stream); 28 | 29 | let mut elements: Vec = Vec::new(); 30 | let mut next_node_index = 1; 31 | 32 | for event in event_reader { 33 | match event { 34 | Ok(XmlEvent::StartElement { ref name, ref attributes, .. }) => { 35 | let attr_map = attributes.iter() 36 | .fold(HashMap::new(), |mut hash_map, attribute| { 37 | hash_map.insert(attribute.name.local_name.clone(), attribute.value.clone()); 38 | 39 | return hash_map; 40 | }); 41 | 42 | elements.push(Element { 43 | node_index: next_node_index, 44 | children: None, 45 | tag_name: name.local_name.clone(), 46 | attr_map: attr_map, 47 | text: String::new(), 48 | }); 49 | next_node_index = next_node_index + 1; 50 | }, 51 | 52 | Ok(XmlEvent::EndElement { ref name, .. }) if elements.last().unwrap().tag_name() == name.local_name => { 53 | let child_node = elements.pop().unwrap(); 54 | 55 | if let Some(mut parent) = elements.pop() { 56 | if let Some(ref mut children) = parent.children { 57 | children.push(Rc::new(child_node)); 58 | } else { 59 | parent.children = Some(vec!(Rc::new(child_node))); 60 | } 61 | 62 | elements.push(parent); 63 | } else { 64 | return Ok(Document { 65 | root: Element { 66 | node_index: 0, 67 | tag_name: "[root]".to_string(), 68 | children: Some(vec!(Rc::new(child_node))), 69 | attr_map: HashMap::new(), 70 | text: String::new(), 71 | } 72 | }); 73 | } 74 | }, 75 | 76 | Ok(XmlEvent::Characters(string)) => { 77 | elements.last_mut().unwrap().text.push_str(&string); 78 | }, 79 | 80 | Ok(XmlEvent::Whitespace(string)) => { 81 | elements.last_mut().unwrap().text.push_str(&string); 82 | }, 83 | 84 | Err(error) => { 85 | return Err(DocumentError::ParseError(error.to_string())); 86 | }, 87 | 88 | Ok(_) => { }, 89 | } 90 | } 91 | 92 | panic!("Root element was not properly returned!"); 93 | } 94 | 95 | /// Creates a new document from a string. 96 | pub fn new_from_xml_string(string: &str) -> Result { 97 | Document::new_from_xml_stream(string.as_bytes()) 98 | } 99 | 100 | /// Creates a new document from a file. 101 | pub fn new_from_xml_file(filename: &str) -> Result { 102 | let path = Path::new(filename); 103 | 104 | if let Ok(file) = File::open(path) { 105 | let reader = BufReader::new(file); 106 | 107 | Document::new_from_xml_stream(reader) 108 | } else { 109 | Err(DocumentError::UnableToOpenFile(path.to_str().unwrap().to_string())) 110 | } 111 | } 112 | 113 | /// Returns the total number of elements in the document. 114 | pub fn number_of_elements(&self) -> usize { 115 | self.root.subtree_size() - 1 116 | } 117 | 118 | /// Searches the document for elements matching the given CSS selector. 119 | pub fn select_all<'a>(&'a self, selector: &str) -> Result + 'a>, SelectError> { 120 | self.root.select_all(selector) 121 | } 122 | 123 | /// Just like `select_all` but only returns the first match. 124 | pub fn select<'a>(&'a self, selector: &str) -> Result<&'a Element, SelectError> { 125 | self.root.select(selector) 126 | } 127 | } 128 | 129 | #[test] 130 | fn it_assigns_node_indices_in_monotonically_increasing_order() { 131 | let document = Document::new_from_xml_string(r#" 132 | 133 | 134 | This is some text 135 | 136 | Simple Sample 137 | Some unrecognisable scribbling 138 | 139 | 140 | 141 | 142 | Another Sample 143 | http://path.to.somewhere 144 | 145 | 146 | 147 | Other Sample 148 | http://some.other.path 149 | 150 | 151 | 152 | 153 |
154 |
155 | 156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 | "#).unwrap(); 168 | 169 | assert_eq!(document.root.node_index, 0); 170 | 171 | document.root.children_deep_iter().fold(0, |index, child| { 172 | assert!(index < child.node_index); 173 | child.node_index 174 | }); 175 | } 176 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! This library offers the ability to represent XML documents as DOM trees, 2 | //! allowing querying with CSS selectors. 3 | //! 4 | //! ``` 5 | //! extern crate rquery; 6 | //! 7 | //! use rquery::Document; 8 | //! 9 | //! fn main() { 10 | //! let document = Document::new_from_xml_file("tests/fixtures/sample.xml").unwrap(); 11 | //! 12 | //! let title = document.select("title").unwrap(); 13 | //! assert_eq!(title.text(), "Sample Document"); 14 | //! assert_eq!(title.attr("ref").unwrap(), "main-title"); 15 | //! 16 | //! let item_count = document.select_all("item").unwrap().count(); 17 | //! assert_eq!(item_count, 2); 18 | //! 19 | //! let item_titles = document.select_all("item > title").unwrap() 20 | //! .map(|element| element.text().clone()) 21 | //! .collect::>() 22 | //! .join(", "); 23 | //! assert_eq!(item_titles, "Another Sample, Other Sample"); 24 | //! } 25 | //! ``` 26 | 27 | #![warn(missing_docs)] 28 | 29 | extern crate xml; 30 | 31 | mod selector; 32 | mod document; 33 | 34 | pub use self::document::{Document, DocumentError}; 35 | pub use self::selector::{ CompoundSelector, MatchType, Scope, Selector, UnexpectedTokenError }; 36 | 37 | use std::rc::Rc; 38 | use std::iter::{ empty, once }; 39 | use std::marker::PhantomData; 40 | use std::collections::HashMap; 41 | 42 | /// Represents a single element in the DOM tree. 43 | #[derive(Clone, Debug)] 44 | pub struct Element { 45 | node_index: usize, 46 | tag_name: String, 47 | children: Option>>, 48 | attr_map: HashMap, 49 | text: String, 50 | } 51 | 52 | /// Errors which can be returned when performing a select operation. 53 | #[derive(Clone, Copy, Debug, PartialEq)] 54 | pub enum SelectError { 55 | /// Returned when the selector could not be parsed successfully. 56 | ParseError(UnexpectedTokenError), 57 | /// Returned when there were no matches for the selector. 58 | NoMatchError, 59 | } 60 | 61 | struct UniqueElements<'a, I: Iterator + 'a> { 62 | next_index: usize, 63 | inner_iter: I, 64 | phantom_data: PhantomData<&'a i32>, 65 | } 66 | 67 | impl<'a, I: Iterator> Iterator for UniqueElements<'a, I> { 68 | type Item = &'a Element; 69 | 70 | fn next(&mut self) -> Option { 71 | loop { 72 | match self.inner_iter.next() { 73 | Some(element) if element.node_index < self.next_index => { 74 | println!("SKIPPED"); 75 | // do nothing 76 | }, 77 | 78 | Some(element) => { 79 | self.next_index = element.node_index + 1; 80 | return Some(element); 81 | }, 82 | 83 | None => return None, 84 | } 85 | } 86 | } 87 | } 88 | 89 | impl Element { 90 | /// Searches the elements children for elements matching the given CSS 91 | /// selector. 92 | pub fn select_all<'a>(&'a self, selector: &str) -> Result + 'a>, SelectError> { 93 | CompoundSelector::parse(selector) 94 | .map_err(|err| SelectError::ParseError(err)) 95 | .and_then(|compound_selectors| { 96 | let initial_iterator: Box> = Box::new(once(self)); 97 | 98 | let iterator = compound_selectors.into_iter() 99 | .fold(initial_iterator, |iter, compound_selector| { 100 | let scope = compound_selector.scope; 101 | 102 | let children_iter = iter 103 | .flat_map(move |child| { 104 | match scope { 105 | Scope::IndirectChild => child.children_deep_iter(), 106 | Scope::DirectChild => child.children_iter(), 107 | } 108 | }); 109 | 110 | let matching_children_iter = children_iter 111 | .filter_map(move |child| { 112 | if child.matches(&compound_selector) { 113 | Some(child) 114 | } else { 115 | None 116 | } 117 | }); 118 | 119 | let unique_children_iter = UniqueElements { 120 | next_index: 0, 121 | inner_iter: matching_children_iter, 122 | phantom_data: PhantomData, 123 | }; 124 | 125 | Box::new(unique_children_iter) 126 | }); 127 | 128 | return Ok(iterator); 129 | }) 130 | } 131 | 132 | /// Just like `select_all` but only returns the first match. 133 | pub fn select<'a>(&'a self, selector: &str) -> Result<&'a Element, SelectError> { 134 | self.select_all(selector).and_then(|mut iterator| { 135 | if let Some(element) = iterator.next() { 136 | Ok(element) 137 | } else { 138 | Err(SelectError::NoMatchError) 139 | } 140 | }) 141 | } 142 | 143 | /// Returns an iterator over the element’s direct children. 144 | pub fn children_iter<'a>(&'a self) -> Box + 'a> { 145 | if let Some(ref children) = self.children { 146 | Box::new(children.iter().map(|node| -> &'a Element { node })) 147 | } else { 148 | Box::new(empty::<&'a Element>()) 149 | } 150 | } 151 | 152 | /// Returns an iterator over all the element’s children, including indirect 153 | /// child elements. 154 | pub fn children_deep_iter<'a>(&'a self) -> Box + 'a> { 155 | let iterator = self.children_iter() 156 | .flat_map(|child| once(child).chain(child.children_deep_iter())); 157 | 158 | Box::new(iterator) 159 | } 160 | 161 | /// Returns the size of the DOM subtree, including the current element. 162 | pub fn subtree_size(&self) -> usize { 163 | if let Some(ref children) = self.children { 164 | children.iter().fold(1, |subtotal, child| child.subtree_size() + subtotal) 165 | } else { 166 | 1 167 | } 168 | } 169 | 170 | /// Returns the name of the element’s tag. 171 | pub fn tag_name(&self) -> &str { 172 | &self.tag_name 173 | } 174 | 175 | /// Returns the value of the element attribute if found. 176 | pub fn attr(&self, attr_name: &str) -> Option<&String> { 177 | self.attr_map.get(attr_name) 178 | } 179 | 180 | /// Returns the text contained within the element. 181 | pub fn text(&self) -> &String { 182 | &self.text 183 | } 184 | 185 | /// Returns true if the element matches the given selector. 186 | pub fn matches(&self, compound_selector: &CompoundSelector) -> bool { 187 | compound_selector.parts.iter().all(|part| { 188 | match part { 189 | &Selector::TagName(ref name) => 190 | self.tag_name() == name, 191 | 192 | &Selector::Id(ref id) => 193 | self.attr("id") == Some(id), 194 | 195 | &Selector::Attribute(ref attr, MatchType::Equals, ref value) => 196 | self.attr(attr) == Some(value), 197 | } 198 | }) 199 | } 200 | 201 | /// Returns the node index for the element. 202 | pub fn node_index(&self) -> usize { 203 | self.node_index 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /src/selector.rs: -------------------------------------------------------------------------------- 1 | use std::str::Chars; 2 | use std::iter::Peekable; 3 | 4 | /// An error which is returned when parsing a selector encounters an unexpected 5 | /// token 6 | #[derive(Clone, Copy, Debug, PartialEq)] 7 | pub struct UnexpectedTokenError(pub char); 8 | 9 | /// Represents a component of a parsed CSS selector is used to match a single 10 | /// element. 11 | #[derive(Clone, Debug)] 12 | pub struct CompoundSelector { 13 | /// The scope of the selector. 14 | pub scope: Scope, 15 | /// The individual parts that make up the compound selector. 16 | pub parts: Vec, 17 | } 18 | 19 | /// The scope of the `CompoundSelector`. 20 | #[derive(Clone, Copy, PartialEq, Debug)] 21 | pub enum Scope { 22 | /// Implies that the selector must be a direct descendent of the previous 23 | /// match (e.g. `body > header`). 24 | DirectChild, 25 | /// Implies that the selector is a descendent of the previous match (e.g., 26 | /// `body header`). 27 | IndirectChild, 28 | } 29 | 30 | /// The individual parts of the `CompoundSelector`. For example, the selector 31 | /// `input[type="radio"]` has two parts, the `TagName` and `Attribute` 32 | /// selectors. 33 | #[derive(Clone, Debug)] 34 | pub enum Selector { 35 | /// Represents an id selector (e.g. `#the-id`) 36 | Id(String), 37 | /// Represents a tag name selector (e.g. `input`) 38 | TagName(String), 39 | /// Represents an attribute selector (e.g. `[type="radio"]`) 40 | Attribute(String, MatchType, String), 41 | } 42 | 43 | /// The match type for an attribute selector. 44 | #[derive(Clone, Copy, Debug, PartialEq)] 45 | pub enum MatchType { 46 | /// Indicates that the match must be identical 47 | Equals, 48 | } 49 | 50 | macro_rules! expect_token { 51 | ($token_option: expr, $token: expr) => { 52 | match $token_option { 53 | Some($token) => { }, 54 | Some(token) => return Err(UnexpectedTokenError(token)), 55 | None => return Err(UnexpectedTokenError(' ')), 56 | } 57 | } 58 | } 59 | 60 | #[inline] 61 | fn non_digit(c: char) -> bool { 62 | ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') 63 | } 64 | 65 | #[inline] 66 | fn allowed_character(c: char) -> bool { 67 | non_digit(c) || ('0' <= c && c <= '9') || c == '-' || c == '_' 68 | } 69 | 70 | #[inline] 71 | fn valid_start_token(c: char) -> bool { 72 | c == '#' || c == '[' 73 | } 74 | 75 | fn extract_valid_string(chars: &mut Peekable) -> Result { 76 | extract_valid_string_until_token(chars, ' ') 77 | } 78 | 79 | fn extract_valid_string_until_token(chars: &mut Peekable, stop_token: char) -> Result { 80 | let mut string = String::new(); 81 | 82 | while let Some(&c) = chars.peek() { 83 | if c == stop_token { 84 | chars.next().unwrap(); 85 | break; 86 | } else if allowed_character(c) { 87 | string.push(chars.next().unwrap()); 88 | } else if valid_start_token(c) { 89 | break; 90 | } else { 91 | return Err(UnexpectedTokenError(c)); 92 | } 93 | } 94 | 95 | return Ok(string); 96 | } 97 | 98 | impl Selector { 99 | fn create_list(string: &str) -> Result, UnexpectedTokenError> { 100 | let mut selectors = Vec::new(); 101 | 102 | let mut chars = string.chars().peekable(); 103 | while let Some(&c) = chars.peek() { 104 | match Selector::next_selector(c, &mut chars) { 105 | Ok(selector) => 106 | selectors.push(selector), 107 | 108 | Err(err) => 109 | return Err(err), 110 | } 111 | } 112 | 113 | return Ok(selectors); 114 | } 115 | 116 | fn next_selector(c: char, chars: &mut Peekable) -> Result { 117 | if non_digit(c) { 118 | Selector::create_tag_name(chars) 119 | } else if c == '#' { 120 | Selector::create_id(chars) 121 | } else if c == '[' { 122 | Selector::create_attribute(chars) 123 | } else { 124 | Err(UnexpectedTokenError(c)) 125 | } 126 | } 127 | 128 | fn create_tag_name(chars: &mut Peekable) -> Result { 129 | extract_valid_string(chars).map(|s| Selector::TagName(s)) 130 | } 131 | 132 | fn create_id(chars: &mut Peekable) -> Result { 133 | match chars.next() { 134 | Some('#') => 135 | return extract_valid_string(chars).map(|s| Selector::Id(s)), 136 | 137 | Some(token) => 138 | return Err(UnexpectedTokenError(token)), 139 | 140 | None => 141 | return Err(UnexpectedTokenError(' ')), 142 | } 143 | } 144 | 145 | fn create_attribute(chars: &mut Peekable) -> Result { 146 | expect_token!(chars.next(), '['); 147 | 148 | extract_valid_string_until_token(chars, '=').and_then(|attribute| { 149 | Ok((attribute, MatchType::Equals)) 150 | }).and_then(|(attribute, match_type)| { 151 | let result = if Some(&'"') == chars.peek() { 152 | chars.next().unwrap(); 153 | let result = extract_valid_string_until_token(chars, '"'); 154 | expect_token!(chars.next(), ']'); 155 | 156 | result 157 | } else { 158 | extract_valid_string_until_token(chars, ']') 159 | }; 160 | 161 | result.map(|value| { 162 | Selector::Attribute(attribute, match_type, value) 163 | }) 164 | }) 165 | } 166 | } 167 | 168 | struct SelectorParts> { 169 | inner_iter: I, 170 | } 171 | 172 | impl> Iterator for SelectorParts { 173 | type Item = (Scope, String); 174 | 175 | fn next(&mut self) -> Option { 176 | self.inner_iter.next().and_then(|next_part| { 177 | if &next_part == ">" { 178 | Some((Scope::DirectChild, self.inner_iter.next().unwrap())) 179 | } else { 180 | Some((Scope::IndirectChild, next_part)) 181 | } 182 | }) 183 | } 184 | } 185 | 186 | impl CompoundSelector { 187 | /// Parses the string and converts it to a list of `CompoundSelector`s. 188 | pub fn parse(selector: &str) -> Result, UnexpectedTokenError> { 189 | let normalized_selector = selector.split(">") 190 | .collect::>() 191 | .join(" > "); 192 | 193 | let selector_parts = SelectorParts { 194 | inner_iter: normalized_selector.split_whitespace().into_iter().map(|s| s.to_string()), 195 | }; 196 | 197 | selector_parts 198 | .fold(Ok(Vec::new()), |result_so_far, (scope, part)| { 199 | if let Ok(mut compound_selectors) = result_so_far { 200 | Selector::create_list(&part).map(|parts| { 201 | compound_selectors.push(CompoundSelector { 202 | scope: scope, 203 | parts: parts 204 | }); 205 | 206 | compound_selectors 207 | }) 208 | } else { 209 | result_so_far 210 | } 211 | }) 212 | } 213 | } 214 | -------------------------------------------------------------------------------- /tests/document_test.rs: -------------------------------------------------------------------------------- 1 | use rquery::Document; 2 | 3 | fn new_document() -> Document { 4 | Document::new_from_xml_string(r#" 5 | 6 | 7 | 8 | 9 | 10 | 11 | "#).unwrap() 12 | } 13 | 14 | #[test] 15 | fn it_captures_the_correct_number_of_elements() { 16 | let document = new_document(); 17 | 18 | assert_eq!(document.number_of_elements(), 4); 19 | } 20 | -------------------------------------------------------------------------------- /tests/element_test.rs: -------------------------------------------------------------------------------- 1 | use rquery::Document; 2 | 3 | fn new_document() -> Document { 4 | Document::new_from_xml_string(r#" 5 | 6 |
7 | This is some text 8 |
9 | "#).unwrap() 10 | } 11 | 12 | 13 | #[test] 14 | fn it_knows_its_tag_name() { 15 | let document = new_document(); 16 | 17 | let element = document.select("main").unwrap(); 18 | assert_eq!(element.tag_name(), "main"); 19 | } 20 | 21 | #[test] 22 | fn it_knows_its_attributes() { 23 | let document = new_document(); 24 | 25 | let element = document.select("main").unwrap(); 26 | assert_eq!(element.attr("type").unwrap(), "simple"); 27 | } 28 | 29 | #[test] 30 | fn it_knows_its_inner_text_contents() { 31 | let document = new_document(); 32 | 33 | 34 | let element = document.select("main").unwrap(); 35 | assert_eq!(element.text().trim(), "This is some text"); 36 | } 37 | 38 | #[test] 39 | fn it_knows_its_node_indices() { 40 | let document = new_document(); 41 | 42 | let element = document.select("main").unwrap(); 43 | assert_eq!(element.node_index(), 1); 44 | } -------------------------------------------------------------------------------- /tests/fixtures/sample.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | This is some text 4 | 5 | Sample Document 6 | Some unrecognisable scribbling 7 | 8 | 9 | 10 | 11 | Another Sample 12 | http://path.to.somewhere 13 | 14 | 15 | 16 | Other Sample 17 | http://some.other.path 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /tests/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate rquery; 2 | 3 | #[cfg(test)] 4 | mod element_test; 5 | 6 | #[cfg(test)] 7 | mod selector_test; 8 | 9 | #[cfg(test)] 10 | mod xml_document_test; 11 | 12 | #[cfg(test)] 13 | mod document_test; 14 | 15 | #[cfg(test)] 16 | mod querying_by_selectors_test; 17 | -------------------------------------------------------------------------------- /tests/querying_by_selectors_test.rs: -------------------------------------------------------------------------------- 1 | use rquery::{ Document, Element, SelectError, UnexpectedTokenError }; 2 | 3 | pub fn new_document() -> Document { 4 | Document::new_from_xml_string(r#" 5 | 6 | 7 | This is some text 8 | 9 | Simple Sample 10 | Some unrecognisable scribbling 11 | 12 | 13 | 14 | 15 | Another Sample 16 | http://path.to.somewhere 17 | 18 | 19 | 20 | Other Sample 21 | http://some.other.path 22 | 23 | 24 | 25 | 26 |
27 |
28 | 29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 | "#).unwrap() 41 | } 42 | 43 | #[test] 44 | fn it_supports_the_tag_selector() { 45 | let document = new_document(); 46 | 47 | let elements: Vec<&Element> = document.select_all("note").unwrap().collect(); 48 | 49 | assert_eq!(elements.len(), 1); 50 | assert_eq!(elements[0].tag_name(), "note"); 51 | } 52 | 53 | #[test] 54 | fn it_supports_the_nested_tag_selector() { 55 | let document = new_document(); 56 | 57 | let elements: Vec<&Element> = document.select_all("related title").unwrap().collect(); 58 | 59 | assert_eq!(elements.len(), 2); 60 | 61 | let element_tag_names: Vec = elements.iter() 62 | .map(|el| el.tag_name().to_string()) 63 | .collect(); 64 | assert_eq!(element_tag_names, vec!("title", "title")); 65 | } 66 | 67 | #[test] 68 | fn it_supports_nesting_selectors() { 69 | let document = new_document(); 70 | 71 | let elements: Vec<&Element> = document.select_all("related").unwrap() 72 | .flat_map(|element| element.select_all("title").unwrap()) 73 | .collect(); 74 | 75 | assert_eq!(elements.len(), 2); 76 | 77 | let element_tag_names: Vec = elements.iter() 78 | .map(|el| el.tag_name().to_string()) 79 | .collect(); 80 | assert_eq!(element_tag_names, vec!("title", "title")); 81 | } 82 | 83 | #[test] 84 | fn it_supports_the_direct_child_tag_selector() { 85 | let document = new_document(); 86 | 87 | let elements: Vec<&Element> = document.select_all("sample > title").unwrap().collect(); 88 | 89 | assert_eq!(elements.len(), 1); 90 | 91 | let element = elements[0]; 92 | assert_eq!(element.tag_name(), "title"); 93 | } 94 | 95 | #[test] 96 | fn it_returns_a_no_match_error_when_the_selector_does_not_match_any_element() { 97 | let document = new_document(); 98 | 99 | let result = document.select("nonexistentelement"); 100 | 101 | if let Err(err) = result { 102 | assert_eq!(err, SelectError::NoMatchError); 103 | } else { 104 | panic!("The select did not result in an error!"); 105 | } 106 | } 107 | 108 | #[test] 109 | fn it_returns_a_parse_error_when_the_selector_is_invalid() { 110 | let document = new_document(); 111 | 112 | let result = document.select_all("?"); 113 | 114 | if let Err(err) = result { 115 | assert_eq!(err, SelectError::ParseError(UnexpectedTokenError('?'))); 116 | } else { 117 | panic!("The invalid selector did not result in an error!"); 118 | } 119 | } 120 | 121 | #[test] 122 | fn it_supports_the_attribute_selector() { 123 | let document = new_document(); 124 | 125 | let elements: Vec<&Element> = document.select_all(r#"[long="false"]"#).unwrap().collect(); 126 | 127 | assert_eq!(elements.len(), 1); 128 | 129 | let element = elements[0]; 130 | assert_eq!(element.text(), "Some unrecognisable scribbling"); 131 | } 132 | 133 | #[test] 134 | fn it_supports_the_id_selector() { 135 | let document = new_document(); 136 | 137 | let elements: Vec<&Element> = document.select_all("#id-1").unwrap().collect(); 138 | 139 | assert_eq!(elements.len(), 1); 140 | 141 | let element = elements[0]; 142 | assert_eq!(element.tag_name(), "item"); 143 | assert_eq!(element.attr("id"), Some(&"id-1".to_string())); 144 | } 145 | 146 | #[test] 147 | fn it_supports_the_compound_selectors() { 148 | let document = new_document(); 149 | 150 | let elements: Vec<&Element> = document.select_all("div[type=three]").unwrap().collect(); 151 | 152 | assert_eq!(elements.len(), 1); 153 | 154 | let element = elements[0]; 155 | assert_eq!(element.tag_name(), "div"); 156 | assert_eq!(element.attr("type"), Some(&"three".to_string())); 157 | } 158 | 159 | #[test] 160 | fn it_does_not_repeat_elements() { 161 | let document = new_document(); 162 | 163 | let unique_count = document.select_all("div").unwrap().count(); 164 | assert_eq!(unique_count, 8); 165 | 166 | let direct_nested_count = document.select_all("div > div").unwrap().count(); 167 | assert_eq!(direct_nested_count, 5); 168 | 169 | let nested_count = document.select_all("div div").unwrap().count(); 170 | assert_eq!(nested_count, 6); 171 | } 172 | -------------------------------------------------------------------------------- /tests/selector_test.rs: -------------------------------------------------------------------------------- 1 | use rquery::{ CompoundSelector, MatchType, Scope, Selector }; 2 | 3 | fn assert_as_single_tag(compound_selector: &CompoundSelector, tag_name: &str) { 4 | assert_eq!(compound_selector.parts.len(), 1); 5 | 6 | if let &Selector::TagName(ref string) = compound_selector.parts.last().unwrap() { 7 | assert_eq!(string, tag_name) 8 | } else { 9 | panic!(format!("Did not match tag name \"{}\"", tag_name)); 10 | } 11 | } 12 | 13 | #[test] 14 | fn it_can_parse_a_single_tag_selector() { 15 | let compound_selectors = CompoundSelector::parse("apples").unwrap(); 16 | 17 | assert_eq!(compound_selectors.len(), 1); 18 | 19 | assert_eq!(compound_selectors[0].scope, Scope::IndirectChild); 20 | assert_as_single_tag(&compound_selectors[0], "apples"); 21 | } 22 | 23 | #[test] 24 | fn it_can_parse_a_nested_tag_selectors() { 25 | let compound_selectors = CompoundSelector::parse("basket apple").unwrap(); 26 | 27 | assert_eq!(compound_selectors.len(), 2); 28 | 29 | assert_eq!(compound_selectors[0].scope, Scope::IndirectChild); 30 | assert_as_single_tag(&compound_selectors[0], "basket"); 31 | 32 | assert_eq!(compound_selectors[1].scope, Scope::IndirectChild); 33 | assert_as_single_tag(&compound_selectors[1], "apple"); 34 | } 35 | 36 | #[test] 37 | fn it_can_parse_a_direct_child_selector() { 38 | let compound_selectors = CompoundSelector::parse("basket > apple").unwrap(); 39 | 40 | assert_eq!(compound_selectors.len(), 2); 41 | 42 | assert_eq!(compound_selectors[0].scope, Scope::IndirectChild); 43 | assert_as_single_tag(&compound_selectors[0], "basket"); 44 | 45 | assert_eq!(compound_selectors[1].scope, Scope::DirectChild); 46 | assert_as_single_tag(&compound_selectors[1], "apple"); 47 | } 48 | 49 | #[test] 50 | fn it_can_parse_the_attribute_selector() { 51 | let compound_selectors = CompoundSelector::parse(r#"[attribute="true-value"]"#).unwrap(); 52 | 53 | assert_eq!(compound_selectors.len(), 1); 54 | assert_eq!(compound_selectors[0].parts.len(), 1); 55 | 56 | if let Selector::Attribute(ref attribute, match_type, ref value) = compound_selectors[0].parts[0] { 57 | assert_eq!(attribute, "attribute"); 58 | assert_eq!(match_type, MatchType::Equals); 59 | assert_eq!(value, "true-value"); 60 | } else { 61 | panic!("Could not parse the attribute selector"); 62 | } 63 | } 64 | 65 | #[test] 66 | fn it_can_parse_the_id_selector() { 67 | let compound_selectors = CompoundSelector::parse("#the-id").unwrap(); 68 | 69 | assert_eq!(compound_selectors.len(), 1); 70 | assert_eq!(compound_selectors[0].parts.len(), 1); 71 | 72 | if let Selector::Id(ref value) = compound_selectors[0].parts[0] { 73 | assert_eq!(value, "the-id"); 74 | } else { 75 | panic!("Could not parse the ID selector"); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /tests/xml_document_test.rs: -------------------------------------------------------------------------------- 1 | use rquery::Document; 2 | 3 | #[test] 4 | fn it_captures_the_correct_number_of_elements() { 5 | let result = Document::new_from_xml_string(r#" 6 | 7 | 8 | 9 | 10 | 11 | 12 | "#); 13 | 14 | assert!(result.is_ok()); 15 | } 16 | 17 | #[test] 18 | fn it_can_be_created_from_a_file() { 19 | let result = Document::new_from_xml_file("tests/fixtures/sample.xml"); 20 | 21 | assert!(result.is_ok()); 22 | } 23 | 24 | #[test] 25 | fn it_returns_an_error_for_non_existent_files() { 26 | let result = Document::new_from_xml_file("non-existent.why"); 27 | 28 | assert!(result.is_err()); 29 | } 30 | 31 | #[test] 32 | fn it_returns_an_error_for_invalid_xml_files() { 33 | let result = Document::new_from_xml_file("non-existent.why"); 34 | 35 | assert!(result.is_err()); 36 | } 37 | --------------------------------------------------------------------------------