├── .gitignore ├── .travis.yml ├── .travis └── docs.sh ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── scripts └── update_entities.pl ├── src ├── dom │ ├── css.rs │ ├── html.rs │ └── mod.rs ├── lib.rs └── util.rs └── tests └── dom.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | sudo: required 3 | 4 | language: rust 5 | matrix: 6 | fast_finish: true 7 | include: 8 | - rust: nightly 9 | - rust: stable 10 | env: BUILD_DOCS=1 11 | 12 | # Dependencies of kcov 13 | addons: 14 | apt: 15 | update: true 16 | packages: 17 | - libcurl4-openssl-dev 18 | - libelf-dev 19 | - libdw-dev 20 | - binutils-dev 21 | - libiberty-dev 22 | 23 | after_success: 24 | - | 25 | [ "$BUILD_DOCS" = "1" ] && 26 | LOCAL="~/.local" && export PATH=$LOCAL/bin:$PATH && 27 | wget https://github.com/SimonKagstrom/kcov/archive/master.tar.gz && 28 | tar xzf master.tar.gz && mkdir kcov-master/build && cd kcov-master/build && 29 | cmake -DCMAKE_INSTALL_PREFIX:PATH=$LOCAL .. && make && make install && cd ../.. && 30 | cargo clean && 31 | RUSTFLAGS="-C link-dead-code" cargo test --no-run && 32 | for file in target/debug/*; do 33 | if [[ -f $file && -x $file ]]; then 34 | mkdir -p "target/cov/$(basename $file)"; 35 | kcov --exclude-pattern=/.cargo,/usr/lib --verify "target/cov/$(basename $file)" "$file"; 36 | fi; 37 | done && 38 | kcov --coveralls-id=$TRAVIS_JOB_ID --merge target/cov target/cov/* 39 | - | 40 | [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$BUILD_DOCS" = "1" ] && 41 | { [ "$TRAVIS_TAG" != "" ] || [ "$TRAVIS_BRANCH" == "master" ]; } && 42 | ./.travis/docs.sh 43 | 44 | env: 45 | global: 46 | - RUST_BACKTRACE=1 47 | 48 | notifications: 49 | email: 50 | on_success: never 51 | -------------------------------------------------------------------------------- /.travis/docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | 5 | shopt -s globstar 6 | 7 | cargo doc --no-deps 8 | 9 | git clone --depth 1 --branch gh-pages "https://${GH_TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git" deploy_docs > /dev/null 2>&1 10 | cd deploy_docs 11 | 12 | git config user.name "$GH_USER_NAME" 13 | git config user.email "$GH_USER_EMAIL" 14 | 15 | if [ "$TRAVIS_TAG" = "" ]; then 16 | rm -rf master 17 | mv ../target/doc ./master 18 | echo "" > ./master/index.html 19 | fi 20 | 21 | git add -A . 22 | git commit -m "rebuild pages at ${TRAVIS_COMMIT}" 23 | 24 | echo 25 | echo "Pushing docs..." 26 | git push --quiet origin gh-pages > /dev/null 2>&1 27 | echo 28 | echo "Docs published." 29 | echo 30 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "victoria-dom" 3 | version = "0.1.2" # remember to update html_root_url 4 | authors = ["Aleksandr Orlenko "] 5 | license = "MIT/Apache-2.0" 6 | readme = "README.md" 7 | repository = "https://github.com/khvzak/victoria-dom" 8 | documentation = "https://docs.rs/victoria-dom" 9 | homepage = "https://github.com/khvzak/victoria-dom" 10 | description = """ 11 | Minimalistic HTML parser with CSS selectors 12 | """ 13 | categories = ["parsing"] 14 | keywords = ["html", "css", "parser"] 15 | 16 | [dependencies] 17 | regex = "1.0" 18 | lazy_static = "1.0" 19 | maplit = "1.0" 20 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Aleksandr Orlenko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # victoria-dom 2 | Minimalistic HTML parser with CSS selectors 3 | 4 | [![crates.io](https://img.shields.io/crates/v/victoria-dom.svg)](https://crates.io/crates/victoria-dom) 5 | [![Build Status](https://travis-ci.org/khvzak/victoria-dom.svg?branch=master)](https://travis-ci.org/khvzak/victoria-dom) 6 | [![Coverage Status](https://coveralls.io/repos/github/khvzak/victoria-dom/badge.svg?branch=master)](https://coveralls.io/github/khvzak/victoria-dom?branch=master) 7 | [![Released API docs](https://docs.rs/victoria-dom/badge.svg)](https://docs.rs/victoria-dom) 8 | [![Master API docs](https://img.shields.io/badge/docs-master-green.svg)](https://khvzak.github.io/victoria-dom/) 9 | 10 | The project has been inspired by [Mojo::DOM](https://metacpan.org/pod/Mojo::DOM). 11 | 12 | ### Installing 13 | Add the following lines to your `Cargo.toml` file: 14 | 15 | ```toml 16 | [dependencies] 17 | victoria-dom = "0.1" 18 | ``` 19 | 20 | and this to your crate root: 21 | ```rust 22 | extern crate victoria_dom; 23 | ``` 24 | 25 | ### Examples 26 | ```rust 27 | extern crate victoria_dom; 28 | 29 | use victoria_dom::DOM; 30 | 31 | fn main() { 32 | let html = r#"
Hello, Rust
"#; 33 | let dom = DOM::new(html); 34 | 35 | assert_eq!(dom.at("html").unwrap().text_all(), "Hello, Rust"); 36 | assert_eq!(dom.at("div#main > a").unwrap().attr("alt").unwrap(), "The Rust Programing Language"); 37 | } 38 | ``` 39 | 40 | ### Documentation 41 | https://docs.rs/victoria-dom 42 | -------------------------------------------------------------------------------- /scripts/update_entities.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use Mojo::Base -strict; 4 | use Mojo::UserAgent; 5 | use Cwd qw(abs_path); 6 | use File::Basename qw(dirname); 7 | use File::Spec qw(); 8 | use Path::Tiny qw(path); 9 | 10 | my @data; 11 | 12 | # Extract named character references from HTML Living Standard 13 | my $res = Mojo::UserAgent->new->get('https://html.spec.whatwg.org')->result; 14 | for my $row ($res->dom('#named-character-references-table tbody > tr')->each) { 15 | my $entity = $row->at('td > code')->text; 16 | my $codepoints = $row->children('td')->[1]->text; 17 | 18 | if ($codepoints =~ /^\s*U\+(\S+)(?:\s+U\+(\S+))?/) { 19 | push @data, [$entity, defined($2) ? "\\u{$1}\\u{$2}" : "\\u{$1}"]; 20 | } 21 | } 22 | 23 | my $util_rs_file = File::Spec->catfile(dirname(abs_path($0)), '..', 'src', 'util.rs'); 24 | 25 | my $util_rs_data = path($util_rs_file)->slurp_utf8; 26 | 27 | my $entities = join(",\n", map { ' "'.$_->[0].'" => "'.$_->[1].'"' } @data); 28 | $util_rs_data =~ s/(?<= static ref ENTITIES: HashMap<&'static str, &'static str> = hashmap!\[\n).+(?= \];)/$entities,\n/s; 29 | 30 | path($util_rs_file)->spew_utf8($util_rs_data); 31 | -------------------------------------------------------------------------------- /src/dom/css.rs: -------------------------------------------------------------------------------- 1 | use std::rc::Rc; 2 | use std::{char, u32}; 3 | 4 | use regex::{self, Regex, Captures}; 5 | 6 | use dom::html::{TreeNode, NodeElem}; 7 | 8 | lazy_static! { 9 | static ref ESCAPE_RE_STR: String = r"\\[^0-9a-fA-F]|\\[0-9a-fA-F]{1,6}".to_owned(); 10 | 11 | static ref ATTR_RE_STR: String = String::new() + 12 | r"\[" + 13 | r"((?:" + &*ESCAPE_RE_STR + r"|[\w-])+)" + // Key 14 | r"(?:" + 15 | r"(\W)?=" + // Operator 16 | r#"(?:"((?:\\"|[^"])*)"|'((?:\\'|[^'])*)'|([^\]]+?))"# + // Value 17 | r"(?:\s+(i))?" + // Case-sensitivity 18 | r")?" + 19 | r"\]"; 20 | } 21 | 22 | #[derive(Debug)] 23 | pub enum SelectorItem { 24 | Combinator { op: String }, 25 | Conditions { items: Vec }, 26 | } 27 | 28 | #[derive(Debug)] 29 | pub enum ConditionItem { 30 | Tag { name: Regex }, 31 | Attribute { name: Regex, value: Option }, 32 | PseudoClass { 33 | class: String, 34 | group: Option, 35 | equation: Option<(i32, i32)>, 36 | }, 37 | } 38 | 39 | pub type Selectors = Vec>; 40 | pub type GroupOfSelectors = Vec>; 41 | 42 | pub fn matches(tree: &Rc, css: &str) -> bool { 43 | if css.is_empty() { return true; } 44 | match tree.elem { 45 | NodeElem::Tag { .. } => _match(&parse(css), tree, tree), 46 | _ => false 47 | } 48 | } 49 | 50 | pub fn select(tree: &Rc, css: &str, limit: usize) -> Vec> { 51 | let group = parse(css); 52 | 53 | let mut result = Vec::new(); 54 | 55 | let mut queue = tree.get_childs().unwrap(); 56 | while queue.len() > 0 { 57 | let current = queue.remove(0); 58 | if let NodeElem::Tag { .. } = current.elem {} else { continue; } 59 | 60 | queue = { let mut x = current.get_childs().unwrap(); x.append(&mut queue); x }; 61 | if (group.is_empty() && css == "*") || _match(&group, ¤t, tree) { result.push(current); } 62 | 63 | if limit > 0 && result.len() == limit { break; } 64 | } 65 | 66 | result 67 | } 68 | 69 | pub fn select_one(tree: &Rc, css: &str) -> Option> { 70 | select(tree, css, 1).pop() 71 | } 72 | 73 | fn _match(group: &GroupOfSelectors, current: &Rc, tree: &Rc) -> bool { 74 | for _selectors in group { 75 | let selectors = Rc::new(_selectors.iter().rev().cloned().collect::()); 76 | if _combinator(&selectors, current, tree, 0) { return true; } 77 | } 78 | return false; 79 | } 80 | 81 | fn _combinator(selectors: &Rc, current: &Rc, tree: &Rc, mut idx: usize) -> bool { 82 | if idx >= selectors.len() { return false; } 83 | 84 | match *selectors[idx] { 85 | SelectorItem::Conditions { ref items } => { 86 | if !_match_selector_conditions(items, current) { return false; } 87 | 88 | idx = idx + 1; 89 | if idx >= selectors.len() { return true; } 90 | return _combinator(selectors, current, tree, idx); 91 | }, 92 | 93 | SelectorItem::Combinator { ref op } => { 94 | idx = idx + 1; 95 | 96 | // ">" (parent only) 97 | if op == ">" { 98 | if current.parent.is_none() { return false; } 99 | let parent = current.get_parent().unwrap(); 100 | 101 | // no suitable parent 102 | if let NodeElem::Root { .. } = parent.elem { return false; } 103 | if parent.id == tree.id { return false; } 104 | 105 | return _combinator(selectors, &parent, tree, idx); 106 | } 107 | 108 | // "~" (preceding siblings) 109 | if op == "~" { 110 | for sibling in _siblings(current, None) { 111 | if sibling.id == current.id { return false; } 112 | if _combinator(selectors, &sibling, tree, idx) { return true; } 113 | } 114 | return false; 115 | } 116 | 117 | // "+" (immediately preceding siblings) 118 | if op == "+" { 119 | let mut found = false; 120 | for sibling in _siblings(current, None) { 121 | if sibling.id == current.id { return found; } 122 | found = _combinator(selectors, &sibling, tree, idx); 123 | } 124 | return false; 125 | } 126 | 127 | // " " (ancestor) 128 | let mut parent = current.get_parent(); 129 | while parent.is_some() { 130 | let current_next = parent.clone().unwrap(); 131 | 132 | if let NodeElem::Root { .. } = current_next.elem { return false; } 133 | if current_next.id == tree.id { return false; } 134 | 135 | if _combinator(selectors, ¤t_next, tree, idx) { return true; } 136 | 137 | parent = current_next.get_parent(); 138 | } 139 | return false; 140 | }, 141 | } 142 | } 143 | 144 | fn _match_selector_conditions(conditions: &Vec, current: &Rc) -> bool { 145 | 'conditem: for ci in conditions { 146 | match ci { 147 | &ConditionItem::Tag { name: ref name_re } => { 148 | if !name_re.is_match(current.get_tag_name().unwrap()) { return false; } 149 | }, 150 | 151 | &ConditionItem::Attribute { name: ref name_re, value: ref value_re } => { 152 | let attrs = current.get_tag_attrs().unwrap(); 153 | let value_re = value_re.as_ref(); 154 | 155 | for (name, value) in attrs.iter() { 156 | let value = value.as_ref(); 157 | 158 | if name_re.is_match(name) && (value.is_none() || value_re.is_none() || value_re.unwrap().is_match(value.unwrap())) { 159 | continue 'conditem; // go to a next condition item 160 | } 161 | } 162 | return false; 163 | }, 164 | 165 | &ConditionItem::PseudoClass { ref class, ref group, ref equation } => { 166 | // ":empty" 167 | if class == "empty" { 168 | let _is_empty = |x: &TreeNode| match x.elem { 169 | NodeElem::Text { ref elem_type, .. } => elem_type == "comment" || elem_type == "pi", 170 | _ => false, 171 | }; 172 | 173 | let _matched = current.get_childs().unwrap().iter().filter(|&x| !_is_empty(x)).count() == 0; 174 | if _matched { continue 'conditem; } 175 | } 176 | 177 | // ":root" 178 | else if class == "root" { 179 | let parent = current.get_parent(); 180 | let _matched = parent.is_some() && match parent.unwrap().elem { 181 | NodeElem::Root { .. } => true, 182 | _ => false 183 | }; 184 | if _matched { continue 'conditem; } 185 | } 186 | 187 | // ":not" 188 | else if class == "not" { 189 | let _matched = !_match(&group.clone().unwrap(), current, current); 190 | if _matched { continue 'conditem; } 191 | } 192 | 193 | // ":checked" 194 | else if class == "checked" { 195 | let _matched = match current.elem { 196 | NodeElem::Tag { ref attrs, .. } => attrs.contains_key("checked") || attrs.contains_key("selected"), 197 | _ => false 198 | }; 199 | if _matched { continue 'conditem; } 200 | } 201 | 202 | // ":nth-child", ":nth-last-child", ":nth-of-type" or ":nth-last-of-type" 203 | else if let Some(equation) = *equation { 204 | let mut siblings = if class.ends_with("of-type") { 205 | _siblings(current, Some(current.get_tag_name().unwrap())) 206 | } else { 207 | _siblings(current, None) 208 | }; 209 | 210 | if class.starts_with("nth-last") { siblings.reverse() } 211 | 212 | for i in 0..siblings.len() { 213 | let result = equation.0 * (i as i32) + equation.1; 214 | 215 | if result < 1 { continue; } 216 | if (result - 1) as usize >= siblings.len() { break; } 217 | 218 | if siblings[(result - 1) as usize].id == current.id { continue 'conditem; } 219 | } 220 | } 221 | 222 | // ":only-child" or ":only-of-type" 223 | else if class == "only-child" || class == "only-of-type" { 224 | let siblings = if class == "only-of-type" { 225 | _siblings(current, Some(current.get_tag_name().unwrap())) 226 | } else { 227 | _siblings(current, None) 228 | }; 229 | for sibling in siblings { 230 | if sibling.id != current.id { return false; } 231 | } 232 | 233 | continue 'conditem; 234 | } 235 | 236 | return false; 237 | }, 238 | } 239 | } 240 | 241 | return true; 242 | } 243 | 244 | fn _siblings(current: &Rc, _name: Option<&str>) -> Vec> { 245 | let parent = current.get_parent().unwrap(); 246 | let childs = parent.get_childs().unwrap(); 247 | 248 | childs.iter().filter(|&x| match x.elem { 249 | NodeElem::Tag { ref name, .. } => if _name.is_some() { name == _name.unwrap() } else { true }, 250 | _ => false 251 | }).cloned().collect() 252 | } 253 | 254 | fn _unescape(_val: &str) -> String { 255 | let mut val = _val.to_owned(); 256 | 257 | lazy_static! { 258 | static ref _RE: Regex = Regex::new(r"\\([0-9a-fA-F]{1,6})\s?").unwrap(); 259 | } 260 | 261 | // Remove escaped newlines 262 | val = val.replace("\\\n", ""); 263 | 264 | // Unescape Unicode characters 265 | val = _RE.replace_all(&val, |caps: &Captures| { 266 | let hex_char = caps.get(1).unwrap().as_str(); 267 | format!("{}", char::from_u32(u32::from_str_radix(hex_char, 16).unwrap()).unwrap()).to_owned() 268 | }).into_owned(); 269 | 270 | // Remove backslash 271 | val = val.replace(r"\", ""); 272 | 273 | val 274 | } 275 | 276 | fn _name_re(_val: &str) -> Regex { 277 | Regex::new(&(r"(?:^|:)".to_owned() + ®ex::escape(&_unescape(_val)) + "$")).unwrap() 278 | } 279 | 280 | fn _value_re(op: &str, _val: Option<&str>, insensitive: bool) -> Option { 281 | if _val.is_none() { return None }; 282 | let mut value = regex::escape(&_unescape(_val.unwrap())); 283 | 284 | if insensitive { 285 | value = "(?i)".to_owned() + &value.to_owned(); 286 | } 287 | 288 | Some(Regex::new(&( 289 | // "~=" (word) 290 | if op == "~" { 291 | r"(?:^|\s+)".to_owned() + &value + r"(?:\s+|$)" 292 | } 293 | 294 | // "*=" (contains) 295 | else if op == "*" { 296 | value 297 | } 298 | 299 | // "^=" (begins with) 300 | else if op == "^" { 301 | r"^".to_owned() + &value 302 | } 303 | 304 | // "$=" (ends with) 305 | else if op == "$" { 306 | value + r"$" 307 | } 308 | 309 | // Everything else 310 | else { 311 | r"^".to_owned() + &value + "$" 312 | } 313 | )).unwrap()) 314 | } 315 | 316 | pub fn parse(css: &str) -> GroupOfSelectors { 317 | let mut css = css.trim(); 318 | 319 | // Group separator re 320 | lazy_static! { 321 | static ref _SEPARATOR_RE: Regex = Regex::new(r"^(?s)\s*,\s*(.*)$").unwrap(); 322 | } 323 | 324 | let mut group: GroupOfSelectors = Vec::new(); 325 | loop { 326 | let (selectors, css_rest) = _parse_selectors(css); 327 | if !selectors.is_empty() { 328 | group.push(Rc::new(selectors)); 329 | css = css_rest; 330 | } else { 331 | break; 332 | } 333 | 334 | // Separator 335 | if let Some(caps) = _SEPARATOR_RE.captures(css) { 336 | css = caps.get(1).unwrap().as_str(); 337 | } else { 338 | break; 339 | } 340 | } 341 | 342 | group 343 | } 344 | 345 | fn _parse_selectors(css: &str) -> (Selectors, &str) { 346 | let mut css = css; 347 | 348 | // Selector combinator re 349 | lazy_static! { 350 | static ref _COMBINATOR_RE: Regex = Regex::new(r"^(?s)\s*([ >+~])\s*(.*)$").unwrap(); 351 | } 352 | 353 | let mut selectors: Selectors = Vec::new(); 354 | loop { 355 | let (conditions, css_rest) = _parse_selector_conditions(css); 356 | if !conditions.is_empty() { 357 | selectors.push(Rc::new(SelectorItem::Conditions { items: conditions })); 358 | css = css_rest; 359 | } else { 360 | break; 361 | } 362 | 363 | // Combinator 364 | if let Some(caps) = _COMBINATOR_RE.captures(css) { 365 | selectors.push(Rc::new(SelectorItem::Combinator { op: caps.get(1).unwrap().as_str().to_owned() })); 366 | css = caps.get(2).unwrap().as_str(); 367 | } else { 368 | break; 369 | } 370 | } 371 | 372 | return (selectors, css); 373 | } 374 | 375 | fn _parse_selector_conditions(css: &str) -> (Vec, &str) { 376 | let mut css = css; 377 | 378 | lazy_static! { 379 | static ref _CLASS_OR_ID_RE: Regex = Regex::new(&(r"^(?s)([.#])((?:".to_owned() + &*ESCAPE_RE_STR + r"\s|\\.|[^,.#:\[ >~+])+)" + r"(.*)$")).unwrap(); 380 | static ref _ATTRIBUTES_RE: Regex = Regex::new(&(r"^(?s)".to_owned() + &*ATTR_RE_STR + r"(.*)$")).unwrap(); 381 | static ref _PSEUDO_CLASS_RE: Regex = Regex::new(&(r"^(?s):([\w-]+)(?:\(((?:\([^)]+\)|[^)])+)\))?".to_owned() + r"(.*)$")).unwrap(); 382 | static ref _TAG_RE: Regex = Regex::new(&(r"^(?s)((?:".to_owned() + &*ESCAPE_RE_STR + r"\s|\\.|[^,.#:\[ >~+])+)" + r"(.*)$")).unwrap(); 383 | } 384 | 385 | let mut conditions: Vec = Vec::new(); 386 | loop { 387 | // Class or ID 388 | if let Some(caps) = _CLASS_OR_ID_RE.captures(css) { 389 | let prefix = caps.get(1).unwrap().as_str(); 390 | let (name, op) = if prefix == "." { ("class", "~") } else { ("id", "") }; 391 | let op_val = caps.get(2).map(|c| c.as_str()); 392 | conditions.push(ConditionItem::Attribute { name: _name_re(name), value: _value_re(op, op_val, false) }); 393 | css = caps.get(3).map(|c| c.as_str()).unwrap_or(""); 394 | } 395 | 396 | // Attributes 397 | else if let Some(caps) = _ATTRIBUTES_RE.captures(css) { 398 | let name = caps.get(1).unwrap().as_str(); 399 | let op = caps.get(2).map(|c| c.as_str()).unwrap_or(""); 400 | let op_val = caps.get(3).or(caps.get(4)).or(caps.get(5)).map(|c| c.as_str()); 401 | let op_insensitive = caps.get(6).is_some(); 402 | conditions.push(ConditionItem::Attribute { name: _name_re(name), value: _value_re(op, op_val, op_insensitive) }); 403 | css = caps.get(7).map(|c| c.as_str()).unwrap_or(""); 404 | } 405 | 406 | // Pseudo-class 407 | else if let Some(caps) = _PSEUDO_CLASS_RE.captures(css) { 408 | let name = caps.get(1).unwrap().as_str().to_owned().to_lowercase(); 409 | let args = caps.get(2).map(|c| c.as_str()); 410 | 411 | // ":not" (contains more selectors) 412 | if name == "not" { 413 | conditions.push(ConditionItem::PseudoClass { class: name, group: args.map(parse), equation: None }); 414 | } 415 | // ":nth-*" (with An+B notation) 416 | else if name.starts_with("nth-") { 417 | conditions.push(ConditionItem::PseudoClass { class: name, group: None, equation: args.map(_equation) }); 418 | } 419 | // ":first-*" (rewrite to ":nth-*") 420 | else if name.starts_with("first-") { 421 | let name = "nth-".to_owned() + &name[6..]; 422 | conditions.push(ConditionItem::PseudoClass { class: name, group: None, equation: Some((0, 1)) }); 423 | } 424 | // ":last-*" (rewrite to ":nth-*") 425 | else if name.starts_with("last-") { 426 | let name = "nth-".to_owned() + &name; 427 | conditions.push(ConditionItem::PseudoClass { class: name, group: None, equation: Some((-1, 1)) }); 428 | } 429 | else { 430 | // No args 431 | conditions.push(ConditionItem::PseudoClass { class: name, group: None, equation: None }); 432 | } 433 | 434 | css = caps.get(3).map(|c| c.as_str()).unwrap_or(""); 435 | } 436 | 437 | // Tag 438 | else if let Some(caps) = _TAG_RE.captures(css) { 439 | let name = caps.get(1).unwrap().as_str(); 440 | if name != "*" { 441 | conditions.push(ConditionItem::Tag { name: _name_re(name) }); 442 | } 443 | css = caps.get(2).map(|c| c.as_str()).unwrap_or(""); 444 | } 445 | 446 | else { break; } 447 | } 448 | 449 | return (conditions, css); 450 | } 451 | 452 | fn _equation(equation_str: &str) -> (i32, i32) { 453 | lazy_static! { 454 | static ref _RE1: Regex = Regex::new(r"^\s*((?:\+|-)?\d+)\s*$").unwrap(); 455 | static ref _RE2: Regex = Regex::new(r"^(?i)\s*((?:\+|-)?(?:\d+)?)?n\s*((?:\+|-)\s*\d+)?\s*$").unwrap(); 456 | } 457 | 458 | if equation_str.is_empty() { return (0, 0); } 459 | 460 | // "even" 461 | if equation_str.trim().to_lowercase() == "even" { return (2, 2); } 462 | 463 | // "odd" 464 | if equation_str.trim().to_lowercase() == "odd" { return (2, 1); } 465 | 466 | // "4", "+4" or "-4" 467 | if let Some(caps) = _RE1.captures(equation_str) { 468 | let num = caps.get(1).unwrap().as_str().parse::().unwrap(); 469 | return (0, num); 470 | } 471 | 472 | // "n", "4n", "+4n", "-4n", "n+1", "4n-1", "+4n-1" (and other variations) 473 | if let Some(caps) = _RE2.captures(equation_str) { 474 | let mut result = (0, 0); 475 | let num1 = caps.get(1).unwrap().as_str(); 476 | result.0 = if num1 == "-" { -1 } else if num1.is_empty() { 1 } else { num1.parse::().unwrap() }; 477 | if let Some(num2) = caps.get(2) { 478 | result.1 = num2.as_str().split_whitespace().collect::>().concat().parse::().unwrap(); 479 | } 480 | return result; 481 | } 482 | 483 | return (0, 0); 484 | } 485 | -------------------------------------------------------------------------------- /src/dom/html.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{HashSet, HashMap, BTreeMap}; 2 | use std::rc::{Rc, Weak}; 3 | use std::cell::RefCell; 4 | use std::sync::atomic::{AtomicUsize, Ordering}; 5 | 6 | use regex::{self, Regex}; 7 | 8 | use util::{xml_escape, html_unescape, html_attr_unescape}; 9 | 10 | lazy_static! { 11 | static ref ATTR_RE_STR: String = String::new() + 12 | r"([^<>=\s/]+|/)" + // Key 13 | r"(?:" + 14 | r"\s*=\s*" + 15 | r"(?s:" + 16 | r#""(.*?)""# + // Quotation marks 17 | r"|" + 18 | r"'(.*?)'" + // Apostrophes 19 | r"|" + 20 | r"([^>\s]*)" + // Unquoted 21 | r")" + 22 | r")?\s*"; 23 | 24 | static ref TOKEN_RE_STR: String = String::new() + 25 | r"(?is)" + 26 | r"([^<]+)?" + // Text 27 | r"(?:" + 28 | r"<(?:" + 29 | r"!(?:" + 30 | r"DOCTYPE(\s+\w+.*?)" + // Doctype 31 | r"|" + 32 | r"--(.*?)--\s*" + // Comment 33 | r"|" + 34 | r"\[CDATA\[(.*?)\]\]" + // CDATA 35 | r")" + 36 | r"|" + 37 | r"\?(.*?)\?" + // Processing Instruction 38 | r"|" + 39 | r"\s*([^<>\s]+\s*(?:" + &*ATTR_RE_STR + r")*)" + // Tag 40 | r")>" + 41 | r"|" + 42 | r"(<)" + // Runaway "<" 43 | r")?" + 44 | r"(.*)$"; // Rest of html 45 | 46 | // HTML elements that only contain raw text 47 | static ref RAW: HashSet<&'static str> = hashset!["script", "style"]; 48 | 49 | // HTML elements that only contain raw text and entities 50 | static ref RCDATA: HashSet<&'static str> = hashset!["title", "textarea"]; 51 | 52 | static ref END: HashMap<&'static str, &'static str> = { 53 | // HTML elements with optional end tags 54 | let mut _end = hashmap!["body" => "head", "optgroup" => "optgroup", "option" => "option"]; 55 | 56 | // HTML elements that break paragraphs 57 | for x in vec![ 58 | "address", "article", "aside", "blockquote", "dir", "div", "dl", "fieldset", "footer", "form", 59 | "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "main", "menu", "nav", "ol", 60 | "p", "pre", "section", "table", "ul" 61 | ] { 62 | _end.insert(x, "p"); 63 | } 64 | 65 | _end 66 | }; 67 | 68 | // HTML elements with optional end tags and scoping rules 69 | static ref CLOSE: HashMap<&'static str, (HashSet<&'static str>, HashSet<&'static str>)> = { 70 | // HTML table elements with optional end tags 71 | let _table = hashset!["colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"]; 72 | 73 | let _close = hashmap![ 74 | "li" => (hashset!["li"], hashset!["ul", "ol"]), 75 | 76 | "colgroup" => (_table.clone(), hashset!["table"]), 77 | "tbody" => (_table.clone(), hashset!["table"]), 78 | "tfoot" => (_table.clone(), hashset!["table"]), 79 | "thead" => (_table.clone(), hashset!["table"]), 80 | 81 | "tr" => (hashset!["tr"], hashset!["table"]), 82 | "th" => (hashset!["th", "td"], hashset!["table"]), 83 | "td" => (hashset!["th", "td"], hashset!["table"]), 84 | 85 | "dd" => (hashset!["dd", "dt"], hashset!["dl"]), 86 | "dt" => (hashset!["dd", "dt"], hashset!["dl"]), 87 | 88 | "rp" => (hashset!["rp", "rt"], hashset!["ruby"]), 89 | "rt" => (hashset!["rp", "rt"], hashset!["ruby"]) 90 | ]; 91 | 92 | _close 93 | }; 94 | 95 | // HTML elements without end tags 96 | static ref EMPTY: HashSet<&'static str> = hashset![ 97 | "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", 98 | "menuitem", "meta", "param", "source", "track", "wbr" 99 | ]; 100 | 101 | // HTML elements categorized as phrasing content (and obsolete inline elements) 102 | static ref PHRASING: HashSet<&'static str> = hashset![ 103 | "a", "abbr", "area", "audio", "b", "bdi", "bdo", "br", "button", "canvas", "cite", "code", "data", 104 | "datalist", "del", "dfn", "em", "embed", "i", "iframe", "img", "input", "ins", "kbd", "keygen", 105 | "label", "link", "map", "mark", "math", "meta", "meter", "noscript", "object", "output", "picture", 106 | "progress", "q", "ruby", "s", "samp", "script", "select", "slot", "small", "span", "strong", "sub", "sup", 107 | "svg", "template", "textarea", "time", "u", "var", "video", "wbr", 108 | "acronym", "applet", "basefont", "big", "font", "strike", "tt" // Obsolete 109 | ]; 110 | 111 | // HTML elements that don't get their self-closing flag acknowledged 112 | static ref BLOCK: HashSet<&'static str> = hashset![ 113 | "a", "address", "applet", "article", "aside", "b", "big", "blockquote", "body", "button", 114 | "caption", "center", "code", "col", "colgroup", "dd", "details", "dialog", "dir", "div", 115 | "dl", "dt", "em", "fieldset", "figcaption", "figure", "font", "footer", "form", "frameset", 116 | "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "html", "i", "iframe", "li", 117 | "listing", "main", "marquee", "menu", "nav", "nobr", "noembed", "noframes", "noscript", 118 | "object", "ol", "optgroup", "option", "p", "plaintext", "pre", "rp", "rt", "s", "script", 119 | "section", "select", "small", "strike", "strong", "style", "summary", "table", "tbody", "td", 120 | "template", "textarea", "tfoot", "th", "thead", "title", "tr", "tt", "u", "ul", "xmp" 121 | ]; 122 | } 123 | 124 | static NODE_ID_NEXT: AtomicUsize = AtomicUsize::new(0); 125 | 126 | #[derive(Debug)] 127 | pub struct TreeNode { 128 | pub id: usize, 129 | pub parent: Option>, 130 | pub elem: NodeElem, 131 | } 132 | 133 | #[derive(Debug)] 134 | pub enum NodeElem { 135 | Root { 136 | childs: RefCell>>, 137 | }, 138 | 139 | Tag { 140 | name: String, 141 | attrs: BTreeMap>, 142 | childs: RefCell>>, 143 | }, 144 | 145 | Text { 146 | elem_type: String, 147 | content: String, 148 | }, 149 | } 150 | 151 | impl TreeNode { 152 | pub fn is_tag(&self) -> bool { 153 | match self.elem { 154 | NodeElem::Tag { .. } => true, 155 | _ => false, 156 | } 157 | } 158 | 159 | pub fn get_tag_name(&self) -> Option<&str> { 160 | match self.elem { 161 | NodeElem::Tag { ref name, .. } => Some(name), 162 | _ => None, 163 | } 164 | } 165 | 166 | pub fn get_tag_attrs<'a>(&'a self) -> Option<&'a BTreeMap>> { 167 | match self.elem { 168 | NodeElem::Tag { ref attrs, .. } => Some(attrs), 169 | _ => None, 170 | } 171 | } 172 | 173 | pub fn get_parent(&self) -> Option> { 174 | match self.parent { 175 | Some(ref x) => Some(x.upgrade().unwrap()), // strong reference should alive, force unwrap it 176 | _ => None, 177 | } 178 | } 179 | 180 | pub fn get_childs(&self) -> Option>> { 181 | match self.elem { 182 | NodeElem::Root { ref childs } => Some(childs.borrow().clone()), 183 | NodeElem::Tag { ref childs, .. } => Some(childs.borrow().clone()), 184 | _ => None, 185 | } 186 | } 187 | 188 | // pub fn dbg_string(&self) -> String { 189 | // let id = self.id; 190 | // match self.elem { 191 | // NodeElem::Root { .. } => format!("[{}] TreeNode:Root", id), 192 | // NodeElem::Tag { ref name, ref attrs, .. } => format!("[{}] TreeNode:Tag(name: {}, attrs: {:?})", id, name, attrs), 193 | // NodeElem::Text { ref elem_type, ref content } => format!("[{}] TreeNode:Text(type: {}, content: {})", id, elem_type, content), 194 | // } 195 | // } 196 | } 197 | 198 | fn _process_text_node(current: &Rc, elem_type: &str, content: &str) { 199 | let new_node = Rc::new( 200 | TreeNode { 201 | id: NODE_ID_NEXT.fetch_add(1, Ordering::Relaxed), 202 | parent: Some(Rc::downgrade(current)), 203 | elem: NodeElem::Text { elem_type: elem_type.to_owned(), content: content.to_owned() }, 204 | } 205 | ); 206 | 207 | match current.elem { 208 | NodeElem::Root { ref childs } => childs.borrow_mut().push(new_node), 209 | NodeElem::Tag { ref childs, .. } => childs.borrow_mut().push(new_node), 210 | NodeElem::Text { .. } => panic!("Cannot use `Text` node as parent"), 211 | }; 212 | } 213 | 214 | fn _process_start_tag(current: &Rc, start_tag: &str, attrs: BTreeMap>) -> Rc { 215 | let mut working_node = current.clone(); 216 | 217 | // Autoclose optional HTML elements 218 | if working_node.parent.is_some() { 219 | if let Some(end_tag) = END.get(start_tag) { 220 | working_node = _process_end_tag(&working_node, end_tag); 221 | } 222 | else if let Some(x) = CLOSE.get(start_tag) { 223 | let (ref allowed, ref scope) = *x; 224 | 225 | // Close allowed parent elements in scope 226 | let mut next = working_node.clone(); 227 | while next.parent.is_some() && !scope.contains(next.clone().get_tag_name().unwrap()) { 228 | let this = next.clone(); 229 | let this_tag_name = this.get_tag_name().unwrap(); 230 | 231 | if allowed.contains(this_tag_name) { 232 | working_node = _process_end_tag(&working_node, this_tag_name); 233 | } 234 | 235 | next = next.get_parent().unwrap(); 236 | } 237 | } 238 | } 239 | 240 | // New tag 241 | let new_node = Rc::new( 242 | TreeNode { 243 | id: NODE_ID_NEXT.fetch_add(1, Ordering::Relaxed), 244 | parent: Some(Rc::downgrade(&working_node)), 245 | elem: NodeElem::Tag { name: start_tag.to_owned(), attrs: attrs, childs: RefCell::new(Vec::new()) }, 246 | } 247 | ); 248 | 249 | match working_node.elem { 250 | NodeElem::Root { ref childs } => childs.borrow_mut().push(new_node.clone()), 251 | NodeElem::Tag { ref childs, .. } => childs.borrow_mut().push(new_node.clone()), 252 | NodeElem::Text { .. } => panic!("Cannot use `Text` node as parent"), 253 | } 254 | 255 | new_node 256 | } 257 | 258 | fn _process_end_tag(current: &Rc, end_tag: &str) -> Rc { 259 | // Search stack for start tag 260 | let mut next = current.clone(); 261 | while next.parent.is_some() { 262 | let this = next.clone(); 263 | let this_tag_name = this.get_tag_name().unwrap(); 264 | 265 | // Right tag 266 | if this_tag_name == end_tag { 267 | return next.get_parent().unwrap(); 268 | } 269 | 270 | // Phrasing content can only cross phrasing content 271 | if PHRASING.contains(end_tag) && !PHRASING.contains(this_tag_name) { 272 | return current.clone(); 273 | } 274 | 275 | next = next.get_parent().unwrap(); 276 | } 277 | 278 | // Ignore useless end tag 279 | current.clone() 280 | } 281 | 282 | pub fn parse(html: &str) -> Rc { 283 | let mut html = html; 284 | 285 | let root = Rc::new( 286 | TreeNode { 287 | id: NODE_ID_NEXT.fetch_add(1, Ordering::Relaxed), 288 | parent: None, 289 | elem: NodeElem::Root { childs: RefCell::new(Vec::new()) }, 290 | } 291 | ); 292 | 293 | let mut current = root.clone(); 294 | 295 | lazy_static! { 296 | static ref _TAG_PLUS_ATTRS_RE: Regex = Regex::new(r"^([^\s/]+)([\s\S]*)").unwrap(); 297 | } 298 | 299 | let re = Regex::new(&*TOKEN_RE_STR).unwrap(); 300 | while let Some(caps) = re.captures(html) { 301 | let text = caps.get(1); 302 | let doctype = caps.get(2); 303 | let comment = caps.get(3); 304 | let cdata = caps.get(4); 305 | let pi = caps.get(5); 306 | let tag = caps.get(6); 307 | let runaway = caps.get(11); 308 | 309 | html = caps.get(12).map(|c| c.as_str()).unwrap_or(""); // html rest 310 | 311 | // Text (and runaway "<") 312 | if let Some(text) = text { 313 | if runaway.is_some() { 314 | _process_text_node(¤t, "text", &html_unescape(&(text.as_str().to_owned() + "<"))); 315 | } else { 316 | _process_text_node(¤t, "text", &html_unescape(text.as_str())); 317 | } 318 | } 319 | 320 | // Tag 321 | if let Some(tag) = tag { 322 | // End: /tag 323 | if tag.as_str().starts_with("/") { 324 | let end_tag = tag.as_str().trim_start_matches('/').trim().to_lowercase(); 325 | current = _process_end_tag(¤t, &end_tag); 326 | } 327 | // Start: tag 328 | else { 329 | let caps = _TAG_PLUS_ATTRS_RE.captures(tag.as_str()).unwrap(); // panic is ok 330 | let mut start_tag = caps.get(1).unwrap().as_str().to_lowercase(); 331 | let attrs_str = caps.get(2).unwrap(); 332 | 333 | // Attributes 334 | let mut attrs: BTreeMap> = BTreeMap::new(); 335 | let mut is_closing = false; 336 | for caps in Regex::new(&*ATTR_RE_STR).unwrap().captures_iter(attrs_str.as_str()) { 337 | let key = caps.get(1).unwrap().as_str().to_owned().to_lowercase(); 338 | let value = if caps.get(2).is_some() { caps.get(2) } else if caps.get(3).is_some() { caps.get(3) } else { caps.get(4) }; 339 | 340 | // Empty tag 341 | if key == "/" { 342 | is_closing = true; 343 | continue; 344 | } 345 | 346 | attrs.insert(key, match value { 347 | Some(ref x) => Some(html_attr_unescape(x.as_str())), 348 | _ => None, 349 | }); 350 | } 351 | 352 | // "image" is an alias for "img" 353 | if start_tag == "image" { start_tag = "img".to_owned() } 354 | 355 | current = _process_start_tag(¤t, &start_tag, attrs); 356 | 357 | // Element without end tag (self-closing) 358 | if EMPTY.contains(start_tag.as_str()) || (!BLOCK.contains(start_tag.as_str()) && is_closing) { 359 | current = _process_end_tag(¤t, &start_tag); 360 | } 361 | 362 | // Raw text elements 363 | if RAW.contains(start_tag.as_str()) || RCDATA.contains(start_tag.as_str()) { 364 | let raw_text_re = Regex::new(&(r"(.+?)<\s*/\s*".to_owned() + ®ex::escape(&start_tag) + r"\s*>(.*)$")).unwrap(); 365 | if let Some(raw_text_caps) = raw_text_re.captures(html) { 366 | let raw_text = raw_text_caps.get(1).unwrap(); 367 | html = raw_text_caps.get(2).map(|c| c.as_str()).unwrap_or(""); 368 | 369 | if RCDATA.contains(&start_tag.as_str()) { 370 | _process_text_node(¤t, "raw", &html_unescape(raw_text.as_str())) 371 | } else { 372 | _process_text_node(¤t, "raw", raw_text.as_str()) 373 | } 374 | 375 | current = _process_end_tag(¤t, &start_tag); 376 | } 377 | } 378 | } 379 | } 380 | 381 | // DOCTYPE 382 | else if let Some(doctype) = doctype { 383 | _process_text_node(¤t, "doctype", doctype.as_str()); 384 | } 385 | 386 | // Comment 387 | else if let Some(comment) = comment { 388 | _process_text_node(¤t, "comment", comment.as_str()); 389 | } 390 | 391 | // CDATA 392 | else if let Some(cdata) = cdata { 393 | _process_text_node(¤t, "cdata", cdata.as_str()); 394 | } 395 | 396 | // Processing instruction 397 | else if let Some(pi) = pi { 398 | _process_text_node(¤t, "pi", pi.as_str()); 399 | } 400 | 401 | if html.is_empty() { break; } 402 | } 403 | 404 | root 405 | } 406 | 407 | pub fn render (root: &Rc) -> String { 408 | match root.elem { 409 | // Text (escaped) 410 | NodeElem::Text { ref elem_type, ref content } if elem_type == "text" => { 411 | return xml_escape(content) 412 | }, 413 | 414 | // Raw text 415 | NodeElem::Text { ref elem_type, ref content } if elem_type == "raw" => { 416 | return content.clone() 417 | }, 418 | 419 | // DOCTYPE 420 | NodeElem::Text { ref elem_type, ref content } if elem_type == "doctype" => { 421 | return "" 422 | }, 423 | 424 | // Comment 425 | NodeElem::Text { ref elem_type, ref content } if elem_type == "comment" => { 426 | return "" 427 | }, 428 | 429 | // CDATA 430 | NodeElem::Text { ref elem_type, ref content } if elem_type == "cdata" => { 431 | return "" 432 | }, 433 | 434 | // Processing instruction 435 | NodeElem::Text { ref elem_type, ref content } if elem_type == "pi" => { 436 | return "" 437 | }, 438 | 439 | // Root 440 | NodeElem::Root { ref childs } => { 441 | return childs.borrow().iter().map(|ref x| { render(x) }).collect::>().concat(); 442 | }, 443 | 444 | NodeElem::Tag { ref name, ref attrs, ref childs } => { 445 | let mut result = "<".to_owned() + name; 446 | 447 | // Attributes 448 | for (key, value) in attrs.iter() { 449 | match *value { 450 | Some(ref x) => { result = result + " " + key + "=\"" + &xml_escape(x) + "\"" }, 451 | None => { result = result + " " + key }, 452 | } 453 | } 454 | 455 | // No children 456 | if childs.borrow().is_empty() { 457 | return if EMPTY.contains(&name[..]) { result + ">" } else { result + ">" }; 458 | } 459 | 460 | // Children 461 | return 462 | result + ">" + 463 | &childs.borrow().iter().map(|ref x| { render(x) }).collect::>().concat() + 464 | ""; 465 | }, 466 | 467 | _ => { return "".to_owned() }, 468 | } 469 | } 470 | -------------------------------------------------------------------------------- /src/dom/mod.rs: -------------------------------------------------------------------------------- 1 | mod css; 2 | mod html; 3 | 4 | use std::collections::BTreeMap; 5 | use std::rc::Rc; 6 | 7 | use regex::Regex; 8 | 9 | use self::html::TreeNode; 10 | 11 | /// The HTML `DOM` type 12 | #[derive(Debug)] 13 | pub struct DOM { 14 | root: Rc, // To avoid destroying the root node ahead of time 15 | tree: Rc, 16 | } 17 | 18 | impl DOM { 19 | /// Construct a new `DOM` object and parse HTML. 20 | /// 21 | /// ``` 22 | /// use victoria_dom::DOM; 23 | /// let dom = DOM::new("
Hello
"); 24 | /// ``` 25 | pub fn new(html: &str) -> DOM { 26 | let tree = html::parse(html); 27 | DOM { root: tree.clone(), tree: tree } 28 | } 29 | 30 | /// Find all ancestor elements of the current element matching the optional CSS selector 31 | /// and return a Vector of DOM objects of these elements. 32 | /// 33 | /// ``` 34 | /// use victoria_dom::DOM; 35 | /// let dom = DOM::new("
Hello
"); 36 | /// let ancestors: Vec<_> = dom.at("div").unwrap().ancestors(None).iter().map(|x| x.tag().unwrap().to_string()).collect(); 37 | /// assert_eq!(ancestors, ["body", "html"]); 38 | /// ``` 39 | pub fn ancestors(&self, selector: Option<&str>) -> Vec { 40 | let mut ancestors = Vec::new(); 41 | let mut node = self.tree.clone(); 42 | while let Some(parent) = node.get_parent() { 43 | if parent.is_tag() && (selector.is_none() || css::matches(&parent, selector.unwrap())) { 44 | ancestors.push(DOM { root: self.root.clone(), tree: parent.clone() }); 45 | } 46 | node = parent; 47 | } 48 | ancestors 49 | } 50 | 51 | /// Find first descendant element of the current element matching the CSS selector and return it as a DOM object, 52 | /// or `None` if none could be found. 53 | pub fn at(&self, selector: &str) -> Option { 54 | if let Some(node) = css::select_one(&self.tree, selector) { 55 | return Some(DOM { root: self.root.clone(), tree: node }) 56 | } 57 | None 58 | } 59 | 60 | /// The current element tag name. 61 | pub fn tag(&self) -> Option<&str> { 62 | self.tree.get_tag_name() 63 | } 64 | 65 | /// The current element attribute2value map. 66 | pub fn attrs(&self) -> BTreeMap> { 67 | self.tree.get_tag_attrs().map_or_else(|| BTreeMap::new(), |x| x.clone()) 68 | } 69 | 70 | /// The current element attribute value, or `None` if there are no attribute with the name or value. 71 | pub fn attr(&self, name: &str) -> Option<&str> { 72 | self.tree.get_tag_attrs().and_then(|x| x.get(name)).and_then(|x| x.as_ref()).map(|x| x.as_str()) 73 | } 74 | 75 | /// Find all child elements of the current element matching the CSS selector and return a Vector of DOM objects of these elements. 76 | /// 77 | /// ``` 78 | /// use victoria_dom::DOM; 79 | /// let dom = DOM::new("
A C
B
"); 80 | /// let childs: Vec<_> = dom.at("div").unwrap().childs(None).iter().map(|x| x.attr("id").unwrap().to_string()).collect(); 81 | /// assert_eq!(childs, ["a", "b"]); 82 | /// ``` 83 | pub fn childs(&self, selector: Option<&str>) -> Vec { 84 | self.tree.get_childs().unwrap_or(Vec::new()).into_iter().filter_map(|x| 85 | if x.is_tag() && (selector.is_none() || css::matches(&x, selector.unwrap())) { 86 | Some(DOM { root: self.root.clone(), tree: x }) 87 | } else { 88 | None 89 | } 90 | ).collect() 91 | } 92 | 93 | /// Find all descendant elements of the current element matching the CSS selector and return a Vector of DOM objects of these elements. 94 | /// 95 | /// ``` 96 | /// use victoria_dom::DOM; 97 | /// let dom = DOM::new("
C
B
"); 98 | /// let elems: Vec<_> = dom.find("div[id]").iter().map(|x| x.attr("id").unwrap().to_string()).collect(); 99 | /// assert_eq!(elems, ["a", "c", "b"]); 100 | /// ``` 101 | pub fn find(&self, selector: &str) -> Vec { 102 | css::select(&self.tree, selector, 0).into_iter().map(|x| DOM { root: self.root.clone(), tree: x }).collect() 103 | } 104 | 105 | /// Check if the current element matches the CSS selector. 106 | pub fn matches(&self, selector: &str) -> bool { 107 | css::matches(&self.tree, selector) 108 | } 109 | 110 | /// Find all sibling elements after the current element matching the CSS selector and return a Vector of DOM objects of these elements. 111 | /// 112 | /// ``` 113 | /// use victoria_dom::DOM; 114 | /// let dom = DOM::new("
C
B
"); 115 | /// let elems: Vec<_> = dom.at("div#a").unwrap().following(None).iter().map(|x| x.attr("id").unwrap().to_string()).collect(); 116 | /// assert_eq!(elems, ["b"]); 117 | /// ``` 118 | pub fn following(&self, selector: Option<&str>) -> Vec { 119 | self._siblings().into_iter().skip_while(|x| x.id != self.tree.id).skip(1) 120 | .filter(|x| selector.is_none() || css::matches(x, selector.unwrap())) 121 | .map(|x| DOM { root: self.root.clone(), tree: x }).collect() 122 | } 123 | 124 | /// Return a DOM object for next sibling element, or `None` if there are no more siblings. 125 | pub fn next(&self) -> Option { 126 | self._siblings().into_iter().skip_while(|x| x.id != self.tree.id).skip(1).next().map(|x| DOM { root: self.root.clone(), tree: x }) 127 | } 128 | 129 | /// Find all sibling elements before the current element matching the CSS selector and return a Vector of DOM objects of these elements. 130 | /// 131 | /// ``` 132 | /// use victoria_dom::DOM; 133 | /// let dom = DOM::new("
C
B
"); 134 | /// let elems: Vec<_> = dom.at("div#b").unwrap().preceding(None).iter().map(|x| x.attr("id").unwrap().to_string()).collect(); 135 | /// assert_eq!(elems, ["a"]); 136 | /// ``` 137 | pub fn preceding(&self, selector: Option<&str>) -> Vec { 138 | self._siblings().into_iter().take_while(|x| x.id != self.tree.id) 139 | .filter(|x| selector.is_none() || css::matches(x, selector.unwrap())) 140 | .map(|x| DOM { root: self.root.clone(), tree: x }).collect() 141 | } 142 | 143 | /// Return a DOM object for the previous sibling element, or `None` if there are no more siblings. 144 | pub fn prev(&self) -> Option { 145 | self._siblings().into_iter().take_while(|x| x.id != self.tree.id).last().map(|x| DOM { root: self.root.clone(), tree: x }) 146 | } 147 | 148 | fn _siblings(&self) -> Vec> { 149 | self.tree.get_parent() 150 | .and_then(|x| x.get_childs()) 151 | .map(|x| x.into_iter().filter(|v| v.is_tag()).collect::>()) 152 | .unwrap_or(Vec::new()) 153 | } 154 | 155 | /// Return a DOM object for the parent of the current element, or `None` if this element has no parent. 156 | pub fn parent(&self) -> Option { 157 | self.tree.get_parent().map(|x| DOM { root: self.root.clone(), tree: x }) 158 | } 159 | 160 | /// Render the current element and its content to HTML. 161 | pub fn to_string(&self) -> String { 162 | html::render(&self.tree) 163 | } 164 | 165 | /// Extract text content from the current element only (not including child elements) with smart whitespace trimming. 166 | /// 167 | /// ``` 168 | /// use victoria_dom::DOM; 169 | /// let dom = DOM::new("
foo\n

bar

baz\n
"); 170 | /// assert_eq!(dom.at("div").unwrap().text(), "foo baz"); 171 | /// ``` 172 | pub fn text(&self) -> String { 173 | self._text(false, true) // non-recursive trimmed 174 | } 175 | 176 | /// Extract text content from the current element only (not including child elements) without smart whitespace trimming. 177 | /// 178 | /// ``` 179 | /// use victoria_dom::DOM; 180 | /// let dom = DOM::new("
foo\n

bar

baz\n
"); 181 | /// assert_eq!(dom.at("div").unwrap().rtext(), "foo\nbaz\n"); 182 | /// ``` 183 | pub fn rtext(&self) -> String { 184 | self._text(false, false) // non-recursive raw 185 | } 186 | 187 | /// Extract text content from all descendant nodes of the current element with smart whitespace trimming. 188 | /// 189 | /// ``` 190 | /// use victoria_dom::DOM; 191 | /// let dom = DOM::new("
foo\n

bar

baz\n
"); 192 | /// assert_eq!(dom.at("div").unwrap().text_all(), "foo bar baz"); 193 | /// ``` 194 | pub fn text_all(&self) -> String { 195 | self._text(true, true) // recursive trimmed 196 | } 197 | 198 | /// Extract text content from all descendant nodes of the current element without smart whitespace trimming. 199 | /// 200 | /// ``` 201 | /// use victoria_dom::DOM; 202 | /// let dom = DOM::new("
foo\n

bar

baz\n
"); 203 | /// assert_eq!(dom.at("div").unwrap().rtext_all(), "foo\nbarbaz\n"); 204 | /// ``` 205 | pub fn rtext_all(&self) -> String { 206 | self._text(true, false) // recursive raw 207 | } 208 | 209 | fn _text(&self, recursive: bool, trim: bool) -> String { 210 | // Try to detect "pre" tag 211 | let mut under_pre_tag = false; 212 | if trim { 213 | let mut node = self.tree.clone(); 214 | loop { 215 | if let html::NodeElem::Tag { ref name, .. } = node.elem { 216 | if name == "pre" { 217 | under_pre_tag = true; 218 | break; 219 | } 220 | } 221 | if node.get_parent().is_some() { node = node.get_parent().unwrap(); } else { break; } 222 | } 223 | } 224 | 225 | match self.tree.get_childs() { 226 | Some(nodes) => _nodes_text(&nodes, recursive, trim && !under_pre_tag), 227 | _ => String::new(), 228 | } 229 | } 230 | 231 | /// Return content of the current element. 232 | /// 233 | /// ``` 234 | /// use victoria_dom::DOM; 235 | /// let dom = DOM::new("
Test
"); 236 | /// assert_eq!(dom.at("div").unwrap().content(), "Test"); 237 | /// ``` 238 | pub fn content(&self) -> String { 239 | self.tree.get_childs().unwrap().into_iter().map(|x| html::render(&x)).collect::>().join("") 240 | } 241 | } 242 | 243 | fn _nodes_text(nodes: &Vec>, recursive: bool, trim: bool) -> String { 244 | lazy_static! { 245 | static ref _RE1: Regex = Regex::new(r"\s+").unwrap(); 246 | static ref _RE2: Regex = Regex::new(r"\S\z").unwrap(); 247 | static ref _RE3: Regex = Regex::new(r"^[^.!?,;:\s]+").unwrap(); 248 | static ref _RE4: Regex = Regex::new(r"\S+").unwrap(); 249 | } 250 | 251 | let mut text = String::new(); 252 | for node in nodes { 253 | let mut chunk = match node.elem { 254 | html::NodeElem::Text { ref elem_type, ref content } => { 255 | match elem_type.as_ref() { 256 | "text" if trim => _RE1.replace_all(content.trim(), " ").into_owned(), 257 | "text" | "raw" | "cdata" => content.to_owned(), 258 | _ => String::new(), 259 | } 260 | }, 261 | html::NodeElem::Tag { ref name, ref childs, .. } if recursive => { 262 | _nodes_text(&childs.borrow(), true, trim && name != "pre") 263 | } 264 | _ => String::new(), 265 | }; 266 | 267 | // Add leading whitespace if punctuation allows it 268 | if trim && _RE2.is_match(&text) && _RE3.is_match(&chunk) { 269 | chunk = " ".to_owned() + &chunk 270 | } 271 | 272 | // Trim whitespace blocks 273 | if _RE4.is_match(&chunk) || !trim { 274 | text.push_str(&chunk); 275 | } 276 | } 277 | text 278 | } 279 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![doc(html_root_url = "https://docs.rs/victoria-dom/0.1.2")] 2 | #![deny(missing_docs)] 3 | #![deny(warnings)] 4 | #![deny(missing_debug_implementations)] 5 | 6 | //! Minimalistic HTML parser with CSS selectors 7 | //! 8 | //! The project has been inspired by [Mojo::DOM](https://metacpan.org/pod/Mojo::DOM). 9 | //! 10 | //! It will even try to interpret broken HTML, so you should not use it for validation. 11 | //! 12 | //! # Examples 13 | //! 14 | //! ``` 15 | //! extern crate victoria_dom; 16 | //! 17 | //! use victoria_dom::DOM; 18 | //! 19 | //! fn main() { 20 | //! let html = r#"
Hello, Rust
"#; 21 | //! let dom = DOM::new(html); 22 | //! 23 | //! assert_eq!(dom.at("html").unwrap().text_all(), "Hello, Rust"); 24 | //! assert_eq!(dom.at("div#main > a").unwrap().attr("alt").unwrap(), "The Rust Programing Language"); 25 | //! } 26 | //! ``` 27 | //! 28 | //! # Supported CSS selectors 29 | //! 30 | //! * `*` Any element. 31 | //! * `E` An element of type `E`. 32 | //! * `E[foo]` An `E` element with a `foo` attribute. 33 | //! * `E[foo="bar"]` An `E` element whose `foo` attribute value is exactly equal to `bar`. 34 | //! * `E[foo~="bar"]` An `E` element whose `foo` attribute value is a list of whitespace-separated values, one of which is exactly equal to `bar`. 35 | //! * `E[foo^="bar"]` An `E` element whose `foo` attribute value begins exactly with the string `bar`. 36 | //! * `E[foo$="bar"]` An `E` element whose `foo` attribute value ends exactly with the string `bar`. 37 | //! * `E[foo*="bar"]` An `E` element whose `foo` attribute value contains the substring `bar`. 38 | //! * `E:root` An `E` element, root of the document. 39 | //! * `E:nth-child(n)` An `E` element, the `n-th` child of its parent. 40 | //! * `E:nth-last-child(n)` An `E` element, the `n-th` child of its parent, counting from the last one. 41 | //! * `E:nth-of-type(n)` An `E` element, the `n-th` sibling of its type. 42 | //! * `E:nth-last-of-type(n)` An `E` element, the `n-th` sibling of its type, counting from the last one. 43 | //! * `E:first-child` An `E` element, first child of its parent. 44 | //! * `E:last-child` An `E` element, last child of its parent. 45 | //! * `E:first-of-type` An `E` element, first sibling of its type. 46 | //! * `E:last-of-type` An `E` element, last sibling of its type. 47 | //! * `E:only-child` An `E` element, only child of its parent. 48 | //! * `E:only-of-type` An `E` element, only sibling of its type. 49 | //! * `E:empty` An `E` element that has no children (including text nodes). 50 | //! * `E:checked` A user interface element `E` which is checked (for instance a radio-button or checkbox). 51 | //! * `E.warning` An `E` element whose class is `warning`. 52 | //! * `E#myid` An `E` element with ID equal to `myid`. 53 | //! * `E:not(s)` An `E` element that does not match simple selector `s`. 54 | //! * `E F` An `F` element descendant of an `E` element. 55 | //! * `E > F` An `F` element child of an `E` element. 56 | //! * `E + F` An `F` element immediately preceded by an `E` element. 57 | //! * `E ~ F` An `F` element preceded by an `E` element. 58 | //! * `E, F, G` Elements of type `E`, `F` and `G`. 59 | //! * `E[foo=bar][bar=baz]` An `E` element whose attributes match all following attribute selectors. 60 | 61 | #[macro_use] extern crate lazy_static; 62 | #[macro_use] extern crate maplit; 63 | extern crate regex; 64 | 65 | pub use dom::DOM; 66 | 67 | mod dom; 68 | mod util; 69 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::{char, u32}; 3 | use std::cmp; 4 | 5 | use regex::{Regex, Captures}; 6 | 7 | lazy_static! { 8 | static ref ENTITY_RE: Regex = Regex::new(r#"&(?:\#((?:[0-9]{1,7}|x[0-9a-fA-F]{1,6}));|(\w+[;=]?))"#).unwrap(); 9 | } 10 | 11 | pub fn xml_escape(text: &str) -> String { 12 | let mut text = text.to_owned(); 13 | text = text.replace("&", "&"); 14 | text = text.replace("<", "<"); 15 | text = text.replace(">", ">"); 16 | text = text.replace("\"", """); 17 | text = text.replace("'", "'"); 18 | text 19 | } 20 | 21 | pub fn html_unescape(text: &str) -> String { 22 | _html_unescape(text, false) 23 | } 24 | 25 | pub fn html_attr_unescape(text: &str) -> String { 26 | _html_unescape(text, true) 27 | } 28 | 29 | fn _html_unescape(text: &str, is_attr: bool) -> String { 30 | ENTITY_RE.replace_all(text, |caps: &Captures| { 31 | caps.get(1) 32 | .map(|x| _decode_point(x.as_str())) 33 | .unwrap_or_else(|| _decode_name(caps.get(2).unwrap().as_str(), is_attr)) 34 | }).into_owned() 35 | } 36 | 37 | fn _decode_point(point: &str) -> String { 38 | // Code point 39 | (if point.starts_with("x") { u32::from_str_radix(&point[1..], 16) } else { u32::from_str_radix(point, 10) }) 40 | .ok() 41 | .and_then(char::from_u32) 42 | .map(|c| c.to_string()) 43 | .unwrap_or(point.to_owned()) 44 | } 45 | 46 | pub fn _decode_name(name: &str, is_attr: bool) -> String { 47 | lazy_static! { 48 | static ref _ALPHANUMEQ_RE: Regex = Regex::new("[A-Za-z0-9=]").unwrap(); 49 | }; 50 | 51 | // Named character reference 52 | for len in 0 .. cmp::max(1, name.len()) - 1 { 53 | let name_trunc = &name[0 .. (name.len() - len)]; 54 | let last = &name[(name.len() - len) .. cmp::min((name.len() - len) + 1, name.len())]; 55 | let rest = &name[(name.len() - len) .. name.len()]; 56 | 57 | if !is_attr || name_trunc.ends_with(';') || !_ALPHANUMEQ_RE.is_match(last) { 58 | if let Some(&val) = ENTITIES.get(&name_trunc) { 59 | return val.to_string() + rest; 60 | } 61 | } 62 | } 63 | 64 | "&".to_string() + name 65 | } 66 | 67 | lazy_static! { 68 | static ref ENTITIES: HashMap<&'static str, &'static str> = hashmap![ 69 | "Aacute;" => "\u{000C1}", 70 | "Aacute" => "\u{000C1}", 71 | "aacute;" => "\u{000E1}", 72 | "aacute" => "\u{000E1}", 73 | "Abreve;" => "\u{00102}", 74 | "abreve;" => "\u{00103}", 75 | "ac;" => "\u{0223E}", 76 | "acd;" => "\u{0223F}", 77 | "acE;" => "\u{0223E}\u{00333}", 78 | "Acirc;" => "\u{000C2}", 79 | "Acirc" => "\u{000C2}", 80 | "acirc;" => "\u{000E2}", 81 | "acirc" => "\u{000E2}", 82 | "acute;" => "\u{000B4}", 83 | "acute" => "\u{000B4}", 84 | "Acy;" => "\u{00410}", 85 | "acy;" => "\u{00430}", 86 | "AElig;" => "\u{000C6}", 87 | "AElig" => "\u{000C6}", 88 | "aelig;" => "\u{000E6}", 89 | "aelig" => "\u{000E6}", 90 | "af;" => "\u{02061}", 91 | "Afr;" => "\u{1D504}", 92 | "afr;" => "\u{1D51E}", 93 | "Agrave;" => "\u{000C0}", 94 | "Agrave" => "\u{000C0}", 95 | "agrave;" => "\u{000E0}", 96 | "agrave" => "\u{000E0}", 97 | "alefsym;" => "\u{02135}", 98 | "aleph;" => "\u{02135}", 99 | "Alpha;" => "\u{00391}", 100 | "alpha;" => "\u{003B1}", 101 | "Amacr;" => "\u{00100}", 102 | "amacr;" => "\u{00101}", 103 | "amalg;" => "\u{02A3F}", 104 | "AMP;" => "\u{00026}", 105 | "AMP" => "\u{00026}", 106 | "amp;" => "\u{00026}", 107 | "amp" => "\u{00026}", 108 | "And;" => "\u{02A53}", 109 | "and;" => "\u{02227}", 110 | "andand;" => "\u{02A55}", 111 | "andd;" => "\u{02A5C}", 112 | "andslope;" => "\u{02A58}", 113 | "andv;" => "\u{02A5A}", 114 | "ang;" => "\u{02220}", 115 | "ange;" => "\u{029A4}", 116 | "angle;" => "\u{02220}", 117 | "angmsd;" => "\u{02221}", 118 | "angmsdaa;" => "\u{029A8}", 119 | "angmsdab;" => "\u{029A9}", 120 | "angmsdac;" => "\u{029AA}", 121 | "angmsdad;" => "\u{029AB}", 122 | "angmsdae;" => "\u{029AC}", 123 | "angmsdaf;" => "\u{029AD}", 124 | "angmsdag;" => "\u{029AE}", 125 | "angmsdah;" => "\u{029AF}", 126 | "angrt;" => "\u{0221F}", 127 | "angrtvb;" => "\u{022BE}", 128 | "angrtvbd;" => "\u{0299D}", 129 | "angsph;" => "\u{02222}", 130 | "angst;" => "\u{000C5}", 131 | "angzarr;" => "\u{0237C}", 132 | "Aogon;" => "\u{00104}", 133 | "aogon;" => "\u{00105}", 134 | "Aopf;" => "\u{1D538}", 135 | "aopf;" => "\u{1D552}", 136 | "ap;" => "\u{02248}", 137 | "apacir;" => "\u{02A6F}", 138 | "apE;" => "\u{02A70}", 139 | "ape;" => "\u{0224A}", 140 | "apid;" => "\u{0224B}", 141 | "apos;" => "\u{00027}", 142 | "ApplyFunction;" => "\u{02061}", 143 | "approx;" => "\u{02248}", 144 | "approxeq;" => "\u{0224A}", 145 | "Aring;" => "\u{000C5}", 146 | "Aring" => "\u{000C5}", 147 | "aring;" => "\u{000E5}", 148 | "aring" => "\u{000E5}", 149 | "Ascr;" => "\u{1D49C}", 150 | "ascr;" => "\u{1D4B6}", 151 | "Assign;" => "\u{02254}", 152 | "ast;" => "\u{0002A}", 153 | "asymp;" => "\u{02248}", 154 | "asympeq;" => "\u{0224D}", 155 | "Atilde;" => "\u{000C3}", 156 | "Atilde" => "\u{000C3}", 157 | "atilde;" => "\u{000E3}", 158 | "atilde" => "\u{000E3}", 159 | "Auml;" => "\u{000C4}", 160 | "Auml" => "\u{000C4}", 161 | "auml;" => "\u{000E4}", 162 | "auml" => "\u{000E4}", 163 | "awconint;" => "\u{02233}", 164 | "awint;" => "\u{02A11}", 165 | "backcong;" => "\u{0224C}", 166 | "backepsilon;" => "\u{003F6}", 167 | "backprime;" => "\u{02035}", 168 | "backsim;" => "\u{0223D}", 169 | "backsimeq;" => "\u{022CD}", 170 | "Backslash;" => "\u{02216}", 171 | "Barv;" => "\u{02AE7}", 172 | "barvee;" => "\u{022BD}", 173 | "Barwed;" => "\u{02306}", 174 | "barwed;" => "\u{02305}", 175 | "barwedge;" => "\u{02305}", 176 | "bbrk;" => "\u{023B5}", 177 | "bbrktbrk;" => "\u{023B6}", 178 | "bcong;" => "\u{0224C}", 179 | "Bcy;" => "\u{00411}", 180 | "bcy;" => "\u{00431}", 181 | "bdquo;" => "\u{0201E}", 182 | "becaus;" => "\u{02235}", 183 | "Because;" => "\u{02235}", 184 | "because;" => "\u{02235}", 185 | "bemptyv;" => "\u{029B0}", 186 | "bepsi;" => "\u{003F6}", 187 | "bernou;" => "\u{0212C}", 188 | "Bernoullis;" => "\u{0212C}", 189 | "Beta;" => "\u{00392}", 190 | "beta;" => "\u{003B2}", 191 | "beth;" => "\u{02136}", 192 | "between;" => "\u{0226C}", 193 | "Bfr;" => "\u{1D505}", 194 | "bfr;" => "\u{1D51F}", 195 | "bigcap;" => "\u{022C2}", 196 | "bigcirc;" => "\u{025EF}", 197 | "bigcup;" => "\u{022C3}", 198 | "bigodot;" => "\u{02A00}", 199 | "bigoplus;" => "\u{02A01}", 200 | "bigotimes;" => "\u{02A02}", 201 | "bigsqcup;" => "\u{02A06}", 202 | "bigstar;" => "\u{02605}", 203 | "bigtriangledown;" => "\u{025BD}", 204 | "bigtriangleup;" => "\u{025B3}", 205 | "biguplus;" => "\u{02A04}", 206 | "bigvee;" => "\u{022C1}", 207 | "bigwedge;" => "\u{022C0}", 208 | "bkarow;" => "\u{0290D}", 209 | "blacklozenge;" => "\u{029EB}", 210 | "blacksquare;" => "\u{025AA}", 211 | "blacktriangle;" => "\u{025B4}", 212 | "blacktriangledown;" => "\u{025BE}", 213 | "blacktriangleleft;" => "\u{025C2}", 214 | "blacktriangleright;" => "\u{025B8}", 215 | "blank;" => "\u{02423}", 216 | "blk12;" => "\u{02592}", 217 | "blk14;" => "\u{02591}", 218 | "blk34;" => "\u{02593}", 219 | "block;" => "\u{02588}", 220 | "bne;" => "\u{0003D}\u{020E5}", 221 | "bnequiv;" => "\u{02261}\u{020E5}", 222 | "bNot;" => "\u{02AED}", 223 | "bnot;" => "\u{02310}", 224 | "Bopf;" => "\u{1D539}", 225 | "bopf;" => "\u{1D553}", 226 | "bot;" => "\u{022A5}", 227 | "bottom;" => "\u{022A5}", 228 | "bowtie;" => "\u{022C8}", 229 | "boxbox;" => "\u{029C9}", 230 | "boxDL;" => "\u{02557}", 231 | "boxDl;" => "\u{02556}", 232 | "boxdL;" => "\u{02555}", 233 | "boxdl;" => "\u{02510}", 234 | "boxDR;" => "\u{02554}", 235 | "boxDr;" => "\u{02553}", 236 | "boxdR;" => "\u{02552}", 237 | "boxdr;" => "\u{0250C}", 238 | "boxH;" => "\u{02550}", 239 | "boxh;" => "\u{02500}", 240 | "boxHD;" => "\u{02566}", 241 | "boxHd;" => "\u{02564}", 242 | "boxhD;" => "\u{02565}", 243 | "boxhd;" => "\u{0252C}", 244 | "boxHU;" => "\u{02569}", 245 | "boxHu;" => "\u{02567}", 246 | "boxhU;" => "\u{02568}", 247 | "boxhu;" => "\u{02534}", 248 | "boxminus;" => "\u{0229F}", 249 | "boxplus;" => "\u{0229E}", 250 | "boxtimes;" => "\u{022A0}", 251 | "boxUL;" => "\u{0255D}", 252 | "boxUl;" => "\u{0255C}", 253 | "boxuL;" => "\u{0255B}", 254 | "boxul;" => "\u{02518}", 255 | "boxUR;" => "\u{0255A}", 256 | "boxUr;" => "\u{02559}", 257 | "boxuR;" => "\u{02558}", 258 | "boxur;" => "\u{02514}", 259 | "boxV;" => "\u{02551}", 260 | "boxv;" => "\u{02502}", 261 | "boxVH;" => "\u{0256C}", 262 | "boxVh;" => "\u{0256B}", 263 | "boxvH;" => "\u{0256A}", 264 | "boxvh;" => "\u{0253C}", 265 | "boxVL;" => "\u{02563}", 266 | "boxVl;" => "\u{02562}", 267 | "boxvL;" => "\u{02561}", 268 | "boxvl;" => "\u{02524}", 269 | "boxVR;" => "\u{02560}", 270 | "boxVr;" => "\u{0255F}", 271 | "boxvR;" => "\u{0255E}", 272 | "boxvr;" => "\u{0251C}", 273 | "bprime;" => "\u{02035}", 274 | "Breve;" => "\u{002D8}", 275 | "breve;" => "\u{002D8}", 276 | "brvbar;" => "\u{000A6}", 277 | "brvbar" => "\u{000A6}", 278 | "Bscr;" => "\u{0212C}", 279 | "bscr;" => "\u{1D4B7}", 280 | "bsemi;" => "\u{0204F}", 281 | "bsim;" => "\u{0223D}", 282 | "bsime;" => "\u{022CD}", 283 | "bsol;" => "\u{0005C}", 284 | "bsolb;" => "\u{029C5}", 285 | "bsolhsub;" => "\u{027C8}", 286 | "bull;" => "\u{02022}", 287 | "bullet;" => "\u{02022}", 288 | "bump;" => "\u{0224E}", 289 | "bumpE;" => "\u{02AAE}", 290 | "bumpe;" => "\u{0224F}", 291 | "Bumpeq;" => "\u{0224E}", 292 | "bumpeq;" => "\u{0224F}", 293 | "Cacute;" => "\u{00106}", 294 | "cacute;" => "\u{00107}", 295 | "Cap;" => "\u{022D2}", 296 | "cap;" => "\u{02229}", 297 | "capand;" => "\u{02A44}", 298 | "capbrcup;" => "\u{02A49}", 299 | "capcap;" => "\u{02A4B}", 300 | "capcup;" => "\u{02A47}", 301 | "capdot;" => "\u{02A40}", 302 | "CapitalDifferentialD;" => "\u{02145}", 303 | "caps;" => "\u{02229}\u{0FE00}", 304 | "caret;" => "\u{02041}", 305 | "caron;" => "\u{002C7}", 306 | "Cayleys;" => "\u{0212D}", 307 | "ccaps;" => "\u{02A4D}", 308 | "Ccaron;" => "\u{0010C}", 309 | "ccaron;" => "\u{0010D}", 310 | "Ccedil;" => "\u{000C7}", 311 | "Ccedil" => "\u{000C7}", 312 | "ccedil;" => "\u{000E7}", 313 | "ccedil" => "\u{000E7}", 314 | "Ccirc;" => "\u{00108}", 315 | "ccirc;" => "\u{00109}", 316 | "Cconint;" => "\u{02230}", 317 | "ccups;" => "\u{02A4C}", 318 | "ccupssm;" => "\u{02A50}", 319 | "Cdot;" => "\u{0010A}", 320 | "cdot;" => "\u{0010B}", 321 | "cedil;" => "\u{000B8}", 322 | "cedil" => "\u{000B8}", 323 | "Cedilla;" => "\u{000B8}", 324 | "cemptyv;" => "\u{029B2}", 325 | "cent;" => "\u{000A2}", 326 | "cent" => "\u{000A2}", 327 | "CenterDot;" => "\u{000B7}", 328 | "centerdot;" => "\u{000B7}", 329 | "Cfr;" => "\u{0212D}", 330 | "cfr;" => "\u{1D520}", 331 | "CHcy;" => "\u{00427}", 332 | "chcy;" => "\u{00447}", 333 | "check;" => "\u{02713}", 334 | "checkmark;" => "\u{02713}", 335 | "Chi;" => "\u{003A7}", 336 | "chi;" => "\u{003C7}", 337 | "cir;" => "\u{025CB}", 338 | "circ;" => "\u{002C6}", 339 | "circeq;" => "\u{02257}", 340 | "circlearrowleft;" => "\u{021BA}", 341 | "circlearrowright;" => "\u{021BB}", 342 | "circledast;" => "\u{0229B}", 343 | "circledcirc;" => "\u{0229A}", 344 | "circleddash;" => "\u{0229D}", 345 | "CircleDot;" => "\u{02299}", 346 | "circledR;" => "\u{000AE}", 347 | "circledS;" => "\u{024C8}", 348 | "CircleMinus;" => "\u{02296}", 349 | "CirclePlus;" => "\u{02295}", 350 | "CircleTimes;" => "\u{02297}", 351 | "cirE;" => "\u{029C3}", 352 | "cire;" => "\u{02257}", 353 | "cirfnint;" => "\u{02A10}", 354 | "cirmid;" => "\u{02AEF}", 355 | "cirscir;" => "\u{029C2}", 356 | "ClockwiseContourIntegral;" => "\u{02232}", 357 | "CloseCurlyDoubleQuote;" => "\u{0201D}", 358 | "CloseCurlyQuote;" => "\u{02019}", 359 | "clubs;" => "\u{02663}", 360 | "clubsuit;" => "\u{02663}", 361 | "Colon;" => "\u{02237}", 362 | "colon;" => "\u{0003A}", 363 | "Colone;" => "\u{02A74}", 364 | "colone;" => "\u{02254}", 365 | "coloneq;" => "\u{02254}", 366 | "comma;" => "\u{0002C}", 367 | "commat;" => "\u{00040}", 368 | "comp;" => "\u{02201}", 369 | "compfn;" => "\u{02218}", 370 | "complement;" => "\u{02201}", 371 | "complexes;" => "\u{02102}", 372 | "cong;" => "\u{02245}", 373 | "congdot;" => "\u{02A6D}", 374 | "Congruent;" => "\u{02261}", 375 | "Conint;" => "\u{0222F}", 376 | "conint;" => "\u{0222E}", 377 | "ContourIntegral;" => "\u{0222E}", 378 | "Copf;" => "\u{02102}", 379 | "copf;" => "\u{1D554}", 380 | "coprod;" => "\u{02210}", 381 | "Coproduct;" => "\u{02210}", 382 | "COPY;" => "\u{000A9}", 383 | "COPY" => "\u{000A9}", 384 | "copy;" => "\u{000A9}", 385 | "copy" => "\u{000A9}", 386 | "copysr;" => "\u{02117}", 387 | "CounterClockwiseContourIntegral;" => "\u{02233}", 388 | "crarr;" => "\u{021B5}", 389 | "Cross;" => "\u{02A2F}", 390 | "cross;" => "\u{02717}", 391 | "Cscr;" => "\u{1D49E}", 392 | "cscr;" => "\u{1D4B8}", 393 | "csub;" => "\u{02ACF}", 394 | "csube;" => "\u{02AD1}", 395 | "csup;" => "\u{02AD0}", 396 | "csupe;" => "\u{02AD2}", 397 | "ctdot;" => "\u{022EF}", 398 | "cudarrl;" => "\u{02938}", 399 | "cudarrr;" => "\u{02935}", 400 | "cuepr;" => "\u{022DE}", 401 | "cuesc;" => "\u{022DF}", 402 | "cularr;" => "\u{021B6}", 403 | "cularrp;" => "\u{0293D}", 404 | "Cup;" => "\u{022D3}", 405 | "cup;" => "\u{0222A}", 406 | "cupbrcap;" => "\u{02A48}", 407 | "CupCap;" => "\u{0224D}", 408 | "cupcap;" => "\u{02A46}", 409 | "cupcup;" => "\u{02A4A}", 410 | "cupdot;" => "\u{0228D}", 411 | "cupor;" => "\u{02A45}", 412 | "cups;" => "\u{0222A}\u{0FE00}", 413 | "curarr;" => "\u{021B7}", 414 | "curarrm;" => "\u{0293C}", 415 | "curlyeqprec;" => "\u{022DE}", 416 | "curlyeqsucc;" => "\u{022DF}", 417 | "curlyvee;" => "\u{022CE}", 418 | "curlywedge;" => "\u{022CF}", 419 | "curren;" => "\u{000A4}", 420 | "curren" => "\u{000A4}", 421 | "curvearrowleft;" => "\u{021B6}", 422 | "curvearrowright;" => "\u{021B7}", 423 | "cuvee;" => "\u{022CE}", 424 | "cuwed;" => "\u{022CF}", 425 | "cwconint;" => "\u{02232}", 426 | "cwint;" => "\u{02231}", 427 | "cylcty;" => "\u{0232D}", 428 | "Dagger;" => "\u{02021}", 429 | "dagger;" => "\u{02020}", 430 | "daleth;" => "\u{02138}", 431 | "Darr;" => "\u{021A1}", 432 | "dArr;" => "\u{021D3}", 433 | "darr;" => "\u{02193}", 434 | "dash;" => "\u{02010}", 435 | "Dashv;" => "\u{02AE4}", 436 | "dashv;" => "\u{022A3}", 437 | "dbkarow;" => "\u{0290F}", 438 | "dblac;" => "\u{002DD}", 439 | "Dcaron;" => "\u{0010E}", 440 | "dcaron;" => "\u{0010F}", 441 | "Dcy;" => "\u{00414}", 442 | "dcy;" => "\u{00434}", 443 | "DD;" => "\u{02145}", 444 | "dd;" => "\u{02146}", 445 | "ddagger;" => "\u{02021}", 446 | "ddarr;" => "\u{021CA}", 447 | "DDotrahd;" => "\u{02911}", 448 | "ddotseq;" => "\u{02A77}", 449 | "deg;" => "\u{000B0}", 450 | "deg" => "\u{000B0}", 451 | "Del;" => "\u{02207}", 452 | "Delta;" => "\u{00394}", 453 | "delta;" => "\u{003B4}", 454 | "demptyv;" => "\u{029B1}", 455 | "dfisht;" => "\u{0297F}", 456 | "Dfr;" => "\u{1D507}", 457 | "dfr;" => "\u{1D521}", 458 | "dHar;" => "\u{02965}", 459 | "dharl;" => "\u{021C3}", 460 | "dharr;" => "\u{021C2}", 461 | "DiacriticalAcute;" => "\u{000B4}", 462 | "DiacriticalDot;" => "\u{002D9}", 463 | "DiacriticalDoubleAcute;" => "\u{002DD}", 464 | "DiacriticalGrave;" => "\u{00060}", 465 | "DiacriticalTilde;" => "\u{002DC}", 466 | "diam;" => "\u{022C4}", 467 | "Diamond;" => "\u{022C4}", 468 | "diamond;" => "\u{022C4}", 469 | "diamondsuit;" => "\u{02666}", 470 | "diams;" => "\u{02666}", 471 | "die;" => "\u{000A8}", 472 | "DifferentialD;" => "\u{02146}", 473 | "digamma;" => "\u{003DD}", 474 | "disin;" => "\u{022F2}", 475 | "div;" => "\u{000F7}", 476 | "divide;" => "\u{000F7}", 477 | "divide" => "\u{000F7}", 478 | "divideontimes;" => "\u{022C7}", 479 | "divonx;" => "\u{022C7}", 480 | "DJcy;" => "\u{00402}", 481 | "djcy;" => "\u{00452}", 482 | "dlcorn;" => "\u{0231E}", 483 | "dlcrop;" => "\u{0230D}", 484 | "dollar;" => "\u{00024}", 485 | "Dopf;" => "\u{1D53B}", 486 | "dopf;" => "\u{1D555}", 487 | "Dot;" => "\u{000A8}", 488 | "dot;" => "\u{002D9}", 489 | "DotDot;" => "\u{020DC}", 490 | "doteq;" => "\u{02250}", 491 | "doteqdot;" => "\u{02251}", 492 | "DotEqual;" => "\u{02250}", 493 | "dotminus;" => "\u{02238}", 494 | "dotplus;" => "\u{02214}", 495 | "dotsquare;" => "\u{022A1}", 496 | "doublebarwedge;" => "\u{02306}", 497 | "DoubleContourIntegral;" => "\u{0222F}", 498 | "DoubleDot;" => "\u{000A8}", 499 | "DoubleDownArrow;" => "\u{021D3}", 500 | "DoubleLeftArrow;" => "\u{021D0}", 501 | "DoubleLeftRightArrow;" => "\u{021D4}", 502 | "DoubleLeftTee;" => "\u{02AE4}", 503 | "DoubleLongLeftArrow;" => "\u{027F8}", 504 | "DoubleLongLeftRightArrow;" => "\u{027FA}", 505 | "DoubleLongRightArrow;" => "\u{027F9}", 506 | "DoubleRightArrow;" => "\u{021D2}", 507 | "DoubleRightTee;" => "\u{022A8}", 508 | "DoubleUpArrow;" => "\u{021D1}", 509 | "DoubleUpDownArrow;" => "\u{021D5}", 510 | "DoubleVerticalBar;" => "\u{02225}", 511 | "DownArrow;" => "\u{02193}", 512 | "Downarrow;" => "\u{021D3}", 513 | "downarrow;" => "\u{02193}", 514 | "DownArrowBar;" => "\u{02913}", 515 | "DownArrowUpArrow;" => "\u{021F5}", 516 | "DownBreve;" => "\u{00311}", 517 | "downdownarrows;" => "\u{021CA}", 518 | "downharpoonleft;" => "\u{021C3}", 519 | "downharpoonright;" => "\u{021C2}", 520 | "DownLeftRightVector;" => "\u{02950}", 521 | "DownLeftTeeVector;" => "\u{0295E}", 522 | "DownLeftVector;" => "\u{021BD}", 523 | "DownLeftVectorBar;" => "\u{02956}", 524 | "DownRightTeeVector;" => "\u{0295F}", 525 | "DownRightVector;" => "\u{021C1}", 526 | "DownRightVectorBar;" => "\u{02957}", 527 | "DownTee;" => "\u{022A4}", 528 | "DownTeeArrow;" => "\u{021A7}", 529 | "drbkarow;" => "\u{02910}", 530 | "drcorn;" => "\u{0231F}", 531 | "drcrop;" => "\u{0230C}", 532 | "Dscr;" => "\u{1D49F}", 533 | "dscr;" => "\u{1D4B9}", 534 | "DScy;" => "\u{00405}", 535 | "dscy;" => "\u{00455}", 536 | "dsol;" => "\u{029F6}", 537 | "Dstrok;" => "\u{00110}", 538 | "dstrok;" => "\u{00111}", 539 | "dtdot;" => "\u{022F1}", 540 | "dtri;" => "\u{025BF}", 541 | "dtrif;" => "\u{025BE}", 542 | "duarr;" => "\u{021F5}", 543 | "duhar;" => "\u{0296F}", 544 | "dwangle;" => "\u{029A6}", 545 | "DZcy;" => "\u{0040F}", 546 | "dzcy;" => "\u{0045F}", 547 | "dzigrarr;" => "\u{027FF}", 548 | "Eacute;" => "\u{000C9}", 549 | "Eacute" => "\u{000C9}", 550 | "eacute;" => "\u{000E9}", 551 | "eacute" => "\u{000E9}", 552 | "easter;" => "\u{02A6E}", 553 | "Ecaron;" => "\u{0011A}", 554 | "ecaron;" => "\u{0011B}", 555 | "ecir;" => "\u{02256}", 556 | "Ecirc;" => "\u{000CA}", 557 | "Ecirc" => "\u{000CA}", 558 | "ecirc;" => "\u{000EA}", 559 | "ecirc" => "\u{000EA}", 560 | "ecolon;" => "\u{02255}", 561 | "Ecy;" => "\u{0042D}", 562 | "ecy;" => "\u{0044D}", 563 | "eDDot;" => "\u{02A77}", 564 | "Edot;" => "\u{00116}", 565 | "eDot;" => "\u{02251}", 566 | "edot;" => "\u{00117}", 567 | "ee;" => "\u{02147}", 568 | "efDot;" => "\u{02252}", 569 | "Efr;" => "\u{1D508}", 570 | "efr;" => "\u{1D522}", 571 | "eg;" => "\u{02A9A}", 572 | "Egrave;" => "\u{000C8}", 573 | "Egrave" => "\u{000C8}", 574 | "egrave;" => "\u{000E8}", 575 | "egrave" => "\u{000E8}", 576 | "egs;" => "\u{02A96}", 577 | "egsdot;" => "\u{02A98}", 578 | "el;" => "\u{02A99}", 579 | "Element;" => "\u{02208}", 580 | "elinters;" => "\u{023E7}", 581 | "ell;" => "\u{02113}", 582 | "els;" => "\u{02A95}", 583 | "elsdot;" => "\u{02A97}", 584 | "Emacr;" => "\u{00112}", 585 | "emacr;" => "\u{00113}", 586 | "empty;" => "\u{02205}", 587 | "emptyset;" => "\u{02205}", 588 | "EmptySmallSquare;" => "\u{025FB}", 589 | "emptyv;" => "\u{02205}", 590 | "EmptyVerySmallSquare;" => "\u{025AB}", 591 | "emsp;" => "\u{02003}", 592 | "emsp13;" => "\u{02004}", 593 | "emsp14;" => "\u{02005}", 594 | "ENG;" => "\u{0014A}", 595 | "eng;" => "\u{0014B}", 596 | "ensp;" => "\u{02002}", 597 | "Eogon;" => "\u{00118}", 598 | "eogon;" => "\u{00119}", 599 | "Eopf;" => "\u{1D53C}", 600 | "eopf;" => "\u{1D556}", 601 | "epar;" => "\u{022D5}", 602 | "eparsl;" => "\u{029E3}", 603 | "eplus;" => "\u{02A71}", 604 | "epsi;" => "\u{003B5}", 605 | "Epsilon;" => "\u{00395}", 606 | "epsilon;" => "\u{003B5}", 607 | "epsiv;" => "\u{003F5}", 608 | "eqcirc;" => "\u{02256}", 609 | "eqcolon;" => "\u{02255}", 610 | "eqsim;" => "\u{02242}", 611 | "eqslantgtr;" => "\u{02A96}", 612 | "eqslantless;" => "\u{02A95}", 613 | "Equal;" => "\u{02A75}", 614 | "equals;" => "\u{0003D}", 615 | "EqualTilde;" => "\u{02242}", 616 | "equest;" => "\u{0225F}", 617 | "Equilibrium;" => "\u{021CC}", 618 | "equiv;" => "\u{02261}", 619 | "equivDD;" => "\u{02A78}", 620 | "eqvparsl;" => "\u{029E5}", 621 | "erarr;" => "\u{02971}", 622 | "erDot;" => "\u{02253}", 623 | "Escr;" => "\u{02130}", 624 | "escr;" => "\u{0212F}", 625 | "esdot;" => "\u{02250}", 626 | "Esim;" => "\u{02A73}", 627 | "esim;" => "\u{02242}", 628 | "Eta;" => "\u{00397}", 629 | "eta;" => "\u{003B7}", 630 | "ETH;" => "\u{000D0}", 631 | "ETH" => "\u{000D0}", 632 | "eth;" => "\u{000F0}", 633 | "eth" => "\u{000F0}", 634 | "Euml;" => "\u{000CB}", 635 | "Euml" => "\u{000CB}", 636 | "euml;" => "\u{000EB}", 637 | "euml" => "\u{000EB}", 638 | "euro;" => "\u{020AC}", 639 | "excl;" => "\u{00021}", 640 | "exist;" => "\u{02203}", 641 | "Exists;" => "\u{02203}", 642 | "expectation;" => "\u{02130}", 643 | "ExponentialE;" => "\u{02147}", 644 | "exponentiale;" => "\u{02147}", 645 | "fallingdotseq;" => "\u{02252}", 646 | "Fcy;" => "\u{00424}", 647 | "fcy;" => "\u{00444}", 648 | "female;" => "\u{02640}", 649 | "ffilig;" => "\u{0FB03}", 650 | "fflig;" => "\u{0FB00}", 651 | "ffllig;" => "\u{0FB04}", 652 | "Ffr;" => "\u{1D509}", 653 | "ffr;" => "\u{1D523}", 654 | "filig;" => "\u{0FB01}", 655 | "FilledSmallSquare;" => "\u{025FC}", 656 | "FilledVerySmallSquare;" => "\u{025AA}", 657 | "fjlig;" => "\u{00066}\u{0006A}", 658 | "flat;" => "\u{0266D}", 659 | "fllig;" => "\u{0FB02}", 660 | "fltns;" => "\u{025B1}", 661 | "fnof;" => "\u{00192}", 662 | "Fopf;" => "\u{1D53D}", 663 | "fopf;" => "\u{1D557}", 664 | "ForAll;" => "\u{02200}", 665 | "forall;" => "\u{02200}", 666 | "fork;" => "\u{022D4}", 667 | "forkv;" => "\u{02AD9}", 668 | "Fouriertrf;" => "\u{02131}", 669 | "fpartint;" => "\u{02A0D}", 670 | "frac12;" => "\u{000BD}", 671 | "frac12" => "\u{000BD}", 672 | "frac13;" => "\u{02153}", 673 | "frac14;" => "\u{000BC}", 674 | "frac14" => "\u{000BC}", 675 | "frac15;" => "\u{02155}", 676 | "frac16;" => "\u{02159}", 677 | "frac18;" => "\u{0215B}", 678 | "frac23;" => "\u{02154}", 679 | "frac25;" => "\u{02156}", 680 | "frac34;" => "\u{000BE}", 681 | "frac34" => "\u{000BE}", 682 | "frac35;" => "\u{02157}", 683 | "frac38;" => "\u{0215C}", 684 | "frac45;" => "\u{02158}", 685 | "frac56;" => "\u{0215A}", 686 | "frac58;" => "\u{0215D}", 687 | "frac78;" => "\u{0215E}", 688 | "frasl;" => "\u{02044}", 689 | "frown;" => "\u{02322}", 690 | "Fscr;" => "\u{02131}", 691 | "fscr;" => "\u{1D4BB}", 692 | "gacute;" => "\u{001F5}", 693 | "Gamma;" => "\u{00393}", 694 | "gamma;" => "\u{003B3}", 695 | "Gammad;" => "\u{003DC}", 696 | "gammad;" => "\u{003DD}", 697 | "gap;" => "\u{02A86}", 698 | "Gbreve;" => "\u{0011E}", 699 | "gbreve;" => "\u{0011F}", 700 | "Gcedil;" => "\u{00122}", 701 | "Gcirc;" => "\u{0011C}", 702 | "gcirc;" => "\u{0011D}", 703 | "Gcy;" => "\u{00413}", 704 | "gcy;" => "\u{00433}", 705 | "Gdot;" => "\u{00120}", 706 | "gdot;" => "\u{00121}", 707 | "gE;" => "\u{02267}", 708 | "ge;" => "\u{02265}", 709 | "gEl;" => "\u{02A8C}", 710 | "gel;" => "\u{022DB}", 711 | "geq;" => "\u{02265}", 712 | "geqq;" => "\u{02267}", 713 | "geqslant;" => "\u{02A7E}", 714 | "ges;" => "\u{02A7E}", 715 | "gescc;" => "\u{02AA9}", 716 | "gesdot;" => "\u{02A80}", 717 | "gesdoto;" => "\u{02A82}", 718 | "gesdotol;" => "\u{02A84}", 719 | "gesl;" => "\u{022DB}\u{0FE00}", 720 | "gesles;" => "\u{02A94}", 721 | "Gfr;" => "\u{1D50A}", 722 | "gfr;" => "\u{1D524}", 723 | "Gg;" => "\u{022D9}", 724 | "gg;" => "\u{0226B}", 725 | "ggg;" => "\u{022D9}", 726 | "gimel;" => "\u{02137}", 727 | "GJcy;" => "\u{00403}", 728 | "gjcy;" => "\u{00453}", 729 | "gl;" => "\u{02277}", 730 | "gla;" => "\u{02AA5}", 731 | "glE;" => "\u{02A92}", 732 | "glj;" => "\u{02AA4}", 733 | "gnap;" => "\u{02A8A}", 734 | "gnapprox;" => "\u{02A8A}", 735 | "gnE;" => "\u{02269}", 736 | "gne;" => "\u{02A88}", 737 | "gneq;" => "\u{02A88}", 738 | "gneqq;" => "\u{02269}", 739 | "gnsim;" => "\u{022E7}", 740 | "Gopf;" => "\u{1D53E}", 741 | "gopf;" => "\u{1D558}", 742 | "grave;" => "\u{00060}", 743 | "GreaterEqual;" => "\u{02265}", 744 | "GreaterEqualLess;" => "\u{022DB}", 745 | "GreaterFullEqual;" => "\u{02267}", 746 | "GreaterGreater;" => "\u{02AA2}", 747 | "GreaterLess;" => "\u{02277}", 748 | "GreaterSlantEqual;" => "\u{02A7E}", 749 | "GreaterTilde;" => "\u{02273}", 750 | "Gscr;" => "\u{1D4A2}", 751 | "gscr;" => "\u{0210A}", 752 | "gsim;" => "\u{02273}", 753 | "gsime;" => "\u{02A8E}", 754 | "gsiml;" => "\u{02A90}", 755 | "GT;" => "\u{0003E}", 756 | "GT" => "\u{0003E}", 757 | "Gt;" => "\u{0226B}", 758 | "gt;" => "\u{0003E}", 759 | "gt" => "\u{0003E}", 760 | "gtcc;" => "\u{02AA7}", 761 | "gtcir;" => "\u{02A7A}", 762 | "gtdot;" => "\u{022D7}", 763 | "gtlPar;" => "\u{02995}", 764 | "gtquest;" => "\u{02A7C}", 765 | "gtrapprox;" => "\u{02A86}", 766 | "gtrarr;" => "\u{02978}", 767 | "gtrdot;" => "\u{022D7}", 768 | "gtreqless;" => "\u{022DB}", 769 | "gtreqqless;" => "\u{02A8C}", 770 | "gtrless;" => "\u{02277}", 771 | "gtrsim;" => "\u{02273}", 772 | "gvertneqq;" => "\u{02269}\u{0FE00}", 773 | "gvnE;" => "\u{02269}\u{0FE00}", 774 | "Hacek;" => "\u{002C7}", 775 | "hairsp;" => "\u{0200A}", 776 | "half;" => "\u{000BD}", 777 | "hamilt;" => "\u{0210B}", 778 | "HARDcy;" => "\u{0042A}", 779 | "hardcy;" => "\u{0044A}", 780 | "hArr;" => "\u{021D4}", 781 | "harr;" => "\u{02194}", 782 | "harrcir;" => "\u{02948}", 783 | "harrw;" => "\u{021AD}", 784 | "Hat;" => "\u{0005E}", 785 | "hbar;" => "\u{0210F}", 786 | "Hcirc;" => "\u{00124}", 787 | "hcirc;" => "\u{00125}", 788 | "hearts;" => "\u{02665}", 789 | "heartsuit;" => "\u{02665}", 790 | "hellip;" => "\u{02026}", 791 | "hercon;" => "\u{022B9}", 792 | "Hfr;" => "\u{0210C}", 793 | "hfr;" => "\u{1D525}", 794 | "HilbertSpace;" => "\u{0210B}", 795 | "hksearow;" => "\u{02925}", 796 | "hkswarow;" => "\u{02926}", 797 | "hoarr;" => "\u{021FF}", 798 | "homtht;" => "\u{0223B}", 799 | "hookleftarrow;" => "\u{021A9}", 800 | "hookrightarrow;" => "\u{021AA}", 801 | "Hopf;" => "\u{0210D}", 802 | "hopf;" => "\u{1D559}", 803 | "horbar;" => "\u{02015}", 804 | "HorizontalLine;" => "\u{02500}", 805 | "Hscr;" => "\u{0210B}", 806 | "hscr;" => "\u{1D4BD}", 807 | "hslash;" => "\u{0210F}", 808 | "Hstrok;" => "\u{00126}", 809 | "hstrok;" => "\u{00127}", 810 | "HumpDownHump;" => "\u{0224E}", 811 | "HumpEqual;" => "\u{0224F}", 812 | "hybull;" => "\u{02043}", 813 | "hyphen;" => "\u{02010}", 814 | "Iacute;" => "\u{000CD}", 815 | "Iacute" => "\u{000CD}", 816 | "iacute;" => "\u{000ED}", 817 | "iacute" => "\u{000ED}", 818 | "ic;" => "\u{02063}", 819 | "Icirc;" => "\u{000CE}", 820 | "Icirc" => "\u{000CE}", 821 | "icirc;" => "\u{000EE}", 822 | "icirc" => "\u{000EE}", 823 | "Icy;" => "\u{00418}", 824 | "icy;" => "\u{00438}", 825 | "Idot;" => "\u{00130}", 826 | "IEcy;" => "\u{00415}", 827 | "iecy;" => "\u{00435}", 828 | "iexcl;" => "\u{000A1}", 829 | "iexcl" => "\u{000A1}", 830 | "iff;" => "\u{021D4}", 831 | "Ifr;" => "\u{02111}", 832 | "ifr;" => "\u{1D526}", 833 | "Igrave;" => "\u{000CC}", 834 | "Igrave" => "\u{000CC}", 835 | "igrave;" => "\u{000EC}", 836 | "igrave" => "\u{000EC}", 837 | "ii;" => "\u{02148}", 838 | "iiiint;" => "\u{02A0C}", 839 | "iiint;" => "\u{0222D}", 840 | "iinfin;" => "\u{029DC}", 841 | "iiota;" => "\u{02129}", 842 | "IJlig;" => "\u{00132}", 843 | "ijlig;" => "\u{00133}", 844 | "Im;" => "\u{02111}", 845 | "Imacr;" => "\u{0012A}", 846 | "imacr;" => "\u{0012B}", 847 | "image;" => "\u{02111}", 848 | "ImaginaryI;" => "\u{02148}", 849 | "imagline;" => "\u{02110}", 850 | "imagpart;" => "\u{02111}", 851 | "imath;" => "\u{00131}", 852 | "imof;" => "\u{022B7}", 853 | "imped;" => "\u{001B5}", 854 | "Implies;" => "\u{021D2}", 855 | "in;" => "\u{02208}", 856 | "incare;" => "\u{02105}", 857 | "infin;" => "\u{0221E}", 858 | "infintie;" => "\u{029DD}", 859 | "inodot;" => "\u{00131}", 860 | "Int;" => "\u{0222C}", 861 | "int;" => "\u{0222B}", 862 | "intcal;" => "\u{022BA}", 863 | "integers;" => "\u{02124}", 864 | "Integral;" => "\u{0222B}", 865 | "intercal;" => "\u{022BA}", 866 | "Intersection;" => "\u{022C2}", 867 | "intlarhk;" => "\u{02A17}", 868 | "intprod;" => "\u{02A3C}", 869 | "InvisibleComma;" => "\u{02063}", 870 | "InvisibleTimes;" => "\u{02062}", 871 | "IOcy;" => "\u{00401}", 872 | "iocy;" => "\u{00451}", 873 | "Iogon;" => "\u{0012E}", 874 | "iogon;" => "\u{0012F}", 875 | "Iopf;" => "\u{1D540}", 876 | "iopf;" => "\u{1D55A}", 877 | "Iota;" => "\u{00399}", 878 | "iota;" => "\u{003B9}", 879 | "iprod;" => "\u{02A3C}", 880 | "iquest;" => "\u{000BF}", 881 | "iquest" => "\u{000BF}", 882 | "Iscr;" => "\u{02110}", 883 | "iscr;" => "\u{1D4BE}", 884 | "isin;" => "\u{02208}", 885 | "isindot;" => "\u{022F5}", 886 | "isinE;" => "\u{022F9}", 887 | "isins;" => "\u{022F4}", 888 | "isinsv;" => "\u{022F3}", 889 | "isinv;" => "\u{02208}", 890 | "it;" => "\u{02062}", 891 | "Itilde;" => "\u{00128}", 892 | "itilde;" => "\u{00129}", 893 | "Iukcy;" => "\u{00406}", 894 | "iukcy;" => "\u{00456}", 895 | "Iuml;" => "\u{000CF}", 896 | "Iuml" => "\u{000CF}", 897 | "iuml;" => "\u{000EF}", 898 | "iuml" => "\u{000EF}", 899 | "Jcirc;" => "\u{00134}", 900 | "jcirc;" => "\u{00135}", 901 | "Jcy;" => "\u{00419}", 902 | "jcy;" => "\u{00439}", 903 | "Jfr;" => "\u{1D50D}", 904 | "jfr;" => "\u{1D527}", 905 | "jmath;" => "\u{00237}", 906 | "Jopf;" => "\u{1D541}", 907 | "jopf;" => "\u{1D55B}", 908 | "Jscr;" => "\u{1D4A5}", 909 | "jscr;" => "\u{1D4BF}", 910 | "Jsercy;" => "\u{00408}", 911 | "jsercy;" => "\u{00458}", 912 | "Jukcy;" => "\u{00404}", 913 | "jukcy;" => "\u{00454}", 914 | "Kappa;" => "\u{0039A}", 915 | "kappa;" => "\u{003BA}", 916 | "kappav;" => "\u{003F0}", 917 | "Kcedil;" => "\u{00136}", 918 | "kcedil;" => "\u{00137}", 919 | "Kcy;" => "\u{0041A}", 920 | "kcy;" => "\u{0043A}", 921 | "Kfr;" => "\u{1D50E}", 922 | "kfr;" => "\u{1D528}", 923 | "kgreen;" => "\u{00138}", 924 | "KHcy;" => "\u{00425}", 925 | "khcy;" => "\u{00445}", 926 | "KJcy;" => "\u{0040C}", 927 | "kjcy;" => "\u{0045C}", 928 | "Kopf;" => "\u{1D542}", 929 | "kopf;" => "\u{1D55C}", 930 | "Kscr;" => "\u{1D4A6}", 931 | "kscr;" => "\u{1D4C0}", 932 | "lAarr;" => "\u{021DA}", 933 | "Lacute;" => "\u{00139}", 934 | "lacute;" => "\u{0013A}", 935 | "laemptyv;" => "\u{029B4}", 936 | "lagran;" => "\u{02112}", 937 | "Lambda;" => "\u{0039B}", 938 | "lambda;" => "\u{003BB}", 939 | "Lang;" => "\u{027EA}", 940 | "lang;" => "\u{027E8}", 941 | "langd;" => "\u{02991}", 942 | "langle;" => "\u{027E8}", 943 | "lap;" => "\u{02A85}", 944 | "Laplacetrf;" => "\u{02112}", 945 | "laquo;" => "\u{000AB}", 946 | "laquo" => "\u{000AB}", 947 | "Larr;" => "\u{0219E}", 948 | "lArr;" => "\u{021D0}", 949 | "larr;" => "\u{02190}", 950 | "larrb;" => "\u{021E4}", 951 | "larrbfs;" => "\u{0291F}", 952 | "larrfs;" => "\u{0291D}", 953 | "larrhk;" => "\u{021A9}", 954 | "larrlp;" => "\u{021AB}", 955 | "larrpl;" => "\u{02939}", 956 | "larrsim;" => "\u{02973}", 957 | "larrtl;" => "\u{021A2}", 958 | "lat;" => "\u{02AAB}", 959 | "lAtail;" => "\u{0291B}", 960 | "latail;" => "\u{02919}", 961 | "late;" => "\u{02AAD}", 962 | "lates;" => "\u{02AAD}\u{0FE00}", 963 | "lBarr;" => "\u{0290E}", 964 | "lbarr;" => "\u{0290C}", 965 | "lbbrk;" => "\u{02772}", 966 | "lbrace;" => "\u{0007B}", 967 | "lbrack;" => "\u{0005B}", 968 | "lbrke;" => "\u{0298B}", 969 | "lbrksld;" => "\u{0298F}", 970 | "lbrkslu;" => "\u{0298D}", 971 | "Lcaron;" => "\u{0013D}", 972 | "lcaron;" => "\u{0013E}", 973 | "Lcedil;" => "\u{0013B}", 974 | "lcedil;" => "\u{0013C}", 975 | "lceil;" => "\u{02308}", 976 | "lcub;" => "\u{0007B}", 977 | "Lcy;" => "\u{0041B}", 978 | "lcy;" => "\u{0043B}", 979 | "ldca;" => "\u{02936}", 980 | "ldquo;" => "\u{0201C}", 981 | "ldquor;" => "\u{0201E}", 982 | "ldrdhar;" => "\u{02967}", 983 | "ldrushar;" => "\u{0294B}", 984 | "ldsh;" => "\u{021B2}", 985 | "lE;" => "\u{02266}", 986 | "le;" => "\u{02264}", 987 | "LeftAngleBracket;" => "\u{027E8}", 988 | "LeftArrow;" => "\u{02190}", 989 | "Leftarrow;" => "\u{021D0}", 990 | "leftarrow;" => "\u{02190}", 991 | "LeftArrowBar;" => "\u{021E4}", 992 | "LeftArrowRightArrow;" => "\u{021C6}", 993 | "leftarrowtail;" => "\u{021A2}", 994 | "LeftCeiling;" => "\u{02308}", 995 | "LeftDoubleBracket;" => "\u{027E6}", 996 | "LeftDownTeeVector;" => "\u{02961}", 997 | "LeftDownVector;" => "\u{021C3}", 998 | "LeftDownVectorBar;" => "\u{02959}", 999 | "LeftFloor;" => "\u{0230A}", 1000 | "leftharpoondown;" => "\u{021BD}", 1001 | "leftharpoonup;" => "\u{021BC}", 1002 | "leftleftarrows;" => "\u{021C7}", 1003 | "LeftRightArrow;" => "\u{02194}", 1004 | "Leftrightarrow;" => "\u{021D4}", 1005 | "leftrightarrow;" => "\u{02194}", 1006 | "leftrightarrows;" => "\u{021C6}", 1007 | "leftrightharpoons;" => "\u{021CB}", 1008 | "leftrightsquigarrow;" => "\u{021AD}", 1009 | "LeftRightVector;" => "\u{0294E}", 1010 | "LeftTee;" => "\u{022A3}", 1011 | "LeftTeeArrow;" => "\u{021A4}", 1012 | "LeftTeeVector;" => "\u{0295A}", 1013 | "leftthreetimes;" => "\u{022CB}", 1014 | "LeftTriangle;" => "\u{022B2}", 1015 | "LeftTriangleBar;" => "\u{029CF}", 1016 | "LeftTriangleEqual;" => "\u{022B4}", 1017 | "LeftUpDownVector;" => "\u{02951}", 1018 | "LeftUpTeeVector;" => "\u{02960}", 1019 | "LeftUpVector;" => "\u{021BF}", 1020 | "LeftUpVectorBar;" => "\u{02958}", 1021 | "LeftVector;" => "\u{021BC}", 1022 | "LeftVectorBar;" => "\u{02952}", 1023 | "lEg;" => "\u{02A8B}", 1024 | "leg;" => "\u{022DA}", 1025 | "leq;" => "\u{02264}", 1026 | "leqq;" => "\u{02266}", 1027 | "leqslant;" => "\u{02A7D}", 1028 | "les;" => "\u{02A7D}", 1029 | "lescc;" => "\u{02AA8}", 1030 | "lesdot;" => "\u{02A7F}", 1031 | "lesdoto;" => "\u{02A81}", 1032 | "lesdotor;" => "\u{02A83}", 1033 | "lesg;" => "\u{022DA}\u{0FE00}", 1034 | "lesges;" => "\u{02A93}", 1035 | "lessapprox;" => "\u{02A85}", 1036 | "lessdot;" => "\u{022D6}", 1037 | "lesseqgtr;" => "\u{022DA}", 1038 | "lesseqqgtr;" => "\u{02A8B}", 1039 | "LessEqualGreater;" => "\u{022DA}", 1040 | "LessFullEqual;" => "\u{02266}", 1041 | "LessGreater;" => "\u{02276}", 1042 | "lessgtr;" => "\u{02276}", 1043 | "LessLess;" => "\u{02AA1}", 1044 | "lesssim;" => "\u{02272}", 1045 | "LessSlantEqual;" => "\u{02A7D}", 1046 | "LessTilde;" => "\u{02272}", 1047 | "lfisht;" => "\u{0297C}", 1048 | "lfloor;" => "\u{0230A}", 1049 | "Lfr;" => "\u{1D50F}", 1050 | "lfr;" => "\u{1D529}", 1051 | "lg;" => "\u{02276}", 1052 | "lgE;" => "\u{02A91}", 1053 | "lHar;" => "\u{02962}", 1054 | "lhard;" => "\u{021BD}", 1055 | "lharu;" => "\u{021BC}", 1056 | "lharul;" => "\u{0296A}", 1057 | "lhblk;" => "\u{02584}", 1058 | "LJcy;" => "\u{00409}", 1059 | "ljcy;" => "\u{00459}", 1060 | "Ll;" => "\u{022D8}", 1061 | "ll;" => "\u{0226A}", 1062 | "llarr;" => "\u{021C7}", 1063 | "llcorner;" => "\u{0231E}", 1064 | "Lleftarrow;" => "\u{021DA}", 1065 | "llhard;" => "\u{0296B}", 1066 | "lltri;" => "\u{025FA}", 1067 | "Lmidot;" => "\u{0013F}", 1068 | "lmidot;" => "\u{00140}", 1069 | "lmoust;" => "\u{023B0}", 1070 | "lmoustache;" => "\u{023B0}", 1071 | "lnap;" => "\u{02A89}", 1072 | "lnapprox;" => "\u{02A89}", 1073 | "lnE;" => "\u{02268}", 1074 | "lne;" => "\u{02A87}", 1075 | "lneq;" => "\u{02A87}", 1076 | "lneqq;" => "\u{02268}", 1077 | "lnsim;" => "\u{022E6}", 1078 | "loang;" => "\u{027EC}", 1079 | "loarr;" => "\u{021FD}", 1080 | "lobrk;" => "\u{027E6}", 1081 | "LongLeftArrow;" => "\u{027F5}", 1082 | "Longleftarrow;" => "\u{027F8}", 1083 | "longleftarrow;" => "\u{027F5}", 1084 | "LongLeftRightArrow;" => "\u{027F7}", 1085 | "Longleftrightarrow;" => "\u{027FA}", 1086 | "longleftrightarrow;" => "\u{027F7}", 1087 | "longmapsto;" => "\u{027FC}", 1088 | "LongRightArrow;" => "\u{027F6}", 1089 | "Longrightarrow;" => "\u{027F9}", 1090 | "longrightarrow;" => "\u{027F6}", 1091 | "looparrowleft;" => "\u{021AB}", 1092 | "looparrowright;" => "\u{021AC}", 1093 | "lopar;" => "\u{02985}", 1094 | "Lopf;" => "\u{1D543}", 1095 | "lopf;" => "\u{1D55D}", 1096 | "loplus;" => "\u{02A2D}", 1097 | "lotimes;" => "\u{02A34}", 1098 | "lowast;" => "\u{02217}", 1099 | "lowbar;" => "\u{0005F}", 1100 | "LowerLeftArrow;" => "\u{02199}", 1101 | "LowerRightArrow;" => "\u{02198}", 1102 | "loz;" => "\u{025CA}", 1103 | "lozenge;" => "\u{025CA}", 1104 | "lozf;" => "\u{029EB}", 1105 | "lpar;" => "\u{00028}", 1106 | "lparlt;" => "\u{02993}", 1107 | "lrarr;" => "\u{021C6}", 1108 | "lrcorner;" => "\u{0231F}", 1109 | "lrhar;" => "\u{021CB}", 1110 | "lrhard;" => "\u{0296D}", 1111 | "lrm;" => "\u{0200E}", 1112 | "lrtri;" => "\u{022BF}", 1113 | "lsaquo;" => "\u{02039}", 1114 | "Lscr;" => "\u{02112}", 1115 | "lscr;" => "\u{1D4C1}", 1116 | "Lsh;" => "\u{021B0}", 1117 | "lsh;" => "\u{021B0}", 1118 | "lsim;" => "\u{02272}", 1119 | "lsime;" => "\u{02A8D}", 1120 | "lsimg;" => "\u{02A8F}", 1121 | "lsqb;" => "\u{0005B}", 1122 | "lsquo;" => "\u{02018}", 1123 | "lsquor;" => "\u{0201A}", 1124 | "Lstrok;" => "\u{00141}", 1125 | "lstrok;" => "\u{00142}", 1126 | "LT;" => "\u{0003C}", 1127 | "LT" => "\u{0003C}", 1128 | "Lt;" => "\u{0226A}", 1129 | "lt;" => "\u{0003C}", 1130 | "lt" => "\u{0003C}", 1131 | "ltcc;" => "\u{02AA6}", 1132 | "ltcir;" => "\u{02A79}", 1133 | "ltdot;" => "\u{022D6}", 1134 | "lthree;" => "\u{022CB}", 1135 | "ltimes;" => "\u{022C9}", 1136 | "ltlarr;" => "\u{02976}", 1137 | "ltquest;" => "\u{02A7B}", 1138 | "ltri;" => "\u{025C3}", 1139 | "ltrie;" => "\u{022B4}", 1140 | "ltrif;" => "\u{025C2}", 1141 | "ltrPar;" => "\u{02996}", 1142 | "lurdshar;" => "\u{0294A}", 1143 | "luruhar;" => "\u{02966}", 1144 | "lvertneqq;" => "\u{02268}\u{0FE00}", 1145 | "lvnE;" => "\u{02268}\u{0FE00}", 1146 | "macr;" => "\u{000AF}", 1147 | "macr" => "\u{000AF}", 1148 | "male;" => "\u{02642}", 1149 | "malt;" => "\u{02720}", 1150 | "maltese;" => "\u{02720}", 1151 | "Map;" => "\u{02905}", 1152 | "map;" => "\u{021A6}", 1153 | "mapsto;" => "\u{021A6}", 1154 | "mapstodown;" => "\u{021A7}", 1155 | "mapstoleft;" => "\u{021A4}", 1156 | "mapstoup;" => "\u{021A5}", 1157 | "marker;" => "\u{025AE}", 1158 | "mcomma;" => "\u{02A29}", 1159 | "Mcy;" => "\u{0041C}", 1160 | "mcy;" => "\u{0043C}", 1161 | "mdash;" => "\u{02014}", 1162 | "mDDot;" => "\u{0223A}", 1163 | "measuredangle;" => "\u{02221}", 1164 | "MediumSpace;" => "\u{0205F}", 1165 | "Mellintrf;" => "\u{02133}", 1166 | "Mfr;" => "\u{1D510}", 1167 | "mfr;" => "\u{1D52A}", 1168 | "mho;" => "\u{02127}", 1169 | "micro;" => "\u{000B5}", 1170 | "micro" => "\u{000B5}", 1171 | "mid;" => "\u{02223}", 1172 | "midast;" => "\u{0002A}", 1173 | "midcir;" => "\u{02AF0}", 1174 | "middot;" => "\u{000B7}", 1175 | "middot" => "\u{000B7}", 1176 | "minus;" => "\u{02212}", 1177 | "minusb;" => "\u{0229F}", 1178 | "minusd;" => "\u{02238}", 1179 | "minusdu;" => "\u{02A2A}", 1180 | "MinusPlus;" => "\u{02213}", 1181 | "mlcp;" => "\u{02ADB}", 1182 | "mldr;" => "\u{02026}", 1183 | "mnplus;" => "\u{02213}", 1184 | "models;" => "\u{022A7}", 1185 | "Mopf;" => "\u{1D544}", 1186 | "mopf;" => "\u{1D55E}", 1187 | "mp;" => "\u{02213}", 1188 | "Mscr;" => "\u{02133}", 1189 | "mscr;" => "\u{1D4C2}", 1190 | "mstpos;" => "\u{0223E}", 1191 | "Mu;" => "\u{0039C}", 1192 | "mu;" => "\u{003BC}", 1193 | "multimap;" => "\u{022B8}", 1194 | "mumap;" => "\u{022B8}", 1195 | "nabla;" => "\u{02207}", 1196 | "Nacute;" => "\u{00143}", 1197 | "nacute;" => "\u{00144}", 1198 | "nang;" => "\u{02220}\u{020D2}", 1199 | "nap;" => "\u{02249}", 1200 | "napE;" => "\u{02A70}\u{00338}", 1201 | "napid;" => "\u{0224B}\u{00338}", 1202 | "napos;" => "\u{00149}", 1203 | "napprox;" => "\u{02249}", 1204 | "natur;" => "\u{0266E}", 1205 | "natural;" => "\u{0266E}", 1206 | "naturals;" => "\u{02115}", 1207 | "nbsp;" => "\u{000A0}", 1208 | "nbsp" => "\u{000A0}", 1209 | "nbump;" => "\u{0224E}\u{00338}", 1210 | "nbumpe;" => "\u{0224F}\u{00338}", 1211 | "ncap;" => "\u{02A43}", 1212 | "Ncaron;" => "\u{00147}", 1213 | "ncaron;" => "\u{00148}", 1214 | "Ncedil;" => "\u{00145}", 1215 | "ncedil;" => "\u{00146}", 1216 | "ncong;" => "\u{02247}", 1217 | "ncongdot;" => "\u{02A6D}\u{00338}", 1218 | "ncup;" => "\u{02A42}", 1219 | "Ncy;" => "\u{0041D}", 1220 | "ncy;" => "\u{0043D}", 1221 | "ndash;" => "\u{02013}", 1222 | "ne;" => "\u{02260}", 1223 | "nearhk;" => "\u{02924}", 1224 | "neArr;" => "\u{021D7}", 1225 | "nearr;" => "\u{02197}", 1226 | "nearrow;" => "\u{02197}", 1227 | "nedot;" => "\u{02250}\u{00338}", 1228 | "NegativeMediumSpace;" => "\u{0200B}", 1229 | "NegativeThickSpace;" => "\u{0200B}", 1230 | "NegativeThinSpace;" => "\u{0200B}", 1231 | "NegativeVeryThinSpace;" => "\u{0200B}", 1232 | "nequiv;" => "\u{02262}", 1233 | "nesear;" => "\u{02928}", 1234 | "nesim;" => "\u{02242}\u{00338}", 1235 | "NestedGreaterGreater;" => "\u{0226B}", 1236 | "NestedLessLess;" => "\u{0226A}", 1237 | "NewLine;" => "\u{0000A}", 1238 | "nexist;" => "\u{02204}", 1239 | "nexists;" => "\u{02204}", 1240 | "Nfr;" => "\u{1D511}", 1241 | "nfr;" => "\u{1D52B}", 1242 | "ngE;" => "\u{02267}\u{00338}", 1243 | "nge;" => "\u{02271}", 1244 | "ngeq;" => "\u{02271}", 1245 | "ngeqq;" => "\u{02267}\u{00338}", 1246 | "ngeqslant;" => "\u{02A7E}\u{00338}", 1247 | "nges;" => "\u{02A7E}\u{00338}", 1248 | "nGg;" => "\u{022D9}\u{00338}", 1249 | "ngsim;" => "\u{02275}", 1250 | "nGt;" => "\u{0226B}\u{020D2}", 1251 | "ngt;" => "\u{0226F}", 1252 | "ngtr;" => "\u{0226F}", 1253 | "nGtv;" => "\u{0226B}\u{00338}", 1254 | "nhArr;" => "\u{021CE}", 1255 | "nharr;" => "\u{021AE}", 1256 | "nhpar;" => "\u{02AF2}", 1257 | "ni;" => "\u{0220B}", 1258 | "nis;" => "\u{022FC}", 1259 | "nisd;" => "\u{022FA}", 1260 | "niv;" => "\u{0220B}", 1261 | "NJcy;" => "\u{0040A}", 1262 | "njcy;" => "\u{0045A}", 1263 | "nlArr;" => "\u{021CD}", 1264 | "nlarr;" => "\u{0219A}", 1265 | "nldr;" => "\u{02025}", 1266 | "nlE;" => "\u{02266}\u{00338}", 1267 | "nle;" => "\u{02270}", 1268 | "nLeftarrow;" => "\u{021CD}", 1269 | "nleftarrow;" => "\u{0219A}", 1270 | "nLeftrightarrow;" => "\u{021CE}", 1271 | "nleftrightarrow;" => "\u{021AE}", 1272 | "nleq;" => "\u{02270}", 1273 | "nleqq;" => "\u{02266}\u{00338}", 1274 | "nleqslant;" => "\u{02A7D}\u{00338}", 1275 | "nles;" => "\u{02A7D}\u{00338}", 1276 | "nless;" => "\u{0226E}", 1277 | "nLl;" => "\u{022D8}\u{00338}", 1278 | "nlsim;" => "\u{02274}", 1279 | "nLt;" => "\u{0226A}\u{020D2}", 1280 | "nlt;" => "\u{0226E}", 1281 | "nltri;" => "\u{022EA}", 1282 | "nltrie;" => "\u{022EC}", 1283 | "nLtv;" => "\u{0226A}\u{00338}", 1284 | "nmid;" => "\u{02224}", 1285 | "NoBreak;" => "\u{02060}", 1286 | "NonBreakingSpace;" => "\u{000A0}", 1287 | "Nopf;" => "\u{02115}", 1288 | "nopf;" => "\u{1D55F}", 1289 | "Not;" => "\u{02AEC}", 1290 | "not;" => "\u{000AC}", 1291 | "not" => "\u{000AC}", 1292 | "NotCongruent;" => "\u{02262}", 1293 | "NotCupCap;" => "\u{0226D}", 1294 | "NotDoubleVerticalBar;" => "\u{02226}", 1295 | "NotElement;" => "\u{02209}", 1296 | "NotEqual;" => "\u{02260}", 1297 | "NotEqualTilde;" => "\u{02242}\u{00338}", 1298 | "NotExists;" => "\u{02204}", 1299 | "NotGreater;" => "\u{0226F}", 1300 | "NotGreaterEqual;" => "\u{02271}", 1301 | "NotGreaterFullEqual;" => "\u{02267}\u{00338}", 1302 | "NotGreaterGreater;" => "\u{0226B}\u{00338}", 1303 | "NotGreaterLess;" => "\u{02279}", 1304 | "NotGreaterSlantEqual;" => "\u{02A7E}\u{00338}", 1305 | "NotGreaterTilde;" => "\u{02275}", 1306 | "NotHumpDownHump;" => "\u{0224E}\u{00338}", 1307 | "NotHumpEqual;" => "\u{0224F}\u{00338}", 1308 | "notin;" => "\u{02209}", 1309 | "notindot;" => "\u{022F5}\u{00338}", 1310 | "notinE;" => "\u{022F9}\u{00338}", 1311 | "notinva;" => "\u{02209}", 1312 | "notinvb;" => "\u{022F7}", 1313 | "notinvc;" => "\u{022F6}", 1314 | "NotLeftTriangle;" => "\u{022EA}", 1315 | "NotLeftTriangleBar;" => "\u{029CF}\u{00338}", 1316 | "NotLeftTriangleEqual;" => "\u{022EC}", 1317 | "NotLess;" => "\u{0226E}", 1318 | "NotLessEqual;" => "\u{02270}", 1319 | "NotLessGreater;" => "\u{02278}", 1320 | "NotLessLess;" => "\u{0226A}\u{00338}", 1321 | "NotLessSlantEqual;" => "\u{02A7D}\u{00338}", 1322 | "NotLessTilde;" => "\u{02274}", 1323 | "NotNestedGreaterGreater;" => "\u{02AA2}\u{00338}", 1324 | "NotNestedLessLess;" => "\u{02AA1}\u{00338}", 1325 | "notni;" => "\u{0220C}", 1326 | "notniva;" => "\u{0220C}", 1327 | "notnivb;" => "\u{022FE}", 1328 | "notnivc;" => "\u{022FD}", 1329 | "NotPrecedes;" => "\u{02280}", 1330 | "NotPrecedesEqual;" => "\u{02AAF}\u{00338}", 1331 | "NotPrecedesSlantEqual;" => "\u{022E0}", 1332 | "NotReverseElement;" => "\u{0220C}", 1333 | "NotRightTriangle;" => "\u{022EB}", 1334 | "NotRightTriangleBar;" => "\u{029D0}\u{00338}", 1335 | "NotRightTriangleEqual;" => "\u{022ED}", 1336 | "NotSquareSubset;" => "\u{0228F}\u{00338}", 1337 | "NotSquareSubsetEqual;" => "\u{022E2}", 1338 | "NotSquareSuperset;" => "\u{02290}\u{00338}", 1339 | "NotSquareSupersetEqual;" => "\u{022E3}", 1340 | "NotSubset;" => "\u{02282}\u{020D2}", 1341 | "NotSubsetEqual;" => "\u{02288}", 1342 | "NotSucceeds;" => "\u{02281}", 1343 | "NotSucceedsEqual;" => "\u{02AB0}\u{00338}", 1344 | "NotSucceedsSlantEqual;" => "\u{022E1}", 1345 | "NotSucceedsTilde;" => "\u{0227F}\u{00338}", 1346 | "NotSuperset;" => "\u{02283}\u{020D2}", 1347 | "NotSupersetEqual;" => "\u{02289}", 1348 | "NotTilde;" => "\u{02241}", 1349 | "NotTildeEqual;" => "\u{02244}", 1350 | "NotTildeFullEqual;" => "\u{02247}", 1351 | "NotTildeTilde;" => "\u{02249}", 1352 | "NotVerticalBar;" => "\u{02224}", 1353 | "npar;" => "\u{02226}", 1354 | "nparallel;" => "\u{02226}", 1355 | "nparsl;" => "\u{02AFD}\u{020E5}", 1356 | "npart;" => "\u{02202}\u{00338}", 1357 | "npolint;" => "\u{02A14}", 1358 | "npr;" => "\u{02280}", 1359 | "nprcue;" => "\u{022E0}", 1360 | "npre;" => "\u{02AAF}\u{00338}", 1361 | "nprec;" => "\u{02280}", 1362 | "npreceq;" => "\u{02AAF}\u{00338}", 1363 | "nrArr;" => "\u{021CF}", 1364 | "nrarr;" => "\u{0219B}", 1365 | "nrarrc;" => "\u{02933}\u{00338}", 1366 | "nrarrw;" => "\u{0219D}\u{00338}", 1367 | "nRightarrow;" => "\u{021CF}", 1368 | "nrightarrow;" => "\u{0219B}", 1369 | "nrtri;" => "\u{022EB}", 1370 | "nrtrie;" => "\u{022ED}", 1371 | "nsc;" => "\u{02281}", 1372 | "nsccue;" => "\u{022E1}", 1373 | "nsce;" => "\u{02AB0}\u{00338}", 1374 | "Nscr;" => "\u{1D4A9}", 1375 | "nscr;" => "\u{1D4C3}", 1376 | "nshortmid;" => "\u{02224}", 1377 | "nshortparallel;" => "\u{02226}", 1378 | "nsim;" => "\u{02241}", 1379 | "nsime;" => "\u{02244}", 1380 | "nsimeq;" => "\u{02244}", 1381 | "nsmid;" => "\u{02224}", 1382 | "nspar;" => "\u{02226}", 1383 | "nsqsube;" => "\u{022E2}", 1384 | "nsqsupe;" => "\u{022E3}", 1385 | "nsub;" => "\u{02284}", 1386 | "nsubE;" => "\u{02AC5}\u{00338}", 1387 | "nsube;" => "\u{02288}", 1388 | "nsubset;" => "\u{02282}\u{020D2}", 1389 | "nsubseteq;" => "\u{02288}", 1390 | "nsubseteqq;" => "\u{02AC5}\u{00338}", 1391 | "nsucc;" => "\u{02281}", 1392 | "nsucceq;" => "\u{02AB0}\u{00338}", 1393 | "nsup;" => "\u{02285}", 1394 | "nsupE;" => "\u{02AC6}\u{00338}", 1395 | "nsupe;" => "\u{02289}", 1396 | "nsupset;" => "\u{02283}\u{020D2}", 1397 | "nsupseteq;" => "\u{02289}", 1398 | "nsupseteqq;" => "\u{02AC6}\u{00338}", 1399 | "ntgl;" => "\u{02279}", 1400 | "Ntilde;" => "\u{000D1}", 1401 | "Ntilde" => "\u{000D1}", 1402 | "ntilde;" => "\u{000F1}", 1403 | "ntilde" => "\u{000F1}", 1404 | "ntlg;" => "\u{02278}", 1405 | "ntriangleleft;" => "\u{022EA}", 1406 | "ntrianglelefteq;" => "\u{022EC}", 1407 | "ntriangleright;" => "\u{022EB}", 1408 | "ntrianglerighteq;" => "\u{022ED}", 1409 | "Nu;" => "\u{0039D}", 1410 | "nu;" => "\u{003BD}", 1411 | "num;" => "\u{00023}", 1412 | "numero;" => "\u{02116}", 1413 | "numsp;" => "\u{02007}", 1414 | "nvap;" => "\u{0224D}\u{020D2}", 1415 | "nVDash;" => "\u{022AF}", 1416 | "nVdash;" => "\u{022AE}", 1417 | "nvDash;" => "\u{022AD}", 1418 | "nvdash;" => "\u{022AC}", 1419 | "nvge;" => "\u{02265}\u{020D2}", 1420 | "nvgt;" => "\u{0003E}\u{020D2}", 1421 | "nvHarr;" => "\u{02904}", 1422 | "nvinfin;" => "\u{029DE}", 1423 | "nvlArr;" => "\u{02902}", 1424 | "nvle;" => "\u{02264}\u{020D2}", 1425 | "nvlt;" => "\u{0003C}\u{020D2}", 1426 | "nvltrie;" => "\u{022B4}\u{020D2}", 1427 | "nvrArr;" => "\u{02903}", 1428 | "nvrtrie;" => "\u{022B5}\u{020D2}", 1429 | "nvsim;" => "\u{0223C}\u{020D2}", 1430 | "nwarhk;" => "\u{02923}", 1431 | "nwArr;" => "\u{021D6}", 1432 | "nwarr;" => "\u{02196}", 1433 | "nwarrow;" => "\u{02196}", 1434 | "nwnear;" => "\u{02927}", 1435 | "Oacute;" => "\u{000D3}", 1436 | "Oacute" => "\u{000D3}", 1437 | "oacute;" => "\u{000F3}", 1438 | "oacute" => "\u{000F3}", 1439 | "oast;" => "\u{0229B}", 1440 | "ocir;" => "\u{0229A}", 1441 | "Ocirc;" => "\u{000D4}", 1442 | "Ocirc" => "\u{000D4}", 1443 | "ocirc;" => "\u{000F4}", 1444 | "ocirc" => "\u{000F4}", 1445 | "Ocy;" => "\u{0041E}", 1446 | "ocy;" => "\u{0043E}", 1447 | "odash;" => "\u{0229D}", 1448 | "Odblac;" => "\u{00150}", 1449 | "odblac;" => "\u{00151}", 1450 | "odiv;" => "\u{02A38}", 1451 | "odot;" => "\u{02299}", 1452 | "odsold;" => "\u{029BC}", 1453 | "OElig;" => "\u{00152}", 1454 | "oelig;" => "\u{00153}", 1455 | "ofcir;" => "\u{029BF}", 1456 | "Ofr;" => "\u{1D512}", 1457 | "ofr;" => "\u{1D52C}", 1458 | "ogon;" => "\u{002DB}", 1459 | "Ograve;" => "\u{000D2}", 1460 | "Ograve" => "\u{000D2}", 1461 | "ograve;" => "\u{000F2}", 1462 | "ograve" => "\u{000F2}", 1463 | "ogt;" => "\u{029C1}", 1464 | "ohbar;" => "\u{029B5}", 1465 | "ohm;" => "\u{003A9}", 1466 | "oint;" => "\u{0222E}", 1467 | "olarr;" => "\u{021BA}", 1468 | "olcir;" => "\u{029BE}", 1469 | "olcross;" => "\u{029BB}", 1470 | "oline;" => "\u{0203E}", 1471 | "olt;" => "\u{029C0}", 1472 | "Omacr;" => "\u{0014C}", 1473 | "omacr;" => "\u{0014D}", 1474 | "Omega;" => "\u{003A9}", 1475 | "omega;" => "\u{003C9}", 1476 | "Omicron;" => "\u{0039F}", 1477 | "omicron;" => "\u{003BF}", 1478 | "omid;" => "\u{029B6}", 1479 | "ominus;" => "\u{02296}", 1480 | "Oopf;" => "\u{1D546}", 1481 | "oopf;" => "\u{1D560}", 1482 | "opar;" => "\u{029B7}", 1483 | "OpenCurlyDoubleQuote;" => "\u{0201C}", 1484 | "OpenCurlyQuote;" => "\u{02018}", 1485 | "operp;" => "\u{029B9}", 1486 | "oplus;" => "\u{02295}", 1487 | "Or;" => "\u{02A54}", 1488 | "or;" => "\u{02228}", 1489 | "orarr;" => "\u{021BB}", 1490 | "ord;" => "\u{02A5D}", 1491 | "order;" => "\u{02134}", 1492 | "orderof;" => "\u{02134}", 1493 | "ordf;" => "\u{000AA}", 1494 | "ordf" => "\u{000AA}", 1495 | "ordm;" => "\u{000BA}", 1496 | "ordm" => "\u{000BA}", 1497 | "origof;" => "\u{022B6}", 1498 | "oror;" => "\u{02A56}", 1499 | "orslope;" => "\u{02A57}", 1500 | "orv;" => "\u{02A5B}", 1501 | "oS;" => "\u{024C8}", 1502 | "Oscr;" => "\u{1D4AA}", 1503 | "oscr;" => "\u{02134}", 1504 | "Oslash;" => "\u{000D8}", 1505 | "Oslash" => "\u{000D8}", 1506 | "oslash;" => "\u{000F8}", 1507 | "oslash" => "\u{000F8}", 1508 | "osol;" => "\u{02298}", 1509 | "Otilde;" => "\u{000D5}", 1510 | "Otilde" => "\u{000D5}", 1511 | "otilde;" => "\u{000F5}", 1512 | "otilde" => "\u{000F5}", 1513 | "Otimes;" => "\u{02A37}", 1514 | "otimes;" => "\u{02297}", 1515 | "otimesas;" => "\u{02A36}", 1516 | "Ouml;" => "\u{000D6}", 1517 | "Ouml" => "\u{000D6}", 1518 | "ouml;" => "\u{000F6}", 1519 | "ouml" => "\u{000F6}", 1520 | "ovbar;" => "\u{0233D}", 1521 | "OverBar;" => "\u{0203E}", 1522 | "OverBrace;" => "\u{023DE}", 1523 | "OverBracket;" => "\u{023B4}", 1524 | "OverParenthesis;" => "\u{023DC}", 1525 | "par;" => "\u{02225}", 1526 | "para;" => "\u{000B6}", 1527 | "para" => "\u{000B6}", 1528 | "parallel;" => "\u{02225}", 1529 | "parsim;" => "\u{02AF3}", 1530 | "parsl;" => "\u{02AFD}", 1531 | "part;" => "\u{02202}", 1532 | "PartialD;" => "\u{02202}", 1533 | "Pcy;" => "\u{0041F}", 1534 | "pcy;" => "\u{0043F}", 1535 | "percnt;" => "\u{00025}", 1536 | "period;" => "\u{0002E}", 1537 | "permil;" => "\u{02030}", 1538 | "perp;" => "\u{022A5}", 1539 | "pertenk;" => "\u{02031}", 1540 | "Pfr;" => "\u{1D513}", 1541 | "pfr;" => "\u{1D52D}", 1542 | "Phi;" => "\u{003A6}", 1543 | "phi;" => "\u{003C6}", 1544 | "phiv;" => "\u{003D5}", 1545 | "phmmat;" => "\u{02133}", 1546 | "phone;" => "\u{0260E}", 1547 | "Pi;" => "\u{003A0}", 1548 | "pi;" => "\u{003C0}", 1549 | "pitchfork;" => "\u{022D4}", 1550 | "piv;" => "\u{003D6}", 1551 | "planck;" => "\u{0210F}", 1552 | "planckh;" => "\u{0210E}", 1553 | "plankv;" => "\u{0210F}", 1554 | "plus;" => "\u{0002B}", 1555 | "plusacir;" => "\u{02A23}", 1556 | "plusb;" => "\u{0229E}", 1557 | "pluscir;" => "\u{02A22}", 1558 | "plusdo;" => "\u{02214}", 1559 | "plusdu;" => "\u{02A25}", 1560 | "pluse;" => "\u{02A72}", 1561 | "PlusMinus;" => "\u{000B1}", 1562 | "plusmn;" => "\u{000B1}", 1563 | "plusmn" => "\u{000B1}", 1564 | "plussim;" => "\u{02A26}", 1565 | "plustwo;" => "\u{02A27}", 1566 | "pm;" => "\u{000B1}", 1567 | "Poincareplane;" => "\u{0210C}", 1568 | "pointint;" => "\u{02A15}", 1569 | "Popf;" => "\u{02119}", 1570 | "popf;" => "\u{1D561}", 1571 | "pound;" => "\u{000A3}", 1572 | "pound" => "\u{000A3}", 1573 | "Pr;" => "\u{02ABB}", 1574 | "pr;" => "\u{0227A}", 1575 | "prap;" => "\u{02AB7}", 1576 | "prcue;" => "\u{0227C}", 1577 | "prE;" => "\u{02AB3}", 1578 | "pre;" => "\u{02AAF}", 1579 | "prec;" => "\u{0227A}", 1580 | "precapprox;" => "\u{02AB7}", 1581 | "preccurlyeq;" => "\u{0227C}", 1582 | "Precedes;" => "\u{0227A}", 1583 | "PrecedesEqual;" => "\u{02AAF}", 1584 | "PrecedesSlantEqual;" => "\u{0227C}", 1585 | "PrecedesTilde;" => "\u{0227E}", 1586 | "preceq;" => "\u{02AAF}", 1587 | "precnapprox;" => "\u{02AB9}", 1588 | "precneqq;" => "\u{02AB5}", 1589 | "precnsim;" => "\u{022E8}", 1590 | "precsim;" => "\u{0227E}", 1591 | "Prime;" => "\u{02033}", 1592 | "prime;" => "\u{02032}", 1593 | "primes;" => "\u{02119}", 1594 | "prnap;" => "\u{02AB9}", 1595 | "prnE;" => "\u{02AB5}", 1596 | "prnsim;" => "\u{022E8}", 1597 | "prod;" => "\u{0220F}", 1598 | "Product;" => "\u{0220F}", 1599 | "profalar;" => "\u{0232E}", 1600 | "profline;" => "\u{02312}", 1601 | "profsurf;" => "\u{02313}", 1602 | "prop;" => "\u{0221D}", 1603 | "Proportion;" => "\u{02237}", 1604 | "Proportional;" => "\u{0221D}", 1605 | "propto;" => "\u{0221D}", 1606 | "prsim;" => "\u{0227E}", 1607 | "prurel;" => "\u{022B0}", 1608 | "Pscr;" => "\u{1D4AB}", 1609 | "pscr;" => "\u{1D4C5}", 1610 | "Psi;" => "\u{003A8}", 1611 | "psi;" => "\u{003C8}", 1612 | "puncsp;" => "\u{02008}", 1613 | "Qfr;" => "\u{1D514}", 1614 | "qfr;" => "\u{1D52E}", 1615 | "qint;" => "\u{02A0C}", 1616 | "Qopf;" => "\u{0211A}", 1617 | "qopf;" => "\u{1D562}", 1618 | "qprime;" => "\u{02057}", 1619 | "Qscr;" => "\u{1D4AC}", 1620 | "qscr;" => "\u{1D4C6}", 1621 | "quaternions;" => "\u{0210D}", 1622 | "quatint;" => "\u{02A16}", 1623 | "quest;" => "\u{0003F}", 1624 | "questeq;" => "\u{0225F}", 1625 | "QUOT;" => "\u{00022}", 1626 | "QUOT" => "\u{00022}", 1627 | "quot;" => "\u{00022}", 1628 | "quot" => "\u{00022}", 1629 | "rAarr;" => "\u{021DB}", 1630 | "race;" => "\u{0223D}\u{00331}", 1631 | "Racute;" => "\u{00154}", 1632 | "racute;" => "\u{00155}", 1633 | "radic;" => "\u{0221A}", 1634 | "raemptyv;" => "\u{029B3}", 1635 | "Rang;" => "\u{027EB}", 1636 | "rang;" => "\u{027E9}", 1637 | "rangd;" => "\u{02992}", 1638 | "range;" => "\u{029A5}", 1639 | "rangle;" => "\u{027E9}", 1640 | "raquo;" => "\u{000BB}", 1641 | "raquo" => "\u{000BB}", 1642 | "Rarr;" => "\u{021A0}", 1643 | "rArr;" => "\u{021D2}", 1644 | "rarr;" => "\u{02192}", 1645 | "rarrap;" => "\u{02975}", 1646 | "rarrb;" => "\u{021E5}", 1647 | "rarrbfs;" => "\u{02920}", 1648 | "rarrc;" => "\u{02933}", 1649 | "rarrfs;" => "\u{0291E}", 1650 | "rarrhk;" => "\u{021AA}", 1651 | "rarrlp;" => "\u{021AC}", 1652 | "rarrpl;" => "\u{02945}", 1653 | "rarrsim;" => "\u{02974}", 1654 | "Rarrtl;" => "\u{02916}", 1655 | "rarrtl;" => "\u{021A3}", 1656 | "rarrw;" => "\u{0219D}", 1657 | "rAtail;" => "\u{0291C}", 1658 | "ratail;" => "\u{0291A}", 1659 | "ratio;" => "\u{02236}", 1660 | "rationals;" => "\u{0211A}", 1661 | "RBarr;" => "\u{02910}", 1662 | "rBarr;" => "\u{0290F}", 1663 | "rbarr;" => "\u{0290D}", 1664 | "rbbrk;" => "\u{02773}", 1665 | "rbrace;" => "\u{0007D}", 1666 | "rbrack;" => "\u{0005D}", 1667 | "rbrke;" => "\u{0298C}", 1668 | "rbrksld;" => "\u{0298E}", 1669 | "rbrkslu;" => "\u{02990}", 1670 | "Rcaron;" => "\u{00158}", 1671 | "rcaron;" => "\u{00159}", 1672 | "Rcedil;" => "\u{00156}", 1673 | "rcedil;" => "\u{00157}", 1674 | "rceil;" => "\u{02309}", 1675 | "rcub;" => "\u{0007D}", 1676 | "Rcy;" => "\u{00420}", 1677 | "rcy;" => "\u{00440}", 1678 | "rdca;" => "\u{02937}", 1679 | "rdldhar;" => "\u{02969}", 1680 | "rdquo;" => "\u{0201D}", 1681 | "rdquor;" => "\u{0201D}", 1682 | "rdsh;" => "\u{021B3}", 1683 | "Re;" => "\u{0211C}", 1684 | "real;" => "\u{0211C}", 1685 | "realine;" => "\u{0211B}", 1686 | "realpart;" => "\u{0211C}", 1687 | "reals;" => "\u{0211D}", 1688 | "rect;" => "\u{025AD}", 1689 | "REG;" => "\u{000AE}", 1690 | "REG" => "\u{000AE}", 1691 | "reg;" => "\u{000AE}", 1692 | "reg" => "\u{000AE}", 1693 | "ReverseElement;" => "\u{0220B}", 1694 | "ReverseEquilibrium;" => "\u{021CB}", 1695 | "ReverseUpEquilibrium;" => "\u{0296F}", 1696 | "rfisht;" => "\u{0297D}", 1697 | "rfloor;" => "\u{0230B}", 1698 | "Rfr;" => "\u{0211C}", 1699 | "rfr;" => "\u{1D52F}", 1700 | "rHar;" => "\u{02964}", 1701 | "rhard;" => "\u{021C1}", 1702 | "rharu;" => "\u{021C0}", 1703 | "rharul;" => "\u{0296C}", 1704 | "Rho;" => "\u{003A1}", 1705 | "rho;" => "\u{003C1}", 1706 | "rhov;" => "\u{003F1}", 1707 | "RightAngleBracket;" => "\u{027E9}", 1708 | "RightArrow;" => "\u{02192}", 1709 | "Rightarrow;" => "\u{021D2}", 1710 | "rightarrow;" => "\u{02192}", 1711 | "RightArrowBar;" => "\u{021E5}", 1712 | "RightArrowLeftArrow;" => "\u{021C4}", 1713 | "rightarrowtail;" => "\u{021A3}", 1714 | "RightCeiling;" => "\u{02309}", 1715 | "RightDoubleBracket;" => "\u{027E7}", 1716 | "RightDownTeeVector;" => "\u{0295D}", 1717 | "RightDownVector;" => "\u{021C2}", 1718 | "RightDownVectorBar;" => "\u{02955}", 1719 | "RightFloor;" => "\u{0230B}", 1720 | "rightharpoondown;" => "\u{021C1}", 1721 | "rightharpoonup;" => "\u{021C0}", 1722 | "rightleftarrows;" => "\u{021C4}", 1723 | "rightleftharpoons;" => "\u{021CC}", 1724 | "rightrightarrows;" => "\u{021C9}", 1725 | "rightsquigarrow;" => "\u{0219D}", 1726 | "RightTee;" => "\u{022A2}", 1727 | "RightTeeArrow;" => "\u{021A6}", 1728 | "RightTeeVector;" => "\u{0295B}", 1729 | "rightthreetimes;" => "\u{022CC}", 1730 | "RightTriangle;" => "\u{022B3}", 1731 | "RightTriangleBar;" => "\u{029D0}", 1732 | "RightTriangleEqual;" => "\u{022B5}", 1733 | "RightUpDownVector;" => "\u{0294F}", 1734 | "RightUpTeeVector;" => "\u{0295C}", 1735 | "RightUpVector;" => "\u{021BE}", 1736 | "RightUpVectorBar;" => "\u{02954}", 1737 | "RightVector;" => "\u{021C0}", 1738 | "RightVectorBar;" => "\u{02953}", 1739 | "ring;" => "\u{002DA}", 1740 | "risingdotseq;" => "\u{02253}", 1741 | "rlarr;" => "\u{021C4}", 1742 | "rlhar;" => "\u{021CC}", 1743 | "rlm;" => "\u{0200F}", 1744 | "rmoust;" => "\u{023B1}", 1745 | "rmoustache;" => "\u{023B1}", 1746 | "rnmid;" => "\u{02AEE}", 1747 | "roang;" => "\u{027ED}", 1748 | "roarr;" => "\u{021FE}", 1749 | "robrk;" => "\u{027E7}", 1750 | "ropar;" => "\u{02986}", 1751 | "Ropf;" => "\u{0211D}", 1752 | "ropf;" => "\u{1D563}", 1753 | "roplus;" => "\u{02A2E}", 1754 | "rotimes;" => "\u{02A35}", 1755 | "RoundImplies;" => "\u{02970}", 1756 | "rpar;" => "\u{00029}", 1757 | "rpargt;" => "\u{02994}", 1758 | "rppolint;" => "\u{02A12}", 1759 | "rrarr;" => "\u{021C9}", 1760 | "Rrightarrow;" => "\u{021DB}", 1761 | "rsaquo;" => "\u{0203A}", 1762 | "Rscr;" => "\u{0211B}", 1763 | "rscr;" => "\u{1D4C7}", 1764 | "Rsh;" => "\u{021B1}", 1765 | "rsh;" => "\u{021B1}", 1766 | "rsqb;" => "\u{0005D}", 1767 | "rsquo;" => "\u{02019}", 1768 | "rsquor;" => "\u{02019}", 1769 | "rthree;" => "\u{022CC}", 1770 | "rtimes;" => "\u{022CA}", 1771 | "rtri;" => "\u{025B9}", 1772 | "rtrie;" => "\u{022B5}", 1773 | "rtrif;" => "\u{025B8}", 1774 | "rtriltri;" => "\u{029CE}", 1775 | "RuleDelayed;" => "\u{029F4}", 1776 | "ruluhar;" => "\u{02968}", 1777 | "rx;" => "\u{0211E}", 1778 | "Sacute;" => "\u{0015A}", 1779 | "sacute;" => "\u{0015B}", 1780 | "sbquo;" => "\u{0201A}", 1781 | "Sc;" => "\u{02ABC}", 1782 | "sc;" => "\u{0227B}", 1783 | "scap;" => "\u{02AB8}", 1784 | "Scaron;" => "\u{00160}", 1785 | "scaron;" => "\u{00161}", 1786 | "sccue;" => "\u{0227D}", 1787 | "scE;" => "\u{02AB4}", 1788 | "sce;" => "\u{02AB0}", 1789 | "Scedil;" => "\u{0015E}", 1790 | "scedil;" => "\u{0015F}", 1791 | "Scirc;" => "\u{0015C}", 1792 | "scirc;" => "\u{0015D}", 1793 | "scnap;" => "\u{02ABA}", 1794 | "scnE;" => "\u{02AB6}", 1795 | "scnsim;" => "\u{022E9}", 1796 | "scpolint;" => "\u{02A13}", 1797 | "scsim;" => "\u{0227F}", 1798 | "Scy;" => "\u{00421}", 1799 | "scy;" => "\u{00441}", 1800 | "sdot;" => "\u{022C5}", 1801 | "sdotb;" => "\u{022A1}", 1802 | "sdote;" => "\u{02A66}", 1803 | "searhk;" => "\u{02925}", 1804 | "seArr;" => "\u{021D8}", 1805 | "searr;" => "\u{02198}", 1806 | "searrow;" => "\u{02198}", 1807 | "sect;" => "\u{000A7}", 1808 | "sect" => "\u{000A7}", 1809 | "semi;" => "\u{0003B}", 1810 | "seswar;" => "\u{02929}", 1811 | "setminus;" => "\u{02216}", 1812 | "setmn;" => "\u{02216}", 1813 | "sext;" => "\u{02736}", 1814 | "Sfr;" => "\u{1D516}", 1815 | "sfr;" => "\u{1D530}", 1816 | "sfrown;" => "\u{02322}", 1817 | "sharp;" => "\u{0266F}", 1818 | "SHCHcy;" => "\u{00429}", 1819 | "shchcy;" => "\u{00449}", 1820 | "SHcy;" => "\u{00428}", 1821 | "shcy;" => "\u{00448}", 1822 | "ShortDownArrow;" => "\u{02193}", 1823 | "ShortLeftArrow;" => "\u{02190}", 1824 | "shortmid;" => "\u{02223}", 1825 | "shortparallel;" => "\u{02225}", 1826 | "ShortRightArrow;" => "\u{02192}", 1827 | "ShortUpArrow;" => "\u{02191}", 1828 | "shy;" => "\u{000AD}", 1829 | "shy" => "\u{000AD}", 1830 | "Sigma;" => "\u{003A3}", 1831 | "sigma;" => "\u{003C3}", 1832 | "sigmaf;" => "\u{003C2}", 1833 | "sigmav;" => "\u{003C2}", 1834 | "sim;" => "\u{0223C}", 1835 | "simdot;" => "\u{02A6A}", 1836 | "sime;" => "\u{02243}", 1837 | "simeq;" => "\u{02243}", 1838 | "simg;" => "\u{02A9E}", 1839 | "simgE;" => "\u{02AA0}", 1840 | "siml;" => "\u{02A9D}", 1841 | "simlE;" => "\u{02A9F}", 1842 | "simne;" => "\u{02246}", 1843 | "simplus;" => "\u{02A24}", 1844 | "simrarr;" => "\u{02972}", 1845 | "slarr;" => "\u{02190}", 1846 | "SmallCircle;" => "\u{02218}", 1847 | "smallsetminus;" => "\u{02216}", 1848 | "smashp;" => "\u{02A33}", 1849 | "smeparsl;" => "\u{029E4}", 1850 | "smid;" => "\u{02223}", 1851 | "smile;" => "\u{02323}", 1852 | "smt;" => "\u{02AAA}", 1853 | "smte;" => "\u{02AAC}", 1854 | "smtes;" => "\u{02AAC}\u{0FE00}", 1855 | "SOFTcy;" => "\u{0042C}", 1856 | "softcy;" => "\u{0044C}", 1857 | "sol;" => "\u{0002F}", 1858 | "solb;" => "\u{029C4}", 1859 | "solbar;" => "\u{0233F}", 1860 | "Sopf;" => "\u{1D54A}", 1861 | "sopf;" => "\u{1D564}", 1862 | "spades;" => "\u{02660}", 1863 | "spadesuit;" => "\u{02660}", 1864 | "spar;" => "\u{02225}", 1865 | "sqcap;" => "\u{02293}", 1866 | "sqcaps;" => "\u{02293}\u{0FE00}", 1867 | "sqcup;" => "\u{02294}", 1868 | "sqcups;" => "\u{02294}\u{0FE00}", 1869 | "Sqrt;" => "\u{0221A}", 1870 | "sqsub;" => "\u{0228F}", 1871 | "sqsube;" => "\u{02291}", 1872 | "sqsubset;" => "\u{0228F}", 1873 | "sqsubseteq;" => "\u{02291}", 1874 | "sqsup;" => "\u{02290}", 1875 | "sqsupe;" => "\u{02292}", 1876 | "sqsupset;" => "\u{02290}", 1877 | "sqsupseteq;" => "\u{02292}", 1878 | "squ;" => "\u{025A1}", 1879 | "Square;" => "\u{025A1}", 1880 | "square;" => "\u{025A1}", 1881 | "SquareIntersection;" => "\u{02293}", 1882 | "SquareSubset;" => "\u{0228F}", 1883 | "SquareSubsetEqual;" => "\u{02291}", 1884 | "SquareSuperset;" => "\u{02290}", 1885 | "SquareSupersetEqual;" => "\u{02292}", 1886 | "SquareUnion;" => "\u{02294}", 1887 | "squarf;" => "\u{025AA}", 1888 | "squf;" => "\u{025AA}", 1889 | "srarr;" => "\u{02192}", 1890 | "Sscr;" => "\u{1D4AE}", 1891 | "sscr;" => "\u{1D4C8}", 1892 | "ssetmn;" => "\u{02216}", 1893 | "ssmile;" => "\u{02323}", 1894 | "sstarf;" => "\u{022C6}", 1895 | "Star;" => "\u{022C6}", 1896 | "star;" => "\u{02606}", 1897 | "starf;" => "\u{02605}", 1898 | "straightepsilon;" => "\u{003F5}", 1899 | "straightphi;" => "\u{003D5}", 1900 | "strns;" => "\u{000AF}", 1901 | "Sub;" => "\u{022D0}", 1902 | "sub;" => "\u{02282}", 1903 | "subdot;" => "\u{02ABD}", 1904 | "subE;" => "\u{02AC5}", 1905 | "sube;" => "\u{02286}", 1906 | "subedot;" => "\u{02AC3}", 1907 | "submult;" => "\u{02AC1}", 1908 | "subnE;" => "\u{02ACB}", 1909 | "subne;" => "\u{0228A}", 1910 | "subplus;" => "\u{02ABF}", 1911 | "subrarr;" => "\u{02979}", 1912 | "Subset;" => "\u{022D0}", 1913 | "subset;" => "\u{02282}", 1914 | "subseteq;" => "\u{02286}", 1915 | "subseteqq;" => "\u{02AC5}", 1916 | "SubsetEqual;" => "\u{02286}", 1917 | "subsetneq;" => "\u{0228A}", 1918 | "subsetneqq;" => "\u{02ACB}", 1919 | "subsim;" => "\u{02AC7}", 1920 | "subsub;" => "\u{02AD5}", 1921 | "subsup;" => "\u{02AD3}", 1922 | "succ;" => "\u{0227B}", 1923 | "succapprox;" => "\u{02AB8}", 1924 | "succcurlyeq;" => "\u{0227D}", 1925 | "Succeeds;" => "\u{0227B}", 1926 | "SucceedsEqual;" => "\u{02AB0}", 1927 | "SucceedsSlantEqual;" => "\u{0227D}", 1928 | "SucceedsTilde;" => "\u{0227F}", 1929 | "succeq;" => "\u{02AB0}", 1930 | "succnapprox;" => "\u{02ABA}", 1931 | "succneqq;" => "\u{02AB6}", 1932 | "succnsim;" => "\u{022E9}", 1933 | "succsim;" => "\u{0227F}", 1934 | "SuchThat;" => "\u{0220B}", 1935 | "Sum;" => "\u{02211}", 1936 | "sum;" => "\u{02211}", 1937 | "sung;" => "\u{0266A}", 1938 | "Sup;" => "\u{022D1}", 1939 | "sup;" => "\u{02283}", 1940 | "sup1;" => "\u{000B9}", 1941 | "sup1" => "\u{000B9}", 1942 | "sup2;" => "\u{000B2}", 1943 | "sup2" => "\u{000B2}", 1944 | "sup3;" => "\u{000B3}", 1945 | "sup3" => "\u{000B3}", 1946 | "supdot;" => "\u{02ABE}", 1947 | "supdsub;" => "\u{02AD8}", 1948 | "supE;" => "\u{02AC6}", 1949 | "supe;" => "\u{02287}", 1950 | "supedot;" => "\u{02AC4}", 1951 | "Superset;" => "\u{02283}", 1952 | "SupersetEqual;" => "\u{02287}", 1953 | "suphsol;" => "\u{027C9}", 1954 | "suphsub;" => "\u{02AD7}", 1955 | "suplarr;" => "\u{0297B}", 1956 | "supmult;" => "\u{02AC2}", 1957 | "supnE;" => "\u{02ACC}", 1958 | "supne;" => "\u{0228B}", 1959 | "supplus;" => "\u{02AC0}", 1960 | "Supset;" => "\u{022D1}", 1961 | "supset;" => "\u{02283}", 1962 | "supseteq;" => "\u{02287}", 1963 | "supseteqq;" => "\u{02AC6}", 1964 | "supsetneq;" => "\u{0228B}", 1965 | "supsetneqq;" => "\u{02ACC}", 1966 | "supsim;" => "\u{02AC8}", 1967 | "supsub;" => "\u{02AD4}", 1968 | "supsup;" => "\u{02AD6}", 1969 | "swarhk;" => "\u{02926}", 1970 | "swArr;" => "\u{021D9}", 1971 | "swarr;" => "\u{02199}", 1972 | "swarrow;" => "\u{02199}", 1973 | "swnwar;" => "\u{0292A}", 1974 | "szlig;" => "\u{000DF}", 1975 | "szlig" => "\u{000DF}", 1976 | "Tab;" => "\u{00009}", 1977 | "target;" => "\u{02316}", 1978 | "Tau;" => "\u{003A4}", 1979 | "tau;" => "\u{003C4}", 1980 | "tbrk;" => "\u{023B4}", 1981 | "Tcaron;" => "\u{00164}", 1982 | "tcaron;" => "\u{00165}", 1983 | "Tcedil;" => "\u{00162}", 1984 | "tcedil;" => "\u{00163}", 1985 | "Tcy;" => "\u{00422}", 1986 | "tcy;" => "\u{00442}", 1987 | "tdot;" => "\u{020DB}", 1988 | "telrec;" => "\u{02315}", 1989 | "Tfr;" => "\u{1D517}", 1990 | "tfr;" => "\u{1D531}", 1991 | "there4;" => "\u{02234}", 1992 | "Therefore;" => "\u{02234}", 1993 | "therefore;" => "\u{02234}", 1994 | "Theta;" => "\u{00398}", 1995 | "theta;" => "\u{003B8}", 1996 | "thetasym;" => "\u{003D1}", 1997 | "thetav;" => "\u{003D1}", 1998 | "thickapprox;" => "\u{02248}", 1999 | "thicksim;" => "\u{0223C}", 2000 | "ThickSpace;" => "\u{0205F}\u{0200A}", 2001 | "thinsp;" => "\u{02009}", 2002 | "ThinSpace;" => "\u{02009}", 2003 | "thkap;" => "\u{02248}", 2004 | "thksim;" => "\u{0223C}", 2005 | "THORN;" => "\u{000DE}", 2006 | "THORN" => "\u{000DE}", 2007 | "thorn;" => "\u{000FE}", 2008 | "thorn" => "\u{000FE}", 2009 | "Tilde;" => "\u{0223C}", 2010 | "tilde;" => "\u{002DC}", 2011 | "TildeEqual;" => "\u{02243}", 2012 | "TildeFullEqual;" => "\u{02245}", 2013 | "TildeTilde;" => "\u{02248}", 2014 | "times;" => "\u{000D7}", 2015 | "times" => "\u{000D7}", 2016 | "timesb;" => "\u{022A0}", 2017 | "timesbar;" => "\u{02A31}", 2018 | "timesd;" => "\u{02A30}", 2019 | "tint;" => "\u{0222D}", 2020 | "toea;" => "\u{02928}", 2021 | "top;" => "\u{022A4}", 2022 | "topbot;" => "\u{02336}", 2023 | "topcir;" => "\u{02AF1}", 2024 | "Topf;" => "\u{1D54B}", 2025 | "topf;" => "\u{1D565}", 2026 | "topfork;" => "\u{02ADA}", 2027 | "tosa;" => "\u{02929}", 2028 | "tprime;" => "\u{02034}", 2029 | "TRADE;" => "\u{02122}", 2030 | "trade;" => "\u{02122}", 2031 | "triangle;" => "\u{025B5}", 2032 | "triangledown;" => "\u{025BF}", 2033 | "triangleleft;" => "\u{025C3}", 2034 | "trianglelefteq;" => "\u{022B4}", 2035 | "triangleq;" => "\u{0225C}", 2036 | "triangleright;" => "\u{025B9}", 2037 | "trianglerighteq;" => "\u{022B5}", 2038 | "tridot;" => "\u{025EC}", 2039 | "trie;" => "\u{0225C}", 2040 | "triminus;" => "\u{02A3A}", 2041 | "TripleDot;" => "\u{020DB}", 2042 | "triplus;" => "\u{02A39}", 2043 | "trisb;" => "\u{029CD}", 2044 | "tritime;" => "\u{02A3B}", 2045 | "trpezium;" => "\u{023E2}", 2046 | "Tscr;" => "\u{1D4AF}", 2047 | "tscr;" => "\u{1D4C9}", 2048 | "TScy;" => "\u{00426}", 2049 | "tscy;" => "\u{00446}", 2050 | "TSHcy;" => "\u{0040B}", 2051 | "tshcy;" => "\u{0045B}", 2052 | "Tstrok;" => "\u{00166}", 2053 | "tstrok;" => "\u{00167}", 2054 | "twixt;" => "\u{0226C}", 2055 | "twoheadleftarrow;" => "\u{0219E}", 2056 | "twoheadrightarrow;" => "\u{021A0}", 2057 | "Uacute;" => "\u{000DA}", 2058 | "Uacute" => "\u{000DA}", 2059 | "uacute;" => "\u{000FA}", 2060 | "uacute" => "\u{000FA}", 2061 | "Uarr;" => "\u{0219F}", 2062 | "uArr;" => "\u{021D1}", 2063 | "uarr;" => "\u{02191}", 2064 | "Uarrocir;" => "\u{02949}", 2065 | "Ubrcy;" => "\u{0040E}", 2066 | "ubrcy;" => "\u{0045E}", 2067 | "Ubreve;" => "\u{0016C}", 2068 | "ubreve;" => "\u{0016D}", 2069 | "Ucirc;" => "\u{000DB}", 2070 | "Ucirc" => "\u{000DB}", 2071 | "ucirc;" => "\u{000FB}", 2072 | "ucirc" => "\u{000FB}", 2073 | "Ucy;" => "\u{00423}", 2074 | "ucy;" => "\u{00443}", 2075 | "udarr;" => "\u{021C5}", 2076 | "Udblac;" => "\u{00170}", 2077 | "udblac;" => "\u{00171}", 2078 | "udhar;" => "\u{0296E}", 2079 | "ufisht;" => "\u{0297E}", 2080 | "Ufr;" => "\u{1D518}", 2081 | "ufr;" => "\u{1D532}", 2082 | "Ugrave;" => "\u{000D9}", 2083 | "Ugrave" => "\u{000D9}", 2084 | "ugrave;" => "\u{000F9}", 2085 | "ugrave" => "\u{000F9}", 2086 | "uHar;" => "\u{02963}", 2087 | "uharl;" => "\u{021BF}", 2088 | "uharr;" => "\u{021BE}", 2089 | "uhblk;" => "\u{02580}", 2090 | "ulcorn;" => "\u{0231C}", 2091 | "ulcorner;" => "\u{0231C}", 2092 | "ulcrop;" => "\u{0230F}", 2093 | "ultri;" => "\u{025F8}", 2094 | "Umacr;" => "\u{0016A}", 2095 | "umacr;" => "\u{0016B}", 2096 | "uml;" => "\u{000A8}", 2097 | "uml" => "\u{000A8}", 2098 | "UnderBar;" => "\u{0005F}", 2099 | "UnderBrace;" => "\u{023DF}", 2100 | "UnderBracket;" => "\u{023B5}", 2101 | "UnderParenthesis;" => "\u{023DD}", 2102 | "Union;" => "\u{022C3}", 2103 | "UnionPlus;" => "\u{0228E}", 2104 | "Uogon;" => "\u{00172}", 2105 | "uogon;" => "\u{00173}", 2106 | "Uopf;" => "\u{1D54C}", 2107 | "uopf;" => "\u{1D566}", 2108 | "UpArrow;" => "\u{02191}", 2109 | "Uparrow;" => "\u{021D1}", 2110 | "uparrow;" => "\u{02191}", 2111 | "UpArrowBar;" => "\u{02912}", 2112 | "UpArrowDownArrow;" => "\u{021C5}", 2113 | "UpDownArrow;" => "\u{02195}", 2114 | "Updownarrow;" => "\u{021D5}", 2115 | "updownarrow;" => "\u{02195}", 2116 | "UpEquilibrium;" => "\u{0296E}", 2117 | "upharpoonleft;" => "\u{021BF}", 2118 | "upharpoonright;" => "\u{021BE}", 2119 | "uplus;" => "\u{0228E}", 2120 | "UpperLeftArrow;" => "\u{02196}", 2121 | "UpperRightArrow;" => "\u{02197}", 2122 | "Upsi;" => "\u{003D2}", 2123 | "upsi;" => "\u{003C5}", 2124 | "upsih;" => "\u{003D2}", 2125 | "Upsilon;" => "\u{003A5}", 2126 | "upsilon;" => "\u{003C5}", 2127 | "UpTee;" => "\u{022A5}", 2128 | "UpTeeArrow;" => "\u{021A5}", 2129 | "upuparrows;" => "\u{021C8}", 2130 | "urcorn;" => "\u{0231D}", 2131 | "urcorner;" => "\u{0231D}", 2132 | "urcrop;" => "\u{0230E}", 2133 | "Uring;" => "\u{0016E}", 2134 | "uring;" => "\u{0016F}", 2135 | "urtri;" => "\u{025F9}", 2136 | "Uscr;" => "\u{1D4B0}", 2137 | "uscr;" => "\u{1D4CA}", 2138 | "utdot;" => "\u{022F0}", 2139 | "Utilde;" => "\u{00168}", 2140 | "utilde;" => "\u{00169}", 2141 | "utri;" => "\u{025B5}", 2142 | "utrif;" => "\u{025B4}", 2143 | "uuarr;" => "\u{021C8}", 2144 | "Uuml;" => "\u{000DC}", 2145 | "Uuml" => "\u{000DC}", 2146 | "uuml;" => "\u{000FC}", 2147 | "uuml" => "\u{000FC}", 2148 | "uwangle;" => "\u{029A7}", 2149 | "vangrt;" => "\u{0299C}", 2150 | "varepsilon;" => "\u{003F5}", 2151 | "varkappa;" => "\u{003F0}", 2152 | "varnothing;" => "\u{02205}", 2153 | "varphi;" => "\u{003D5}", 2154 | "varpi;" => "\u{003D6}", 2155 | "varpropto;" => "\u{0221D}", 2156 | "vArr;" => "\u{021D5}", 2157 | "varr;" => "\u{02195}", 2158 | "varrho;" => "\u{003F1}", 2159 | "varsigma;" => "\u{003C2}", 2160 | "varsubsetneq;" => "\u{0228A}\u{0FE00}", 2161 | "varsubsetneqq;" => "\u{02ACB}\u{0FE00}", 2162 | "varsupsetneq;" => "\u{0228B}\u{0FE00}", 2163 | "varsupsetneqq;" => "\u{02ACC}\u{0FE00}", 2164 | "vartheta;" => "\u{003D1}", 2165 | "vartriangleleft;" => "\u{022B2}", 2166 | "vartriangleright;" => "\u{022B3}", 2167 | "Vbar;" => "\u{02AEB}", 2168 | "vBar;" => "\u{02AE8}", 2169 | "vBarv;" => "\u{02AE9}", 2170 | "Vcy;" => "\u{00412}", 2171 | "vcy;" => "\u{00432}", 2172 | "VDash;" => "\u{022AB}", 2173 | "Vdash;" => "\u{022A9}", 2174 | "vDash;" => "\u{022A8}", 2175 | "vdash;" => "\u{022A2}", 2176 | "Vdashl;" => "\u{02AE6}", 2177 | "Vee;" => "\u{022C1}", 2178 | "vee;" => "\u{02228}", 2179 | "veebar;" => "\u{022BB}", 2180 | "veeeq;" => "\u{0225A}", 2181 | "vellip;" => "\u{022EE}", 2182 | "Verbar;" => "\u{02016}", 2183 | "verbar;" => "\u{0007C}", 2184 | "Vert;" => "\u{02016}", 2185 | "vert;" => "\u{0007C}", 2186 | "VerticalBar;" => "\u{02223}", 2187 | "VerticalLine;" => "\u{0007C}", 2188 | "VerticalSeparator;" => "\u{02758}", 2189 | "VerticalTilde;" => "\u{02240}", 2190 | "VeryThinSpace;" => "\u{0200A}", 2191 | "Vfr;" => "\u{1D519}", 2192 | "vfr;" => "\u{1D533}", 2193 | "vltri;" => "\u{022B2}", 2194 | "vnsub;" => "\u{02282}\u{020D2}", 2195 | "vnsup;" => "\u{02283}\u{020D2}", 2196 | "Vopf;" => "\u{1D54D}", 2197 | "vopf;" => "\u{1D567}", 2198 | "vprop;" => "\u{0221D}", 2199 | "vrtri;" => "\u{022B3}", 2200 | "Vscr;" => "\u{1D4B1}", 2201 | "vscr;" => "\u{1D4CB}", 2202 | "vsubnE;" => "\u{02ACB}\u{0FE00}", 2203 | "vsubne;" => "\u{0228A}\u{0FE00}", 2204 | "vsupnE;" => "\u{02ACC}\u{0FE00}", 2205 | "vsupne;" => "\u{0228B}\u{0FE00}", 2206 | "Vvdash;" => "\u{022AA}", 2207 | "vzigzag;" => "\u{0299A}", 2208 | "Wcirc;" => "\u{00174}", 2209 | "wcirc;" => "\u{00175}", 2210 | "wedbar;" => "\u{02A5F}", 2211 | "Wedge;" => "\u{022C0}", 2212 | "wedge;" => "\u{02227}", 2213 | "wedgeq;" => "\u{02259}", 2214 | "weierp;" => "\u{02118}", 2215 | "Wfr;" => "\u{1D51A}", 2216 | "wfr;" => "\u{1D534}", 2217 | "Wopf;" => "\u{1D54E}", 2218 | "wopf;" => "\u{1D568}", 2219 | "wp;" => "\u{02118}", 2220 | "wr;" => "\u{02240}", 2221 | "wreath;" => "\u{02240}", 2222 | "Wscr;" => "\u{1D4B2}", 2223 | "wscr;" => "\u{1D4CC}", 2224 | "xcap;" => "\u{022C2}", 2225 | "xcirc;" => "\u{025EF}", 2226 | "xcup;" => "\u{022C3}", 2227 | "xdtri;" => "\u{025BD}", 2228 | "Xfr;" => "\u{1D51B}", 2229 | "xfr;" => "\u{1D535}", 2230 | "xhArr;" => "\u{027FA}", 2231 | "xharr;" => "\u{027F7}", 2232 | "Xi;" => "\u{0039E}", 2233 | "xi;" => "\u{003BE}", 2234 | "xlArr;" => "\u{027F8}", 2235 | "xlarr;" => "\u{027F5}", 2236 | "xmap;" => "\u{027FC}", 2237 | "xnis;" => "\u{022FB}", 2238 | "xodot;" => "\u{02A00}", 2239 | "Xopf;" => "\u{1D54F}", 2240 | "xopf;" => "\u{1D569}", 2241 | "xoplus;" => "\u{02A01}", 2242 | "xotime;" => "\u{02A02}", 2243 | "xrArr;" => "\u{027F9}", 2244 | "xrarr;" => "\u{027F6}", 2245 | "Xscr;" => "\u{1D4B3}", 2246 | "xscr;" => "\u{1D4CD}", 2247 | "xsqcup;" => "\u{02A06}", 2248 | "xuplus;" => "\u{02A04}", 2249 | "xutri;" => "\u{025B3}", 2250 | "xvee;" => "\u{022C1}", 2251 | "xwedge;" => "\u{022C0}", 2252 | "Yacute;" => "\u{000DD}", 2253 | "Yacute" => "\u{000DD}", 2254 | "yacute;" => "\u{000FD}", 2255 | "yacute" => "\u{000FD}", 2256 | "YAcy;" => "\u{0042F}", 2257 | "yacy;" => "\u{0044F}", 2258 | "Ycirc;" => "\u{00176}", 2259 | "ycirc;" => "\u{00177}", 2260 | "Ycy;" => "\u{0042B}", 2261 | "ycy;" => "\u{0044B}", 2262 | "yen;" => "\u{000A5}", 2263 | "yen" => "\u{000A5}", 2264 | "Yfr;" => "\u{1D51C}", 2265 | "yfr;" => "\u{1D536}", 2266 | "YIcy;" => "\u{00407}", 2267 | "yicy;" => "\u{00457}", 2268 | "Yopf;" => "\u{1D550}", 2269 | "yopf;" => "\u{1D56A}", 2270 | "Yscr;" => "\u{1D4B4}", 2271 | "yscr;" => "\u{1D4CE}", 2272 | "YUcy;" => "\u{0042E}", 2273 | "yucy;" => "\u{0044E}", 2274 | "Yuml;" => "\u{00178}", 2275 | "yuml;" => "\u{000FF}", 2276 | "yuml" => "\u{000FF}", 2277 | "Zacute;" => "\u{00179}", 2278 | "zacute;" => "\u{0017A}", 2279 | "Zcaron;" => "\u{0017D}", 2280 | "zcaron;" => "\u{0017E}", 2281 | "Zcy;" => "\u{00417}", 2282 | "zcy;" => "\u{00437}", 2283 | "Zdot;" => "\u{0017B}", 2284 | "zdot;" => "\u{0017C}", 2285 | "zeetrf;" => "\u{02128}", 2286 | "ZeroWidthSpace;" => "\u{0200B}", 2287 | "Zeta;" => "\u{00396}", 2288 | "zeta;" => "\u{003B6}", 2289 | "Zfr;" => "\u{02128}", 2290 | "zfr;" => "\u{1D537}", 2291 | "ZHcy;" => "\u{00416}", 2292 | "zhcy;" => "\u{00436}", 2293 | "zigrarr;" => "\u{021DD}", 2294 | "Zopf;" => "\u{02124}", 2295 | "zopf;" => "\u{1D56B}", 2296 | "Zscr;" => "\u{1D4B5}", 2297 | "zscr;" => "\u{1D4CF}", 2298 | "zwj;" => "\u{0200D}", 2299 | "zwnj;" => "\u{0200C}", 2300 | ]; 2301 | } 2302 | 2303 | #[cfg(test)] 2304 | mod tests { 2305 | use ::util::html_attr_unescape; 2306 | 2307 | #[test] 2308 | fn test_html_attr_unescape() { 2309 | assert_eq!(html_attr_unescape("/?foo<=bar"), "/?foo<=bar".to_string()); 2310 | assert_eq!(html_attr_unescape("/?f<oo=bar"), "/?f<oo=bar".to_string()); 2311 | assert_eq!(html_attr_unescape("/?f<-oo=bar"), "/?f<-oo=bar".to_string()); 2312 | assert_eq!(html_attr_unescape("/?foo=<"), "/?foo=<".to_string()); 2313 | assert_eq!(html_attr_unescape("/?f<oo=bar"), "/?f").childs(None); 17 | assert_eq!(dom[0].text(), ""); 18 | } 19 | 20 | #[test] 21 | fn basic1() { 22 | // Simple (basics) 23 | let dom = DOM::new(r#"
A
B
"#); 24 | assert_eq!(dom.at("#b").unwrap().text(), "B"); 25 | assert_eq!(dom.find("div[id]").iter().map(|x| x.text()).collect::>(), ["A", "B"]); 26 | assert_eq!(dom.at("#a").unwrap().attr("foo"), Some("0")); 27 | assert!(dom.at("#b").unwrap().attrs().contains_key("myattr")); 28 | assert_eq!(dom.find("[id]").iter().map(|x| x.attr("id").unwrap()).collect::>(), ["a", "b"]); 29 | assert_eq!(dom.to_string(), r#"
A
B
"#); 30 | } 31 | 32 | #[test] 33 | fn basic2() { 34 | // Select based on parent 35 | let dom = DOM::new(r#" 36 | 37 |
test1
38 |
test2
39 | 40 | "#); 41 | assert_eq!(dom.find("body > div").get(0).unwrap().text(), "test1"); // right text 42 | assert_eq!(dom.find("body > div").get(1).unwrap().text(), ""); // no content 43 | assert_eq!(dom.find("body > div").len(), 2); // right number of elements 44 | assert_eq!(dom.find("body > div > div").get(0).unwrap().text(), "test2"); // right text 45 | assert_eq!(dom.find("body > div > div").len(), 1); // right number of elements 46 | } 47 | 48 | #[test] 49 | fn basic3() { 50 | // Basic navigation 51 | let dom = DOM::new(r#" 52 | 53 | 54 | test 55 | easy 56 | 57 | 58 | works well 59 | 60 | 61 | 62 | < very broken 63 |
64 | more text 65 |
66 | "#); 67 | assert!(dom.tag().is_none()); // no tag 68 | assert!(!dom.attrs().contains_key("foo")); 69 | assert_eq!( 70 | dom.to_string(), 71 | r#" 72 | 73 | 74 | test 75 | easy 76 | 77 | 78 | works well 79 | 80 | 81 | 82 | < very broken 83 |
84 | more text 85 |
86 | "#); 87 | 88 | let simple = dom.at("foo simple.working[class^=\"wor\"]").unwrap(); 89 | assert_eq!(simple.parent().unwrap().text_all(), "test easy works well yada yada < very broken more text"); 90 | assert_eq!(simple.tag().unwrap(), "simple"); 91 | assert_eq!(simple.attr("class").unwrap(), "working"); 92 | assert_eq!(simple.text(), "easy"); 93 | assert_eq!(simple.parent().unwrap().tag().unwrap(), "foo"); 94 | assert_eq!(simple.parent().unwrap().attr("bar").unwrap(), "baeasy"); 97 | 98 | assert_eq!(dom.at("test#test").unwrap().tag().unwrap(), "test"); 99 | assert_eq!(dom.at("[class$=\"ing\"]").unwrap().tag().unwrap(), "simple"); 100 | assert_eq!(dom.at("[class$=ing]").unwrap().tag().unwrap(), "simple"); 101 | assert_eq!(dom.at("[class=\"working\"]").unwrap().tag().unwrap(), "simple"); 102 | assert_eq!(dom.at("[class=working][class]").unwrap().tag().unwrap(), "simple"); 103 | assert_eq!(dom.at("foo > simple").unwrap().next().unwrap().tag().unwrap(), "test"); 104 | assert_eq!(dom.at("foo > simple").unwrap().next().unwrap().next().unwrap().tag().unwrap(), "a"); 105 | assert_eq!(dom.at("foo > test").unwrap().prev().unwrap().tag().unwrap(), "simple"); 106 | assert!(dom.next().is_none()); 107 | assert!(dom.prev().is_none()); 108 | assert!(dom.at("foo > a").unwrap().next().is_none()); 109 | assert!(dom.at("foo > simple").unwrap().prev().is_none()); 110 | assert_eq!(dom.at("simple").unwrap().ancestors(None).iter().map(|x| x.tag().unwrap()).collect::>(), ["foo"]); 111 | } 112 | 113 | #[test] 114 | fn class_and_id() { 115 | // Class and ID 116 | let dom = DOM::new(r#"
a
"#); 117 | assert_eq!(dom.at("div#id.class").unwrap().text(), "a"); 118 | } 119 | 120 | #[test] 121 | fn deep_nesting() { 122 | // Deep nesting (parent combinator) 123 | let dom = DOM::new(r#" 124 | 125 | 126 | Foo 127 | 128 | 129 |
130 | 136 |
137 |
138 |

Bar

139 |
140 |
141 |
More stuff
142 |
143 | 144 | 145 | "#); 146 | 147 | let p = dom.find("body > #container > div p[id]"); 148 | assert_eq!(p.len(), 1); 149 | assert_eq!(p.get(0).unwrap().attr("id").unwrap(), "foo"); 150 | 151 | assert_eq!( 152 | dom.find("div").iter().map(|x| x.attr("id").unwrap()).collect::>(), 153 | ["container", "header", "logo", "buttons", "buttons", "content"] 154 | ); 155 | assert_eq!( 156 | dom.find("p").iter().map(|x| x.attr("id").unwrap()).collect::>(), 157 | ["foo", "bar"] 158 | ); 159 | assert_eq!( 160 | dom.at("p").unwrap().ancestors(None).iter().map(|x| x.tag().unwrap()).collect::>(), 161 | ["div", "div", "div", "body", "html"] 162 | ); 163 | assert_eq!(dom.at("html").unwrap().ancestors(None).len(), 0); 164 | assert_eq!(dom.ancestors(None).len(), 0); 165 | } 166 | 167 | #[test] 168 | fn script_tag() { 169 | let dom = DOM::new(r#""#); 170 | assert_eq!(dom.at("script").unwrap().text(), "alert('world');"); 171 | } 172 | 173 | #[test] 174 | fn html5_base() { 175 | // HTML5 (unquoted values) 176 | let dom = DOM::new(r#"
works
"#); 177 | assert_eq!(dom.at("#test").unwrap().text(), "works"); 178 | assert_eq!(dom.at("div").unwrap().text(), "works"); 179 | assert_eq!(dom.at("[foo=bar][foo=\"bar\"]").unwrap().text(), "works"); 180 | assert!(dom.at("[foo=\"ba\"]").is_none()); 181 | assert_eq!(dom.at("[foo=bar]").unwrap().text(), "works"); 182 | assert!(dom.at("[foo=ba]").is_none()); 183 | assert_eq!(dom.at(".tset").unwrap().text(), "works"); 184 | assert_eq!(dom.at("[bar=/baz/]").unwrap().text(), "works"); 185 | assert_eq!(dom.at("[baz=//]").unwrap().text(), "works"); 186 | } 187 | 188 | #[test] 189 | fn html1_mix() { 190 | // HTML1 (single quotes, uppercase tags and whitespace in attributes) 191 | let dom = DOM::new(r#"
works
"#); 192 | assert_eq!(dom.at("#test").unwrap().text(), "works"); 193 | assert_eq!(dom.at("div").unwrap().text(), "works"); 194 | assert_eq!(dom.at("[foo=\"bar\"]").unwrap().text(), "works"); 195 | assert!(dom.at("[foo=\"ba\"]").is_none()); 196 | assert_eq!(dom.at("[foo=bar]").unwrap().text(), "works"); 197 | assert!(dom.at("[foo=ba]").is_none()); 198 | assert_eq!(dom.at(".tset").unwrap().text(), "works"); 199 | } 200 | 201 | #[test] 202 | fn unicode_snowman() { 203 | // Already decoded Unicode snowman and quotes in selector 204 | let dom = DOM::new(r#"
"#); 205 | assert_eq!(dom.at(r#"[id="snow'm\"an"]"#).unwrap().text(), "☃"); 206 | assert_eq!(dom.at(r#"[id="snow'm\22 an"]"#).unwrap().text(), "☃"); 207 | assert_eq!(dom.at(r#"[id="snow\'m\000022an"]"#).unwrap().text(), "☃"); 208 | assert_eq!(dom.at("[id='snow\\'m\"an']").unwrap().text(), "☃"); 209 | assert_eq!(dom.at("[id='snow\\27m\"an']").unwrap().text(), "☃"); 210 | assert!(dom.at(r#"[id="snow'm\22an"]"#).is_none()); 211 | assert!(dom.at(r#"[id="snow'm\21 an"]"#).is_none()); 212 | assert!(dom.at(r#"[id="snow'm\000021an"]"#).is_none()); 213 | assert!(dom.at(r#"[id="snow'm\000021 an"]"#).is_none()); 214 | } 215 | 216 | #[test] 217 | fn unicode_selectors() { 218 | // Unicode and escaped selectors 219 | let html = r#"
Snowman
Heart
"#; 220 | let dom = DOM::new(html); 221 | 222 | assert_eq!(dom.at("#\\\n\\002603x").unwrap().text(), "Snowman"); 223 | assert_eq!(dom.at("#\\2603 x").unwrap().text(), "Snowman"); 224 | assert_eq!(dom.at("#\\\n\\2603 x").unwrap().text(), "Snowman"); 225 | assert_eq!(dom.at("[id=\"\\\n\\2603 x\"]").unwrap().text(), "Snowman"); 226 | assert_eq!(dom.at("[id=\"\\\n\\002603x\"]").unwrap().text(), "Snowman"); 227 | assert_eq!(dom.at("[id=\"\\\\2603 x\"]").unwrap().text(), "Snowman"); 228 | assert_eq!(dom.at("html #\\\n\\002603x").unwrap().text(), "Snowman"); 229 | assert_eq!(dom.at("html #\\2603 x").unwrap().text(), "Snowman"); 230 | assert_eq!(dom.at("html #\\\n\\2603 x").unwrap().text(), "Snowman"); 231 | assert_eq!(dom.at("html [id=\"\\\n\\2603 x\"]").unwrap().text(), "Snowman"); 232 | assert_eq!(dom.at("html [id=\"\\\n\\002603x\"]").unwrap().text(), "Snowman"); 233 | assert_eq!(dom.at("html [id=\"\\\\2603 x\"]").unwrap().text(), "Snowman"); 234 | assert_eq!(dom.at("#☃x").unwrap().text(), "Snowman"); 235 | assert_eq!(dom.at("html div#☃x").unwrap().text(), "Snowman"); 236 | assert_eq!(dom.at("[id^=\"☃\"]").unwrap().text(), "Snowman"); 237 | assert_eq!(dom.at("div[id^=\"☃\"]").unwrap().text(), "Snowman"); 238 | assert_eq!(dom.at("html div[id^=\"☃\"]").unwrap().text(), "Snowman"); 239 | assert_eq!(dom.at("html > div[id^=\"☃\"]").unwrap().text(), "Snowman"); 240 | assert_eq!(dom.at("[id^=☃]").unwrap().text(), "Snowman"); 241 | assert_eq!(dom.at("div[id^=☃]").unwrap().text(), "Snowman"); 242 | assert_eq!(dom.at("html div[id^=☃]").unwrap().text(), "Snowman"); 243 | assert_eq!(dom.at("html > div[id^=☃]").unwrap().text(), "Snowman"); 244 | assert_eq!(dom.at(".\\\n\\002665").unwrap().text(), "Heart"); 245 | assert_eq!(dom.at(".\\2665").unwrap().text(), "Heart"); 246 | assert_eq!(dom.at("html .\\\n\\002665").unwrap().text(), "Heart"); 247 | assert_eq!(dom.at("html .\\2665").unwrap().text(), "Heart"); 248 | assert_eq!(dom.at("html [class$=\"\\\n\\002665\"]").unwrap().text(), "Heart"); 249 | assert_eq!(dom.at("html [class$=\"\\2665\"]").unwrap().text(), "Heart"); 250 | assert_eq!(dom.at("[class$=\"\\\n\\002665\"]").unwrap().text(), "Heart"); 251 | assert_eq!(dom.at("[class$=\"\\2665\"]").unwrap().text(), "Heart"); 252 | assert_eq!(dom.at(".x").unwrap().text(), "Heart"); 253 | assert_eq!(dom.at("html .x").unwrap().text(), "Heart"); 254 | assert_eq!(dom.at(".♥").unwrap().text(), "Heart"); 255 | assert_eq!(dom.at("html .♥").unwrap().text(), "Heart"); 256 | assert_eq!(dom.at("div.♥").unwrap().text(), "Heart"); 257 | assert_eq!(dom.at("html div.♥").unwrap().text(), "Heart"); 258 | assert_eq!(dom.at("[class$=\"♥\"]").unwrap().text(), "Heart"); 259 | assert_eq!(dom.at("div[class$=\"♥\"]").unwrap().text(), "Heart"); 260 | assert_eq!(dom.at("html div[class$=\"♥\"]").unwrap().text(), "Heart"); 261 | assert_eq!(dom.at("html > div[class$=\"♥\"]").unwrap().text(), "Heart"); 262 | assert_eq!(dom.at("[class$=♥]").unwrap().text(), "Heart"); 263 | assert_eq!(dom.at("div[class$=♥]").unwrap().text(), "Heart"); 264 | assert_eq!(dom.at("html div[class$=♥]").unwrap().text(), "Heart"); 265 | assert_eq!(dom.at("html > div[class$=♥]").unwrap().text(), "Heart"); 266 | assert_eq!(dom.at("[class~=\"♥\"]").unwrap().text(), "Heart"); 267 | assert_eq!(dom.at("div[class~=\"♥\"]").unwrap().text(), "Heart"); 268 | assert_eq!(dom.at("html div[class~=\"♥\"]").unwrap().text(), "Heart"); 269 | assert_eq!(dom.at("html > div[class~=\"♥\"]").unwrap().text(), "Heart"); 270 | assert_eq!(dom.at("[class~=♥]").unwrap().text(), "Heart"); 271 | assert_eq!(dom.at("div[class~=♥]").unwrap().text(), "Heart"); 272 | assert_eq!(dom.at("html div[class~=♥]").unwrap().text(), "Heart"); 273 | assert_eq!(dom.at("html > div[class~=♥]").unwrap().text(), "Heart"); 274 | assert_eq!(dom.at("[class~=\"x\"]").unwrap().text(), "Heart"); 275 | assert_eq!(dom.at("div[class~=\"x\"]").unwrap().text(), "Heart"); 276 | assert_eq!(dom.at("html div[class~=\"x\"]").unwrap().text(), "Heart"); 277 | assert_eq!(dom.at("html > div[class~=\"x\"]").unwrap().text(), "Heart"); 278 | assert_eq!(dom.at("[class~=x]").unwrap().text(), "Heart"); 279 | assert_eq!(dom.at("div[class~=x]").unwrap().text(), "Heart"); 280 | assert_eq!(dom.at("html div[class~=x]").unwrap().text(), "Heart"); 281 | assert_eq!(dom.at("html > div[class~=x]").unwrap().text(), "Heart"); 282 | assert_eq!(dom.at("html").unwrap().to_string(), html); 283 | assert_eq!(dom.at("#☃x").unwrap().parent().unwrap().to_string(), html); 284 | assert_eq!(dom.to_string(), html); 285 | assert_eq!(dom.content(), html); 286 | 287 | let dom = DOM::new(r#"☃☃"#); 288 | assert_eq!(dom.at("title").unwrap().text(), "♥"); 289 | assert_eq!(dom.at("*").unwrap().text(), "♥"); 290 | assert_eq!(dom.at(".test").unwrap().text(), "♥"); 291 | } 292 | 293 | #[test] 294 | fn attrs_on_multiple_lines() { 295 | // Attributes on multiple lines 296 | let dom = DOM::new("
"); 297 | assert_eq!(dom.at("div.x").unwrap().attr("test").unwrap(), "23"); 298 | assert_eq!(dom.at("[foo=\"bar\"]").unwrap().attr("class").unwrap(), "x"); 299 | } 300 | 301 | #[test] 302 | fn markup_chars_in_attr_vals() { 303 | // Markup characters in attribute values 304 | let dom = DOM::new("
\" \n test='='>Test
"); 305 | assert_eq!(dom.at("div[id=\"\"]").unwrap().attrs().get("test").unwrap().clone(), Some("=".to_owned())); 306 | assert_eq!(dom.at("[id=\"\"]").unwrap().text(), "Test"); 307 | assert_eq!(dom.at("[id=\"><\"]").unwrap().attrs().get("id").unwrap().clone(), Some("><".to_owned())); 308 | } 309 | 310 | #[test] 311 | fn empty_attrs() { 312 | // Empty attributes 313 | let dom = DOM::new("
"); 314 | assert_eq!(dom.at("div").unwrap().attr("test").unwrap(), ""); 315 | assert_eq!(dom.at("div").unwrap().attr("test2").unwrap(), ""); 316 | assert_eq!(dom.at("[test]").unwrap().tag().unwrap(), "div"); 317 | assert_eq!(dom.at("[test=\"\"]").unwrap().tag().unwrap(), "div"); 318 | assert_eq!(dom.at("[test2]").unwrap().tag().unwrap(), "div"); 319 | assert_eq!(dom.at("[test2=\"\"]").unwrap().tag().unwrap(), "div"); 320 | assert!(dom.at("[test3]").is_none()); 321 | assert!(dom.at("[test3=\"\"]").is_none()); 322 | } 323 | 324 | #[test] 325 | fn multi_line_attr() { 326 | // Multi-line attribute 327 | let dom = DOM::new("
"); 328 | assert_eq!(dom.at("div").unwrap().attr("class").unwrap(), "line1\nline2"); 329 | assert_eq!(dom.at(".line1").unwrap().tag().unwrap(), "div"); 330 | assert_eq!(dom.at(".line2").unwrap().tag().unwrap(), "div"); 331 | assert!(dom.at(".line3").is_none()); 332 | } 333 | 334 | #[test] 335 | fn entities_in_attrs() { 336 | assert_eq!(DOM::new("").at("a").unwrap().attr("href").unwrap(), "/?foo<=bar"); 337 | assert_eq!(DOM::new("").at("a").unwrap().attr("href").unwrap(), "/?f<oo=bar"); 338 | assert_eq!(DOM::new("").at("a").unwrap().attr("href").unwrap(), "/?f<-oo=bar"); 339 | assert_eq!(DOM::new("").at("a").unwrap().attr("href").unwrap(), "/?foo=<"); 340 | assert_eq!(DOM::new("").at("a").unwrap().attr("href").unwrap(), "/?fcontent
"); 347 | assert!(dom.at("div").is_some()); 348 | assert_eq!(dom.at("div").unwrap().text(), "content"); 349 | } 350 | 351 | #[test] 352 | fn class_with_hyphen() { 353 | // Class with hyphen 354 | let dom = DOM::new(r#"
A
A1
"#); 355 | assert_eq!(dom.find(".a").iter().map(|x| x.text()).collect::>(), ["A"]); // found first element only 356 | assert_eq!(dom.find(".a-1").iter().map(|x| x.text()).collect::>(), ["A1"]); // found last element only 357 | } 358 | 359 | #[test] 360 | fn empty_tags() { 361 | // Empty tags 362 | let dom = DOM::new("



"); 363 | assert_eq!(dom.to_string(), "



"); 364 | } 365 | 366 | #[test] 367 | fn inner_html() { 368 | let dom = DOM::new("xxxxxxx"); 369 | assert_eq!(dom.at("a").unwrap().content(), "xxxxxxx"); 370 | assert_eq!(dom.content(), "xxxxxxx"); 371 | } 372 | 373 | #[test] 374 | fn multiple_selectors() { 375 | // Multiple selectors 376 | let dom = DOM::new("
A
B
C

D

"); 377 | assert_eq!(dom.find("p, div").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D"]); 378 | assert_eq!(dom.find("#a, #c").iter().map(|x| x.text()).collect::>(), ["A", "C"]); 379 | assert_eq!(dom.find("div#a, div#b").iter().map(|x| x.text()).collect::>(), ["A", "B"]); 380 | assert_eq!(dom.find("div[id=\"a\"], div[id=\"c\"]").iter().map(|x| x.text()).collect::>(), ["A", "C"]); 381 | 382 | let dom2 = DOM::new("
A
B
C
"); 383 | assert_eq!(dom2.find("#☃, #♥x").iter().map(|x| x.text()).collect::>(), ["A", "C"]); 384 | assert_eq!(dom2.find("div#☃, div#b").iter().map(|x| x.text()).collect::>(), ["A", "B"]); 385 | assert_eq!(dom2.find("div[id=\"☃\"], div[id=\"♥x\"]").iter().map(|x| x.text()).collect::>(), ["A", "C"]); 386 | } 387 | 388 | #[test] 389 | fn multiple_attributes() { 390 | // Multiple attributes 391 | let dom = DOM::new(r#" 392 |
A
393 |
B
394 |
C
395 |
D
396 | "#); 397 | 398 | assert_eq!(dom.find("div[foo=\"bar\"][bar=\"baz\"]").iter().map(|x| x.text()).collect::>(), ["A", "C"]); 399 | assert_eq!(dom.find("div[foo^=\"b\"][foo$=\"r\"]").iter().map(|x| x.text()).collect::>(), ["A", "B", "C"]); 400 | assert!(dom.at("[foo=\"bar\"]").unwrap().prev().is_none()); 401 | assert_eq!(dom.at("[foo=\"bar\"]").unwrap().next().unwrap().text(), "B"); 402 | assert_eq!(dom.at("[foo=\"bar\"]").unwrap().next().unwrap().prev().unwrap().text(), "A"); 403 | assert!(dom.at("[foo=\"bar\"]").unwrap().next().unwrap().next().unwrap().next().unwrap().next().is_none()); 404 | } 405 | 406 | #[test] 407 | fn pseudo_classes() { 408 | // Pseudo-classes 409 | let dom = DOM::new(r#" 410 |
411 | 412 | 413 | 423 | 424 | 425 |

test 123

426 |

427 |
428 | "#); 429 | assert_eq!(dom.find(":root").len(), 1); 430 | assert_eq!(dom.find(":root").get(0).unwrap().tag(), Some("form")); 431 | assert_eq!(dom.find("*:root").get(0).unwrap().tag(), Some("form")); 432 | assert_eq!(dom.find("form:root").get(0).unwrap().tag(), Some("form")); 433 | assert_eq!(dom.find(":checked").len(), 4); 434 | assert_eq!(dom.find(":checked").get(0).unwrap().attr("name").unwrap(), "groovy"); 435 | assert_eq!(dom.find("option:checked").get(0).unwrap().attr("value").unwrap(), "e"); 436 | assert_eq!(dom.find(":checked").get(1).unwrap().text(), "E"); 437 | assert_eq!(dom.find("*:checked").get(1).unwrap().text(), "E"); 438 | assert_eq!(dom.find(":checked").get(2).unwrap().text(), "H"); 439 | assert_eq!(dom.find(":checked").get(3).unwrap().attr("name").unwrap(), "I"); 440 | assert_eq!(dom.find("option[selected]").len(), 2); 441 | assert_eq!(dom.find("option[selected]").get(0).unwrap().attr("value").unwrap(), "e"); 442 | assert_eq!(dom.find("option[selected]").get(1).unwrap().text(), "H"); 443 | assert_eq!(dom.find(":checked[value=\"e\"]").get(0).unwrap().text(), "E"); 444 | assert_eq!(dom.find("*:checked[value=\"e\"]").get(0).unwrap().text(), "E"); 445 | assert_eq!(dom.find("option:checked[value=\"e\"]").get(0).unwrap().text(), "E"); 446 | assert_eq!(dom.at("optgroup option:checked[value=\"e\"]").unwrap().text(), "E"); 447 | assert_eq!(dom.at("select option:checked[value=\"e\"]").unwrap().text(), "E"); 448 | assert_eq!(dom.at("select :checked[value=\"e\"]").unwrap().text(), "E"); 449 | assert_eq!(dom.at("optgroup > :checked[value=\"e\"]").unwrap().text(), "E"); 450 | assert_eq!(dom.at("select *:checked[value=\"e\"]").unwrap().text(), "E"); 451 | assert_eq!(dom.at("optgroup > *:checked[value=\"e\"]").unwrap().text(), "E"); 452 | assert_eq!(dom.find(":checked[value=\"e\"]").len(), 1); 453 | assert_eq!(dom.find(":empty").get(0).unwrap().attr("name").unwrap(), "user"); 454 | assert_eq!(dom.find("input:empty").get(0).unwrap().attr("name").unwrap(), "user"); 455 | assert_eq!(dom.at(":empty[type^=\"ch\"]").unwrap().attr("name").unwrap(), "groovy"); 456 | assert_eq!(dom.at("p").unwrap().attr("id").unwrap(), "content"); 457 | assert_eq!(dom.at("p:empty").unwrap().attr("id").unwrap(), "no_content"); 458 | 459 | // More pseudo-classes 460 | let dom = DOM::new(" 461 |
    462 |
  • A
  • 463 |
  • B
  • 464 |
  • C
  • 465 |
  • D
  • 466 |
  • E
  • 467 |
  • F
  • 468 |
  • G
  • 469 |
  • H
  • 470 |
471 | "); 472 | assert_eq!(dom.find("li:nth-child(odd)").iter().map(|x| x.text()).collect::>(), ["A", "C", "E", "G"]); 473 | assert_eq!(dom.find("li:NTH-CHILD(ODD)").iter().map(|x| x.text()).collect::>(), ["A", "C", "E", "G"]); 474 | assert_eq!(dom.find("li:nth-last-child(odd)").iter().map(|x| x.text()).collect::>(), ["B", "D", "F", "H"]); 475 | assert_eq!(dom.find(":nth-child(odd)").get(0).unwrap().tag().unwrap(), "ul"); 476 | assert_eq!(dom.find(":nth-child(odd)").get(1).unwrap().text(), "A"); 477 | assert_eq!(dom.find(":nth-child(1)").get(0).unwrap().tag().unwrap(), "ul"); 478 | assert_eq!(dom.find(":nth-child(1)").get(1).unwrap().text(), "A"); 479 | assert_eq!(dom.find(":nth-last-child(odd)").get(0).unwrap().tag().unwrap(), "ul"); 480 | assert_eq!(dom.find(":nth-last-child(odd)").last().unwrap().text(), "H"); 481 | assert_eq!(dom.find(":nth-last-child(1)").get(0).unwrap().tag().unwrap(), "ul"); 482 | assert_eq!(dom.find(":nth-last-child(1)").get(1).unwrap().text(), "H"); 483 | assert_eq!(dom.find("li:nth-child(2n+1)").iter().map(|x| x.text()).collect::>(), ["A", "C", "E", "G"]); 484 | assert_eq!(dom.find("li:nth-child(2n + 1)").iter().map(|x| x.text()).collect::>(), ["A", "C", "E", "G"]); 485 | assert_eq!(dom.find("li:nth-last-child(2n+1)").iter().map(|x| x.text()).collect::>(), ["B", "D", "F", "H"]); 486 | assert_eq!(dom.find("li:nth-child(even)").iter().map(|x| x.text()).collect::>(), ["B", "D", "F", "H"]); 487 | assert_eq!(dom.find("li:NTH-CHILD(EVEN)").iter().map(|x| x.text()).collect::>(), ["B", "D", "F", "H"]); 488 | assert_eq!(dom.find("li:nth-last-child( even )").iter().map(|x| x.text()).collect::>(), ["A", "C", "E", "G"]); 489 | assert_eq!(dom.find("li:nth-child(2n+2)").iter().map(|x| x.text()).collect::>(), ["B", "D", "F", "H"]); 490 | assert_eq!(dom.find("li:nTh-chILd(2N+2)").iter().map(|x| x.text()).collect::>(), ["B", "D", "F", "H"]); 491 | assert_eq!(dom.find("li:nth-child( 2n + 2 )").iter().map(|x| x.text()).collect::>(), ["B", "D", "F", "H"]); 492 | assert_eq!(dom.find("li:nth-last-child(2n+2)").iter().map(|x| x.text()).collect::>(), ["A", "C", "E", "G"]); 493 | assert_eq!(dom.find("li:nth-child(4n+1)").iter().map(|x| x.text()).collect::>(), ["A", "E"]); 494 | assert_eq!(dom.find("li:nth-last-child(4n+1)").iter().map(|x| x.text()).collect::>(), ["D", "H"]); 495 | assert_eq!(dom.find("li:nth-child(4n+4)").iter().map(|x| x.text()).collect::>(), ["D", "H"]); 496 | assert_eq!(dom.find("li:nth-last-child(4n+4)").iter().map(|x| x.text()).collect::>(), ["A", "E"]); 497 | assert_eq!(dom.find("li:nth-child(4n)").iter().map(|x| x.text()).collect::>(), ["D", "H"]); 498 | assert_eq!(dom.find("li:nth-child( 4n )").iter().map(|x| x.text()).collect::>(), ["D", "H"]); 499 | assert_eq!(dom.find("li:nth-last-child(4n)").iter().map(|x| x.text()).collect::>(), ["A", "E"]); 500 | assert_eq!(dom.find("li:nth-child(5n-2)").iter().map(|x| x.text()).collect::>(), ["C", "H"]); 501 | assert_eq!(dom.find("li:nth-child( 5n - 2 )").iter().map(|x| x.text()).collect::>(), ["C", "H"]); 502 | assert_eq!(dom.find("li:nth-last-child(5n-2)").iter().map(|x| x.text()).collect::>(), ["A", "F"]); 503 | assert_eq!(dom.find("li:nth-child(-n+3)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C"]); 504 | assert_eq!(dom.find("li:nth-child( -n + 3 )").iter().map(|x| x.text()).collect::>(), ["A", "B", "C"]); 505 | assert_eq!(dom.find("li:nth-last-child(-n+3)").iter().map(|x| x.text()).collect::>(), ["F", "G", "H"]); 506 | assert_eq!(dom.find("li:nth-child(-1n+3)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C"]); 507 | assert_eq!(dom.find("li:nth-last-child(-1n+3)").iter().map(|x| x.text()).collect::>(), ["F", "G", "H"]); 508 | assert_eq!(dom.find("li:nth-child(3n)").iter().map(|x| x.text()).collect::>(), ["C", "F"]); 509 | assert_eq!(dom.find("li:nth-last-child(3n)").iter().map(|x| x.text()).collect::>(), ["C", "F"]); 510 | assert_eq!(dom.find("li:NTH-LAST-CHILD(3N)").iter().map(|x| x.text()).collect::>(), ["C", "F"]); 511 | assert_eq!(dom.find("li:Nth-Last-Child(3N)").iter().map(|x| x.text()).collect::>(), ["C", "F"]); 512 | assert_eq!(dom.find("li:nth-child( 3 )").iter().map(|x| x.text()).collect::>(), ["C"]); 513 | assert_eq!(dom.find("li:nth-last-child( +3 )").iter().map(|x| x.text()).collect::>(), ["F"]); 514 | assert_eq!(dom.find("li:nth-child(1n+0)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]); 515 | assert_eq!(dom.find("li:nth-child(1n-0)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]); 516 | assert_eq!(dom.find("li:nth-child(n+0)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]); 517 | assert_eq!(dom.find("li:nth-child(n)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]); 518 | assert_eq!(dom.find("li:nth-child(n+0)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]); 519 | assert_eq!(dom.find("li:NTH-CHILD(N+0)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]); 520 | assert_eq!(dom.find("li:Nth-Child(N+0)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]); 521 | assert_eq!(dom.find("li:nth-child(n)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]); 522 | assert_eq!(dom.find("li:nth-child(0n+1)").iter().map(|x| x.text()).collect::>(), ["A"]); 523 | assert_eq!(dom.find("li:nth-child(0n+0)").len(), 0); 524 | assert_eq!(dom.find("li:nth-child(0)").len(), 0); 525 | assert_eq!(dom.find("li:nth-child()").len(), 0); 526 | assert_eq!(dom.find("li:nth-child(whatever)").len(), 0); 527 | assert_eq!(dom.find("li:whatever(whatever)").len(), 0); 528 | 529 | // Even more pseudo-classes 530 | let dom = DOM::new(r#" 531 |
    532 |
  • A
  • 533 |

    B

    534 |
  • C
  • 535 |

    D

    536 |
  • E
  • 537 |
  • F
  • 538 |

    G

    539 |
  • H
  • 540 |
  • I
  • 541 |
542 |
543 |
J
544 |
545 |
546 | Mojo! 547 |
K
548 | Mojolicious! 549 |
550 | "#); 551 | assert_eq!(dom.find("ul :nth-child(odd)").iter().map(|x| x.text()).collect::>(), ["A", "C", "E", "G", "I"]); 552 | assert_eq!(dom.find("li:nth-of-type(odd)").iter().map(|x| x.text()).collect::>(), ["A", "E", "H"]); 553 | assert_eq!(dom.find("li:nth-last-of-type( odd )").iter().map(|x| x.text()).collect::>(), ["C", "F", "I"]); 554 | assert_eq!(dom.find("p:nth-of-type(odd)").iter().map(|x| x.text()).collect::>(), ["B", "G"]); 555 | assert_eq!(dom.find("p:nth-last-of-type(odd)").iter().map(|x| x.text()).collect::>(), ["B", "G"]); 556 | assert_eq!(dom.find("ul :nth-child(1)").iter().map(|x| x.text()).collect::>(), ["A"]); 557 | assert_eq!(dom.find("ul :first-child").iter().map(|x| x.text()).collect::>(), ["A"]); 558 | assert_eq!(dom.find("p:nth-of-type(1)").iter().map(|x| x.text()).collect::>(), ["B"]); 559 | assert_eq!(dom.find("p:first-of-type").iter().map(|x| x.text()).collect::>(), ["B"]); 560 | assert_eq!(dom.find("li:nth-of-type(1)").iter().map(|x| x.text()).collect::>(), ["A"]); 561 | assert_eq!(dom.find("li:first-of-type").iter().map(|x| x.text()).collect::>(), ["A"]); 562 | assert_eq!(dom.find("ul :nth-last-child(-n+1)").iter().map(|x| x.text()).collect::>(), ["I"]); 563 | assert_eq!(dom.find("ul :last-child").iter().map(|x| x.text()).collect::>(), ["I"]); 564 | assert_eq!(dom.find("p:nth-last-of-type(-n+1)").iter().map(|x| x.text()).collect::>(), ["G"]); 565 | assert_eq!(dom.find("p:last-of-type").iter().map(|x| x.text()).collect::>(), ["G"]); 566 | assert_eq!(dom.find("li:nth-last-of-type(-n+1)").iter().map(|x| x.text()).collect::>(), ["I"]); 567 | assert_eq!(dom.find("li:last-of-type").iter().map(|x| x.text()).collect::>(), ["I"]); 568 | assert_eq!(dom.find("ul :nth-child(-n+3):not(li)").iter().map(|x| x.text()).collect::>(), ["B"]); 569 | assert_eq!(dom.find("ul :nth-child(-n+3):NOT(li)").iter().map(|x| x.text()).collect::>(), ["B"]); 570 | assert_eq!(dom.find("ul :nth-child(-n+3):not(:first-child)").iter().map(|x| x.text()).collect::>(), ["B", "C"]); 571 | assert_eq!(dom.find("ul :nth-child(-n+3):not(.♥)").iter().map(|x| x.text()).collect::>(), ["A", "B"]); 572 | assert_eq!(dom.find("ul :nth-child(-n+3):not([class$=\"♥\"])").iter().map(|x| x.text()).collect::>(), ["A", "B"]); 573 | assert_eq!(dom.find("ul :nth-child(-n+3):not(li[class$=\"♥\"])").iter().map(|x| x.text()).collect::>(), ["A", "B"]); 574 | assert_eq!(dom.find("ul :nth-child(-n+3):not([class$=\"♥\"][class^=\"test\"])").iter().map(|x| x.text()).collect::>(), ["A", "B"]); 575 | assert_eq!(dom.find("ul :nth-child(-n+3):not(*[class$=\"♥\"])").iter().map(|x| x.text()).collect::>(), ["A", "B"]); 576 | assert_eq!(dom.find("ul :nth-child(-n+3):not(:nth-child(-n+2))").iter().map(|x| x.text()).collect::>(), ["C"]); 577 | assert_eq!(dom.find("ul :nth-child(-n+3):not(:nth-child(1)):not(:nth-child(2))").iter().map(|x| x.text()).collect::>(), ["C"]); 578 | assert_eq!(dom.find(":only-child").iter().map(|x| x.text()).collect::>(), ["J"]); 579 | assert_eq!(dom.find("div :only-of-type").iter().map(|x| x.text()).collect::>(), ["J", "K"]); 580 | assert_eq!(dom.find("div:only-child").iter().map(|x| x.text()).collect::>(), ["J"]); 581 | assert_eq!(dom.find("div div:only-of-type").iter().map(|x| x.text()).collect::>(), ["J", "K"]); 582 | } 583 | --------------------------------------------------------------------------------