├── .gitignore
├── .travis.yml
├── .travis
└── docs.sh
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── scripts
└── update_entities.pl
├── src
├── dom
│ ├── css.rs
│ ├── html.rs
│ └── mod.rs
├── lib.rs
└── util.rs
└── tests
└── dom.rs
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /Cargo.lock
3 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | dist: trusty
2 | sudo: required
3 |
4 | language: rust
5 | matrix:
6 | fast_finish: true
7 | include:
8 | - rust: nightly
9 | - rust: stable
10 | env: BUILD_DOCS=1
11 |
12 | # Dependencies of kcov
13 | addons:
14 | apt:
15 | update: true
16 | packages:
17 | - libcurl4-openssl-dev
18 | - libelf-dev
19 | - libdw-dev
20 | - binutils-dev
21 | - libiberty-dev
22 |
23 | after_success:
24 | - |
25 | [ "$BUILD_DOCS" = "1" ] &&
26 | LOCAL="~/.local" && export PATH=$LOCAL/bin:$PATH &&
27 | wget https://github.com/SimonKagstrom/kcov/archive/master.tar.gz &&
28 | tar xzf master.tar.gz && mkdir kcov-master/build && cd kcov-master/build &&
29 | cmake -DCMAKE_INSTALL_PREFIX:PATH=$LOCAL .. && make && make install && cd ../.. &&
30 | cargo clean &&
31 | RUSTFLAGS="-C link-dead-code" cargo test --no-run &&
32 | for file in target/debug/*; do
33 | if [[ -f $file && -x $file ]]; then
34 | mkdir -p "target/cov/$(basename $file)";
35 | kcov --exclude-pattern=/.cargo,/usr/lib --verify "target/cov/$(basename $file)" "$file";
36 | fi;
37 | done &&
38 | kcov --coveralls-id=$TRAVIS_JOB_ID --merge target/cov target/cov/*
39 | - |
40 | [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$BUILD_DOCS" = "1" ] &&
41 | { [ "$TRAVIS_TAG" != "" ] || [ "$TRAVIS_BRANCH" == "master" ]; } &&
42 | ./.travis/docs.sh
43 |
44 | env:
45 | global:
46 | - RUST_BACKTRACE=1
47 |
48 | notifications:
49 | email:
50 | on_success: never
51 |
--------------------------------------------------------------------------------
/.travis/docs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -o errexit
4 |
5 | shopt -s globstar
6 |
7 | cargo doc --no-deps
8 |
9 | git clone --depth 1 --branch gh-pages "https://${GH_TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git" deploy_docs > /dev/null 2>&1
10 | cd deploy_docs
11 |
12 | git config user.name "$GH_USER_NAME"
13 | git config user.email "$GH_USER_EMAIL"
14 |
15 | if [ "$TRAVIS_TAG" = "" ]; then
16 | rm -rf master
17 | mv ../target/doc ./master
18 | echo "" > ./master/index.html
19 | fi
20 |
21 | git add -A .
22 | git commit -m "rebuild pages at ${TRAVIS_COMMIT}"
23 |
24 | echo
25 | echo "Pushing docs..."
26 | git push --quiet origin gh-pages > /dev/null 2>&1
27 | echo
28 | echo "Docs published."
29 | echo
30 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "victoria-dom"
3 | version = "0.1.2" # remember to update html_root_url
4 | authors = ["Aleksandr Orlenko "]
5 | license = "MIT/Apache-2.0"
6 | readme = "README.md"
7 | repository = "https://github.com/khvzak/victoria-dom"
8 | documentation = "https://docs.rs/victoria-dom"
9 | homepage = "https://github.com/khvzak/victoria-dom"
10 | description = """
11 | Minimalistic HTML parser with CSS selectors
12 | """
13 | categories = ["parsing"]
14 | keywords = ["html", "css", "parser"]
15 |
16 | [dependencies]
17 | regex = "1.0"
18 | lazy_static = "1.0"
19 | maplit = "1.0"
20 |
--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 Aleksandr Orlenko
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # victoria-dom
2 | Minimalistic HTML parser with CSS selectors
3 |
4 | [](https://crates.io/crates/victoria-dom)
5 | [](https://travis-ci.org/khvzak/victoria-dom)
6 | [](https://coveralls.io/github/khvzak/victoria-dom?branch=master)
7 | [](https://docs.rs/victoria-dom)
8 | [](https://khvzak.github.io/victoria-dom/)
9 |
10 | The project has been inspired by [Mojo::DOM](https://metacpan.org/pod/Mojo::DOM).
11 |
12 | ### Installing
13 | Add the following lines to your `Cargo.toml` file:
14 |
15 | ```toml
16 | [dependencies]
17 | victoria-dom = "0.1"
18 | ```
19 |
20 | and this to your crate root:
21 | ```rust
22 | extern crate victoria_dom;
23 | ```
24 |
25 | ### Examples
26 | ```rust
27 | extern crate victoria_dom;
28 |
29 | use victoria_dom::DOM;
30 |
31 | fn main() {
32 | let html = r#""#;
33 | let dom = DOM::new(html);
34 |
35 | assert_eq!(dom.at("html").unwrap().text_all(), "Hello, Rust");
36 | assert_eq!(dom.at("div#main > a").unwrap().attr("alt").unwrap(), "The Rust Programing Language");
37 | }
38 | ```
39 |
40 | ### Documentation
41 | https://docs.rs/victoria-dom
42 |
--------------------------------------------------------------------------------
/scripts/update_entities.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 |
3 | use Mojo::Base -strict;
4 | use Mojo::UserAgent;
5 | use Cwd qw(abs_path);
6 | use File::Basename qw(dirname);
7 | use File::Spec qw();
8 | use Path::Tiny qw(path);
9 |
10 | my @data;
11 |
12 | # Extract named character references from HTML Living Standard
13 | my $res = Mojo::UserAgent->new->get('https://html.spec.whatwg.org')->result;
14 | for my $row ($res->dom('#named-character-references-table tbody > tr')->each) {
15 | my $entity = $row->at('td > code')->text;
16 | my $codepoints = $row->children('td')->[1]->text;
17 |
18 | if ($codepoints =~ /^\s*U\+(\S+)(?:\s+U\+(\S+))?/) {
19 | push @data, [$entity, defined($2) ? "\\u{$1}\\u{$2}" : "\\u{$1}"];
20 | }
21 | }
22 |
23 | my $util_rs_file = File::Spec->catfile(dirname(abs_path($0)), '..', 'src', 'util.rs');
24 |
25 | my $util_rs_data = path($util_rs_file)->slurp_utf8;
26 |
27 | my $entities = join(",\n", map { ' "'.$_->[0].'" => "'.$_->[1].'"' } @data);
28 | $util_rs_data =~ s/(?<= static ref ENTITIES: HashMap<&'static str, &'static str> = hashmap!\[\n).+(?= \];)/$entities,\n/s;
29 |
30 | path($util_rs_file)->spew_utf8($util_rs_data);
31 |
--------------------------------------------------------------------------------
/src/dom/css.rs:
--------------------------------------------------------------------------------
1 | use std::rc::Rc;
2 | use std::{char, u32};
3 |
4 | use regex::{self, Regex, Captures};
5 |
6 | use dom::html::{TreeNode, NodeElem};
7 |
8 | lazy_static! {
9 | static ref ESCAPE_RE_STR: String = r"\\[^0-9a-fA-F]|\\[0-9a-fA-F]{1,6}".to_owned();
10 |
11 | static ref ATTR_RE_STR: String = String::new() +
12 | r"\[" +
13 | r"((?:" + &*ESCAPE_RE_STR + r"|[\w-])+)" + // Key
14 | r"(?:" +
15 | r"(\W)?=" + // Operator
16 | r#"(?:"((?:\\"|[^"])*)"|'((?:\\'|[^'])*)'|([^\]]+?))"# + // Value
17 | r"(?:\s+(i))?" + // Case-sensitivity
18 | r")?" +
19 | r"\]";
20 | }
21 |
22 | #[derive(Debug)]
23 | pub enum SelectorItem {
24 | Combinator { op: String },
25 | Conditions { items: Vec },
26 | }
27 |
28 | #[derive(Debug)]
29 | pub enum ConditionItem {
30 | Tag { name: Regex },
31 | Attribute { name: Regex, value: Option },
32 | PseudoClass {
33 | class: String,
34 | group: Option,
35 | equation: Option<(i32, i32)>,
36 | },
37 | }
38 |
39 | pub type Selectors = Vec>;
40 | pub type GroupOfSelectors = Vec>;
41 |
42 | pub fn matches(tree: &Rc, css: &str) -> bool {
43 | if css.is_empty() { return true; }
44 | match tree.elem {
45 | NodeElem::Tag { .. } => _match(&parse(css), tree, tree),
46 | _ => false
47 | }
48 | }
49 |
50 | pub fn select(tree: &Rc, css: &str, limit: usize) -> Vec> {
51 | let group = parse(css);
52 |
53 | let mut result = Vec::new();
54 |
55 | let mut queue = tree.get_childs().unwrap();
56 | while queue.len() > 0 {
57 | let current = queue.remove(0);
58 | if let NodeElem::Tag { .. } = current.elem {} else { continue; }
59 |
60 | queue = { let mut x = current.get_childs().unwrap(); x.append(&mut queue); x };
61 | if (group.is_empty() && css == "*") || _match(&group, ¤t, tree) { result.push(current); }
62 |
63 | if limit > 0 && result.len() == limit { break; }
64 | }
65 |
66 | result
67 | }
68 |
69 | pub fn select_one(tree: &Rc, css: &str) -> Option> {
70 | select(tree, css, 1).pop()
71 | }
72 |
73 | fn _match(group: &GroupOfSelectors, current: &Rc, tree: &Rc) -> bool {
74 | for _selectors in group {
75 | let selectors = Rc::new(_selectors.iter().rev().cloned().collect::());
76 | if _combinator(&selectors, current, tree, 0) { return true; }
77 | }
78 | return false;
79 | }
80 |
81 | fn _combinator(selectors: &Rc, current: &Rc, tree: &Rc, mut idx: usize) -> bool {
82 | if idx >= selectors.len() { return false; }
83 |
84 | match *selectors[idx] {
85 | SelectorItem::Conditions { ref items } => {
86 | if !_match_selector_conditions(items, current) { return false; }
87 |
88 | idx = idx + 1;
89 | if idx >= selectors.len() { return true; }
90 | return _combinator(selectors, current, tree, idx);
91 | },
92 |
93 | SelectorItem::Combinator { ref op } => {
94 | idx = idx + 1;
95 |
96 | // ">" (parent only)
97 | if op == ">" {
98 | if current.parent.is_none() { return false; }
99 | let parent = current.get_parent().unwrap();
100 |
101 | // no suitable parent
102 | if let NodeElem::Root { .. } = parent.elem { return false; }
103 | if parent.id == tree.id { return false; }
104 |
105 | return _combinator(selectors, &parent, tree, idx);
106 | }
107 |
108 | // "~" (preceding siblings)
109 | if op == "~" {
110 | for sibling in _siblings(current, None) {
111 | if sibling.id == current.id { return false; }
112 | if _combinator(selectors, &sibling, tree, idx) { return true; }
113 | }
114 | return false;
115 | }
116 |
117 | // "+" (immediately preceding siblings)
118 | if op == "+" {
119 | let mut found = false;
120 | for sibling in _siblings(current, None) {
121 | if sibling.id == current.id { return found; }
122 | found = _combinator(selectors, &sibling, tree, idx);
123 | }
124 | return false;
125 | }
126 |
127 | // " " (ancestor)
128 | let mut parent = current.get_parent();
129 | while parent.is_some() {
130 | let current_next = parent.clone().unwrap();
131 |
132 | if let NodeElem::Root { .. } = current_next.elem { return false; }
133 | if current_next.id == tree.id { return false; }
134 |
135 | if _combinator(selectors, ¤t_next, tree, idx) { return true; }
136 |
137 | parent = current_next.get_parent();
138 | }
139 | return false;
140 | },
141 | }
142 | }
143 |
144 | fn _match_selector_conditions(conditions: &Vec, current: &Rc) -> bool {
145 | 'conditem: for ci in conditions {
146 | match ci {
147 | &ConditionItem::Tag { name: ref name_re } => {
148 | if !name_re.is_match(current.get_tag_name().unwrap()) { return false; }
149 | },
150 |
151 | &ConditionItem::Attribute { name: ref name_re, value: ref value_re } => {
152 | let attrs = current.get_tag_attrs().unwrap();
153 | let value_re = value_re.as_ref();
154 |
155 | for (name, value) in attrs.iter() {
156 | let value = value.as_ref();
157 |
158 | if name_re.is_match(name) && (value.is_none() || value_re.is_none() || value_re.unwrap().is_match(value.unwrap())) {
159 | continue 'conditem; // go to a next condition item
160 | }
161 | }
162 | return false;
163 | },
164 |
165 | &ConditionItem::PseudoClass { ref class, ref group, ref equation } => {
166 | // ":empty"
167 | if class == "empty" {
168 | let _is_empty = |x: &TreeNode| match x.elem {
169 | NodeElem::Text { ref elem_type, .. } => elem_type == "comment" || elem_type == "pi",
170 | _ => false,
171 | };
172 |
173 | let _matched = current.get_childs().unwrap().iter().filter(|&x| !_is_empty(x)).count() == 0;
174 | if _matched { continue 'conditem; }
175 | }
176 |
177 | // ":root"
178 | else if class == "root" {
179 | let parent = current.get_parent();
180 | let _matched = parent.is_some() && match parent.unwrap().elem {
181 | NodeElem::Root { .. } => true,
182 | _ => false
183 | };
184 | if _matched { continue 'conditem; }
185 | }
186 |
187 | // ":not"
188 | else if class == "not" {
189 | let _matched = !_match(&group.clone().unwrap(), current, current);
190 | if _matched { continue 'conditem; }
191 | }
192 |
193 | // ":checked"
194 | else if class == "checked" {
195 | let _matched = match current.elem {
196 | NodeElem::Tag { ref attrs, .. } => attrs.contains_key("checked") || attrs.contains_key("selected"),
197 | _ => false
198 | };
199 | if _matched { continue 'conditem; }
200 | }
201 |
202 | // ":nth-child", ":nth-last-child", ":nth-of-type" or ":nth-last-of-type"
203 | else if let Some(equation) = *equation {
204 | let mut siblings = if class.ends_with("of-type") {
205 | _siblings(current, Some(current.get_tag_name().unwrap()))
206 | } else {
207 | _siblings(current, None)
208 | };
209 |
210 | if class.starts_with("nth-last") { siblings.reverse() }
211 |
212 | for i in 0..siblings.len() {
213 | let result = equation.0 * (i as i32) + equation.1;
214 |
215 | if result < 1 { continue; }
216 | if (result - 1) as usize >= siblings.len() { break; }
217 |
218 | if siblings[(result - 1) as usize].id == current.id { continue 'conditem; }
219 | }
220 | }
221 |
222 | // ":only-child" or ":only-of-type"
223 | else if class == "only-child" || class == "only-of-type" {
224 | let siblings = if class == "only-of-type" {
225 | _siblings(current, Some(current.get_tag_name().unwrap()))
226 | } else {
227 | _siblings(current, None)
228 | };
229 | for sibling in siblings {
230 | if sibling.id != current.id { return false; }
231 | }
232 |
233 | continue 'conditem;
234 | }
235 |
236 | return false;
237 | },
238 | }
239 | }
240 |
241 | return true;
242 | }
243 |
244 | fn _siblings(current: &Rc, _name: Option<&str>) -> Vec> {
245 | let parent = current.get_parent().unwrap();
246 | let childs = parent.get_childs().unwrap();
247 |
248 | childs.iter().filter(|&x| match x.elem {
249 | NodeElem::Tag { ref name, .. } => if _name.is_some() { name == _name.unwrap() } else { true },
250 | _ => false
251 | }).cloned().collect()
252 | }
253 |
254 | fn _unescape(_val: &str) -> String {
255 | let mut val = _val.to_owned();
256 |
257 | lazy_static! {
258 | static ref _RE: Regex = Regex::new(r"\\([0-9a-fA-F]{1,6})\s?").unwrap();
259 | }
260 |
261 | // Remove escaped newlines
262 | val = val.replace("\\\n", "");
263 |
264 | // Unescape Unicode characters
265 | val = _RE.replace_all(&val, |caps: &Captures| {
266 | let hex_char = caps.get(1).unwrap().as_str();
267 | format!("{}", char::from_u32(u32::from_str_radix(hex_char, 16).unwrap()).unwrap()).to_owned()
268 | }).into_owned();
269 |
270 | // Remove backslash
271 | val = val.replace(r"\", "");
272 |
273 | val
274 | }
275 |
276 | fn _name_re(_val: &str) -> Regex {
277 | Regex::new(&(r"(?:^|:)".to_owned() + ®ex::escape(&_unescape(_val)) + "$")).unwrap()
278 | }
279 |
280 | fn _value_re(op: &str, _val: Option<&str>, insensitive: bool) -> Option {
281 | if _val.is_none() { return None };
282 | let mut value = regex::escape(&_unescape(_val.unwrap()));
283 |
284 | if insensitive {
285 | value = "(?i)".to_owned() + &value.to_owned();
286 | }
287 |
288 | Some(Regex::new(&(
289 | // "~=" (word)
290 | if op == "~" {
291 | r"(?:^|\s+)".to_owned() + &value + r"(?:\s+|$)"
292 | }
293 |
294 | // "*=" (contains)
295 | else if op == "*" {
296 | value
297 | }
298 |
299 | // "^=" (begins with)
300 | else if op == "^" {
301 | r"^".to_owned() + &value
302 | }
303 |
304 | // "$=" (ends with)
305 | else if op == "$" {
306 | value + r"$"
307 | }
308 |
309 | // Everything else
310 | else {
311 | r"^".to_owned() + &value + "$"
312 | }
313 | )).unwrap())
314 | }
315 |
316 | pub fn parse(css: &str) -> GroupOfSelectors {
317 | let mut css = css.trim();
318 |
319 | // Group separator re
320 | lazy_static! {
321 | static ref _SEPARATOR_RE: Regex = Regex::new(r"^(?s)\s*,\s*(.*)$").unwrap();
322 | }
323 |
324 | let mut group: GroupOfSelectors = Vec::new();
325 | loop {
326 | let (selectors, css_rest) = _parse_selectors(css);
327 | if !selectors.is_empty() {
328 | group.push(Rc::new(selectors));
329 | css = css_rest;
330 | } else {
331 | break;
332 | }
333 |
334 | // Separator
335 | if let Some(caps) = _SEPARATOR_RE.captures(css) {
336 | css = caps.get(1).unwrap().as_str();
337 | } else {
338 | break;
339 | }
340 | }
341 |
342 | group
343 | }
344 |
345 | fn _parse_selectors(css: &str) -> (Selectors, &str) {
346 | let mut css = css;
347 |
348 | // Selector combinator re
349 | lazy_static! {
350 | static ref _COMBINATOR_RE: Regex = Regex::new(r"^(?s)\s*([ >+~])\s*(.*)$").unwrap();
351 | }
352 |
353 | let mut selectors: Selectors = Vec::new();
354 | loop {
355 | let (conditions, css_rest) = _parse_selector_conditions(css);
356 | if !conditions.is_empty() {
357 | selectors.push(Rc::new(SelectorItem::Conditions { items: conditions }));
358 | css = css_rest;
359 | } else {
360 | break;
361 | }
362 |
363 | // Combinator
364 | if let Some(caps) = _COMBINATOR_RE.captures(css) {
365 | selectors.push(Rc::new(SelectorItem::Combinator { op: caps.get(1).unwrap().as_str().to_owned() }));
366 | css = caps.get(2).unwrap().as_str();
367 | } else {
368 | break;
369 | }
370 | }
371 |
372 | return (selectors, css);
373 | }
374 |
375 | fn _parse_selector_conditions(css: &str) -> (Vec, &str) {
376 | let mut css = css;
377 |
378 | lazy_static! {
379 | static ref _CLASS_OR_ID_RE: Regex = Regex::new(&(r"^(?s)([.#])((?:".to_owned() + &*ESCAPE_RE_STR + r"\s|\\.|[^,.#:\[ >~+])+)" + r"(.*)$")).unwrap();
380 | static ref _ATTRIBUTES_RE: Regex = Regex::new(&(r"^(?s)".to_owned() + &*ATTR_RE_STR + r"(.*)$")).unwrap();
381 | static ref _PSEUDO_CLASS_RE: Regex = Regex::new(&(r"^(?s):([\w-]+)(?:\(((?:\([^)]+\)|[^)])+)\))?".to_owned() + r"(.*)$")).unwrap();
382 | static ref _TAG_RE: Regex = Regex::new(&(r"^(?s)((?:".to_owned() + &*ESCAPE_RE_STR + r"\s|\\.|[^,.#:\[ >~+])+)" + r"(.*)$")).unwrap();
383 | }
384 |
385 | let mut conditions: Vec = Vec::new();
386 | loop {
387 | // Class or ID
388 | if let Some(caps) = _CLASS_OR_ID_RE.captures(css) {
389 | let prefix = caps.get(1).unwrap().as_str();
390 | let (name, op) = if prefix == "." { ("class", "~") } else { ("id", "") };
391 | let op_val = caps.get(2).map(|c| c.as_str());
392 | conditions.push(ConditionItem::Attribute { name: _name_re(name), value: _value_re(op, op_val, false) });
393 | css = caps.get(3).map(|c| c.as_str()).unwrap_or("");
394 | }
395 |
396 | // Attributes
397 | else if let Some(caps) = _ATTRIBUTES_RE.captures(css) {
398 | let name = caps.get(1).unwrap().as_str();
399 | let op = caps.get(2).map(|c| c.as_str()).unwrap_or("");
400 | let op_val = caps.get(3).or(caps.get(4)).or(caps.get(5)).map(|c| c.as_str());
401 | let op_insensitive = caps.get(6).is_some();
402 | conditions.push(ConditionItem::Attribute { name: _name_re(name), value: _value_re(op, op_val, op_insensitive) });
403 | css = caps.get(7).map(|c| c.as_str()).unwrap_or("");
404 | }
405 |
406 | // Pseudo-class
407 | else if let Some(caps) = _PSEUDO_CLASS_RE.captures(css) {
408 | let name = caps.get(1).unwrap().as_str().to_owned().to_lowercase();
409 | let args = caps.get(2).map(|c| c.as_str());
410 |
411 | // ":not" (contains more selectors)
412 | if name == "not" {
413 | conditions.push(ConditionItem::PseudoClass { class: name, group: args.map(parse), equation: None });
414 | }
415 | // ":nth-*" (with An+B notation)
416 | else if name.starts_with("nth-") {
417 | conditions.push(ConditionItem::PseudoClass { class: name, group: None, equation: args.map(_equation) });
418 | }
419 | // ":first-*" (rewrite to ":nth-*")
420 | else if name.starts_with("first-") {
421 | let name = "nth-".to_owned() + &name[6..];
422 | conditions.push(ConditionItem::PseudoClass { class: name, group: None, equation: Some((0, 1)) });
423 | }
424 | // ":last-*" (rewrite to ":nth-*")
425 | else if name.starts_with("last-") {
426 | let name = "nth-".to_owned() + &name;
427 | conditions.push(ConditionItem::PseudoClass { class: name, group: None, equation: Some((-1, 1)) });
428 | }
429 | else {
430 | // No args
431 | conditions.push(ConditionItem::PseudoClass { class: name, group: None, equation: None });
432 | }
433 |
434 | css = caps.get(3).map(|c| c.as_str()).unwrap_or("");
435 | }
436 |
437 | // Tag
438 | else if let Some(caps) = _TAG_RE.captures(css) {
439 | let name = caps.get(1).unwrap().as_str();
440 | if name != "*" {
441 | conditions.push(ConditionItem::Tag { name: _name_re(name) });
442 | }
443 | css = caps.get(2).map(|c| c.as_str()).unwrap_or("");
444 | }
445 |
446 | else { break; }
447 | }
448 |
449 | return (conditions, css);
450 | }
451 |
452 | fn _equation(equation_str: &str) -> (i32, i32) {
453 | lazy_static! {
454 | static ref _RE1: Regex = Regex::new(r"^\s*((?:\+|-)?\d+)\s*$").unwrap();
455 | static ref _RE2: Regex = Regex::new(r"^(?i)\s*((?:\+|-)?(?:\d+)?)?n\s*((?:\+|-)\s*\d+)?\s*$").unwrap();
456 | }
457 |
458 | if equation_str.is_empty() { return (0, 0); }
459 |
460 | // "even"
461 | if equation_str.trim().to_lowercase() == "even" { return (2, 2); }
462 |
463 | // "odd"
464 | if equation_str.trim().to_lowercase() == "odd" { return (2, 1); }
465 |
466 | // "4", "+4" or "-4"
467 | if let Some(caps) = _RE1.captures(equation_str) {
468 | let num = caps.get(1).unwrap().as_str().parse::().unwrap();
469 | return (0, num);
470 | }
471 |
472 | // "n", "4n", "+4n", "-4n", "n+1", "4n-1", "+4n-1" (and other variations)
473 | if let Some(caps) = _RE2.captures(equation_str) {
474 | let mut result = (0, 0);
475 | let num1 = caps.get(1).unwrap().as_str();
476 | result.0 = if num1 == "-" { -1 } else if num1.is_empty() { 1 } else { num1.parse::().unwrap() };
477 | if let Some(num2) = caps.get(2) {
478 | result.1 = num2.as_str().split_whitespace().collect::>().concat().parse::().unwrap();
479 | }
480 | return result;
481 | }
482 |
483 | return (0, 0);
484 | }
485 |
--------------------------------------------------------------------------------
/src/dom/html.rs:
--------------------------------------------------------------------------------
1 | use std::collections::{HashSet, HashMap, BTreeMap};
2 | use std::rc::{Rc, Weak};
3 | use std::cell::RefCell;
4 | use std::sync::atomic::{AtomicUsize, Ordering};
5 |
6 | use regex::{self, Regex};
7 |
8 | use util::{xml_escape, html_unescape, html_attr_unescape};
9 |
10 | lazy_static! {
11 | static ref ATTR_RE_STR: String = String::new() +
12 | r"([^<>=\s/]+|/)" + // Key
13 | r"(?:" +
14 | r"\s*=\s*" +
15 | r"(?s:" +
16 | r#""(.*?)""# + // Quotation marks
17 | r"|" +
18 | r"'(.*?)'" + // Apostrophes
19 | r"|" +
20 | r"([^>\s]*)" + // Unquoted
21 | r")" +
22 | r")?\s*";
23 |
24 | static ref TOKEN_RE_STR: String = String::new() +
25 | r"(?is)" +
26 | r"([^<]+)?" + // Text
27 | r"(?:" +
28 | r"<(?:" +
29 | r"!(?:" +
30 | r"DOCTYPE(\s+\w+.*?)" + // Doctype
31 | r"|" +
32 | r"--(.*?)--\s*" + // Comment
33 | r"|" +
34 | r"\[CDATA\[(.*?)\]\]" + // CDATA
35 | r")" +
36 | r"|" +
37 | r"\?(.*?)\?" + // Processing Instruction
38 | r"|" +
39 | r"\s*([^<>\s]+\s*(?:" + &*ATTR_RE_STR + r")*)" + // Tag
40 | r")>" +
41 | r"|" +
42 | r"(<)" + // Runaway "<"
43 | r")?" +
44 | r"(.*)$"; // Rest of html
45 |
46 | // HTML elements that only contain raw text
47 | static ref RAW: HashSet<&'static str> = hashset!["script", "style"];
48 |
49 | // HTML elements that only contain raw text and entities
50 | static ref RCDATA: HashSet<&'static str> = hashset!["title", "textarea"];
51 |
52 | static ref END: HashMap<&'static str, &'static str> = {
53 | // HTML elements with optional end tags
54 | let mut _end = hashmap!["body" => "head", "optgroup" => "optgroup", "option" => "option"];
55 |
56 | // HTML elements that break paragraphs
57 | for x in vec![
58 | "address", "article", "aside", "blockquote", "dir", "div", "dl", "fieldset", "footer", "form",
59 | "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "main", "menu", "nav", "ol",
60 | "p", "pre", "section", "table", "ul"
61 | ] {
62 | _end.insert(x, "p");
63 | }
64 |
65 | _end
66 | };
67 |
68 | // HTML elements with optional end tags and scoping rules
69 | static ref CLOSE: HashMap<&'static str, (HashSet<&'static str>, HashSet<&'static str>)> = {
70 | // HTML table elements with optional end tags
71 | let _table = hashset!["colgroup", "tbody", "td", "tfoot", "th", "thead", "tr"];
72 |
73 | let _close = hashmap![
74 | "li" => (hashset!["li"], hashset!["ul", "ol"]),
75 |
76 | "colgroup" => (_table.clone(), hashset!["table"]),
77 | "tbody" => (_table.clone(), hashset!["table"]),
78 | "tfoot" => (_table.clone(), hashset!["table"]),
79 | "thead" => (_table.clone(), hashset!["table"]),
80 |
81 | "tr" => (hashset!["tr"], hashset!["table"]),
82 | "th" => (hashset!["th", "td"], hashset!["table"]),
83 | "td" => (hashset!["th", "td"], hashset!["table"]),
84 |
85 | "dd" => (hashset!["dd", "dt"], hashset!["dl"]),
86 | "dt" => (hashset!["dd", "dt"], hashset!["dl"]),
87 |
88 | "rp" => (hashset!["rp", "rt"], hashset!["ruby"]),
89 | "rt" => (hashset!["rp", "rt"], hashset!["ruby"])
90 | ];
91 |
92 | _close
93 | };
94 |
95 | // HTML elements without end tags
96 | static ref EMPTY: HashSet<&'static str> = hashset![
97 | "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link",
98 | "menuitem", "meta", "param", "source", "track", "wbr"
99 | ];
100 |
101 | // HTML elements categorized as phrasing content (and obsolete inline elements)
102 | static ref PHRASING: HashSet<&'static str> = hashset![
103 | "a", "abbr", "area", "audio", "b", "bdi", "bdo", "br", "button", "canvas", "cite", "code", "data",
104 | "datalist", "del", "dfn", "em", "embed", "i", "iframe", "img", "input", "ins", "kbd", "keygen",
105 | "label", "link", "map", "mark", "math", "meta", "meter", "noscript", "object", "output", "picture",
106 | "progress", "q", "ruby", "s", "samp", "script", "select", "slot", "small", "span", "strong", "sub", "sup",
107 | "svg", "template", "textarea", "time", "u", "var", "video", "wbr",
108 | "acronym", "applet", "basefont", "big", "font", "strike", "tt" // Obsolete
109 | ];
110 |
111 | // HTML elements that don't get their self-closing flag acknowledged
112 | static ref BLOCK: HashSet<&'static str> = hashset![
113 | "a", "address", "applet", "article", "aside", "b", "big", "blockquote", "body", "button",
114 | "caption", "center", "code", "col", "colgroup", "dd", "details", "dialog", "dir", "div",
115 | "dl", "dt", "em", "fieldset", "figcaption", "figure", "font", "footer", "form", "frameset",
116 | "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "html", "i", "iframe", "li",
117 | "listing", "main", "marquee", "menu", "nav", "nobr", "noembed", "noframes", "noscript",
118 | "object", "ol", "optgroup", "option", "p", "plaintext", "pre", "rp", "rt", "s", "script",
119 | "section", "select", "small", "strike", "strong", "style", "summary", "table", "tbody", "td",
120 | "template", "textarea", "tfoot", "th", "thead", "title", "tr", "tt", "u", "ul", "xmp"
121 | ];
122 | }
123 |
124 | static NODE_ID_NEXT: AtomicUsize = AtomicUsize::new(0);
125 |
126 | #[derive(Debug)]
127 | pub struct TreeNode {
128 | pub id: usize,
129 | pub parent: Option>,
130 | pub elem: NodeElem,
131 | }
132 |
133 | #[derive(Debug)]
134 | pub enum NodeElem {
135 | Root {
136 | childs: RefCell>>,
137 | },
138 |
139 | Tag {
140 | name: String,
141 | attrs: BTreeMap>,
142 | childs: RefCell>>,
143 | },
144 |
145 | Text {
146 | elem_type: String,
147 | content: String,
148 | },
149 | }
150 |
151 | impl TreeNode {
152 | pub fn is_tag(&self) -> bool {
153 | match self.elem {
154 | NodeElem::Tag { .. } => true,
155 | _ => false,
156 | }
157 | }
158 |
159 | pub fn get_tag_name(&self) -> Option<&str> {
160 | match self.elem {
161 | NodeElem::Tag { ref name, .. } => Some(name),
162 | _ => None,
163 | }
164 | }
165 |
166 | pub fn get_tag_attrs<'a>(&'a self) -> Option<&'a BTreeMap>> {
167 | match self.elem {
168 | NodeElem::Tag { ref attrs, .. } => Some(attrs),
169 | _ => None,
170 | }
171 | }
172 |
173 | pub fn get_parent(&self) -> Option> {
174 | match self.parent {
175 | Some(ref x) => Some(x.upgrade().unwrap()), // strong reference should alive, force unwrap it
176 | _ => None,
177 | }
178 | }
179 |
180 | pub fn get_childs(&self) -> Option>> {
181 | match self.elem {
182 | NodeElem::Root { ref childs } => Some(childs.borrow().clone()),
183 | NodeElem::Tag { ref childs, .. } => Some(childs.borrow().clone()),
184 | _ => None,
185 | }
186 | }
187 |
188 | // pub fn dbg_string(&self) -> String {
189 | // let id = self.id;
190 | // match self.elem {
191 | // NodeElem::Root { .. } => format!("[{}] TreeNode:Root", id),
192 | // NodeElem::Tag { ref name, ref attrs, .. } => format!("[{}] TreeNode:Tag(name: {}, attrs: {:?})", id, name, attrs),
193 | // NodeElem::Text { ref elem_type, ref content } => format!("[{}] TreeNode:Text(type: {}, content: {})", id, elem_type, content),
194 | // }
195 | // }
196 | }
197 |
198 | fn _process_text_node(current: &Rc, elem_type: &str, content: &str) {
199 | let new_node = Rc::new(
200 | TreeNode {
201 | id: NODE_ID_NEXT.fetch_add(1, Ordering::Relaxed),
202 | parent: Some(Rc::downgrade(current)),
203 | elem: NodeElem::Text { elem_type: elem_type.to_owned(), content: content.to_owned() },
204 | }
205 | );
206 |
207 | match current.elem {
208 | NodeElem::Root { ref childs } => childs.borrow_mut().push(new_node),
209 | NodeElem::Tag { ref childs, .. } => childs.borrow_mut().push(new_node),
210 | NodeElem::Text { .. } => panic!("Cannot use `Text` node as parent"),
211 | };
212 | }
213 |
214 | fn _process_start_tag(current: &Rc, start_tag: &str, attrs: BTreeMap>) -> Rc {
215 | let mut working_node = current.clone();
216 |
217 | // Autoclose optional HTML elements
218 | if working_node.parent.is_some() {
219 | if let Some(end_tag) = END.get(start_tag) {
220 | working_node = _process_end_tag(&working_node, end_tag);
221 | }
222 | else if let Some(x) = CLOSE.get(start_tag) {
223 | let (ref allowed, ref scope) = *x;
224 |
225 | // Close allowed parent elements in scope
226 | let mut next = working_node.clone();
227 | while next.parent.is_some() && !scope.contains(next.clone().get_tag_name().unwrap()) {
228 | let this = next.clone();
229 | let this_tag_name = this.get_tag_name().unwrap();
230 |
231 | if allowed.contains(this_tag_name) {
232 | working_node = _process_end_tag(&working_node, this_tag_name);
233 | }
234 |
235 | next = next.get_parent().unwrap();
236 | }
237 | }
238 | }
239 |
240 | // New tag
241 | let new_node = Rc::new(
242 | TreeNode {
243 | id: NODE_ID_NEXT.fetch_add(1, Ordering::Relaxed),
244 | parent: Some(Rc::downgrade(&working_node)),
245 | elem: NodeElem::Tag { name: start_tag.to_owned(), attrs: attrs, childs: RefCell::new(Vec::new()) },
246 | }
247 | );
248 |
249 | match working_node.elem {
250 | NodeElem::Root { ref childs } => childs.borrow_mut().push(new_node.clone()),
251 | NodeElem::Tag { ref childs, .. } => childs.borrow_mut().push(new_node.clone()),
252 | NodeElem::Text { .. } => panic!("Cannot use `Text` node as parent"),
253 | }
254 |
255 | new_node
256 | }
257 |
258 | fn _process_end_tag(current: &Rc, end_tag: &str) -> Rc {
259 | // Search stack for start tag
260 | let mut next = current.clone();
261 | while next.parent.is_some() {
262 | let this = next.clone();
263 | let this_tag_name = this.get_tag_name().unwrap();
264 |
265 | // Right tag
266 | if this_tag_name == end_tag {
267 | return next.get_parent().unwrap();
268 | }
269 |
270 | // Phrasing content can only cross phrasing content
271 | if PHRASING.contains(end_tag) && !PHRASING.contains(this_tag_name) {
272 | return current.clone();
273 | }
274 |
275 | next = next.get_parent().unwrap();
276 | }
277 |
278 | // Ignore useless end tag
279 | current.clone()
280 | }
281 |
282 | pub fn parse(html: &str) -> Rc {
283 | let mut html = html;
284 |
285 | let root = Rc::new(
286 | TreeNode {
287 | id: NODE_ID_NEXT.fetch_add(1, Ordering::Relaxed),
288 | parent: None,
289 | elem: NodeElem::Root { childs: RefCell::new(Vec::new()) },
290 | }
291 | );
292 |
293 | let mut current = root.clone();
294 |
295 | lazy_static! {
296 | static ref _TAG_PLUS_ATTRS_RE: Regex = Regex::new(r"^([^\s/]+)([\s\S]*)").unwrap();
297 | }
298 |
299 | let re = Regex::new(&*TOKEN_RE_STR).unwrap();
300 | while let Some(caps) = re.captures(html) {
301 | let text = caps.get(1);
302 | let doctype = caps.get(2);
303 | let comment = caps.get(3);
304 | let cdata = caps.get(4);
305 | let pi = caps.get(5);
306 | let tag = caps.get(6);
307 | let runaway = caps.get(11);
308 |
309 | html = caps.get(12).map(|c| c.as_str()).unwrap_or(""); // html rest
310 |
311 | // Text (and runaway "<")
312 | if let Some(text) = text {
313 | if runaway.is_some() {
314 | _process_text_node(¤t, "text", &html_unescape(&(text.as_str().to_owned() + "<")));
315 | } else {
316 | _process_text_node(¤t, "text", &html_unescape(text.as_str()));
317 | }
318 | }
319 |
320 | // Tag
321 | if let Some(tag) = tag {
322 | // End: /tag
323 | if tag.as_str().starts_with("/") {
324 | let end_tag = tag.as_str().trim_start_matches('/').trim().to_lowercase();
325 | current = _process_end_tag(¤t, &end_tag);
326 | }
327 | // Start: tag
328 | else {
329 | let caps = _TAG_PLUS_ATTRS_RE.captures(tag.as_str()).unwrap(); // panic is ok
330 | let mut start_tag = caps.get(1).unwrap().as_str().to_lowercase();
331 | let attrs_str = caps.get(2).unwrap();
332 |
333 | // Attributes
334 | let mut attrs: BTreeMap> = BTreeMap::new();
335 | let mut is_closing = false;
336 | for caps in Regex::new(&*ATTR_RE_STR).unwrap().captures_iter(attrs_str.as_str()) {
337 | let key = caps.get(1).unwrap().as_str().to_owned().to_lowercase();
338 | let value = if caps.get(2).is_some() { caps.get(2) } else if caps.get(3).is_some() { caps.get(3) } else { caps.get(4) };
339 |
340 | // Empty tag
341 | if key == "/" {
342 | is_closing = true;
343 | continue;
344 | }
345 |
346 | attrs.insert(key, match value {
347 | Some(ref x) => Some(html_attr_unescape(x.as_str())),
348 | _ => None,
349 | });
350 | }
351 |
352 | // "image" is an alias for "img"
353 | if start_tag == "image" { start_tag = "img".to_owned() }
354 |
355 | current = _process_start_tag(¤t, &start_tag, attrs);
356 |
357 | // Element without end tag (self-closing)
358 | if EMPTY.contains(start_tag.as_str()) || (!BLOCK.contains(start_tag.as_str()) && is_closing) {
359 | current = _process_end_tag(¤t, &start_tag);
360 | }
361 |
362 | // Raw text elements
363 | if RAW.contains(start_tag.as_str()) || RCDATA.contains(start_tag.as_str()) {
364 | let raw_text_re = Regex::new(&(r"(.+?)<\s*/\s*".to_owned() + ®ex::escape(&start_tag) + r"\s*>(.*)$")).unwrap();
365 | if let Some(raw_text_caps) = raw_text_re.captures(html) {
366 | let raw_text = raw_text_caps.get(1).unwrap();
367 | html = raw_text_caps.get(2).map(|c| c.as_str()).unwrap_or("");
368 |
369 | if RCDATA.contains(&start_tag.as_str()) {
370 | _process_text_node(¤t, "raw", &html_unescape(raw_text.as_str()))
371 | } else {
372 | _process_text_node(¤t, "raw", raw_text.as_str())
373 | }
374 |
375 | current = _process_end_tag(¤t, &start_tag);
376 | }
377 | }
378 | }
379 | }
380 |
381 | // DOCTYPE
382 | else if let Some(doctype) = doctype {
383 | _process_text_node(¤t, "doctype", doctype.as_str());
384 | }
385 |
386 | // Comment
387 | else if let Some(comment) = comment {
388 | _process_text_node(¤t, "comment", comment.as_str());
389 | }
390 |
391 | // CDATA
392 | else if let Some(cdata) = cdata {
393 | _process_text_node(¤t, "cdata", cdata.as_str());
394 | }
395 |
396 | // Processing instruction
397 | else if let Some(pi) = pi {
398 | _process_text_node(¤t, "pi", pi.as_str());
399 | }
400 |
401 | if html.is_empty() { break; }
402 | }
403 |
404 | root
405 | }
406 |
407 | pub fn render (root: &Rc) -> String {
408 | match root.elem {
409 | // Text (escaped)
410 | NodeElem::Text { ref elem_type, ref content } if elem_type == "text" => {
411 | return xml_escape(content)
412 | },
413 |
414 | // Raw text
415 | NodeElem::Text { ref elem_type, ref content } if elem_type == "raw" => {
416 | return content.clone()
417 | },
418 |
419 | // DOCTYPE
420 | NodeElem::Text { ref elem_type, ref content } if elem_type == "doctype" => {
421 | return ""
422 | },
423 |
424 | // Comment
425 | NodeElem::Text { ref elem_type, ref content } if elem_type == "comment" => {
426 | return ""
427 | },
428 |
429 | // CDATA
430 | NodeElem::Text { ref elem_type, ref content } if elem_type == "cdata" => {
431 | return ""
432 | },
433 |
434 | // Processing instruction
435 | NodeElem::Text { ref elem_type, ref content } if elem_type == "pi" => {
436 | return "".to_owned() + content + "?>"
437 | },
438 |
439 | // Root
440 | NodeElem::Root { ref childs } => {
441 | return childs.borrow().iter().map(|ref x| { render(x) }).collect::>().concat();
442 | },
443 |
444 | NodeElem::Tag { ref name, ref attrs, ref childs } => {
445 | let mut result = "<".to_owned() + name;
446 |
447 | // Attributes
448 | for (key, value) in attrs.iter() {
449 | match *value {
450 | Some(ref x) => { result = result + " " + key + "=\"" + &xml_escape(x) + "\"" },
451 | None => { result = result + " " + key },
452 | }
453 | }
454 |
455 | // No children
456 | if childs.borrow().is_empty() {
457 | return if EMPTY.contains(&name[..]) { result + ">" } else { result + ">" + name + ">" };
458 | }
459 |
460 | // Children
461 | return
462 | result + ">" +
463 | &childs.borrow().iter().map(|ref x| { render(x) }).collect::>().concat() +
464 | "" + name + ">";
465 | },
466 |
467 | _ => { return "".to_owned() },
468 | }
469 | }
470 |
--------------------------------------------------------------------------------
/src/dom/mod.rs:
--------------------------------------------------------------------------------
1 | mod css;
2 | mod html;
3 |
4 | use std::collections::BTreeMap;
5 | use std::rc::Rc;
6 |
7 | use regex::Regex;
8 |
9 | use self::html::TreeNode;
10 |
11 | /// The HTML `DOM` type
12 | #[derive(Debug)]
13 | pub struct DOM {
14 | root: Rc, // To avoid destroying the root node ahead of time
15 | tree: Rc,
16 | }
17 |
18 | impl DOM {
19 | /// Construct a new `DOM` object and parse HTML.
20 | ///
21 | /// ```
22 | /// use victoria_dom::DOM;
23 | /// let dom = DOM::new("Hello
");
24 | /// ```
25 | pub fn new(html: &str) -> DOM {
26 | let tree = html::parse(html);
27 | DOM { root: tree.clone(), tree: tree }
28 | }
29 |
30 | /// Find all ancestor elements of the current element matching the optional CSS selector
31 | /// and return a Vector of DOM objects of these elements.
32 | ///
33 | /// ```
34 | /// use victoria_dom::DOM;
35 | /// let dom = DOM::new("Hello
");
36 | /// let ancestors: Vec<_> = dom.at("div").unwrap().ancestors(None).iter().map(|x| x.tag().unwrap().to_string()).collect();
37 | /// assert_eq!(ancestors, ["body", "html"]);
38 | /// ```
39 | pub fn ancestors(&self, selector: Option<&str>) -> Vec {
40 | let mut ancestors = Vec::new();
41 | let mut node = self.tree.clone();
42 | while let Some(parent) = node.get_parent() {
43 | if parent.is_tag() && (selector.is_none() || css::matches(&parent, selector.unwrap())) {
44 | ancestors.push(DOM { root: self.root.clone(), tree: parent.clone() });
45 | }
46 | node = parent;
47 | }
48 | ancestors
49 | }
50 |
51 | /// Find first descendant element of the current element matching the CSS selector and return it as a DOM object,
52 | /// or `None` if none could be found.
53 | pub fn at(&self, selector: &str) -> Option {
54 | if let Some(node) = css::select_one(&self.tree, selector) {
55 | return Some(DOM { root: self.root.clone(), tree: node })
56 | }
57 | None
58 | }
59 |
60 | /// The current element tag name.
61 | pub fn tag(&self) -> Option<&str> {
62 | self.tree.get_tag_name()
63 | }
64 |
65 | /// The current element attribute2value map.
66 | pub fn attrs(&self) -> BTreeMap> {
67 | self.tree.get_tag_attrs().map_or_else(|| BTreeMap::new(), |x| x.clone())
68 | }
69 |
70 | /// The current element attribute value, or `None` if there are no attribute with the name or value.
71 | pub fn attr(&self, name: &str) -> Option<&str> {
72 | self.tree.get_tag_attrs().and_then(|x| x.get(name)).and_then(|x| x.as_ref()).map(|x| x.as_str())
73 | }
74 |
75 | /// Find all child elements of the current element matching the CSS selector and return a Vector of DOM objects of these elements.
76 | ///
77 | /// ```
78 | /// use victoria_dom::DOM;
79 | /// let dom = DOM::new("");
80 | /// let childs: Vec<_> = dom.at("div").unwrap().childs(None).iter().map(|x| x.attr("id").unwrap().to_string()).collect();
81 | /// assert_eq!(childs, ["a", "b"]);
82 | /// ```
83 | pub fn childs(&self, selector: Option<&str>) -> Vec {
84 | self.tree.get_childs().unwrap_or(Vec::new()).into_iter().filter_map(|x|
85 | if x.is_tag() && (selector.is_none() || css::matches(&x, selector.unwrap())) {
86 | Some(DOM { root: self.root.clone(), tree: x })
87 | } else {
88 | None
89 | }
90 | ).collect()
91 | }
92 |
93 | /// Find all descendant elements of the current element matching the CSS selector and return a Vector of DOM objects of these elements.
94 | ///
95 | /// ```
96 | /// use victoria_dom::DOM;
97 | /// let dom = DOM::new("");
98 | /// let elems: Vec<_> = dom.find("div[id]").iter().map(|x| x.attr("id").unwrap().to_string()).collect();
99 | /// assert_eq!(elems, ["a", "c", "b"]);
100 | /// ```
101 | pub fn find(&self, selector: &str) -> Vec {
102 | css::select(&self.tree, selector, 0).into_iter().map(|x| DOM { root: self.root.clone(), tree: x }).collect()
103 | }
104 |
105 | /// Check if the current element matches the CSS selector.
106 | pub fn matches(&self, selector: &str) -> bool {
107 | css::matches(&self.tree, selector)
108 | }
109 |
110 | /// Find all sibling elements after the current element matching the CSS selector and return a Vector of DOM objects of these elements.
111 | ///
112 | /// ```
113 | /// use victoria_dom::DOM;
114 | /// let dom = DOM::new("");
115 | /// let elems: Vec<_> = dom.at("div#a").unwrap().following(None).iter().map(|x| x.attr("id").unwrap().to_string()).collect();
116 | /// assert_eq!(elems, ["b"]);
117 | /// ```
118 | pub fn following(&self, selector: Option<&str>) -> Vec {
119 | self._siblings().into_iter().skip_while(|x| x.id != self.tree.id).skip(1)
120 | .filter(|x| selector.is_none() || css::matches(x, selector.unwrap()))
121 | .map(|x| DOM { root: self.root.clone(), tree: x }).collect()
122 | }
123 |
124 | /// Return a DOM object for next sibling element, or `None` if there are no more siblings.
125 | pub fn next(&self) -> Option {
126 | self._siblings().into_iter().skip_while(|x| x.id != self.tree.id).skip(1).next().map(|x| DOM { root: self.root.clone(), tree: x })
127 | }
128 |
129 | /// Find all sibling elements before the current element matching the CSS selector and return a Vector of DOM objects of these elements.
130 | ///
131 | /// ```
132 | /// use victoria_dom::DOM;
133 | /// let dom = DOM::new("");
134 | /// let elems: Vec<_> = dom.at("div#b").unwrap().preceding(None).iter().map(|x| x.attr("id").unwrap().to_string()).collect();
135 | /// assert_eq!(elems, ["a"]);
136 | /// ```
137 | pub fn preceding(&self, selector: Option<&str>) -> Vec {
138 | self._siblings().into_iter().take_while(|x| x.id != self.tree.id)
139 | .filter(|x| selector.is_none() || css::matches(x, selector.unwrap()))
140 | .map(|x| DOM { root: self.root.clone(), tree: x }).collect()
141 | }
142 |
143 | /// Return a DOM object for the previous sibling element, or `None` if there are no more siblings.
144 | pub fn prev(&self) -> Option {
145 | self._siblings().into_iter().take_while(|x| x.id != self.tree.id).last().map(|x| DOM { root: self.root.clone(), tree: x })
146 | }
147 |
148 | fn _siblings(&self) -> Vec> {
149 | self.tree.get_parent()
150 | .and_then(|x| x.get_childs())
151 | .map(|x| x.into_iter().filter(|v| v.is_tag()).collect::>())
152 | .unwrap_or(Vec::new())
153 | }
154 |
155 | /// Return a DOM object for the parent of the current element, or `None` if this element has no parent.
156 | pub fn parent(&self) -> Option {
157 | self.tree.get_parent().map(|x| DOM { root: self.root.clone(), tree: x })
158 | }
159 |
160 | /// Render the current element and its content to HTML.
161 | pub fn to_string(&self) -> String {
162 | html::render(&self.tree)
163 | }
164 |
165 | /// Extract text content from the current element only (not including child elements) with smart whitespace trimming.
166 | ///
167 | /// ```
168 | /// use victoria_dom::DOM;
169 | /// let dom = DOM::new("");
170 | /// assert_eq!(dom.at("div").unwrap().text(), "foo baz");
171 | /// ```
172 | pub fn text(&self) -> String {
173 | self._text(false, true) // non-recursive trimmed
174 | }
175 |
176 | /// Extract text content from the current element only (not including child elements) without smart whitespace trimming.
177 | ///
178 | /// ```
179 | /// use victoria_dom::DOM;
180 | /// let dom = DOM::new("");
181 | /// assert_eq!(dom.at("div").unwrap().rtext(), "foo\nbaz\n");
182 | /// ```
183 | pub fn rtext(&self) -> String {
184 | self._text(false, false) // non-recursive raw
185 | }
186 |
187 | /// Extract text content from all descendant nodes of the current element with smart whitespace trimming.
188 | ///
189 | /// ```
190 | /// use victoria_dom::DOM;
191 | /// let dom = DOM::new("");
192 | /// assert_eq!(dom.at("div").unwrap().text_all(), "foo bar baz");
193 | /// ```
194 | pub fn text_all(&self) -> String {
195 | self._text(true, true) // recursive trimmed
196 | }
197 |
198 | /// Extract text content from all descendant nodes of the current element without smart whitespace trimming.
199 | ///
200 | /// ```
201 | /// use victoria_dom::DOM;
202 | /// let dom = DOM::new("");
203 | /// assert_eq!(dom.at("div").unwrap().rtext_all(), "foo\nbarbaz\n");
204 | /// ```
205 | pub fn rtext_all(&self) -> String {
206 | self._text(true, false) // recursive raw
207 | }
208 |
209 | fn _text(&self, recursive: bool, trim: bool) -> String {
210 | // Try to detect "pre" tag
211 | let mut under_pre_tag = false;
212 | if trim {
213 | let mut node = self.tree.clone();
214 | loop {
215 | if let html::NodeElem::Tag { ref name, .. } = node.elem {
216 | if name == "pre" {
217 | under_pre_tag = true;
218 | break;
219 | }
220 | }
221 | if node.get_parent().is_some() { node = node.get_parent().unwrap(); } else { break; }
222 | }
223 | }
224 |
225 | match self.tree.get_childs() {
226 | Some(nodes) => _nodes_text(&nodes, recursive, trim && !under_pre_tag),
227 | _ => String::new(),
228 | }
229 | }
230 |
231 | /// Return content of the current element.
232 | ///
233 | /// ```
234 | /// use victoria_dom::DOM;
235 | /// let dom = DOM::new("Test
");
236 | /// assert_eq!(dom.at("div").unwrap().content(), "Test");
237 | /// ```
238 | pub fn content(&self) -> String {
239 | self.tree.get_childs().unwrap().into_iter().map(|x| html::render(&x)).collect::>().join("")
240 | }
241 | }
242 |
243 | fn _nodes_text(nodes: &Vec>, recursive: bool, trim: bool) -> String {
244 | lazy_static! {
245 | static ref _RE1: Regex = Regex::new(r"\s+").unwrap();
246 | static ref _RE2: Regex = Regex::new(r"\S\z").unwrap();
247 | static ref _RE3: Regex = Regex::new(r"^[^.!?,;:\s]+").unwrap();
248 | static ref _RE4: Regex = Regex::new(r"\S+").unwrap();
249 | }
250 |
251 | let mut text = String::new();
252 | for node in nodes {
253 | let mut chunk = match node.elem {
254 | html::NodeElem::Text { ref elem_type, ref content } => {
255 | match elem_type.as_ref() {
256 | "text" if trim => _RE1.replace_all(content.trim(), " ").into_owned(),
257 | "text" | "raw" | "cdata" => content.to_owned(),
258 | _ => String::new(),
259 | }
260 | },
261 | html::NodeElem::Tag { ref name, ref childs, .. } if recursive => {
262 | _nodes_text(&childs.borrow(), true, trim && name != "pre")
263 | }
264 | _ => String::new(),
265 | };
266 |
267 | // Add leading whitespace if punctuation allows it
268 | if trim && _RE2.is_match(&text) && _RE3.is_match(&chunk) {
269 | chunk = " ".to_owned() + &chunk
270 | }
271 |
272 | // Trim whitespace blocks
273 | if _RE4.is_match(&chunk) || !trim {
274 | text.push_str(&chunk);
275 | }
276 | }
277 | text
278 | }
279 |
--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | #![doc(html_root_url = "https://docs.rs/victoria-dom/0.1.2")]
2 | #![deny(missing_docs)]
3 | #![deny(warnings)]
4 | #![deny(missing_debug_implementations)]
5 |
6 | //! Minimalistic HTML parser with CSS selectors
7 | //!
8 | //! The project has been inspired by [Mojo::DOM](https://metacpan.org/pod/Mojo::DOM).
9 | //!
10 | //! It will even try to interpret broken HTML, so you should not use it for validation.
11 | //!
12 | //! # Examples
13 | //!
14 | //! ```
15 | //! extern crate victoria_dom;
16 | //!
17 | //! use victoria_dom::DOM;
18 | //!
19 | //! fn main() {
20 | //! let html = r#""#;
21 | //! let dom = DOM::new(html);
22 | //!
23 | //! assert_eq!(dom.at("html").unwrap().text_all(), "Hello, Rust");
24 | //! assert_eq!(dom.at("div#main > a").unwrap().attr("alt").unwrap(), "The Rust Programing Language");
25 | //! }
26 | //! ```
27 | //!
28 | //! # Supported CSS selectors
29 | //!
30 | //! * `*` Any element.
31 | //! * `E` An element of type `E`.
32 | //! * `E[foo]` An `E` element with a `foo` attribute.
33 | //! * `E[foo="bar"]` An `E` element whose `foo` attribute value is exactly equal to `bar`.
34 | //! * `E[foo~="bar"]` An `E` element whose `foo` attribute value is a list of whitespace-separated values, one of which is exactly equal to `bar`.
35 | //! * `E[foo^="bar"]` An `E` element whose `foo` attribute value begins exactly with the string `bar`.
36 | //! * `E[foo$="bar"]` An `E` element whose `foo` attribute value ends exactly with the string `bar`.
37 | //! * `E[foo*="bar"]` An `E` element whose `foo` attribute value contains the substring `bar`.
38 | //! * `E:root` An `E` element, root of the document.
39 | //! * `E:nth-child(n)` An `E` element, the `n-th` child of its parent.
40 | //! * `E:nth-last-child(n)` An `E` element, the `n-th` child of its parent, counting from the last one.
41 | //! * `E:nth-of-type(n)` An `E` element, the `n-th` sibling of its type.
42 | //! * `E:nth-last-of-type(n)` An `E` element, the `n-th` sibling of its type, counting from the last one.
43 | //! * `E:first-child` An `E` element, first child of its parent.
44 | //! * `E:last-child` An `E` element, last child of its parent.
45 | //! * `E:first-of-type` An `E` element, first sibling of its type.
46 | //! * `E:last-of-type` An `E` element, last sibling of its type.
47 | //! * `E:only-child` An `E` element, only child of its parent.
48 | //! * `E:only-of-type` An `E` element, only sibling of its type.
49 | //! * `E:empty` An `E` element that has no children (including text nodes).
50 | //! * `E:checked` A user interface element `E` which is checked (for instance a radio-button or checkbox).
51 | //! * `E.warning` An `E` element whose class is `warning`.
52 | //! * `E#myid` An `E` element with ID equal to `myid`.
53 | //! * `E:not(s)` An `E` element that does not match simple selector `s`.
54 | //! * `E F` An `F` element descendant of an `E` element.
55 | //! * `E > F` An `F` element child of an `E` element.
56 | //! * `E + F` An `F` element immediately preceded by an `E` element.
57 | //! * `E ~ F` An `F` element preceded by an `E` element.
58 | //! * `E, F, G` Elements of type `E`, `F` and `G`.
59 | //! * `E[foo=bar][bar=baz]` An `E` element whose attributes match all following attribute selectors.
60 |
61 | #[macro_use] extern crate lazy_static;
62 | #[macro_use] extern crate maplit;
63 | extern crate regex;
64 |
65 | pub use dom::DOM;
66 |
67 | mod dom;
68 | mod util;
69 |
--------------------------------------------------------------------------------
/src/util.rs:
--------------------------------------------------------------------------------
1 | use std::collections::HashMap;
2 | use std::{char, u32};
3 | use std::cmp;
4 |
5 | use regex::{Regex, Captures};
6 |
7 | lazy_static! {
8 | static ref ENTITY_RE: Regex = Regex::new(r#"&(?:\#((?:[0-9]{1,7}|x[0-9a-fA-F]{1,6}));|(\w+[;=]?))"#).unwrap();
9 | }
10 |
11 | pub fn xml_escape(text: &str) -> String {
12 | let mut text = text.to_owned();
13 | text = text.replace("&", "&");
14 | text = text.replace("<", "<");
15 | text = text.replace(">", ">");
16 | text = text.replace("\"", """);
17 | text = text.replace("'", "'");
18 | text
19 | }
20 |
21 | pub fn html_unescape(text: &str) -> String {
22 | _html_unescape(text, false)
23 | }
24 |
25 | pub fn html_attr_unescape(text: &str) -> String {
26 | _html_unescape(text, true)
27 | }
28 |
29 | fn _html_unescape(text: &str, is_attr: bool) -> String {
30 | ENTITY_RE.replace_all(text, |caps: &Captures| {
31 | caps.get(1)
32 | .map(|x| _decode_point(x.as_str()))
33 | .unwrap_or_else(|| _decode_name(caps.get(2).unwrap().as_str(), is_attr))
34 | }).into_owned()
35 | }
36 |
37 | fn _decode_point(point: &str) -> String {
38 | // Code point
39 | (if point.starts_with("x") { u32::from_str_radix(&point[1..], 16) } else { u32::from_str_radix(point, 10) })
40 | .ok()
41 | .and_then(char::from_u32)
42 | .map(|c| c.to_string())
43 | .unwrap_or(point.to_owned())
44 | }
45 |
46 | pub fn _decode_name(name: &str, is_attr: bool) -> String {
47 | lazy_static! {
48 | static ref _ALPHANUMEQ_RE: Regex = Regex::new("[A-Za-z0-9=]").unwrap();
49 | };
50 |
51 | // Named character reference
52 | for len in 0 .. cmp::max(1, name.len()) - 1 {
53 | let name_trunc = &name[0 .. (name.len() - len)];
54 | let last = &name[(name.len() - len) .. cmp::min((name.len() - len) + 1, name.len())];
55 | let rest = &name[(name.len() - len) .. name.len()];
56 |
57 | if !is_attr || name_trunc.ends_with(';') || !_ALPHANUMEQ_RE.is_match(last) {
58 | if let Some(&val) = ENTITIES.get(&name_trunc) {
59 | return val.to_string() + rest;
60 | }
61 | }
62 | }
63 |
64 | "&".to_string() + name
65 | }
66 |
67 | lazy_static! {
68 | static ref ENTITIES: HashMap<&'static str, &'static str> = hashmap![
69 | "Aacute;" => "\u{000C1}",
70 | "Aacute" => "\u{000C1}",
71 | "aacute;" => "\u{000E1}",
72 | "aacute" => "\u{000E1}",
73 | "Abreve;" => "\u{00102}",
74 | "abreve;" => "\u{00103}",
75 | "ac;" => "\u{0223E}",
76 | "acd;" => "\u{0223F}",
77 | "acE;" => "\u{0223E}\u{00333}",
78 | "Acirc;" => "\u{000C2}",
79 | "Acirc" => "\u{000C2}",
80 | "acirc;" => "\u{000E2}",
81 | "acirc" => "\u{000E2}",
82 | "acute;" => "\u{000B4}",
83 | "acute" => "\u{000B4}",
84 | "Acy;" => "\u{00410}",
85 | "acy;" => "\u{00430}",
86 | "AElig;" => "\u{000C6}",
87 | "AElig" => "\u{000C6}",
88 | "aelig;" => "\u{000E6}",
89 | "aelig" => "\u{000E6}",
90 | "af;" => "\u{02061}",
91 | "Afr;" => "\u{1D504}",
92 | "afr;" => "\u{1D51E}",
93 | "Agrave;" => "\u{000C0}",
94 | "Agrave" => "\u{000C0}",
95 | "agrave;" => "\u{000E0}",
96 | "agrave" => "\u{000E0}",
97 | "alefsym;" => "\u{02135}",
98 | "aleph;" => "\u{02135}",
99 | "Alpha;" => "\u{00391}",
100 | "alpha;" => "\u{003B1}",
101 | "Amacr;" => "\u{00100}",
102 | "amacr;" => "\u{00101}",
103 | "amalg;" => "\u{02A3F}",
104 | "AMP;" => "\u{00026}",
105 | "AMP" => "\u{00026}",
106 | "amp;" => "\u{00026}",
107 | "amp" => "\u{00026}",
108 | "And;" => "\u{02A53}",
109 | "and;" => "\u{02227}",
110 | "andand;" => "\u{02A55}",
111 | "andd;" => "\u{02A5C}",
112 | "andslope;" => "\u{02A58}",
113 | "andv;" => "\u{02A5A}",
114 | "ang;" => "\u{02220}",
115 | "ange;" => "\u{029A4}",
116 | "angle;" => "\u{02220}",
117 | "angmsd;" => "\u{02221}",
118 | "angmsdaa;" => "\u{029A8}",
119 | "angmsdab;" => "\u{029A9}",
120 | "angmsdac;" => "\u{029AA}",
121 | "angmsdad;" => "\u{029AB}",
122 | "angmsdae;" => "\u{029AC}",
123 | "angmsdaf;" => "\u{029AD}",
124 | "angmsdag;" => "\u{029AE}",
125 | "angmsdah;" => "\u{029AF}",
126 | "angrt;" => "\u{0221F}",
127 | "angrtvb;" => "\u{022BE}",
128 | "angrtvbd;" => "\u{0299D}",
129 | "angsph;" => "\u{02222}",
130 | "angst;" => "\u{000C5}",
131 | "angzarr;" => "\u{0237C}",
132 | "Aogon;" => "\u{00104}",
133 | "aogon;" => "\u{00105}",
134 | "Aopf;" => "\u{1D538}",
135 | "aopf;" => "\u{1D552}",
136 | "ap;" => "\u{02248}",
137 | "apacir;" => "\u{02A6F}",
138 | "apE;" => "\u{02A70}",
139 | "ape;" => "\u{0224A}",
140 | "apid;" => "\u{0224B}",
141 | "apos;" => "\u{00027}",
142 | "ApplyFunction;" => "\u{02061}",
143 | "approx;" => "\u{02248}",
144 | "approxeq;" => "\u{0224A}",
145 | "Aring;" => "\u{000C5}",
146 | "Aring" => "\u{000C5}",
147 | "aring;" => "\u{000E5}",
148 | "aring" => "\u{000E5}",
149 | "Ascr;" => "\u{1D49C}",
150 | "ascr;" => "\u{1D4B6}",
151 | "Assign;" => "\u{02254}",
152 | "ast;" => "\u{0002A}",
153 | "asymp;" => "\u{02248}",
154 | "asympeq;" => "\u{0224D}",
155 | "Atilde;" => "\u{000C3}",
156 | "Atilde" => "\u{000C3}",
157 | "atilde;" => "\u{000E3}",
158 | "atilde" => "\u{000E3}",
159 | "Auml;" => "\u{000C4}",
160 | "Auml" => "\u{000C4}",
161 | "auml;" => "\u{000E4}",
162 | "auml" => "\u{000E4}",
163 | "awconint;" => "\u{02233}",
164 | "awint;" => "\u{02A11}",
165 | "backcong;" => "\u{0224C}",
166 | "backepsilon;" => "\u{003F6}",
167 | "backprime;" => "\u{02035}",
168 | "backsim;" => "\u{0223D}",
169 | "backsimeq;" => "\u{022CD}",
170 | "Backslash;" => "\u{02216}",
171 | "Barv;" => "\u{02AE7}",
172 | "barvee;" => "\u{022BD}",
173 | "Barwed;" => "\u{02306}",
174 | "barwed;" => "\u{02305}",
175 | "barwedge;" => "\u{02305}",
176 | "bbrk;" => "\u{023B5}",
177 | "bbrktbrk;" => "\u{023B6}",
178 | "bcong;" => "\u{0224C}",
179 | "Bcy;" => "\u{00411}",
180 | "bcy;" => "\u{00431}",
181 | "bdquo;" => "\u{0201E}",
182 | "becaus;" => "\u{02235}",
183 | "Because;" => "\u{02235}",
184 | "because;" => "\u{02235}",
185 | "bemptyv;" => "\u{029B0}",
186 | "bepsi;" => "\u{003F6}",
187 | "bernou;" => "\u{0212C}",
188 | "Bernoullis;" => "\u{0212C}",
189 | "Beta;" => "\u{00392}",
190 | "beta;" => "\u{003B2}",
191 | "beth;" => "\u{02136}",
192 | "between;" => "\u{0226C}",
193 | "Bfr;" => "\u{1D505}",
194 | "bfr;" => "\u{1D51F}",
195 | "bigcap;" => "\u{022C2}",
196 | "bigcirc;" => "\u{025EF}",
197 | "bigcup;" => "\u{022C3}",
198 | "bigodot;" => "\u{02A00}",
199 | "bigoplus;" => "\u{02A01}",
200 | "bigotimes;" => "\u{02A02}",
201 | "bigsqcup;" => "\u{02A06}",
202 | "bigstar;" => "\u{02605}",
203 | "bigtriangledown;" => "\u{025BD}",
204 | "bigtriangleup;" => "\u{025B3}",
205 | "biguplus;" => "\u{02A04}",
206 | "bigvee;" => "\u{022C1}",
207 | "bigwedge;" => "\u{022C0}",
208 | "bkarow;" => "\u{0290D}",
209 | "blacklozenge;" => "\u{029EB}",
210 | "blacksquare;" => "\u{025AA}",
211 | "blacktriangle;" => "\u{025B4}",
212 | "blacktriangledown;" => "\u{025BE}",
213 | "blacktriangleleft;" => "\u{025C2}",
214 | "blacktriangleright;" => "\u{025B8}",
215 | "blank;" => "\u{02423}",
216 | "blk12;" => "\u{02592}",
217 | "blk14;" => "\u{02591}",
218 | "blk34;" => "\u{02593}",
219 | "block;" => "\u{02588}",
220 | "bne;" => "\u{0003D}\u{020E5}",
221 | "bnequiv;" => "\u{02261}\u{020E5}",
222 | "bNot;" => "\u{02AED}",
223 | "bnot;" => "\u{02310}",
224 | "Bopf;" => "\u{1D539}",
225 | "bopf;" => "\u{1D553}",
226 | "bot;" => "\u{022A5}",
227 | "bottom;" => "\u{022A5}",
228 | "bowtie;" => "\u{022C8}",
229 | "boxbox;" => "\u{029C9}",
230 | "boxDL;" => "\u{02557}",
231 | "boxDl;" => "\u{02556}",
232 | "boxdL;" => "\u{02555}",
233 | "boxdl;" => "\u{02510}",
234 | "boxDR;" => "\u{02554}",
235 | "boxDr;" => "\u{02553}",
236 | "boxdR;" => "\u{02552}",
237 | "boxdr;" => "\u{0250C}",
238 | "boxH;" => "\u{02550}",
239 | "boxh;" => "\u{02500}",
240 | "boxHD;" => "\u{02566}",
241 | "boxHd;" => "\u{02564}",
242 | "boxhD;" => "\u{02565}",
243 | "boxhd;" => "\u{0252C}",
244 | "boxHU;" => "\u{02569}",
245 | "boxHu;" => "\u{02567}",
246 | "boxhU;" => "\u{02568}",
247 | "boxhu;" => "\u{02534}",
248 | "boxminus;" => "\u{0229F}",
249 | "boxplus;" => "\u{0229E}",
250 | "boxtimes;" => "\u{022A0}",
251 | "boxUL;" => "\u{0255D}",
252 | "boxUl;" => "\u{0255C}",
253 | "boxuL;" => "\u{0255B}",
254 | "boxul;" => "\u{02518}",
255 | "boxUR;" => "\u{0255A}",
256 | "boxUr;" => "\u{02559}",
257 | "boxuR;" => "\u{02558}",
258 | "boxur;" => "\u{02514}",
259 | "boxV;" => "\u{02551}",
260 | "boxv;" => "\u{02502}",
261 | "boxVH;" => "\u{0256C}",
262 | "boxVh;" => "\u{0256B}",
263 | "boxvH;" => "\u{0256A}",
264 | "boxvh;" => "\u{0253C}",
265 | "boxVL;" => "\u{02563}",
266 | "boxVl;" => "\u{02562}",
267 | "boxvL;" => "\u{02561}",
268 | "boxvl;" => "\u{02524}",
269 | "boxVR;" => "\u{02560}",
270 | "boxVr;" => "\u{0255F}",
271 | "boxvR;" => "\u{0255E}",
272 | "boxvr;" => "\u{0251C}",
273 | "bprime;" => "\u{02035}",
274 | "Breve;" => "\u{002D8}",
275 | "breve;" => "\u{002D8}",
276 | "brvbar;" => "\u{000A6}",
277 | "brvbar" => "\u{000A6}",
278 | "Bscr;" => "\u{0212C}",
279 | "bscr;" => "\u{1D4B7}",
280 | "bsemi;" => "\u{0204F}",
281 | "bsim;" => "\u{0223D}",
282 | "bsime;" => "\u{022CD}",
283 | "bsol;" => "\u{0005C}",
284 | "bsolb;" => "\u{029C5}",
285 | "bsolhsub;" => "\u{027C8}",
286 | "bull;" => "\u{02022}",
287 | "bullet;" => "\u{02022}",
288 | "bump;" => "\u{0224E}",
289 | "bumpE;" => "\u{02AAE}",
290 | "bumpe;" => "\u{0224F}",
291 | "Bumpeq;" => "\u{0224E}",
292 | "bumpeq;" => "\u{0224F}",
293 | "Cacute;" => "\u{00106}",
294 | "cacute;" => "\u{00107}",
295 | "Cap;" => "\u{022D2}",
296 | "cap;" => "\u{02229}",
297 | "capand;" => "\u{02A44}",
298 | "capbrcup;" => "\u{02A49}",
299 | "capcap;" => "\u{02A4B}",
300 | "capcup;" => "\u{02A47}",
301 | "capdot;" => "\u{02A40}",
302 | "CapitalDifferentialD;" => "\u{02145}",
303 | "caps;" => "\u{02229}\u{0FE00}",
304 | "caret;" => "\u{02041}",
305 | "caron;" => "\u{002C7}",
306 | "Cayleys;" => "\u{0212D}",
307 | "ccaps;" => "\u{02A4D}",
308 | "Ccaron;" => "\u{0010C}",
309 | "ccaron;" => "\u{0010D}",
310 | "Ccedil;" => "\u{000C7}",
311 | "Ccedil" => "\u{000C7}",
312 | "ccedil;" => "\u{000E7}",
313 | "ccedil" => "\u{000E7}",
314 | "Ccirc;" => "\u{00108}",
315 | "ccirc;" => "\u{00109}",
316 | "Cconint;" => "\u{02230}",
317 | "ccups;" => "\u{02A4C}",
318 | "ccupssm;" => "\u{02A50}",
319 | "Cdot;" => "\u{0010A}",
320 | "cdot;" => "\u{0010B}",
321 | "cedil;" => "\u{000B8}",
322 | "cedil" => "\u{000B8}",
323 | "Cedilla;" => "\u{000B8}",
324 | "cemptyv;" => "\u{029B2}",
325 | "cent;" => "\u{000A2}",
326 | "cent" => "\u{000A2}",
327 | "CenterDot;" => "\u{000B7}",
328 | "centerdot;" => "\u{000B7}",
329 | "Cfr;" => "\u{0212D}",
330 | "cfr;" => "\u{1D520}",
331 | "CHcy;" => "\u{00427}",
332 | "chcy;" => "\u{00447}",
333 | "check;" => "\u{02713}",
334 | "checkmark;" => "\u{02713}",
335 | "Chi;" => "\u{003A7}",
336 | "chi;" => "\u{003C7}",
337 | "cir;" => "\u{025CB}",
338 | "circ;" => "\u{002C6}",
339 | "circeq;" => "\u{02257}",
340 | "circlearrowleft;" => "\u{021BA}",
341 | "circlearrowright;" => "\u{021BB}",
342 | "circledast;" => "\u{0229B}",
343 | "circledcirc;" => "\u{0229A}",
344 | "circleddash;" => "\u{0229D}",
345 | "CircleDot;" => "\u{02299}",
346 | "circledR;" => "\u{000AE}",
347 | "circledS;" => "\u{024C8}",
348 | "CircleMinus;" => "\u{02296}",
349 | "CirclePlus;" => "\u{02295}",
350 | "CircleTimes;" => "\u{02297}",
351 | "cirE;" => "\u{029C3}",
352 | "cire;" => "\u{02257}",
353 | "cirfnint;" => "\u{02A10}",
354 | "cirmid;" => "\u{02AEF}",
355 | "cirscir;" => "\u{029C2}",
356 | "ClockwiseContourIntegral;" => "\u{02232}",
357 | "CloseCurlyDoubleQuote;" => "\u{0201D}",
358 | "CloseCurlyQuote;" => "\u{02019}",
359 | "clubs;" => "\u{02663}",
360 | "clubsuit;" => "\u{02663}",
361 | "Colon;" => "\u{02237}",
362 | "colon;" => "\u{0003A}",
363 | "Colone;" => "\u{02A74}",
364 | "colone;" => "\u{02254}",
365 | "coloneq;" => "\u{02254}",
366 | "comma;" => "\u{0002C}",
367 | "commat;" => "\u{00040}",
368 | "comp;" => "\u{02201}",
369 | "compfn;" => "\u{02218}",
370 | "complement;" => "\u{02201}",
371 | "complexes;" => "\u{02102}",
372 | "cong;" => "\u{02245}",
373 | "congdot;" => "\u{02A6D}",
374 | "Congruent;" => "\u{02261}",
375 | "Conint;" => "\u{0222F}",
376 | "conint;" => "\u{0222E}",
377 | "ContourIntegral;" => "\u{0222E}",
378 | "Copf;" => "\u{02102}",
379 | "copf;" => "\u{1D554}",
380 | "coprod;" => "\u{02210}",
381 | "Coproduct;" => "\u{02210}",
382 | "COPY;" => "\u{000A9}",
383 | "COPY" => "\u{000A9}",
384 | "copy;" => "\u{000A9}",
385 | "copy" => "\u{000A9}",
386 | "copysr;" => "\u{02117}",
387 | "CounterClockwiseContourIntegral;" => "\u{02233}",
388 | "crarr;" => "\u{021B5}",
389 | "Cross;" => "\u{02A2F}",
390 | "cross;" => "\u{02717}",
391 | "Cscr;" => "\u{1D49E}",
392 | "cscr;" => "\u{1D4B8}",
393 | "csub;" => "\u{02ACF}",
394 | "csube;" => "\u{02AD1}",
395 | "csup;" => "\u{02AD0}",
396 | "csupe;" => "\u{02AD2}",
397 | "ctdot;" => "\u{022EF}",
398 | "cudarrl;" => "\u{02938}",
399 | "cudarrr;" => "\u{02935}",
400 | "cuepr;" => "\u{022DE}",
401 | "cuesc;" => "\u{022DF}",
402 | "cularr;" => "\u{021B6}",
403 | "cularrp;" => "\u{0293D}",
404 | "Cup;" => "\u{022D3}",
405 | "cup;" => "\u{0222A}",
406 | "cupbrcap;" => "\u{02A48}",
407 | "CupCap;" => "\u{0224D}",
408 | "cupcap;" => "\u{02A46}",
409 | "cupcup;" => "\u{02A4A}",
410 | "cupdot;" => "\u{0228D}",
411 | "cupor;" => "\u{02A45}",
412 | "cups;" => "\u{0222A}\u{0FE00}",
413 | "curarr;" => "\u{021B7}",
414 | "curarrm;" => "\u{0293C}",
415 | "curlyeqprec;" => "\u{022DE}",
416 | "curlyeqsucc;" => "\u{022DF}",
417 | "curlyvee;" => "\u{022CE}",
418 | "curlywedge;" => "\u{022CF}",
419 | "curren;" => "\u{000A4}",
420 | "curren" => "\u{000A4}",
421 | "curvearrowleft;" => "\u{021B6}",
422 | "curvearrowright;" => "\u{021B7}",
423 | "cuvee;" => "\u{022CE}",
424 | "cuwed;" => "\u{022CF}",
425 | "cwconint;" => "\u{02232}",
426 | "cwint;" => "\u{02231}",
427 | "cylcty;" => "\u{0232D}",
428 | "Dagger;" => "\u{02021}",
429 | "dagger;" => "\u{02020}",
430 | "daleth;" => "\u{02138}",
431 | "Darr;" => "\u{021A1}",
432 | "dArr;" => "\u{021D3}",
433 | "darr;" => "\u{02193}",
434 | "dash;" => "\u{02010}",
435 | "Dashv;" => "\u{02AE4}",
436 | "dashv;" => "\u{022A3}",
437 | "dbkarow;" => "\u{0290F}",
438 | "dblac;" => "\u{002DD}",
439 | "Dcaron;" => "\u{0010E}",
440 | "dcaron;" => "\u{0010F}",
441 | "Dcy;" => "\u{00414}",
442 | "dcy;" => "\u{00434}",
443 | "DD;" => "\u{02145}",
444 | "dd;" => "\u{02146}",
445 | "ddagger;" => "\u{02021}",
446 | "ddarr;" => "\u{021CA}",
447 | "DDotrahd;" => "\u{02911}",
448 | "ddotseq;" => "\u{02A77}",
449 | "deg;" => "\u{000B0}",
450 | "deg" => "\u{000B0}",
451 | "Del;" => "\u{02207}",
452 | "Delta;" => "\u{00394}",
453 | "delta;" => "\u{003B4}",
454 | "demptyv;" => "\u{029B1}",
455 | "dfisht;" => "\u{0297F}",
456 | "Dfr;" => "\u{1D507}",
457 | "dfr;" => "\u{1D521}",
458 | "dHar;" => "\u{02965}",
459 | "dharl;" => "\u{021C3}",
460 | "dharr;" => "\u{021C2}",
461 | "DiacriticalAcute;" => "\u{000B4}",
462 | "DiacriticalDot;" => "\u{002D9}",
463 | "DiacriticalDoubleAcute;" => "\u{002DD}",
464 | "DiacriticalGrave;" => "\u{00060}",
465 | "DiacriticalTilde;" => "\u{002DC}",
466 | "diam;" => "\u{022C4}",
467 | "Diamond;" => "\u{022C4}",
468 | "diamond;" => "\u{022C4}",
469 | "diamondsuit;" => "\u{02666}",
470 | "diams;" => "\u{02666}",
471 | "die;" => "\u{000A8}",
472 | "DifferentialD;" => "\u{02146}",
473 | "digamma;" => "\u{003DD}",
474 | "disin;" => "\u{022F2}",
475 | "div;" => "\u{000F7}",
476 | "divide;" => "\u{000F7}",
477 | "divide" => "\u{000F7}",
478 | "divideontimes;" => "\u{022C7}",
479 | "divonx;" => "\u{022C7}",
480 | "DJcy;" => "\u{00402}",
481 | "djcy;" => "\u{00452}",
482 | "dlcorn;" => "\u{0231E}",
483 | "dlcrop;" => "\u{0230D}",
484 | "dollar;" => "\u{00024}",
485 | "Dopf;" => "\u{1D53B}",
486 | "dopf;" => "\u{1D555}",
487 | "Dot;" => "\u{000A8}",
488 | "dot;" => "\u{002D9}",
489 | "DotDot;" => "\u{020DC}",
490 | "doteq;" => "\u{02250}",
491 | "doteqdot;" => "\u{02251}",
492 | "DotEqual;" => "\u{02250}",
493 | "dotminus;" => "\u{02238}",
494 | "dotplus;" => "\u{02214}",
495 | "dotsquare;" => "\u{022A1}",
496 | "doublebarwedge;" => "\u{02306}",
497 | "DoubleContourIntegral;" => "\u{0222F}",
498 | "DoubleDot;" => "\u{000A8}",
499 | "DoubleDownArrow;" => "\u{021D3}",
500 | "DoubleLeftArrow;" => "\u{021D0}",
501 | "DoubleLeftRightArrow;" => "\u{021D4}",
502 | "DoubleLeftTee;" => "\u{02AE4}",
503 | "DoubleLongLeftArrow;" => "\u{027F8}",
504 | "DoubleLongLeftRightArrow;" => "\u{027FA}",
505 | "DoubleLongRightArrow;" => "\u{027F9}",
506 | "DoubleRightArrow;" => "\u{021D2}",
507 | "DoubleRightTee;" => "\u{022A8}",
508 | "DoubleUpArrow;" => "\u{021D1}",
509 | "DoubleUpDownArrow;" => "\u{021D5}",
510 | "DoubleVerticalBar;" => "\u{02225}",
511 | "DownArrow;" => "\u{02193}",
512 | "Downarrow;" => "\u{021D3}",
513 | "downarrow;" => "\u{02193}",
514 | "DownArrowBar;" => "\u{02913}",
515 | "DownArrowUpArrow;" => "\u{021F5}",
516 | "DownBreve;" => "\u{00311}",
517 | "downdownarrows;" => "\u{021CA}",
518 | "downharpoonleft;" => "\u{021C3}",
519 | "downharpoonright;" => "\u{021C2}",
520 | "DownLeftRightVector;" => "\u{02950}",
521 | "DownLeftTeeVector;" => "\u{0295E}",
522 | "DownLeftVector;" => "\u{021BD}",
523 | "DownLeftVectorBar;" => "\u{02956}",
524 | "DownRightTeeVector;" => "\u{0295F}",
525 | "DownRightVector;" => "\u{021C1}",
526 | "DownRightVectorBar;" => "\u{02957}",
527 | "DownTee;" => "\u{022A4}",
528 | "DownTeeArrow;" => "\u{021A7}",
529 | "drbkarow;" => "\u{02910}",
530 | "drcorn;" => "\u{0231F}",
531 | "drcrop;" => "\u{0230C}",
532 | "Dscr;" => "\u{1D49F}",
533 | "dscr;" => "\u{1D4B9}",
534 | "DScy;" => "\u{00405}",
535 | "dscy;" => "\u{00455}",
536 | "dsol;" => "\u{029F6}",
537 | "Dstrok;" => "\u{00110}",
538 | "dstrok;" => "\u{00111}",
539 | "dtdot;" => "\u{022F1}",
540 | "dtri;" => "\u{025BF}",
541 | "dtrif;" => "\u{025BE}",
542 | "duarr;" => "\u{021F5}",
543 | "duhar;" => "\u{0296F}",
544 | "dwangle;" => "\u{029A6}",
545 | "DZcy;" => "\u{0040F}",
546 | "dzcy;" => "\u{0045F}",
547 | "dzigrarr;" => "\u{027FF}",
548 | "Eacute;" => "\u{000C9}",
549 | "Eacute" => "\u{000C9}",
550 | "eacute;" => "\u{000E9}",
551 | "eacute" => "\u{000E9}",
552 | "easter;" => "\u{02A6E}",
553 | "Ecaron;" => "\u{0011A}",
554 | "ecaron;" => "\u{0011B}",
555 | "ecir;" => "\u{02256}",
556 | "Ecirc;" => "\u{000CA}",
557 | "Ecirc" => "\u{000CA}",
558 | "ecirc;" => "\u{000EA}",
559 | "ecirc" => "\u{000EA}",
560 | "ecolon;" => "\u{02255}",
561 | "Ecy;" => "\u{0042D}",
562 | "ecy;" => "\u{0044D}",
563 | "eDDot;" => "\u{02A77}",
564 | "Edot;" => "\u{00116}",
565 | "eDot;" => "\u{02251}",
566 | "edot;" => "\u{00117}",
567 | "ee;" => "\u{02147}",
568 | "efDot;" => "\u{02252}",
569 | "Efr;" => "\u{1D508}",
570 | "efr;" => "\u{1D522}",
571 | "eg;" => "\u{02A9A}",
572 | "Egrave;" => "\u{000C8}",
573 | "Egrave" => "\u{000C8}",
574 | "egrave;" => "\u{000E8}",
575 | "egrave" => "\u{000E8}",
576 | "egs;" => "\u{02A96}",
577 | "egsdot;" => "\u{02A98}",
578 | "el;" => "\u{02A99}",
579 | "Element;" => "\u{02208}",
580 | "elinters;" => "\u{023E7}",
581 | "ell;" => "\u{02113}",
582 | "els;" => "\u{02A95}",
583 | "elsdot;" => "\u{02A97}",
584 | "Emacr;" => "\u{00112}",
585 | "emacr;" => "\u{00113}",
586 | "empty;" => "\u{02205}",
587 | "emptyset;" => "\u{02205}",
588 | "EmptySmallSquare;" => "\u{025FB}",
589 | "emptyv;" => "\u{02205}",
590 | "EmptyVerySmallSquare;" => "\u{025AB}",
591 | "emsp;" => "\u{02003}",
592 | "emsp13;" => "\u{02004}",
593 | "emsp14;" => "\u{02005}",
594 | "ENG;" => "\u{0014A}",
595 | "eng;" => "\u{0014B}",
596 | "ensp;" => "\u{02002}",
597 | "Eogon;" => "\u{00118}",
598 | "eogon;" => "\u{00119}",
599 | "Eopf;" => "\u{1D53C}",
600 | "eopf;" => "\u{1D556}",
601 | "epar;" => "\u{022D5}",
602 | "eparsl;" => "\u{029E3}",
603 | "eplus;" => "\u{02A71}",
604 | "epsi;" => "\u{003B5}",
605 | "Epsilon;" => "\u{00395}",
606 | "epsilon;" => "\u{003B5}",
607 | "epsiv;" => "\u{003F5}",
608 | "eqcirc;" => "\u{02256}",
609 | "eqcolon;" => "\u{02255}",
610 | "eqsim;" => "\u{02242}",
611 | "eqslantgtr;" => "\u{02A96}",
612 | "eqslantless;" => "\u{02A95}",
613 | "Equal;" => "\u{02A75}",
614 | "equals;" => "\u{0003D}",
615 | "EqualTilde;" => "\u{02242}",
616 | "equest;" => "\u{0225F}",
617 | "Equilibrium;" => "\u{021CC}",
618 | "equiv;" => "\u{02261}",
619 | "equivDD;" => "\u{02A78}",
620 | "eqvparsl;" => "\u{029E5}",
621 | "erarr;" => "\u{02971}",
622 | "erDot;" => "\u{02253}",
623 | "Escr;" => "\u{02130}",
624 | "escr;" => "\u{0212F}",
625 | "esdot;" => "\u{02250}",
626 | "Esim;" => "\u{02A73}",
627 | "esim;" => "\u{02242}",
628 | "Eta;" => "\u{00397}",
629 | "eta;" => "\u{003B7}",
630 | "ETH;" => "\u{000D0}",
631 | "ETH" => "\u{000D0}",
632 | "eth;" => "\u{000F0}",
633 | "eth" => "\u{000F0}",
634 | "Euml;" => "\u{000CB}",
635 | "Euml" => "\u{000CB}",
636 | "euml;" => "\u{000EB}",
637 | "euml" => "\u{000EB}",
638 | "euro;" => "\u{020AC}",
639 | "excl;" => "\u{00021}",
640 | "exist;" => "\u{02203}",
641 | "Exists;" => "\u{02203}",
642 | "expectation;" => "\u{02130}",
643 | "ExponentialE;" => "\u{02147}",
644 | "exponentiale;" => "\u{02147}",
645 | "fallingdotseq;" => "\u{02252}",
646 | "Fcy;" => "\u{00424}",
647 | "fcy;" => "\u{00444}",
648 | "female;" => "\u{02640}",
649 | "ffilig;" => "\u{0FB03}",
650 | "fflig;" => "\u{0FB00}",
651 | "ffllig;" => "\u{0FB04}",
652 | "Ffr;" => "\u{1D509}",
653 | "ffr;" => "\u{1D523}",
654 | "filig;" => "\u{0FB01}",
655 | "FilledSmallSquare;" => "\u{025FC}",
656 | "FilledVerySmallSquare;" => "\u{025AA}",
657 | "fjlig;" => "\u{00066}\u{0006A}",
658 | "flat;" => "\u{0266D}",
659 | "fllig;" => "\u{0FB02}",
660 | "fltns;" => "\u{025B1}",
661 | "fnof;" => "\u{00192}",
662 | "Fopf;" => "\u{1D53D}",
663 | "fopf;" => "\u{1D557}",
664 | "ForAll;" => "\u{02200}",
665 | "forall;" => "\u{02200}",
666 | "fork;" => "\u{022D4}",
667 | "forkv;" => "\u{02AD9}",
668 | "Fouriertrf;" => "\u{02131}",
669 | "fpartint;" => "\u{02A0D}",
670 | "frac12;" => "\u{000BD}",
671 | "frac12" => "\u{000BD}",
672 | "frac13;" => "\u{02153}",
673 | "frac14;" => "\u{000BC}",
674 | "frac14" => "\u{000BC}",
675 | "frac15;" => "\u{02155}",
676 | "frac16;" => "\u{02159}",
677 | "frac18;" => "\u{0215B}",
678 | "frac23;" => "\u{02154}",
679 | "frac25;" => "\u{02156}",
680 | "frac34;" => "\u{000BE}",
681 | "frac34" => "\u{000BE}",
682 | "frac35;" => "\u{02157}",
683 | "frac38;" => "\u{0215C}",
684 | "frac45;" => "\u{02158}",
685 | "frac56;" => "\u{0215A}",
686 | "frac58;" => "\u{0215D}",
687 | "frac78;" => "\u{0215E}",
688 | "frasl;" => "\u{02044}",
689 | "frown;" => "\u{02322}",
690 | "Fscr;" => "\u{02131}",
691 | "fscr;" => "\u{1D4BB}",
692 | "gacute;" => "\u{001F5}",
693 | "Gamma;" => "\u{00393}",
694 | "gamma;" => "\u{003B3}",
695 | "Gammad;" => "\u{003DC}",
696 | "gammad;" => "\u{003DD}",
697 | "gap;" => "\u{02A86}",
698 | "Gbreve;" => "\u{0011E}",
699 | "gbreve;" => "\u{0011F}",
700 | "Gcedil;" => "\u{00122}",
701 | "Gcirc;" => "\u{0011C}",
702 | "gcirc;" => "\u{0011D}",
703 | "Gcy;" => "\u{00413}",
704 | "gcy;" => "\u{00433}",
705 | "Gdot;" => "\u{00120}",
706 | "gdot;" => "\u{00121}",
707 | "gE;" => "\u{02267}",
708 | "ge;" => "\u{02265}",
709 | "gEl;" => "\u{02A8C}",
710 | "gel;" => "\u{022DB}",
711 | "geq;" => "\u{02265}",
712 | "geqq;" => "\u{02267}",
713 | "geqslant;" => "\u{02A7E}",
714 | "ges;" => "\u{02A7E}",
715 | "gescc;" => "\u{02AA9}",
716 | "gesdot;" => "\u{02A80}",
717 | "gesdoto;" => "\u{02A82}",
718 | "gesdotol;" => "\u{02A84}",
719 | "gesl;" => "\u{022DB}\u{0FE00}",
720 | "gesles;" => "\u{02A94}",
721 | "Gfr;" => "\u{1D50A}",
722 | "gfr;" => "\u{1D524}",
723 | "Gg;" => "\u{022D9}",
724 | "gg;" => "\u{0226B}",
725 | "ggg;" => "\u{022D9}",
726 | "gimel;" => "\u{02137}",
727 | "GJcy;" => "\u{00403}",
728 | "gjcy;" => "\u{00453}",
729 | "gl;" => "\u{02277}",
730 | "gla;" => "\u{02AA5}",
731 | "glE;" => "\u{02A92}",
732 | "glj;" => "\u{02AA4}",
733 | "gnap;" => "\u{02A8A}",
734 | "gnapprox;" => "\u{02A8A}",
735 | "gnE;" => "\u{02269}",
736 | "gne;" => "\u{02A88}",
737 | "gneq;" => "\u{02A88}",
738 | "gneqq;" => "\u{02269}",
739 | "gnsim;" => "\u{022E7}",
740 | "Gopf;" => "\u{1D53E}",
741 | "gopf;" => "\u{1D558}",
742 | "grave;" => "\u{00060}",
743 | "GreaterEqual;" => "\u{02265}",
744 | "GreaterEqualLess;" => "\u{022DB}",
745 | "GreaterFullEqual;" => "\u{02267}",
746 | "GreaterGreater;" => "\u{02AA2}",
747 | "GreaterLess;" => "\u{02277}",
748 | "GreaterSlantEqual;" => "\u{02A7E}",
749 | "GreaterTilde;" => "\u{02273}",
750 | "Gscr;" => "\u{1D4A2}",
751 | "gscr;" => "\u{0210A}",
752 | "gsim;" => "\u{02273}",
753 | "gsime;" => "\u{02A8E}",
754 | "gsiml;" => "\u{02A90}",
755 | "GT;" => "\u{0003E}",
756 | "GT" => "\u{0003E}",
757 | "Gt;" => "\u{0226B}",
758 | "gt;" => "\u{0003E}",
759 | "gt" => "\u{0003E}",
760 | "gtcc;" => "\u{02AA7}",
761 | "gtcir;" => "\u{02A7A}",
762 | "gtdot;" => "\u{022D7}",
763 | "gtlPar;" => "\u{02995}",
764 | "gtquest;" => "\u{02A7C}",
765 | "gtrapprox;" => "\u{02A86}",
766 | "gtrarr;" => "\u{02978}",
767 | "gtrdot;" => "\u{022D7}",
768 | "gtreqless;" => "\u{022DB}",
769 | "gtreqqless;" => "\u{02A8C}",
770 | "gtrless;" => "\u{02277}",
771 | "gtrsim;" => "\u{02273}",
772 | "gvertneqq;" => "\u{02269}\u{0FE00}",
773 | "gvnE;" => "\u{02269}\u{0FE00}",
774 | "Hacek;" => "\u{002C7}",
775 | "hairsp;" => "\u{0200A}",
776 | "half;" => "\u{000BD}",
777 | "hamilt;" => "\u{0210B}",
778 | "HARDcy;" => "\u{0042A}",
779 | "hardcy;" => "\u{0044A}",
780 | "hArr;" => "\u{021D4}",
781 | "harr;" => "\u{02194}",
782 | "harrcir;" => "\u{02948}",
783 | "harrw;" => "\u{021AD}",
784 | "Hat;" => "\u{0005E}",
785 | "hbar;" => "\u{0210F}",
786 | "Hcirc;" => "\u{00124}",
787 | "hcirc;" => "\u{00125}",
788 | "hearts;" => "\u{02665}",
789 | "heartsuit;" => "\u{02665}",
790 | "hellip;" => "\u{02026}",
791 | "hercon;" => "\u{022B9}",
792 | "Hfr;" => "\u{0210C}",
793 | "hfr;" => "\u{1D525}",
794 | "HilbertSpace;" => "\u{0210B}",
795 | "hksearow;" => "\u{02925}",
796 | "hkswarow;" => "\u{02926}",
797 | "hoarr;" => "\u{021FF}",
798 | "homtht;" => "\u{0223B}",
799 | "hookleftarrow;" => "\u{021A9}",
800 | "hookrightarrow;" => "\u{021AA}",
801 | "Hopf;" => "\u{0210D}",
802 | "hopf;" => "\u{1D559}",
803 | "horbar;" => "\u{02015}",
804 | "HorizontalLine;" => "\u{02500}",
805 | "Hscr;" => "\u{0210B}",
806 | "hscr;" => "\u{1D4BD}",
807 | "hslash;" => "\u{0210F}",
808 | "Hstrok;" => "\u{00126}",
809 | "hstrok;" => "\u{00127}",
810 | "HumpDownHump;" => "\u{0224E}",
811 | "HumpEqual;" => "\u{0224F}",
812 | "hybull;" => "\u{02043}",
813 | "hyphen;" => "\u{02010}",
814 | "Iacute;" => "\u{000CD}",
815 | "Iacute" => "\u{000CD}",
816 | "iacute;" => "\u{000ED}",
817 | "iacute" => "\u{000ED}",
818 | "ic;" => "\u{02063}",
819 | "Icirc;" => "\u{000CE}",
820 | "Icirc" => "\u{000CE}",
821 | "icirc;" => "\u{000EE}",
822 | "icirc" => "\u{000EE}",
823 | "Icy;" => "\u{00418}",
824 | "icy;" => "\u{00438}",
825 | "Idot;" => "\u{00130}",
826 | "IEcy;" => "\u{00415}",
827 | "iecy;" => "\u{00435}",
828 | "iexcl;" => "\u{000A1}",
829 | "iexcl" => "\u{000A1}",
830 | "iff;" => "\u{021D4}",
831 | "Ifr;" => "\u{02111}",
832 | "ifr;" => "\u{1D526}",
833 | "Igrave;" => "\u{000CC}",
834 | "Igrave" => "\u{000CC}",
835 | "igrave;" => "\u{000EC}",
836 | "igrave" => "\u{000EC}",
837 | "ii;" => "\u{02148}",
838 | "iiiint;" => "\u{02A0C}",
839 | "iiint;" => "\u{0222D}",
840 | "iinfin;" => "\u{029DC}",
841 | "iiota;" => "\u{02129}",
842 | "IJlig;" => "\u{00132}",
843 | "ijlig;" => "\u{00133}",
844 | "Im;" => "\u{02111}",
845 | "Imacr;" => "\u{0012A}",
846 | "imacr;" => "\u{0012B}",
847 | "image;" => "\u{02111}",
848 | "ImaginaryI;" => "\u{02148}",
849 | "imagline;" => "\u{02110}",
850 | "imagpart;" => "\u{02111}",
851 | "imath;" => "\u{00131}",
852 | "imof;" => "\u{022B7}",
853 | "imped;" => "\u{001B5}",
854 | "Implies;" => "\u{021D2}",
855 | "in;" => "\u{02208}",
856 | "incare;" => "\u{02105}",
857 | "infin;" => "\u{0221E}",
858 | "infintie;" => "\u{029DD}",
859 | "inodot;" => "\u{00131}",
860 | "Int;" => "\u{0222C}",
861 | "int;" => "\u{0222B}",
862 | "intcal;" => "\u{022BA}",
863 | "integers;" => "\u{02124}",
864 | "Integral;" => "\u{0222B}",
865 | "intercal;" => "\u{022BA}",
866 | "Intersection;" => "\u{022C2}",
867 | "intlarhk;" => "\u{02A17}",
868 | "intprod;" => "\u{02A3C}",
869 | "InvisibleComma;" => "\u{02063}",
870 | "InvisibleTimes;" => "\u{02062}",
871 | "IOcy;" => "\u{00401}",
872 | "iocy;" => "\u{00451}",
873 | "Iogon;" => "\u{0012E}",
874 | "iogon;" => "\u{0012F}",
875 | "Iopf;" => "\u{1D540}",
876 | "iopf;" => "\u{1D55A}",
877 | "Iota;" => "\u{00399}",
878 | "iota;" => "\u{003B9}",
879 | "iprod;" => "\u{02A3C}",
880 | "iquest;" => "\u{000BF}",
881 | "iquest" => "\u{000BF}",
882 | "Iscr;" => "\u{02110}",
883 | "iscr;" => "\u{1D4BE}",
884 | "isin;" => "\u{02208}",
885 | "isindot;" => "\u{022F5}",
886 | "isinE;" => "\u{022F9}",
887 | "isins;" => "\u{022F4}",
888 | "isinsv;" => "\u{022F3}",
889 | "isinv;" => "\u{02208}",
890 | "it;" => "\u{02062}",
891 | "Itilde;" => "\u{00128}",
892 | "itilde;" => "\u{00129}",
893 | "Iukcy;" => "\u{00406}",
894 | "iukcy;" => "\u{00456}",
895 | "Iuml;" => "\u{000CF}",
896 | "Iuml" => "\u{000CF}",
897 | "iuml;" => "\u{000EF}",
898 | "iuml" => "\u{000EF}",
899 | "Jcirc;" => "\u{00134}",
900 | "jcirc;" => "\u{00135}",
901 | "Jcy;" => "\u{00419}",
902 | "jcy;" => "\u{00439}",
903 | "Jfr;" => "\u{1D50D}",
904 | "jfr;" => "\u{1D527}",
905 | "jmath;" => "\u{00237}",
906 | "Jopf;" => "\u{1D541}",
907 | "jopf;" => "\u{1D55B}",
908 | "Jscr;" => "\u{1D4A5}",
909 | "jscr;" => "\u{1D4BF}",
910 | "Jsercy;" => "\u{00408}",
911 | "jsercy;" => "\u{00458}",
912 | "Jukcy;" => "\u{00404}",
913 | "jukcy;" => "\u{00454}",
914 | "Kappa;" => "\u{0039A}",
915 | "kappa;" => "\u{003BA}",
916 | "kappav;" => "\u{003F0}",
917 | "Kcedil;" => "\u{00136}",
918 | "kcedil;" => "\u{00137}",
919 | "Kcy;" => "\u{0041A}",
920 | "kcy;" => "\u{0043A}",
921 | "Kfr;" => "\u{1D50E}",
922 | "kfr;" => "\u{1D528}",
923 | "kgreen;" => "\u{00138}",
924 | "KHcy;" => "\u{00425}",
925 | "khcy;" => "\u{00445}",
926 | "KJcy;" => "\u{0040C}",
927 | "kjcy;" => "\u{0045C}",
928 | "Kopf;" => "\u{1D542}",
929 | "kopf;" => "\u{1D55C}",
930 | "Kscr;" => "\u{1D4A6}",
931 | "kscr;" => "\u{1D4C0}",
932 | "lAarr;" => "\u{021DA}",
933 | "Lacute;" => "\u{00139}",
934 | "lacute;" => "\u{0013A}",
935 | "laemptyv;" => "\u{029B4}",
936 | "lagran;" => "\u{02112}",
937 | "Lambda;" => "\u{0039B}",
938 | "lambda;" => "\u{003BB}",
939 | "Lang;" => "\u{027EA}",
940 | "lang;" => "\u{027E8}",
941 | "langd;" => "\u{02991}",
942 | "langle;" => "\u{027E8}",
943 | "lap;" => "\u{02A85}",
944 | "Laplacetrf;" => "\u{02112}",
945 | "laquo;" => "\u{000AB}",
946 | "laquo" => "\u{000AB}",
947 | "Larr;" => "\u{0219E}",
948 | "lArr;" => "\u{021D0}",
949 | "larr;" => "\u{02190}",
950 | "larrb;" => "\u{021E4}",
951 | "larrbfs;" => "\u{0291F}",
952 | "larrfs;" => "\u{0291D}",
953 | "larrhk;" => "\u{021A9}",
954 | "larrlp;" => "\u{021AB}",
955 | "larrpl;" => "\u{02939}",
956 | "larrsim;" => "\u{02973}",
957 | "larrtl;" => "\u{021A2}",
958 | "lat;" => "\u{02AAB}",
959 | "lAtail;" => "\u{0291B}",
960 | "latail;" => "\u{02919}",
961 | "late;" => "\u{02AAD}",
962 | "lates;" => "\u{02AAD}\u{0FE00}",
963 | "lBarr;" => "\u{0290E}",
964 | "lbarr;" => "\u{0290C}",
965 | "lbbrk;" => "\u{02772}",
966 | "lbrace;" => "\u{0007B}",
967 | "lbrack;" => "\u{0005B}",
968 | "lbrke;" => "\u{0298B}",
969 | "lbrksld;" => "\u{0298F}",
970 | "lbrkslu;" => "\u{0298D}",
971 | "Lcaron;" => "\u{0013D}",
972 | "lcaron;" => "\u{0013E}",
973 | "Lcedil;" => "\u{0013B}",
974 | "lcedil;" => "\u{0013C}",
975 | "lceil;" => "\u{02308}",
976 | "lcub;" => "\u{0007B}",
977 | "Lcy;" => "\u{0041B}",
978 | "lcy;" => "\u{0043B}",
979 | "ldca;" => "\u{02936}",
980 | "ldquo;" => "\u{0201C}",
981 | "ldquor;" => "\u{0201E}",
982 | "ldrdhar;" => "\u{02967}",
983 | "ldrushar;" => "\u{0294B}",
984 | "ldsh;" => "\u{021B2}",
985 | "lE;" => "\u{02266}",
986 | "le;" => "\u{02264}",
987 | "LeftAngleBracket;" => "\u{027E8}",
988 | "LeftArrow;" => "\u{02190}",
989 | "Leftarrow;" => "\u{021D0}",
990 | "leftarrow;" => "\u{02190}",
991 | "LeftArrowBar;" => "\u{021E4}",
992 | "LeftArrowRightArrow;" => "\u{021C6}",
993 | "leftarrowtail;" => "\u{021A2}",
994 | "LeftCeiling;" => "\u{02308}",
995 | "LeftDoubleBracket;" => "\u{027E6}",
996 | "LeftDownTeeVector;" => "\u{02961}",
997 | "LeftDownVector;" => "\u{021C3}",
998 | "LeftDownVectorBar;" => "\u{02959}",
999 | "LeftFloor;" => "\u{0230A}",
1000 | "leftharpoondown;" => "\u{021BD}",
1001 | "leftharpoonup;" => "\u{021BC}",
1002 | "leftleftarrows;" => "\u{021C7}",
1003 | "LeftRightArrow;" => "\u{02194}",
1004 | "Leftrightarrow;" => "\u{021D4}",
1005 | "leftrightarrow;" => "\u{02194}",
1006 | "leftrightarrows;" => "\u{021C6}",
1007 | "leftrightharpoons;" => "\u{021CB}",
1008 | "leftrightsquigarrow;" => "\u{021AD}",
1009 | "LeftRightVector;" => "\u{0294E}",
1010 | "LeftTee;" => "\u{022A3}",
1011 | "LeftTeeArrow;" => "\u{021A4}",
1012 | "LeftTeeVector;" => "\u{0295A}",
1013 | "leftthreetimes;" => "\u{022CB}",
1014 | "LeftTriangle;" => "\u{022B2}",
1015 | "LeftTriangleBar;" => "\u{029CF}",
1016 | "LeftTriangleEqual;" => "\u{022B4}",
1017 | "LeftUpDownVector;" => "\u{02951}",
1018 | "LeftUpTeeVector;" => "\u{02960}",
1019 | "LeftUpVector;" => "\u{021BF}",
1020 | "LeftUpVectorBar;" => "\u{02958}",
1021 | "LeftVector;" => "\u{021BC}",
1022 | "LeftVectorBar;" => "\u{02952}",
1023 | "lEg;" => "\u{02A8B}",
1024 | "leg;" => "\u{022DA}",
1025 | "leq;" => "\u{02264}",
1026 | "leqq;" => "\u{02266}",
1027 | "leqslant;" => "\u{02A7D}",
1028 | "les;" => "\u{02A7D}",
1029 | "lescc;" => "\u{02AA8}",
1030 | "lesdot;" => "\u{02A7F}",
1031 | "lesdoto;" => "\u{02A81}",
1032 | "lesdotor;" => "\u{02A83}",
1033 | "lesg;" => "\u{022DA}\u{0FE00}",
1034 | "lesges;" => "\u{02A93}",
1035 | "lessapprox;" => "\u{02A85}",
1036 | "lessdot;" => "\u{022D6}",
1037 | "lesseqgtr;" => "\u{022DA}",
1038 | "lesseqqgtr;" => "\u{02A8B}",
1039 | "LessEqualGreater;" => "\u{022DA}",
1040 | "LessFullEqual;" => "\u{02266}",
1041 | "LessGreater;" => "\u{02276}",
1042 | "lessgtr;" => "\u{02276}",
1043 | "LessLess;" => "\u{02AA1}",
1044 | "lesssim;" => "\u{02272}",
1045 | "LessSlantEqual;" => "\u{02A7D}",
1046 | "LessTilde;" => "\u{02272}",
1047 | "lfisht;" => "\u{0297C}",
1048 | "lfloor;" => "\u{0230A}",
1049 | "Lfr;" => "\u{1D50F}",
1050 | "lfr;" => "\u{1D529}",
1051 | "lg;" => "\u{02276}",
1052 | "lgE;" => "\u{02A91}",
1053 | "lHar;" => "\u{02962}",
1054 | "lhard;" => "\u{021BD}",
1055 | "lharu;" => "\u{021BC}",
1056 | "lharul;" => "\u{0296A}",
1057 | "lhblk;" => "\u{02584}",
1058 | "LJcy;" => "\u{00409}",
1059 | "ljcy;" => "\u{00459}",
1060 | "Ll;" => "\u{022D8}",
1061 | "ll;" => "\u{0226A}",
1062 | "llarr;" => "\u{021C7}",
1063 | "llcorner;" => "\u{0231E}",
1064 | "Lleftarrow;" => "\u{021DA}",
1065 | "llhard;" => "\u{0296B}",
1066 | "lltri;" => "\u{025FA}",
1067 | "Lmidot;" => "\u{0013F}",
1068 | "lmidot;" => "\u{00140}",
1069 | "lmoust;" => "\u{023B0}",
1070 | "lmoustache;" => "\u{023B0}",
1071 | "lnap;" => "\u{02A89}",
1072 | "lnapprox;" => "\u{02A89}",
1073 | "lnE;" => "\u{02268}",
1074 | "lne;" => "\u{02A87}",
1075 | "lneq;" => "\u{02A87}",
1076 | "lneqq;" => "\u{02268}",
1077 | "lnsim;" => "\u{022E6}",
1078 | "loang;" => "\u{027EC}",
1079 | "loarr;" => "\u{021FD}",
1080 | "lobrk;" => "\u{027E6}",
1081 | "LongLeftArrow;" => "\u{027F5}",
1082 | "Longleftarrow;" => "\u{027F8}",
1083 | "longleftarrow;" => "\u{027F5}",
1084 | "LongLeftRightArrow;" => "\u{027F7}",
1085 | "Longleftrightarrow;" => "\u{027FA}",
1086 | "longleftrightarrow;" => "\u{027F7}",
1087 | "longmapsto;" => "\u{027FC}",
1088 | "LongRightArrow;" => "\u{027F6}",
1089 | "Longrightarrow;" => "\u{027F9}",
1090 | "longrightarrow;" => "\u{027F6}",
1091 | "looparrowleft;" => "\u{021AB}",
1092 | "looparrowright;" => "\u{021AC}",
1093 | "lopar;" => "\u{02985}",
1094 | "Lopf;" => "\u{1D543}",
1095 | "lopf;" => "\u{1D55D}",
1096 | "loplus;" => "\u{02A2D}",
1097 | "lotimes;" => "\u{02A34}",
1098 | "lowast;" => "\u{02217}",
1099 | "lowbar;" => "\u{0005F}",
1100 | "LowerLeftArrow;" => "\u{02199}",
1101 | "LowerRightArrow;" => "\u{02198}",
1102 | "loz;" => "\u{025CA}",
1103 | "lozenge;" => "\u{025CA}",
1104 | "lozf;" => "\u{029EB}",
1105 | "lpar;" => "\u{00028}",
1106 | "lparlt;" => "\u{02993}",
1107 | "lrarr;" => "\u{021C6}",
1108 | "lrcorner;" => "\u{0231F}",
1109 | "lrhar;" => "\u{021CB}",
1110 | "lrhard;" => "\u{0296D}",
1111 | "lrm;" => "\u{0200E}",
1112 | "lrtri;" => "\u{022BF}",
1113 | "lsaquo;" => "\u{02039}",
1114 | "Lscr;" => "\u{02112}",
1115 | "lscr;" => "\u{1D4C1}",
1116 | "Lsh;" => "\u{021B0}",
1117 | "lsh;" => "\u{021B0}",
1118 | "lsim;" => "\u{02272}",
1119 | "lsime;" => "\u{02A8D}",
1120 | "lsimg;" => "\u{02A8F}",
1121 | "lsqb;" => "\u{0005B}",
1122 | "lsquo;" => "\u{02018}",
1123 | "lsquor;" => "\u{0201A}",
1124 | "Lstrok;" => "\u{00141}",
1125 | "lstrok;" => "\u{00142}",
1126 | "LT;" => "\u{0003C}",
1127 | "LT" => "\u{0003C}",
1128 | "Lt;" => "\u{0226A}",
1129 | "lt;" => "\u{0003C}",
1130 | "lt" => "\u{0003C}",
1131 | "ltcc;" => "\u{02AA6}",
1132 | "ltcir;" => "\u{02A79}",
1133 | "ltdot;" => "\u{022D6}",
1134 | "lthree;" => "\u{022CB}",
1135 | "ltimes;" => "\u{022C9}",
1136 | "ltlarr;" => "\u{02976}",
1137 | "ltquest;" => "\u{02A7B}",
1138 | "ltri;" => "\u{025C3}",
1139 | "ltrie;" => "\u{022B4}",
1140 | "ltrif;" => "\u{025C2}",
1141 | "ltrPar;" => "\u{02996}",
1142 | "lurdshar;" => "\u{0294A}",
1143 | "luruhar;" => "\u{02966}",
1144 | "lvertneqq;" => "\u{02268}\u{0FE00}",
1145 | "lvnE;" => "\u{02268}\u{0FE00}",
1146 | "macr;" => "\u{000AF}",
1147 | "macr" => "\u{000AF}",
1148 | "male;" => "\u{02642}",
1149 | "malt;" => "\u{02720}",
1150 | "maltese;" => "\u{02720}",
1151 | "Map;" => "\u{02905}",
1152 | "map;" => "\u{021A6}",
1153 | "mapsto;" => "\u{021A6}",
1154 | "mapstodown;" => "\u{021A7}",
1155 | "mapstoleft;" => "\u{021A4}",
1156 | "mapstoup;" => "\u{021A5}",
1157 | "marker;" => "\u{025AE}",
1158 | "mcomma;" => "\u{02A29}",
1159 | "Mcy;" => "\u{0041C}",
1160 | "mcy;" => "\u{0043C}",
1161 | "mdash;" => "\u{02014}",
1162 | "mDDot;" => "\u{0223A}",
1163 | "measuredangle;" => "\u{02221}",
1164 | "MediumSpace;" => "\u{0205F}",
1165 | "Mellintrf;" => "\u{02133}",
1166 | "Mfr;" => "\u{1D510}",
1167 | "mfr;" => "\u{1D52A}",
1168 | "mho;" => "\u{02127}",
1169 | "micro;" => "\u{000B5}",
1170 | "micro" => "\u{000B5}",
1171 | "mid;" => "\u{02223}",
1172 | "midast;" => "\u{0002A}",
1173 | "midcir;" => "\u{02AF0}",
1174 | "middot;" => "\u{000B7}",
1175 | "middot" => "\u{000B7}",
1176 | "minus;" => "\u{02212}",
1177 | "minusb;" => "\u{0229F}",
1178 | "minusd;" => "\u{02238}",
1179 | "minusdu;" => "\u{02A2A}",
1180 | "MinusPlus;" => "\u{02213}",
1181 | "mlcp;" => "\u{02ADB}",
1182 | "mldr;" => "\u{02026}",
1183 | "mnplus;" => "\u{02213}",
1184 | "models;" => "\u{022A7}",
1185 | "Mopf;" => "\u{1D544}",
1186 | "mopf;" => "\u{1D55E}",
1187 | "mp;" => "\u{02213}",
1188 | "Mscr;" => "\u{02133}",
1189 | "mscr;" => "\u{1D4C2}",
1190 | "mstpos;" => "\u{0223E}",
1191 | "Mu;" => "\u{0039C}",
1192 | "mu;" => "\u{003BC}",
1193 | "multimap;" => "\u{022B8}",
1194 | "mumap;" => "\u{022B8}",
1195 | "nabla;" => "\u{02207}",
1196 | "Nacute;" => "\u{00143}",
1197 | "nacute;" => "\u{00144}",
1198 | "nang;" => "\u{02220}\u{020D2}",
1199 | "nap;" => "\u{02249}",
1200 | "napE;" => "\u{02A70}\u{00338}",
1201 | "napid;" => "\u{0224B}\u{00338}",
1202 | "napos;" => "\u{00149}",
1203 | "napprox;" => "\u{02249}",
1204 | "natur;" => "\u{0266E}",
1205 | "natural;" => "\u{0266E}",
1206 | "naturals;" => "\u{02115}",
1207 | "nbsp;" => "\u{000A0}",
1208 | "nbsp" => "\u{000A0}",
1209 | "nbump;" => "\u{0224E}\u{00338}",
1210 | "nbumpe;" => "\u{0224F}\u{00338}",
1211 | "ncap;" => "\u{02A43}",
1212 | "Ncaron;" => "\u{00147}",
1213 | "ncaron;" => "\u{00148}",
1214 | "Ncedil;" => "\u{00145}",
1215 | "ncedil;" => "\u{00146}",
1216 | "ncong;" => "\u{02247}",
1217 | "ncongdot;" => "\u{02A6D}\u{00338}",
1218 | "ncup;" => "\u{02A42}",
1219 | "Ncy;" => "\u{0041D}",
1220 | "ncy;" => "\u{0043D}",
1221 | "ndash;" => "\u{02013}",
1222 | "ne;" => "\u{02260}",
1223 | "nearhk;" => "\u{02924}",
1224 | "neArr;" => "\u{021D7}",
1225 | "nearr;" => "\u{02197}",
1226 | "nearrow;" => "\u{02197}",
1227 | "nedot;" => "\u{02250}\u{00338}",
1228 | "NegativeMediumSpace;" => "\u{0200B}",
1229 | "NegativeThickSpace;" => "\u{0200B}",
1230 | "NegativeThinSpace;" => "\u{0200B}",
1231 | "NegativeVeryThinSpace;" => "\u{0200B}",
1232 | "nequiv;" => "\u{02262}",
1233 | "nesear;" => "\u{02928}",
1234 | "nesim;" => "\u{02242}\u{00338}",
1235 | "NestedGreaterGreater;" => "\u{0226B}",
1236 | "NestedLessLess;" => "\u{0226A}",
1237 | "NewLine;" => "\u{0000A}",
1238 | "nexist;" => "\u{02204}",
1239 | "nexists;" => "\u{02204}",
1240 | "Nfr;" => "\u{1D511}",
1241 | "nfr;" => "\u{1D52B}",
1242 | "ngE;" => "\u{02267}\u{00338}",
1243 | "nge;" => "\u{02271}",
1244 | "ngeq;" => "\u{02271}",
1245 | "ngeqq;" => "\u{02267}\u{00338}",
1246 | "ngeqslant;" => "\u{02A7E}\u{00338}",
1247 | "nges;" => "\u{02A7E}\u{00338}",
1248 | "nGg;" => "\u{022D9}\u{00338}",
1249 | "ngsim;" => "\u{02275}",
1250 | "nGt;" => "\u{0226B}\u{020D2}",
1251 | "ngt;" => "\u{0226F}",
1252 | "ngtr;" => "\u{0226F}",
1253 | "nGtv;" => "\u{0226B}\u{00338}",
1254 | "nhArr;" => "\u{021CE}",
1255 | "nharr;" => "\u{021AE}",
1256 | "nhpar;" => "\u{02AF2}",
1257 | "ni;" => "\u{0220B}",
1258 | "nis;" => "\u{022FC}",
1259 | "nisd;" => "\u{022FA}",
1260 | "niv;" => "\u{0220B}",
1261 | "NJcy;" => "\u{0040A}",
1262 | "njcy;" => "\u{0045A}",
1263 | "nlArr;" => "\u{021CD}",
1264 | "nlarr;" => "\u{0219A}",
1265 | "nldr;" => "\u{02025}",
1266 | "nlE;" => "\u{02266}\u{00338}",
1267 | "nle;" => "\u{02270}",
1268 | "nLeftarrow;" => "\u{021CD}",
1269 | "nleftarrow;" => "\u{0219A}",
1270 | "nLeftrightarrow;" => "\u{021CE}",
1271 | "nleftrightarrow;" => "\u{021AE}",
1272 | "nleq;" => "\u{02270}",
1273 | "nleqq;" => "\u{02266}\u{00338}",
1274 | "nleqslant;" => "\u{02A7D}\u{00338}",
1275 | "nles;" => "\u{02A7D}\u{00338}",
1276 | "nless;" => "\u{0226E}",
1277 | "nLl;" => "\u{022D8}\u{00338}",
1278 | "nlsim;" => "\u{02274}",
1279 | "nLt;" => "\u{0226A}\u{020D2}",
1280 | "nlt;" => "\u{0226E}",
1281 | "nltri;" => "\u{022EA}",
1282 | "nltrie;" => "\u{022EC}",
1283 | "nLtv;" => "\u{0226A}\u{00338}",
1284 | "nmid;" => "\u{02224}",
1285 | "NoBreak;" => "\u{02060}",
1286 | "NonBreakingSpace;" => "\u{000A0}",
1287 | "Nopf;" => "\u{02115}",
1288 | "nopf;" => "\u{1D55F}",
1289 | "Not;" => "\u{02AEC}",
1290 | "not;" => "\u{000AC}",
1291 | "not" => "\u{000AC}",
1292 | "NotCongruent;" => "\u{02262}",
1293 | "NotCupCap;" => "\u{0226D}",
1294 | "NotDoubleVerticalBar;" => "\u{02226}",
1295 | "NotElement;" => "\u{02209}",
1296 | "NotEqual;" => "\u{02260}",
1297 | "NotEqualTilde;" => "\u{02242}\u{00338}",
1298 | "NotExists;" => "\u{02204}",
1299 | "NotGreater;" => "\u{0226F}",
1300 | "NotGreaterEqual;" => "\u{02271}",
1301 | "NotGreaterFullEqual;" => "\u{02267}\u{00338}",
1302 | "NotGreaterGreater;" => "\u{0226B}\u{00338}",
1303 | "NotGreaterLess;" => "\u{02279}",
1304 | "NotGreaterSlantEqual;" => "\u{02A7E}\u{00338}",
1305 | "NotGreaterTilde;" => "\u{02275}",
1306 | "NotHumpDownHump;" => "\u{0224E}\u{00338}",
1307 | "NotHumpEqual;" => "\u{0224F}\u{00338}",
1308 | "notin;" => "\u{02209}",
1309 | "notindot;" => "\u{022F5}\u{00338}",
1310 | "notinE;" => "\u{022F9}\u{00338}",
1311 | "notinva;" => "\u{02209}",
1312 | "notinvb;" => "\u{022F7}",
1313 | "notinvc;" => "\u{022F6}",
1314 | "NotLeftTriangle;" => "\u{022EA}",
1315 | "NotLeftTriangleBar;" => "\u{029CF}\u{00338}",
1316 | "NotLeftTriangleEqual;" => "\u{022EC}",
1317 | "NotLess;" => "\u{0226E}",
1318 | "NotLessEqual;" => "\u{02270}",
1319 | "NotLessGreater;" => "\u{02278}",
1320 | "NotLessLess;" => "\u{0226A}\u{00338}",
1321 | "NotLessSlantEqual;" => "\u{02A7D}\u{00338}",
1322 | "NotLessTilde;" => "\u{02274}",
1323 | "NotNestedGreaterGreater;" => "\u{02AA2}\u{00338}",
1324 | "NotNestedLessLess;" => "\u{02AA1}\u{00338}",
1325 | "notni;" => "\u{0220C}",
1326 | "notniva;" => "\u{0220C}",
1327 | "notnivb;" => "\u{022FE}",
1328 | "notnivc;" => "\u{022FD}",
1329 | "NotPrecedes;" => "\u{02280}",
1330 | "NotPrecedesEqual;" => "\u{02AAF}\u{00338}",
1331 | "NotPrecedesSlantEqual;" => "\u{022E0}",
1332 | "NotReverseElement;" => "\u{0220C}",
1333 | "NotRightTriangle;" => "\u{022EB}",
1334 | "NotRightTriangleBar;" => "\u{029D0}\u{00338}",
1335 | "NotRightTriangleEqual;" => "\u{022ED}",
1336 | "NotSquareSubset;" => "\u{0228F}\u{00338}",
1337 | "NotSquareSubsetEqual;" => "\u{022E2}",
1338 | "NotSquareSuperset;" => "\u{02290}\u{00338}",
1339 | "NotSquareSupersetEqual;" => "\u{022E3}",
1340 | "NotSubset;" => "\u{02282}\u{020D2}",
1341 | "NotSubsetEqual;" => "\u{02288}",
1342 | "NotSucceeds;" => "\u{02281}",
1343 | "NotSucceedsEqual;" => "\u{02AB0}\u{00338}",
1344 | "NotSucceedsSlantEqual;" => "\u{022E1}",
1345 | "NotSucceedsTilde;" => "\u{0227F}\u{00338}",
1346 | "NotSuperset;" => "\u{02283}\u{020D2}",
1347 | "NotSupersetEqual;" => "\u{02289}",
1348 | "NotTilde;" => "\u{02241}",
1349 | "NotTildeEqual;" => "\u{02244}",
1350 | "NotTildeFullEqual;" => "\u{02247}",
1351 | "NotTildeTilde;" => "\u{02249}",
1352 | "NotVerticalBar;" => "\u{02224}",
1353 | "npar;" => "\u{02226}",
1354 | "nparallel;" => "\u{02226}",
1355 | "nparsl;" => "\u{02AFD}\u{020E5}",
1356 | "npart;" => "\u{02202}\u{00338}",
1357 | "npolint;" => "\u{02A14}",
1358 | "npr;" => "\u{02280}",
1359 | "nprcue;" => "\u{022E0}",
1360 | "npre;" => "\u{02AAF}\u{00338}",
1361 | "nprec;" => "\u{02280}",
1362 | "npreceq;" => "\u{02AAF}\u{00338}",
1363 | "nrArr;" => "\u{021CF}",
1364 | "nrarr;" => "\u{0219B}",
1365 | "nrarrc;" => "\u{02933}\u{00338}",
1366 | "nrarrw;" => "\u{0219D}\u{00338}",
1367 | "nRightarrow;" => "\u{021CF}",
1368 | "nrightarrow;" => "\u{0219B}",
1369 | "nrtri;" => "\u{022EB}",
1370 | "nrtrie;" => "\u{022ED}",
1371 | "nsc;" => "\u{02281}",
1372 | "nsccue;" => "\u{022E1}",
1373 | "nsce;" => "\u{02AB0}\u{00338}",
1374 | "Nscr;" => "\u{1D4A9}",
1375 | "nscr;" => "\u{1D4C3}",
1376 | "nshortmid;" => "\u{02224}",
1377 | "nshortparallel;" => "\u{02226}",
1378 | "nsim;" => "\u{02241}",
1379 | "nsime;" => "\u{02244}",
1380 | "nsimeq;" => "\u{02244}",
1381 | "nsmid;" => "\u{02224}",
1382 | "nspar;" => "\u{02226}",
1383 | "nsqsube;" => "\u{022E2}",
1384 | "nsqsupe;" => "\u{022E3}",
1385 | "nsub;" => "\u{02284}",
1386 | "nsubE;" => "\u{02AC5}\u{00338}",
1387 | "nsube;" => "\u{02288}",
1388 | "nsubset;" => "\u{02282}\u{020D2}",
1389 | "nsubseteq;" => "\u{02288}",
1390 | "nsubseteqq;" => "\u{02AC5}\u{00338}",
1391 | "nsucc;" => "\u{02281}",
1392 | "nsucceq;" => "\u{02AB0}\u{00338}",
1393 | "nsup;" => "\u{02285}",
1394 | "nsupE;" => "\u{02AC6}\u{00338}",
1395 | "nsupe;" => "\u{02289}",
1396 | "nsupset;" => "\u{02283}\u{020D2}",
1397 | "nsupseteq;" => "\u{02289}",
1398 | "nsupseteqq;" => "\u{02AC6}\u{00338}",
1399 | "ntgl;" => "\u{02279}",
1400 | "Ntilde;" => "\u{000D1}",
1401 | "Ntilde" => "\u{000D1}",
1402 | "ntilde;" => "\u{000F1}",
1403 | "ntilde" => "\u{000F1}",
1404 | "ntlg;" => "\u{02278}",
1405 | "ntriangleleft;" => "\u{022EA}",
1406 | "ntrianglelefteq;" => "\u{022EC}",
1407 | "ntriangleright;" => "\u{022EB}",
1408 | "ntrianglerighteq;" => "\u{022ED}",
1409 | "Nu;" => "\u{0039D}",
1410 | "nu;" => "\u{003BD}",
1411 | "num;" => "\u{00023}",
1412 | "numero;" => "\u{02116}",
1413 | "numsp;" => "\u{02007}",
1414 | "nvap;" => "\u{0224D}\u{020D2}",
1415 | "nVDash;" => "\u{022AF}",
1416 | "nVdash;" => "\u{022AE}",
1417 | "nvDash;" => "\u{022AD}",
1418 | "nvdash;" => "\u{022AC}",
1419 | "nvge;" => "\u{02265}\u{020D2}",
1420 | "nvgt;" => "\u{0003E}\u{020D2}",
1421 | "nvHarr;" => "\u{02904}",
1422 | "nvinfin;" => "\u{029DE}",
1423 | "nvlArr;" => "\u{02902}",
1424 | "nvle;" => "\u{02264}\u{020D2}",
1425 | "nvlt;" => "\u{0003C}\u{020D2}",
1426 | "nvltrie;" => "\u{022B4}\u{020D2}",
1427 | "nvrArr;" => "\u{02903}",
1428 | "nvrtrie;" => "\u{022B5}\u{020D2}",
1429 | "nvsim;" => "\u{0223C}\u{020D2}",
1430 | "nwarhk;" => "\u{02923}",
1431 | "nwArr;" => "\u{021D6}",
1432 | "nwarr;" => "\u{02196}",
1433 | "nwarrow;" => "\u{02196}",
1434 | "nwnear;" => "\u{02927}",
1435 | "Oacute;" => "\u{000D3}",
1436 | "Oacute" => "\u{000D3}",
1437 | "oacute;" => "\u{000F3}",
1438 | "oacute" => "\u{000F3}",
1439 | "oast;" => "\u{0229B}",
1440 | "ocir;" => "\u{0229A}",
1441 | "Ocirc;" => "\u{000D4}",
1442 | "Ocirc" => "\u{000D4}",
1443 | "ocirc;" => "\u{000F4}",
1444 | "ocirc" => "\u{000F4}",
1445 | "Ocy;" => "\u{0041E}",
1446 | "ocy;" => "\u{0043E}",
1447 | "odash;" => "\u{0229D}",
1448 | "Odblac;" => "\u{00150}",
1449 | "odblac;" => "\u{00151}",
1450 | "odiv;" => "\u{02A38}",
1451 | "odot;" => "\u{02299}",
1452 | "odsold;" => "\u{029BC}",
1453 | "OElig;" => "\u{00152}",
1454 | "oelig;" => "\u{00153}",
1455 | "ofcir;" => "\u{029BF}",
1456 | "Ofr;" => "\u{1D512}",
1457 | "ofr;" => "\u{1D52C}",
1458 | "ogon;" => "\u{002DB}",
1459 | "Ograve;" => "\u{000D2}",
1460 | "Ograve" => "\u{000D2}",
1461 | "ograve;" => "\u{000F2}",
1462 | "ograve" => "\u{000F2}",
1463 | "ogt;" => "\u{029C1}",
1464 | "ohbar;" => "\u{029B5}",
1465 | "ohm;" => "\u{003A9}",
1466 | "oint;" => "\u{0222E}",
1467 | "olarr;" => "\u{021BA}",
1468 | "olcir;" => "\u{029BE}",
1469 | "olcross;" => "\u{029BB}",
1470 | "oline;" => "\u{0203E}",
1471 | "olt;" => "\u{029C0}",
1472 | "Omacr;" => "\u{0014C}",
1473 | "omacr;" => "\u{0014D}",
1474 | "Omega;" => "\u{003A9}",
1475 | "omega;" => "\u{003C9}",
1476 | "Omicron;" => "\u{0039F}",
1477 | "omicron;" => "\u{003BF}",
1478 | "omid;" => "\u{029B6}",
1479 | "ominus;" => "\u{02296}",
1480 | "Oopf;" => "\u{1D546}",
1481 | "oopf;" => "\u{1D560}",
1482 | "opar;" => "\u{029B7}",
1483 | "OpenCurlyDoubleQuote;" => "\u{0201C}",
1484 | "OpenCurlyQuote;" => "\u{02018}",
1485 | "operp;" => "\u{029B9}",
1486 | "oplus;" => "\u{02295}",
1487 | "Or;" => "\u{02A54}",
1488 | "or;" => "\u{02228}",
1489 | "orarr;" => "\u{021BB}",
1490 | "ord;" => "\u{02A5D}",
1491 | "order;" => "\u{02134}",
1492 | "orderof;" => "\u{02134}",
1493 | "ordf;" => "\u{000AA}",
1494 | "ordf" => "\u{000AA}",
1495 | "ordm;" => "\u{000BA}",
1496 | "ordm" => "\u{000BA}",
1497 | "origof;" => "\u{022B6}",
1498 | "oror;" => "\u{02A56}",
1499 | "orslope;" => "\u{02A57}",
1500 | "orv;" => "\u{02A5B}",
1501 | "oS;" => "\u{024C8}",
1502 | "Oscr;" => "\u{1D4AA}",
1503 | "oscr;" => "\u{02134}",
1504 | "Oslash;" => "\u{000D8}",
1505 | "Oslash" => "\u{000D8}",
1506 | "oslash;" => "\u{000F8}",
1507 | "oslash" => "\u{000F8}",
1508 | "osol;" => "\u{02298}",
1509 | "Otilde;" => "\u{000D5}",
1510 | "Otilde" => "\u{000D5}",
1511 | "otilde;" => "\u{000F5}",
1512 | "otilde" => "\u{000F5}",
1513 | "Otimes;" => "\u{02A37}",
1514 | "otimes;" => "\u{02297}",
1515 | "otimesas;" => "\u{02A36}",
1516 | "Ouml;" => "\u{000D6}",
1517 | "Ouml" => "\u{000D6}",
1518 | "ouml;" => "\u{000F6}",
1519 | "ouml" => "\u{000F6}",
1520 | "ovbar;" => "\u{0233D}",
1521 | "OverBar;" => "\u{0203E}",
1522 | "OverBrace;" => "\u{023DE}",
1523 | "OverBracket;" => "\u{023B4}",
1524 | "OverParenthesis;" => "\u{023DC}",
1525 | "par;" => "\u{02225}",
1526 | "para;" => "\u{000B6}",
1527 | "para" => "\u{000B6}",
1528 | "parallel;" => "\u{02225}",
1529 | "parsim;" => "\u{02AF3}",
1530 | "parsl;" => "\u{02AFD}",
1531 | "part;" => "\u{02202}",
1532 | "PartialD;" => "\u{02202}",
1533 | "Pcy;" => "\u{0041F}",
1534 | "pcy;" => "\u{0043F}",
1535 | "percnt;" => "\u{00025}",
1536 | "period;" => "\u{0002E}",
1537 | "permil;" => "\u{02030}",
1538 | "perp;" => "\u{022A5}",
1539 | "pertenk;" => "\u{02031}",
1540 | "Pfr;" => "\u{1D513}",
1541 | "pfr;" => "\u{1D52D}",
1542 | "Phi;" => "\u{003A6}",
1543 | "phi;" => "\u{003C6}",
1544 | "phiv;" => "\u{003D5}",
1545 | "phmmat;" => "\u{02133}",
1546 | "phone;" => "\u{0260E}",
1547 | "Pi;" => "\u{003A0}",
1548 | "pi;" => "\u{003C0}",
1549 | "pitchfork;" => "\u{022D4}",
1550 | "piv;" => "\u{003D6}",
1551 | "planck;" => "\u{0210F}",
1552 | "planckh;" => "\u{0210E}",
1553 | "plankv;" => "\u{0210F}",
1554 | "plus;" => "\u{0002B}",
1555 | "plusacir;" => "\u{02A23}",
1556 | "plusb;" => "\u{0229E}",
1557 | "pluscir;" => "\u{02A22}",
1558 | "plusdo;" => "\u{02214}",
1559 | "plusdu;" => "\u{02A25}",
1560 | "pluse;" => "\u{02A72}",
1561 | "PlusMinus;" => "\u{000B1}",
1562 | "plusmn;" => "\u{000B1}",
1563 | "plusmn" => "\u{000B1}",
1564 | "plussim;" => "\u{02A26}",
1565 | "plustwo;" => "\u{02A27}",
1566 | "pm;" => "\u{000B1}",
1567 | "Poincareplane;" => "\u{0210C}",
1568 | "pointint;" => "\u{02A15}",
1569 | "Popf;" => "\u{02119}",
1570 | "popf;" => "\u{1D561}",
1571 | "pound;" => "\u{000A3}",
1572 | "pound" => "\u{000A3}",
1573 | "Pr;" => "\u{02ABB}",
1574 | "pr;" => "\u{0227A}",
1575 | "prap;" => "\u{02AB7}",
1576 | "prcue;" => "\u{0227C}",
1577 | "prE;" => "\u{02AB3}",
1578 | "pre;" => "\u{02AAF}",
1579 | "prec;" => "\u{0227A}",
1580 | "precapprox;" => "\u{02AB7}",
1581 | "preccurlyeq;" => "\u{0227C}",
1582 | "Precedes;" => "\u{0227A}",
1583 | "PrecedesEqual;" => "\u{02AAF}",
1584 | "PrecedesSlantEqual;" => "\u{0227C}",
1585 | "PrecedesTilde;" => "\u{0227E}",
1586 | "preceq;" => "\u{02AAF}",
1587 | "precnapprox;" => "\u{02AB9}",
1588 | "precneqq;" => "\u{02AB5}",
1589 | "precnsim;" => "\u{022E8}",
1590 | "precsim;" => "\u{0227E}",
1591 | "Prime;" => "\u{02033}",
1592 | "prime;" => "\u{02032}",
1593 | "primes;" => "\u{02119}",
1594 | "prnap;" => "\u{02AB9}",
1595 | "prnE;" => "\u{02AB5}",
1596 | "prnsim;" => "\u{022E8}",
1597 | "prod;" => "\u{0220F}",
1598 | "Product;" => "\u{0220F}",
1599 | "profalar;" => "\u{0232E}",
1600 | "profline;" => "\u{02312}",
1601 | "profsurf;" => "\u{02313}",
1602 | "prop;" => "\u{0221D}",
1603 | "Proportion;" => "\u{02237}",
1604 | "Proportional;" => "\u{0221D}",
1605 | "propto;" => "\u{0221D}",
1606 | "prsim;" => "\u{0227E}",
1607 | "prurel;" => "\u{022B0}",
1608 | "Pscr;" => "\u{1D4AB}",
1609 | "pscr;" => "\u{1D4C5}",
1610 | "Psi;" => "\u{003A8}",
1611 | "psi;" => "\u{003C8}",
1612 | "puncsp;" => "\u{02008}",
1613 | "Qfr;" => "\u{1D514}",
1614 | "qfr;" => "\u{1D52E}",
1615 | "qint;" => "\u{02A0C}",
1616 | "Qopf;" => "\u{0211A}",
1617 | "qopf;" => "\u{1D562}",
1618 | "qprime;" => "\u{02057}",
1619 | "Qscr;" => "\u{1D4AC}",
1620 | "qscr;" => "\u{1D4C6}",
1621 | "quaternions;" => "\u{0210D}",
1622 | "quatint;" => "\u{02A16}",
1623 | "quest;" => "\u{0003F}",
1624 | "questeq;" => "\u{0225F}",
1625 | "QUOT;" => "\u{00022}",
1626 | "QUOT" => "\u{00022}",
1627 | "quot;" => "\u{00022}",
1628 | "quot" => "\u{00022}",
1629 | "rAarr;" => "\u{021DB}",
1630 | "race;" => "\u{0223D}\u{00331}",
1631 | "Racute;" => "\u{00154}",
1632 | "racute;" => "\u{00155}",
1633 | "radic;" => "\u{0221A}",
1634 | "raemptyv;" => "\u{029B3}",
1635 | "Rang;" => "\u{027EB}",
1636 | "rang;" => "\u{027E9}",
1637 | "rangd;" => "\u{02992}",
1638 | "range;" => "\u{029A5}",
1639 | "rangle;" => "\u{027E9}",
1640 | "raquo;" => "\u{000BB}",
1641 | "raquo" => "\u{000BB}",
1642 | "Rarr;" => "\u{021A0}",
1643 | "rArr;" => "\u{021D2}",
1644 | "rarr;" => "\u{02192}",
1645 | "rarrap;" => "\u{02975}",
1646 | "rarrb;" => "\u{021E5}",
1647 | "rarrbfs;" => "\u{02920}",
1648 | "rarrc;" => "\u{02933}",
1649 | "rarrfs;" => "\u{0291E}",
1650 | "rarrhk;" => "\u{021AA}",
1651 | "rarrlp;" => "\u{021AC}",
1652 | "rarrpl;" => "\u{02945}",
1653 | "rarrsim;" => "\u{02974}",
1654 | "Rarrtl;" => "\u{02916}",
1655 | "rarrtl;" => "\u{021A3}",
1656 | "rarrw;" => "\u{0219D}",
1657 | "rAtail;" => "\u{0291C}",
1658 | "ratail;" => "\u{0291A}",
1659 | "ratio;" => "\u{02236}",
1660 | "rationals;" => "\u{0211A}",
1661 | "RBarr;" => "\u{02910}",
1662 | "rBarr;" => "\u{0290F}",
1663 | "rbarr;" => "\u{0290D}",
1664 | "rbbrk;" => "\u{02773}",
1665 | "rbrace;" => "\u{0007D}",
1666 | "rbrack;" => "\u{0005D}",
1667 | "rbrke;" => "\u{0298C}",
1668 | "rbrksld;" => "\u{0298E}",
1669 | "rbrkslu;" => "\u{02990}",
1670 | "Rcaron;" => "\u{00158}",
1671 | "rcaron;" => "\u{00159}",
1672 | "Rcedil;" => "\u{00156}",
1673 | "rcedil;" => "\u{00157}",
1674 | "rceil;" => "\u{02309}",
1675 | "rcub;" => "\u{0007D}",
1676 | "Rcy;" => "\u{00420}",
1677 | "rcy;" => "\u{00440}",
1678 | "rdca;" => "\u{02937}",
1679 | "rdldhar;" => "\u{02969}",
1680 | "rdquo;" => "\u{0201D}",
1681 | "rdquor;" => "\u{0201D}",
1682 | "rdsh;" => "\u{021B3}",
1683 | "Re;" => "\u{0211C}",
1684 | "real;" => "\u{0211C}",
1685 | "realine;" => "\u{0211B}",
1686 | "realpart;" => "\u{0211C}",
1687 | "reals;" => "\u{0211D}",
1688 | "rect;" => "\u{025AD}",
1689 | "REG;" => "\u{000AE}",
1690 | "REG" => "\u{000AE}",
1691 | "reg;" => "\u{000AE}",
1692 | "reg" => "\u{000AE}",
1693 | "ReverseElement;" => "\u{0220B}",
1694 | "ReverseEquilibrium;" => "\u{021CB}",
1695 | "ReverseUpEquilibrium;" => "\u{0296F}",
1696 | "rfisht;" => "\u{0297D}",
1697 | "rfloor;" => "\u{0230B}",
1698 | "Rfr;" => "\u{0211C}",
1699 | "rfr;" => "\u{1D52F}",
1700 | "rHar;" => "\u{02964}",
1701 | "rhard;" => "\u{021C1}",
1702 | "rharu;" => "\u{021C0}",
1703 | "rharul;" => "\u{0296C}",
1704 | "Rho;" => "\u{003A1}",
1705 | "rho;" => "\u{003C1}",
1706 | "rhov;" => "\u{003F1}",
1707 | "RightAngleBracket;" => "\u{027E9}",
1708 | "RightArrow;" => "\u{02192}",
1709 | "Rightarrow;" => "\u{021D2}",
1710 | "rightarrow;" => "\u{02192}",
1711 | "RightArrowBar;" => "\u{021E5}",
1712 | "RightArrowLeftArrow;" => "\u{021C4}",
1713 | "rightarrowtail;" => "\u{021A3}",
1714 | "RightCeiling;" => "\u{02309}",
1715 | "RightDoubleBracket;" => "\u{027E7}",
1716 | "RightDownTeeVector;" => "\u{0295D}",
1717 | "RightDownVector;" => "\u{021C2}",
1718 | "RightDownVectorBar;" => "\u{02955}",
1719 | "RightFloor;" => "\u{0230B}",
1720 | "rightharpoondown;" => "\u{021C1}",
1721 | "rightharpoonup;" => "\u{021C0}",
1722 | "rightleftarrows;" => "\u{021C4}",
1723 | "rightleftharpoons;" => "\u{021CC}",
1724 | "rightrightarrows;" => "\u{021C9}",
1725 | "rightsquigarrow;" => "\u{0219D}",
1726 | "RightTee;" => "\u{022A2}",
1727 | "RightTeeArrow;" => "\u{021A6}",
1728 | "RightTeeVector;" => "\u{0295B}",
1729 | "rightthreetimes;" => "\u{022CC}",
1730 | "RightTriangle;" => "\u{022B3}",
1731 | "RightTriangleBar;" => "\u{029D0}",
1732 | "RightTriangleEqual;" => "\u{022B5}",
1733 | "RightUpDownVector;" => "\u{0294F}",
1734 | "RightUpTeeVector;" => "\u{0295C}",
1735 | "RightUpVector;" => "\u{021BE}",
1736 | "RightUpVectorBar;" => "\u{02954}",
1737 | "RightVector;" => "\u{021C0}",
1738 | "RightVectorBar;" => "\u{02953}",
1739 | "ring;" => "\u{002DA}",
1740 | "risingdotseq;" => "\u{02253}",
1741 | "rlarr;" => "\u{021C4}",
1742 | "rlhar;" => "\u{021CC}",
1743 | "rlm;" => "\u{0200F}",
1744 | "rmoust;" => "\u{023B1}",
1745 | "rmoustache;" => "\u{023B1}",
1746 | "rnmid;" => "\u{02AEE}",
1747 | "roang;" => "\u{027ED}",
1748 | "roarr;" => "\u{021FE}",
1749 | "robrk;" => "\u{027E7}",
1750 | "ropar;" => "\u{02986}",
1751 | "Ropf;" => "\u{0211D}",
1752 | "ropf;" => "\u{1D563}",
1753 | "roplus;" => "\u{02A2E}",
1754 | "rotimes;" => "\u{02A35}",
1755 | "RoundImplies;" => "\u{02970}",
1756 | "rpar;" => "\u{00029}",
1757 | "rpargt;" => "\u{02994}",
1758 | "rppolint;" => "\u{02A12}",
1759 | "rrarr;" => "\u{021C9}",
1760 | "Rrightarrow;" => "\u{021DB}",
1761 | "rsaquo;" => "\u{0203A}",
1762 | "Rscr;" => "\u{0211B}",
1763 | "rscr;" => "\u{1D4C7}",
1764 | "Rsh;" => "\u{021B1}",
1765 | "rsh;" => "\u{021B1}",
1766 | "rsqb;" => "\u{0005D}",
1767 | "rsquo;" => "\u{02019}",
1768 | "rsquor;" => "\u{02019}",
1769 | "rthree;" => "\u{022CC}",
1770 | "rtimes;" => "\u{022CA}",
1771 | "rtri;" => "\u{025B9}",
1772 | "rtrie;" => "\u{022B5}",
1773 | "rtrif;" => "\u{025B8}",
1774 | "rtriltri;" => "\u{029CE}",
1775 | "RuleDelayed;" => "\u{029F4}",
1776 | "ruluhar;" => "\u{02968}",
1777 | "rx;" => "\u{0211E}",
1778 | "Sacute;" => "\u{0015A}",
1779 | "sacute;" => "\u{0015B}",
1780 | "sbquo;" => "\u{0201A}",
1781 | "Sc;" => "\u{02ABC}",
1782 | "sc;" => "\u{0227B}",
1783 | "scap;" => "\u{02AB8}",
1784 | "Scaron;" => "\u{00160}",
1785 | "scaron;" => "\u{00161}",
1786 | "sccue;" => "\u{0227D}",
1787 | "scE;" => "\u{02AB4}",
1788 | "sce;" => "\u{02AB0}",
1789 | "Scedil;" => "\u{0015E}",
1790 | "scedil;" => "\u{0015F}",
1791 | "Scirc;" => "\u{0015C}",
1792 | "scirc;" => "\u{0015D}",
1793 | "scnap;" => "\u{02ABA}",
1794 | "scnE;" => "\u{02AB6}",
1795 | "scnsim;" => "\u{022E9}",
1796 | "scpolint;" => "\u{02A13}",
1797 | "scsim;" => "\u{0227F}",
1798 | "Scy;" => "\u{00421}",
1799 | "scy;" => "\u{00441}",
1800 | "sdot;" => "\u{022C5}",
1801 | "sdotb;" => "\u{022A1}",
1802 | "sdote;" => "\u{02A66}",
1803 | "searhk;" => "\u{02925}",
1804 | "seArr;" => "\u{021D8}",
1805 | "searr;" => "\u{02198}",
1806 | "searrow;" => "\u{02198}",
1807 | "sect;" => "\u{000A7}",
1808 | "sect" => "\u{000A7}",
1809 | "semi;" => "\u{0003B}",
1810 | "seswar;" => "\u{02929}",
1811 | "setminus;" => "\u{02216}",
1812 | "setmn;" => "\u{02216}",
1813 | "sext;" => "\u{02736}",
1814 | "Sfr;" => "\u{1D516}",
1815 | "sfr;" => "\u{1D530}",
1816 | "sfrown;" => "\u{02322}",
1817 | "sharp;" => "\u{0266F}",
1818 | "SHCHcy;" => "\u{00429}",
1819 | "shchcy;" => "\u{00449}",
1820 | "SHcy;" => "\u{00428}",
1821 | "shcy;" => "\u{00448}",
1822 | "ShortDownArrow;" => "\u{02193}",
1823 | "ShortLeftArrow;" => "\u{02190}",
1824 | "shortmid;" => "\u{02223}",
1825 | "shortparallel;" => "\u{02225}",
1826 | "ShortRightArrow;" => "\u{02192}",
1827 | "ShortUpArrow;" => "\u{02191}",
1828 | "shy;" => "\u{000AD}",
1829 | "shy" => "\u{000AD}",
1830 | "Sigma;" => "\u{003A3}",
1831 | "sigma;" => "\u{003C3}",
1832 | "sigmaf;" => "\u{003C2}",
1833 | "sigmav;" => "\u{003C2}",
1834 | "sim;" => "\u{0223C}",
1835 | "simdot;" => "\u{02A6A}",
1836 | "sime;" => "\u{02243}",
1837 | "simeq;" => "\u{02243}",
1838 | "simg;" => "\u{02A9E}",
1839 | "simgE;" => "\u{02AA0}",
1840 | "siml;" => "\u{02A9D}",
1841 | "simlE;" => "\u{02A9F}",
1842 | "simne;" => "\u{02246}",
1843 | "simplus;" => "\u{02A24}",
1844 | "simrarr;" => "\u{02972}",
1845 | "slarr;" => "\u{02190}",
1846 | "SmallCircle;" => "\u{02218}",
1847 | "smallsetminus;" => "\u{02216}",
1848 | "smashp;" => "\u{02A33}",
1849 | "smeparsl;" => "\u{029E4}",
1850 | "smid;" => "\u{02223}",
1851 | "smile;" => "\u{02323}",
1852 | "smt;" => "\u{02AAA}",
1853 | "smte;" => "\u{02AAC}",
1854 | "smtes;" => "\u{02AAC}\u{0FE00}",
1855 | "SOFTcy;" => "\u{0042C}",
1856 | "softcy;" => "\u{0044C}",
1857 | "sol;" => "\u{0002F}",
1858 | "solb;" => "\u{029C4}",
1859 | "solbar;" => "\u{0233F}",
1860 | "Sopf;" => "\u{1D54A}",
1861 | "sopf;" => "\u{1D564}",
1862 | "spades;" => "\u{02660}",
1863 | "spadesuit;" => "\u{02660}",
1864 | "spar;" => "\u{02225}",
1865 | "sqcap;" => "\u{02293}",
1866 | "sqcaps;" => "\u{02293}\u{0FE00}",
1867 | "sqcup;" => "\u{02294}",
1868 | "sqcups;" => "\u{02294}\u{0FE00}",
1869 | "Sqrt;" => "\u{0221A}",
1870 | "sqsub;" => "\u{0228F}",
1871 | "sqsube;" => "\u{02291}",
1872 | "sqsubset;" => "\u{0228F}",
1873 | "sqsubseteq;" => "\u{02291}",
1874 | "sqsup;" => "\u{02290}",
1875 | "sqsupe;" => "\u{02292}",
1876 | "sqsupset;" => "\u{02290}",
1877 | "sqsupseteq;" => "\u{02292}",
1878 | "squ;" => "\u{025A1}",
1879 | "Square;" => "\u{025A1}",
1880 | "square;" => "\u{025A1}",
1881 | "SquareIntersection;" => "\u{02293}",
1882 | "SquareSubset;" => "\u{0228F}",
1883 | "SquareSubsetEqual;" => "\u{02291}",
1884 | "SquareSuperset;" => "\u{02290}",
1885 | "SquareSupersetEqual;" => "\u{02292}",
1886 | "SquareUnion;" => "\u{02294}",
1887 | "squarf;" => "\u{025AA}",
1888 | "squf;" => "\u{025AA}",
1889 | "srarr;" => "\u{02192}",
1890 | "Sscr;" => "\u{1D4AE}",
1891 | "sscr;" => "\u{1D4C8}",
1892 | "ssetmn;" => "\u{02216}",
1893 | "ssmile;" => "\u{02323}",
1894 | "sstarf;" => "\u{022C6}",
1895 | "Star;" => "\u{022C6}",
1896 | "star;" => "\u{02606}",
1897 | "starf;" => "\u{02605}",
1898 | "straightepsilon;" => "\u{003F5}",
1899 | "straightphi;" => "\u{003D5}",
1900 | "strns;" => "\u{000AF}",
1901 | "Sub;" => "\u{022D0}",
1902 | "sub;" => "\u{02282}",
1903 | "subdot;" => "\u{02ABD}",
1904 | "subE;" => "\u{02AC5}",
1905 | "sube;" => "\u{02286}",
1906 | "subedot;" => "\u{02AC3}",
1907 | "submult;" => "\u{02AC1}",
1908 | "subnE;" => "\u{02ACB}",
1909 | "subne;" => "\u{0228A}",
1910 | "subplus;" => "\u{02ABF}",
1911 | "subrarr;" => "\u{02979}",
1912 | "Subset;" => "\u{022D0}",
1913 | "subset;" => "\u{02282}",
1914 | "subseteq;" => "\u{02286}",
1915 | "subseteqq;" => "\u{02AC5}",
1916 | "SubsetEqual;" => "\u{02286}",
1917 | "subsetneq;" => "\u{0228A}",
1918 | "subsetneqq;" => "\u{02ACB}",
1919 | "subsim;" => "\u{02AC7}",
1920 | "subsub;" => "\u{02AD5}",
1921 | "subsup;" => "\u{02AD3}",
1922 | "succ;" => "\u{0227B}",
1923 | "succapprox;" => "\u{02AB8}",
1924 | "succcurlyeq;" => "\u{0227D}",
1925 | "Succeeds;" => "\u{0227B}",
1926 | "SucceedsEqual;" => "\u{02AB0}",
1927 | "SucceedsSlantEqual;" => "\u{0227D}",
1928 | "SucceedsTilde;" => "\u{0227F}",
1929 | "succeq;" => "\u{02AB0}",
1930 | "succnapprox;" => "\u{02ABA}",
1931 | "succneqq;" => "\u{02AB6}",
1932 | "succnsim;" => "\u{022E9}",
1933 | "succsim;" => "\u{0227F}",
1934 | "SuchThat;" => "\u{0220B}",
1935 | "Sum;" => "\u{02211}",
1936 | "sum;" => "\u{02211}",
1937 | "sung;" => "\u{0266A}",
1938 | "Sup;" => "\u{022D1}",
1939 | "sup;" => "\u{02283}",
1940 | "sup1;" => "\u{000B9}",
1941 | "sup1" => "\u{000B9}",
1942 | "sup2;" => "\u{000B2}",
1943 | "sup2" => "\u{000B2}",
1944 | "sup3;" => "\u{000B3}",
1945 | "sup3" => "\u{000B3}",
1946 | "supdot;" => "\u{02ABE}",
1947 | "supdsub;" => "\u{02AD8}",
1948 | "supE;" => "\u{02AC6}",
1949 | "supe;" => "\u{02287}",
1950 | "supedot;" => "\u{02AC4}",
1951 | "Superset;" => "\u{02283}",
1952 | "SupersetEqual;" => "\u{02287}",
1953 | "suphsol;" => "\u{027C9}",
1954 | "suphsub;" => "\u{02AD7}",
1955 | "suplarr;" => "\u{0297B}",
1956 | "supmult;" => "\u{02AC2}",
1957 | "supnE;" => "\u{02ACC}",
1958 | "supne;" => "\u{0228B}",
1959 | "supplus;" => "\u{02AC0}",
1960 | "Supset;" => "\u{022D1}",
1961 | "supset;" => "\u{02283}",
1962 | "supseteq;" => "\u{02287}",
1963 | "supseteqq;" => "\u{02AC6}",
1964 | "supsetneq;" => "\u{0228B}",
1965 | "supsetneqq;" => "\u{02ACC}",
1966 | "supsim;" => "\u{02AC8}",
1967 | "supsub;" => "\u{02AD4}",
1968 | "supsup;" => "\u{02AD6}",
1969 | "swarhk;" => "\u{02926}",
1970 | "swArr;" => "\u{021D9}",
1971 | "swarr;" => "\u{02199}",
1972 | "swarrow;" => "\u{02199}",
1973 | "swnwar;" => "\u{0292A}",
1974 | "szlig;" => "\u{000DF}",
1975 | "szlig" => "\u{000DF}",
1976 | "Tab;" => "\u{00009}",
1977 | "target;" => "\u{02316}",
1978 | "Tau;" => "\u{003A4}",
1979 | "tau;" => "\u{003C4}",
1980 | "tbrk;" => "\u{023B4}",
1981 | "Tcaron;" => "\u{00164}",
1982 | "tcaron;" => "\u{00165}",
1983 | "Tcedil;" => "\u{00162}",
1984 | "tcedil;" => "\u{00163}",
1985 | "Tcy;" => "\u{00422}",
1986 | "tcy;" => "\u{00442}",
1987 | "tdot;" => "\u{020DB}",
1988 | "telrec;" => "\u{02315}",
1989 | "Tfr;" => "\u{1D517}",
1990 | "tfr;" => "\u{1D531}",
1991 | "there4;" => "\u{02234}",
1992 | "Therefore;" => "\u{02234}",
1993 | "therefore;" => "\u{02234}",
1994 | "Theta;" => "\u{00398}",
1995 | "theta;" => "\u{003B8}",
1996 | "thetasym;" => "\u{003D1}",
1997 | "thetav;" => "\u{003D1}",
1998 | "thickapprox;" => "\u{02248}",
1999 | "thicksim;" => "\u{0223C}",
2000 | "ThickSpace;" => "\u{0205F}\u{0200A}",
2001 | "thinsp;" => "\u{02009}",
2002 | "ThinSpace;" => "\u{02009}",
2003 | "thkap;" => "\u{02248}",
2004 | "thksim;" => "\u{0223C}",
2005 | "THORN;" => "\u{000DE}",
2006 | "THORN" => "\u{000DE}",
2007 | "thorn;" => "\u{000FE}",
2008 | "thorn" => "\u{000FE}",
2009 | "Tilde;" => "\u{0223C}",
2010 | "tilde;" => "\u{002DC}",
2011 | "TildeEqual;" => "\u{02243}",
2012 | "TildeFullEqual;" => "\u{02245}",
2013 | "TildeTilde;" => "\u{02248}",
2014 | "times;" => "\u{000D7}",
2015 | "times" => "\u{000D7}",
2016 | "timesb;" => "\u{022A0}",
2017 | "timesbar;" => "\u{02A31}",
2018 | "timesd;" => "\u{02A30}",
2019 | "tint;" => "\u{0222D}",
2020 | "toea;" => "\u{02928}",
2021 | "top;" => "\u{022A4}",
2022 | "topbot;" => "\u{02336}",
2023 | "topcir;" => "\u{02AF1}",
2024 | "Topf;" => "\u{1D54B}",
2025 | "topf;" => "\u{1D565}",
2026 | "topfork;" => "\u{02ADA}",
2027 | "tosa;" => "\u{02929}",
2028 | "tprime;" => "\u{02034}",
2029 | "TRADE;" => "\u{02122}",
2030 | "trade;" => "\u{02122}",
2031 | "triangle;" => "\u{025B5}",
2032 | "triangledown;" => "\u{025BF}",
2033 | "triangleleft;" => "\u{025C3}",
2034 | "trianglelefteq;" => "\u{022B4}",
2035 | "triangleq;" => "\u{0225C}",
2036 | "triangleright;" => "\u{025B9}",
2037 | "trianglerighteq;" => "\u{022B5}",
2038 | "tridot;" => "\u{025EC}",
2039 | "trie;" => "\u{0225C}",
2040 | "triminus;" => "\u{02A3A}",
2041 | "TripleDot;" => "\u{020DB}",
2042 | "triplus;" => "\u{02A39}",
2043 | "trisb;" => "\u{029CD}",
2044 | "tritime;" => "\u{02A3B}",
2045 | "trpezium;" => "\u{023E2}",
2046 | "Tscr;" => "\u{1D4AF}",
2047 | "tscr;" => "\u{1D4C9}",
2048 | "TScy;" => "\u{00426}",
2049 | "tscy;" => "\u{00446}",
2050 | "TSHcy;" => "\u{0040B}",
2051 | "tshcy;" => "\u{0045B}",
2052 | "Tstrok;" => "\u{00166}",
2053 | "tstrok;" => "\u{00167}",
2054 | "twixt;" => "\u{0226C}",
2055 | "twoheadleftarrow;" => "\u{0219E}",
2056 | "twoheadrightarrow;" => "\u{021A0}",
2057 | "Uacute;" => "\u{000DA}",
2058 | "Uacute" => "\u{000DA}",
2059 | "uacute;" => "\u{000FA}",
2060 | "uacute" => "\u{000FA}",
2061 | "Uarr;" => "\u{0219F}",
2062 | "uArr;" => "\u{021D1}",
2063 | "uarr;" => "\u{02191}",
2064 | "Uarrocir;" => "\u{02949}",
2065 | "Ubrcy;" => "\u{0040E}",
2066 | "ubrcy;" => "\u{0045E}",
2067 | "Ubreve;" => "\u{0016C}",
2068 | "ubreve;" => "\u{0016D}",
2069 | "Ucirc;" => "\u{000DB}",
2070 | "Ucirc" => "\u{000DB}",
2071 | "ucirc;" => "\u{000FB}",
2072 | "ucirc" => "\u{000FB}",
2073 | "Ucy;" => "\u{00423}",
2074 | "ucy;" => "\u{00443}",
2075 | "udarr;" => "\u{021C5}",
2076 | "Udblac;" => "\u{00170}",
2077 | "udblac;" => "\u{00171}",
2078 | "udhar;" => "\u{0296E}",
2079 | "ufisht;" => "\u{0297E}",
2080 | "Ufr;" => "\u{1D518}",
2081 | "ufr;" => "\u{1D532}",
2082 | "Ugrave;" => "\u{000D9}",
2083 | "Ugrave" => "\u{000D9}",
2084 | "ugrave;" => "\u{000F9}",
2085 | "ugrave" => "\u{000F9}",
2086 | "uHar;" => "\u{02963}",
2087 | "uharl;" => "\u{021BF}",
2088 | "uharr;" => "\u{021BE}",
2089 | "uhblk;" => "\u{02580}",
2090 | "ulcorn;" => "\u{0231C}",
2091 | "ulcorner;" => "\u{0231C}",
2092 | "ulcrop;" => "\u{0230F}",
2093 | "ultri;" => "\u{025F8}",
2094 | "Umacr;" => "\u{0016A}",
2095 | "umacr;" => "\u{0016B}",
2096 | "uml;" => "\u{000A8}",
2097 | "uml" => "\u{000A8}",
2098 | "UnderBar;" => "\u{0005F}",
2099 | "UnderBrace;" => "\u{023DF}",
2100 | "UnderBracket;" => "\u{023B5}",
2101 | "UnderParenthesis;" => "\u{023DD}",
2102 | "Union;" => "\u{022C3}",
2103 | "UnionPlus;" => "\u{0228E}",
2104 | "Uogon;" => "\u{00172}",
2105 | "uogon;" => "\u{00173}",
2106 | "Uopf;" => "\u{1D54C}",
2107 | "uopf;" => "\u{1D566}",
2108 | "UpArrow;" => "\u{02191}",
2109 | "Uparrow;" => "\u{021D1}",
2110 | "uparrow;" => "\u{02191}",
2111 | "UpArrowBar;" => "\u{02912}",
2112 | "UpArrowDownArrow;" => "\u{021C5}",
2113 | "UpDownArrow;" => "\u{02195}",
2114 | "Updownarrow;" => "\u{021D5}",
2115 | "updownarrow;" => "\u{02195}",
2116 | "UpEquilibrium;" => "\u{0296E}",
2117 | "upharpoonleft;" => "\u{021BF}",
2118 | "upharpoonright;" => "\u{021BE}",
2119 | "uplus;" => "\u{0228E}",
2120 | "UpperLeftArrow;" => "\u{02196}",
2121 | "UpperRightArrow;" => "\u{02197}",
2122 | "Upsi;" => "\u{003D2}",
2123 | "upsi;" => "\u{003C5}",
2124 | "upsih;" => "\u{003D2}",
2125 | "Upsilon;" => "\u{003A5}",
2126 | "upsilon;" => "\u{003C5}",
2127 | "UpTee;" => "\u{022A5}",
2128 | "UpTeeArrow;" => "\u{021A5}",
2129 | "upuparrows;" => "\u{021C8}",
2130 | "urcorn;" => "\u{0231D}",
2131 | "urcorner;" => "\u{0231D}",
2132 | "urcrop;" => "\u{0230E}",
2133 | "Uring;" => "\u{0016E}",
2134 | "uring;" => "\u{0016F}",
2135 | "urtri;" => "\u{025F9}",
2136 | "Uscr;" => "\u{1D4B0}",
2137 | "uscr;" => "\u{1D4CA}",
2138 | "utdot;" => "\u{022F0}",
2139 | "Utilde;" => "\u{00168}",
2140 | "utilde;" => "\u{00169}",
2141 | "utri;" => "\u{025B5}",
2142 | "utrif;" => "\u{025B4}",
2143 | "uuarr;" => "\u{021C8}",
2144 | "Uuml;" => "\u{000DC}",
2145 | "Uuml" => "\u{000DC}",
2146 | "uuml;" => "\u{000FC}",
2147 | "uuml" => "\u{000FC}",
2148 | "uwangle;" => "\u{029A7}",
2149 | "vangrt;" => "\u{0299C}",
2150 | "varepsilon;" => "\u{003F5}",
2151 | "varkappa;" => "\u{003F0}",
2152 | "varnothing;" => "\u{02205}",
2153 | "varphi;" => "\u{003D5}",
2154 | "varpi;" => "\u{003D6}",
2155 | "varpropto;" => "\u{0221D}",
2156 | "vArr;" => "\u{021D5}",
2157 | "varr;" => "\u{02195}",
2158 | "varrho;" => "\u{003F1}",
2159 | "varsigma;" => "\u{003C2}",
2160 | "varsubsetneq;" => "\u{0228A}\u{0FE00}",
2161 | "varsubsetneqq;" => "\u{02ACB}\u{0FE00}",
2162 | "varsupsetneq;" => "\u{0228B}\u{0FE00}",
2163 | "varsupsetneqq;" => "\u{02ACC}\u{0FE00}",
2164 | "vartheta;" => "\u{003D1}",
2165 | "vartriangleleft;" => "\u{022B2}",
2166 | "vartriangleright;" => "\u{022B3}",
2167 | "Vbar;" => "\u{02AEB}",
2168 | "vBar;" => "\u{02AE8}",
2169 | "vBarv;" => "\u{02AE9}",
2170 | "Vcy;" => "\u{00412}",
2171 | "vcy;" => "\u{00432}",
2172 | "VDash;" => "\u{022AB}",
2173 | "Vdash;" => "\u{022A9}",
2174 | "vDash;" => "\u{022A8}",
2175 | "vdash;" => "\u{022A2}",
2176 | "Vdashl;" => "\u{02AE6}",
2177 | "Vee;" => "\u{022C1}",
2178 | "vee;" => "\u{02228}",
2179 | "veebar;" => "\u{022BB}",
2180 | "veeeq;" => "\u{0225A}",
2181 | "vellip;" => "\u{022EE}",
2182 | "Verbar;" => "\u{02016}",
2183 | "verbar;" => "\u{0007C}",
2184 | "Vert;" => "\u{02016}",
2185 | "vert;" => "\u{0007C}",
2186 | "VerticalBar;" => "\u{02223}",
2187 | "VerticalLine;" => "\u{0007C}",
2188 | "VerticalSeparator;" => "\u{02758}",
2189 | "VerticalTilde;" => "\u{02240}",
2190 | "VeryThinSpace;" => "\u{0200A}",
2191 | "Vfr;" => "\u{1D519}",
2192 | "vfr;" => "\u{1D533}",
2193 | "vltri;" => "\u{022B2}",
2194 | "vnsub;" => "\u{02282}\u{020D2}",
2195 | "vnsup;" => "\u{02283}\u{020D2}",
2196 | "Vopf;" => "\u{1D54D}",
2197 | "vopf;" => "\u{1D567}",
2198 | "vprop;" => "\u{0221D}",
2199 | "vrtri;" => "\u{022B3}",
2200 | "Vscr;" => "\u{1D4B1}",
2201 | "vscr;" => "\u{1D4CB}",
2202 | "vsubnE;" => "\u{02ACB}\u{0FE00}",
2203 | "vsubne;" => "\u{0228A}\u{0FE00}",
2204 | "vsupnE;" => "\u{02ACC}\u{0FE00}",
2205 | "vsupne;" => "\u{0228B}\u{0FE00}",
2206 | "Vvdash;" => "\u{022AA}",
2207 | "vzigzag;" => "\u{0299A}",
2208 | "Wcirc;" => "\u{00174}",
2209 | "wcirc;" => "\u{00175}",
2210 | "wedbar;" => "\u{02A5F}",
2211 | "Wedge;" => "\u{022C0}",
2212 | "wedge;" => "\u{02227}",
2213 | "wedgeq;" => "\u{02259}",
2214 | "weierp;" => "\u{02118}",
2215 | "Wfr;" => "\u{1D51A}",
2216 | "wfr;" => "\u{1D534}",
2217 | "Wopf;" => "\u{1D54E}",
2218 | "wopf;" => "\u{1D568}",
2219 | "wp;" => "\u{02118}",
2220 | "wr;" => "\u{02240}",
2221 | "wreath;" => "\u{02240}",
2222 | "Wscr;" => "\u{1D4B2}",
2223 | "wscr;" => "\u{1D4CC}",
2224 | "xcap;" => "\u{022C2}",
2225 | "xcirc;" => "\u{025EF}",
2226 | "xcup;" => "\u{022C3}",
2227 | "xdtri;" => "\u{025BD}",
2228 | "Xfr;" => "\u{1D51B}",
2229 | "xfr;" => "\u{1D535}",
2230 | "xhArr;" => "\u{027FA}",
2231 | "xharr;" => "\u{027F7}",
2232 | "Xi;" => "\u{0039E}",
2233 | "xi;" => "\u{003BE}",
2234 | "xlArr;" => "\u{027F8}",
2235 | "xlarr;" => "\u{027F5}",
2236 | "xmap;" => "\u{027FC}",
2237 | "xnis;" => "\u{022FB}",
2238 | "xodot;" => "\u{02A00}",
2239 | "Xopf;" => "\u{1D54F}",
2240 | "xopf;" => "\u{1D569}",
2241 | "xoplus;" => "\u{02A01}",
2242 | "xotime;" => "\u{02A02}",
2243 | "xrArr;" => "\u{027F9}",
2244 | "xrarr;" => "\u{027F6}",
2245 | "Xscr;" => "\u{1D4B3}",
2246 | "xscr;" => "\u{1D4CD}",
2247 | "xsqcup;" => "\u{02A06}",
2248 | "xuplus;" => "\u{02A04}",
2249 | "xutri;" => "\u{025B3}",
2250 | "xvee;" => "\u{022C1}",
2251 | "xwedge;" => "\u{022C0}",
2252 | "Yacute;" => "\u{000DD}",
2253 | "Yacute" => "\u{000DD}",
2254 | "yacute;" => "\u{000FD}",
2255 | "yacute" => "\u{000FD}",
2256 | "YAcy;" => "\u{0042F}",
2257 | "yacy;" => "\u{0044F}",
2258 | "Ycirc;" => "\u{00176}",
2259 | "ycirc;" => "\u{00177}",
2260 | "Ycy;" => "\u{0042B}",
2261 | "ycy;" => "\u{0044B}",
2262 | "yen;" => "\u{000A5}",
2263 | "yen" => "\u{000A5}",
2264 | "Yfr;" => "\u{1D51C}",
2265 | "yfr;" => "\u{1D536}",
2266 | "YIcy;" => "\u{00407}",
2267 | "yicy;" => "\u{00457}",
2268 | "Yopf;" => "\u{1D550}",
2269 | "yopf;" => "\u{1D56A}",
2270 | "Yscr;" => "\u{1D4B4}",
2271 | "yscr;" => "\u{1D4CE}",
2272 | "YUcy;" => "\u{0042E}",
2273 | "yucy;" => "\u{0044E}",
2274 | "Yuml;" => "\u{00178}",
2275 | "yuml;" => "\u{000FF}",
2276 | "yuml" => "\u{000FF}",
2277 | "Zacute;" => "\u{00179}",
2278 | "zacute;" => "\u{0017A}",
2279 | "Zcaron;" => "\u{0017D}",
2280 | "zcaron;" => "\u{0017E}",
2281 | "Zcy;" => "\u{00417}",
2282 | "zcy;" => "\u{00437}",
2283 | "Zdot;" => "\u{0017B}",
2284 | "zdot;" => "\u{0017C}",
2285 | "zeetrf;" => "\u{02128}",
2286 | "ZeroWidthSpace;" => "\u{0200B}",
2287 | "Zeta;" => "\u{00396}",
2288 | "zeta;" => "\u{003B6}",
2289 | "Zfr;" => "\u{02128}",
2290 | "zfr;" => "\u{1D537}",
2291 | "ZHcy;" => "\u{00416}",
2292 | "zhcy;" => "\u{00436}",
2293 | "zigrarr;" => "\u{021DD}",
2294 | "Zopf;" => "\u{02124}",
2295 | "zopf;" => "\u{1D56B}",
2296 | "Zscr;" => "\u{1D4B5}",
2297 | "zscr;" => "\u{1D4CF}",
2298 | "zwj;" => "\u{0200D}",
2299 | "zwnj;" => "\u{0200C}",
2300 | ];
2301 | }
2302 |
2303 | #[cfg(test)]
2304 | mod tests {
2305 | use ::util::html_attr_unescape;
2306 |
2307 | #[test]
2308 | fn test_html_attr_unescape() {
2309 | assert_eq!(html_attr_unescape("/?foo<=bar"), "/?foo<=bar".to_string());
2310 | assert_eq!(html_attr_unescape("/?f<oo=bar"), "/?f<oo=bar".to_string());
2311 | assert_eq!(html_attr_unescape("/?f<-oo=bar"), "/?f<-oo=bar".to_string());
2312 | assert_eq!(html_attr_unescape("/?foo=<"), "/?foo=<".to_string());
2313 | assert_eq!(html_attr_unescape("/?f<oo=bar"), "/?f").childs(None);
17 | assert_eq!(dom[0].text(), "");
18 | }
19 |
20 | #[test]
21 | fn basic1() {
22 | // Simple (basics)
23 | let dom = DOM::new(r#""#);
24 | assert_eq!(dom.at("#b").unwrap().text(), "B");
25 | assert_eq!(dom.find("div[id]").iter().map(|x| x.text()).collect::>(), ["A", "B"]);
26 | assert_eq!(dom.at("#a").unwrap().attr("foo"), Some("0"));
27 | assert!(dom.at("#b").unwrap().attrs().contains_key("myattr"));
28 | assert_eq!(dom.find("[id]").iter().map(|x| x.attr("id").unwrap()).collect::>(), ["a", "b"]);
29 | assert_eq!(dom.to_string(), r#""#);
30 | }
31 |
32 | #[test]
33 | fn basic2() {
34 | // Select based on parent
35 | let dom = DOM::new(r#"
36 |
37 | test1
38 |
39 |
40 | "#);
41 | assert_eq!(dom.find("body > div").get(0).unwrap().text(), "test1"); // right text
42 | assert_eq!(dom.find("body > div").get(1).unwrap().text(), ""); // no content
43 | assert_eq!(dom.find("body > div").len(), 2); // right number of elements
44 | assert_eq!(dom.find("body > div > div").get(0).unwrap().text(), "test2"); // right text
45 | assert_eq!(dom.find("body > div > div").len(), 1); // right number of elements
46 | }
47 |
48 | #[test]
49 | fn basic3() {
50 | // Basic navigation
51 | let dom = DOM::new(r#"
52 |
53 |
54 | test
55 | easy
56 |
57 |
58 | works well
59 |
60 |
61 |
62 | < very broken
63 |
64 | more text
65 |
66 | "#);
67 | assert!(dom.tag().is_none()); // no tag
68 | assert!(!dom.attrs().contains_key("foo"));
69 | assert_eq!(
70 | dom.to_string(),
71 | r#"
72 |
73 |
74 | test
75 | easy
76 |
77 |
78 | works well
79 |
80 |
81 |
82 | < very broken
83 |
84 | more text
85 |
86 | "#);
87 |
88 | let simple = dom.at("foo simple.working[class^=\"wor\"]").unwrap();
89 | assert_eq!(simple.parent().unwrap().text_all(), "test easy works well yada yada < very broken more text");
90 | assert_eq!(simple.tag().unwrap(), "simple");
91 | assert_eq!(simple.attr("class").unwrap(), "working");
92 | assert_eq!(simple.text(), "easy");
93 | assert_eq!(simple.parent().unwrap().tag().unwrap(), "foo");
94 | assert_eq!(simple.parent().unwrap().attr("bar").unwrap(), "baeasy");
97 |
98 | assert_eq!(dom.at("test#test").unwrap().tag().unwrap(), "test");
99 | assert_eq!(dom.at("[class$=\"ing\"]").unwrap().tag().unwrap(), "simple");
100 | assert_eq!(dom.at("[class$=ing]").unwrap().tag().unwrap(), "simple");
101 | assert_eq!(dom.at("[class=\"working\"]").unwrap().tag().unwrap(), "simple");
102 | assert_eq!(dom.at("[class=working][class]").unwrap().tag().unwrap(), "simple");
103 | assert_eq!(dom.at("foo > simple").unwrap().next().unwrap().tag().unwrap(), "test");
104 | assert_eq!(dom.at("foo > simple").unwrap().next().unwrap().next().unwrap().tag().unwrap(), "a");
105 | assert_eq!(dom.at("foo > test").unwrap().prev().unwrap().tag().unwrap(), "simple");
106 | assert!(dom.next().is_none());
107 | assert!(dom.prev().is_none());
108 | assert!(dom.at("foo > a").unwrap().next().is_none());
109 | assert!(dom.at("foo > simple").unwrap().prev().is_none());
110 | assert_eq!(dom.at("simple").unwrap().ancestors(None).iter().map(|x| x.tag().unwrap()).collect::>(), ["foo"]);
111 | }
112 |
113 | #[test]
114 | fn class_and_id() {
115 | // Class and ID
116 | let dom = DOM::new(r#"a
"#);
117 | assert_eq!(dom.at("div#id.class").unwrap().text(), "a");
118 | }
119 |
120 | #[test]
121 | fn deep_nesting() {
122 | // Deep nesting (parent combinator)
123 | let dom = DOM::new(r#"
124 |
125 |
126 | Foo
127 |
128 |
129 |
130 |
136 |
141 |
More stuff
142 |
143 |
144 |
145 | "#);
146 |
147 | let p = dom.find("body > #container > div p[id]");
148 | assert_eq!(p.len(), 1);
149 | assert_eq!(p.get(0).unwrap().attr("id").unwrap(), "foo");
150 |
151 | assert_eq!(
152 | dom.find("div").iter().map(|x| x.attr("id").unwrap()).collect::>(),
153 | ["container", "header", "logo", "buttons", "buttons", "content"]
154 | );
155 | assert_eq!(
156 | dom.find("p").iter().map(|x| x.attr("id").unwrap()).collect::>(),
157 | ["foo", "bar"]
158 | );
159 | assert_eq!(
160 | dom.at("p").unwrap().ancestors(None).iter().map(|x| x.tag().unwrap()).collect::>(),
161 | ["div", "div", "div", "body", "html"]
162 | );
163 | assert_eq!(dom.at("html").unwrap().ancestors(None).len(), 0);
164 | assert_eq!(dom.ancestors(None).len(), 0);
165 | }
166 |
167 | #[test]
168 | fn script_tag() {
169 | let dom = DOM::new(r#""#);
170 | assert_eq!(dom.at("script").unwrap().text(), "alert('world');");
171 | }
172 |
173 | #[test]
174 | fn html5_base() {
175 | // HTML5 (unquoted values)
176 | let dom = DOM::new(r#"works
"#);
177 | assert_eq!(dom.at("#test").unwrap().text(), "works");
178 | assert_eq!(dom.at("div").unwrap().text(), "works");
179 | assert_eq!(dom.at("[foo=bar][foo=\"bar\"]").unwrap().text(), "works");
180 | assert!(dom.at("[foo=\"ba\"]").is_none());
181 | assert_eq!(dom.at("[foo=bar]").unwrap().text(), "works");
182 | assert!(dom.at("[foo=ba]").is_none());
183 | assert_eq!(dom.at(".tset").unwrap().text(), "works");
184 | assert_eq!(dom.at("[bar=/baz/]").unwrap().text(), "works");
185 | assert_eq!(dom.at("[baz=//]").unwrap().text(), "works");
186 | }
187 |
188 | #[test]
189 | fn html1_mix() {
190 | // HTML1 (single quotes, uppercase tags and whitespace in attributes)
191 | let dom = DOM::new(r#"works
"#);
192 | assert_eq!(dom.at("#test").unwrap().text(), "works");
193 | assert_eq!(dom.at("div").unwrap().text(), "works");
194 | assert_eq!(dom.at("[foo=\"bar\"]").unwrap().text(), "works");
195 | assert!(dom.at("[foo=\"ba\"]").is_none());
196 | assert_eq!(dom.at("[foo=bar]").unwrap().text(), "works");
197 | assert!(dom.at("[foo=ba]").is_none());
198 | assert_eq!(dom.at(".tset").unwrap().text(), "works");
199 | }
200 |
201 | #[test]
202 | fn unicode_snowman() {
203 | // Already decoded Unicode snowman and quotes in selector
204 | let dom = DOM::new(r#"☃
"#);
205 | assert_eq!(dom.at(r#"[id="snow'm\"an"]"#).unwrap().text(), "☃");
206 | assert_eq!(dom.at(r#"[id="snow'm\22 an"]"#).unwrap().text(), "☃");
207 | assert_eq!(dom.at(r#"[id="snow\'m\000022an"]"#).unwrap().text(), "☃");
208 | assert_eq!(dom.at("[id='snow\\'m\"an']").unwrap().text(), "☃");
209 | assert_eq!(dom.at("[id='snow\\27m\"an']").unwrap().text(), "☃");
210 | assert!(dom.at(r#"[id="snow'm\22an"]"#).is_none());
211 | assert!(dom.at(r#"[id="snow'm\21 an"]"#).is_none());
212 | assert!(dom.at(r#"[id="snow'm\000021an"]"#).is_none());
213 | assert!(dom.at(r#"[id="snow'm\000021 an"]"#).is_none());
214 | }
215 |
216 | #[test]
217 | fn unicode_selectors() {
218 | // Unicode and escaped selectors
219 | let html = r#"Snowman
Heart
"#;
220 | let dom = DOM::new(html);
221 |
222 | assert_eq!(dom.at("#\\\n\\002603x").unwrap().text(), "Snowman");
223 | assert_eq!(dom.at("#\\2603 x").unwrap().text(), "Snowman");
224 | assert_eq!(dom.at("#\\\n\\2603 x").unwrap().text(), "Snowman");
225 | assert_eq!(dom.at("[id=\"\\\n\\2603 x\"]").unwrap().text(), "Snowman");
226 | assert_eq!(dom.at("[id=\"\\\n\\002603x\"]").unwrap().text(), "Snowman");
227 | assert_eq!(dom.at("[id=\"\\\\2603 x\"]").unwrap().text(), "Snowman");
228 | assert_eq!(dom.at("html #\\\n\\002603x").unwrap().text(), "Snowman");
229 | assert_eq!(dom.at("html #\\2603 x").unwrap().text(), "Snowman");
230 | assert_eq!(dom.at("html #\\\n\\2603 x").unwrap().text(), "Snowman");
231 | assert_eq!(dom.at("html [id=\"\\\n\\2603 x\"]").unwrap().text(), "Snowman");
232 | assert_eq!(dom.at("html [id=\"\\\n\\002603x\"]").unwrap().text(), "Snowman");
233 | assert_eq!(dom.at("html [id=\"\\\\2603 x\"]").unwrap().text(), "Snowman");
234 | assert_eq!(dom.at("#☃x").unwrap().text(), "Snowman");
235 | assert_eq!(dom.at("html div#☃x").unwrap().text(), "Snowman");
236 | assert_eq!(dom.at("[id^=\"☃\"]").unwrap().text(), "Snowman");
237 | assert_eq!(dom.at("div[id^=\"☃\"]").unwrap().text(), "Snowman");
238 | assert_eq!(dom.at("html div[id^=\"☃\"]").unwrap().text(), "Snowman");
239 | assert_eq!(dom.at("html > div[id^=\"☃\"]").unwrap().text(), "Snowman");
240 | assert_eq!(dom.at("[id^=☃]").unwrap().text(), "Snowman");
241 | assert_eq!(dom.at("div[id^=☃]").unwrap().text(), "Snowman");
242 | assert_eq!(dom.at("html div[id^=☃]").unwrap().text(), "Snowman");
243 | assert_eq!(dom.at("html > div[id^=☃]").unwrap().text(), "Snowman");
244 | assert_eq!(dom.at(".\\\n\\002665").unwrap().text(), "Heart");
245 | assert_eq!(dom.at(".\\2665").unwrap().text(), "Heart");
246 | assert_eq!(dom.at("html .\\\n\\002665").unwrap().text(), "Heart");
247 | assert_eq!(dom.at("html .\\2665").unwrap().text(), "Heart");
248 | assert_eq!(dom.at("html [class$=\"\\\n\\002665\"]").unwrap().text(), "Heart");
249 | assert_eq!(dom.at("html [class$=\"\\2665\"]").unwrap().text(), "Heart");
250 | assert_eq!(dom.at("[class$=\"\\\n\\002665\"]").unwrap().text(), "Heart");
251 | assert_eq!(dom.at("[class$=\"\\2665\"]").unwrap().text(), "Heart");
252 | assert_eq!(dom.at(".x").unwrap().text(), "Heart");
253 | assert_eq!(dom.at("html .x").unwrap().text(), "Heart");
254 | assert_eq!(dom.at(".♥").unwrap().text(), "Heart");
255 | assert_eq!(dom.at("html .♥").unwrap().text(), "Heart");
256 | assert_eq!(dom.at("div.♥").unwrap().text(), "Heart");
257 | assert_eq!(dom.at("html div.♥").unwrap().text(), "Heart");
258 | assert_eq!(dom.at("[class$=\"♥\"]").unwrap().text(), "Heart");
259 | assert_eq!(dom.at("div[class$=\"♥\"]").unwrap().text(), "Heart");
260 | assert_eq!(dom.at("html div[class$=\"♥\"]").unwrap().text(), "Heart");
261 | assert_eq!(dom.at("html > div[class$=\"♥\"]").unwrap().text(), "Heart");
262 | assert_eq!(dom.at("[class$=♥]").unwrap().text(), "Heart");
263 | assert_eq!(dom.at("div[class$=♥]").unwrap().text(), "Heart");
264 | assert_eq!(dom.at("html div[class$=♥]").unwrap().text(), "Heart");
265 | assert_eq!(dom.at("html > div[class$=♥]").unwrap().text(), "Heart");
266 | assert_eq!(dom.at("[class~=\"♥\"]").unwrap().text(), "Heart");
267 | assert_eq!(dom.at("div[class~=\"♥\"]").unwrap().text(), "Heart");
268 | assert_eq!(dom.at("html div[class~=\"♥\"]").unwrap().text(), "Heart");
269 | assert_eq!(dom.at("html > div[class~=\"♥\"]").unwrap().text(), "Heart");
270 | assert_eq!(dom.at("[class~=♥]").unwrap().text(), "Heart");
271 | assert_eq!(dom.at("div[class~=♥]").unwrap().text(), "Heart");
272 | assert_eq!(dom.at("html div[class~=♥]").unwrap().text(), "Heart");
273 | assert_eq!(dom.at("html > div[class~=♥]").unwrap().text(), "Heart");
274 | assert_eq!(dom.at("[class~=\"x\"]").unwrap().text(), "Heart");
275 | assert_eq!(dom.at("div[class~=\"x\"]").unwrap().text(), "Heart");
276 | assert_eq!(dom.at("html div[class~=\"x\"]").unwrap().text(), "Heart");
277 | assert_eq!(dom.at("html > div[class~=\"x\"]").unwrap().text(), "Heart");
278 | assert_eq!(dom.at("[class~=x]").unwrap().text(), "Heart");
279 | assert_eq!(dom.at("div[class~=x]").unwrap().text(), "Heart");
280 | assert_eq!(dom.at("html div[class~=x]").unwrap().text(), "Heart");
281 | assert_eq!(dom.at("html > div[class~=x]").unwrap().text(), "Heart");
282 | assert_eq!(dom.at("html").unwrap().to_string(), html);
283 | assert_eq!(dom.at("#☃x").unwrap().parent().unwrap().to_string(), html);
284 | assert_eq!(dom.to_string(), html);
285 | assert_eq!(dom.content(), html);
286 |
287 | let dom = DOM::new(r#"☃♥☃"#);
288 | assert_eq!(dom.at("title").unwrap().text(), "♥");
289 | assert_eq!(dom.at("*").unwrap().text(), "♥");
290 | assert_eq!(dom.at(".test").unwrap().text(), "♥");
291 | }
292 |
293 | #[test]
294 | fn attrs_on_multiple_lines() {
295 | // Attributes on multiple lines
296 | let dom = DOM::new("");
297 | assert_eq!(dom.at("div.x").unwrap().attr("test").unwrap(), "23");
298 | assert_eq!(dom.at("[foo=\"bar\"]").unwrap().attr("class").unwrap(), "x");
299 | }
300 |
301 | #[test]
302 | fn markup_chars_in_attr_vals() {
303 | // Markup characters in attribute values
304 | let dom = DOM::new("");
305 | assert_eq!(dom.at("div[id=\"\"]").unwrap().attrs().get("test").unwrap().clone(), Some("=".to_owned()));
306 | assert_eq!(dom.at("[id=\"\"]").unwrap().text(), "Test");
307 | assert_eq!(dom.at("[id=\"><\"]").unwrap().attrs().get("id").unwrap().clone(), Some("><".to_owned()));
308 | }
309 |
310 | #[test]
311 | fn empty_attrs() {
312 | // Empty attributes
313 | let dom = DOM::new("");
314 | assert_eq!(dom.at("div").unwrap().attr("test").unwrap(), "");
315 | assert_eq!(dom.at("div").unwrap().attr("test2").unwrap(), "");
316 | assert_eq!(dom.at("[test]").unwrap().tag().unwrap(), "div");
317 | assert_eq!(dom.at("[test=\"\"]").unwrap().tag().unwrap(), "div");
318 | assert_eq!(dom.at("[test2]").unwrap().tag().unwrap(), "div");
319 | assert_eq!(dom.at("[test2=\"\"]").unwrap().tag().unwrap(), "div");
320 | assert!(dom.at("[test3]").is_none());
321 | assert!(dom.at("[test3=\"\"]").is_none());
322 | }
323 |
324 | #[test]
325 | fn multi_line_attr() {
326 | // Multi-line attribute
327 | let dom = DOM::new("");
328 | assert_eq!(dom.at("div").unwrap().attr("class").unwrap(), "line1\nline2");
329 | assert_eq!(dom.at(".line1").unwrap().tag().unwrap(), "div");
330 | assert_eq!(dom.at(".line2").unwrap().tag().unwrap(), "div");
331 | assert!(dom.at(".line3").is_none());
332 | }
333 |
334 | #[test]
335 | fn entities_in_attrs() {
336 | assert_eq!(DOM::new("").at("a").unwrap().attr("href").unwrap(), "/?foo<=bar");
337 | assert_eq!(DOM::new("").at("a").unwrap().attr("href").unwrap(), "/?f<oo=bar");
338 | assert_eq!(DOM::new("").at("a").unwrap().attr("href").unwrap(), "/?f<-oo=bar");
339 | assert_eq!(DOM::new("").at("a").unwrap().attr("href").unwrap(), "/?foo=<");
340 | assert_eq!(DOM::new("").at("a").unwrap().attr("href").unwrap(), "/?fcontent");
347 | assert!(dom.at("div").is_some());
348 | assert_eq!(dom.at("div").unwrap().text(), "content");
349 | }
350 |
351 | #[test]
352 | fn class_with_hyphen() {
353 | // Class with hyphen
354 | let dom = DOM::new(r#"A
A1
"#);
355 | assert_eq!(dom.find(".a").iter().map(|x| x.text()).collect::>(), ["A"]); // found first element only
356 | assert_eq!(dom.find(".a-1").iter().map(|x| x.text()).collect::>(), ["A1"]); // found last element only
357 | }
358 |
359 | #[test]
360 | fn empty_tags() {
361 | // Empty tags
362 | let dom = DOM::new("
");
363 | assert_eq!(dom.to_string(), "
");
364 | }
365 |
366 | #[test]
367 | fn inner_html() {
368 | let dom = DOM::new("xxxxxxx");
369 | assert_eq!(dom.at("a").unwrap().content(), "xxxxxxx");
370 | assert_eq!(dom.content(), "xxxxxxx");
371 | }
372 |
373 | #[test]
374 | fn multiple_selectors() {
375 | // Multiple selectors
376 | let dom = DOM::new("A
B
C
D
");
377 | assert_eq!(dom.find("p, div").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D"]);
378 | assert_eq!(dom.find("#a, #c").iter().map(|x| x.text()).collect::>(), ["A", "C"]);
379 | assert_eq!(dom.find("div#a, div#b").iter().map(|x| x.text()).collect::>(), ["A", "B"]);
380 | assert_eq!(dom.find("div[id=\"a\"], div[id=\"c\"]").iter().map(|x| x.text()).collect::>(), ["A", "C"]);
381 |
382 | let dom2 = DOM::new("A
B
C
");
383 | assert_eq!(dom2.find("#☃, #♥x").iter().map(|x| x.text()).collect::>(), ["A", "C"]);
384 | assert_eq!(dom2.find("div#☃, div#b").iter().map(|x| x.text()).collect::>(), ["A", "B"]);
385 | assert_eq!(dom2.find("div[id=\"☃\"], div[id=\"♥x\"]").iter().map(|x| x.text()).collect::>(), ["A", "C"]);
386 | }
387 |
388 | #[test]
389 | fn multiple_attributes() {
390 | // Multiple attributes
391 | let dom = DOM::new(r#"
392 | A
393 | B
394 | C
395 | D
396 | "#);
397 |
398 | assert_eq!(dom.find("div[foo=\"bar\"][bar=\"baz\"]").iter().map(|x| x.text()).collect::>(), ["A", "C"]);
399 | assert_eq!(dom.find("div[foo^=\"b\"][foo$=\"r\"]").iter().map(|x| x.text()).collect::>(), ["A", "B", "C"]);
400 | assert!(dom.at("[foo=\"bar\"]").unwrap().prev().is_none());
401 | assert_eq!(dom.at("[foo=\"bar\"]").unwrap().next().unwrap().text(), "B");
402 | assert_eq!(dom.at("[foo=\"bar\"]").unwrap().next().unwrap().prev().unwrap().text(), "A");
403 | assert!(dom.at("[foo=\"bar\"]").unwrap().next().unwrap().next().unwrap().next().unwrap().next().is_none());
404 | }
405 |
406 | #[test]
407 | fn pseudo_classes() {
408 | // Pseudo-classes
409 | let dom = DOM::new(r#"
410 |
428 | "#);
429 | assert_eq!(dom.find(":root").len(), 1);
430 | assert_eq!(dom.find(":root").get(0).unwrap().tag(), Some("form"));
431 | assert_eq!(dom.find("*:root").get(0).unwrap().tag(), Some("form"));
432 | assert_eq!(dom.find("form:root").get(0).unwrap().tag(), Some("form"));
433 | assert_eq!(dom.find(":checked").len(), 4);
434 | assert_eq!(dom.find(":checked").get(0).unwrap().attr("name").unwrap(), "groovy");
435 | assert_eq!(dom.find("option:checked").get(0).unwrap().attr("value").unwrap(), "e");
436 | assert_eq!(dom.find(":checked").get(1).unwrap().text(), "E");
437 | assert_eq!(dom.find("*:checked").get(1).unwrap().text(), "E");
438 | assert_eq!(dom.find(":checked").get(2).unwrap().text(), "H");
439 | assert_eq!(dom.find(":checked").get(3).unwrap().attr("name").unwrap(), "I");
440 | assert_eq!(dom.find("option[selected]").len(), 2);
441 | assert_eq!(dom.find("option[selected]").get(0).unwrap().attr("value").unwrap(), "e");
442 | assert_eq!(dom.find("option[selected]").get(1).unwrap().text(), "H");
443 | assert_eq!(dom.find(":checked[value=\"e\"]").get(0).unwrap().text(), "E");
444 | assert_eq!(dom.find("*:checked[value=\"e\"]").get(0).unwrap().text(), "E");
445 | assert_eq!(dom.find("option:checked[value=\"e\"]").get(0).unwrap().text(), "E");
446 | assert_eq!(dom.at("optgroup option:checked[value=\"e\"]").unwrap().text(), "E");
447 | assert_eq!(dom.at("select option:checked[value=\"e\"]").unwrap().text(), "E");
448 | assert_eq!(dom.at("select :checked[value=\"e\"]").unwrap().text(), "E");
449 | assert_eq!(dom.at("optgroup > :checked[value=\"e\"]").unwrap().text(), "E");
450 | assert_eq!(dom.at("select *:checked[value=\"e\"]").unwrap().text(), "E");
451 | assert_eq!(dom.at("optgroup > *:checked[value=\"e\"]").unwrap().text(), "E");
452 | assert_eq!(dom.find(":checked[value=\"e\"]").len(), 1);
453 | assert_eq!(dom.find(":empty").get(0).unwrap().attr("name").unwrap(), "user");
454 | assert_eq!(dom.find("input:empty").get(0).unwrap().attr("name").unwrap(), "user");
455 | assert_eq!(dom.at(":empty[type^=\"ch\"]").unwrap().attr("name").unwrap(), "groovy");
456 | assert_eq!(dom.at("p").unwrap().attr("id").unwrap(), "content");
457 | assert_eq!(dom.at("p:empty").unwrap().attr("id").unwrap(), "no_content");
458 |
459 | // More pseudo-classes
460 | let dom = DOM::new("
461 |
462 | - A
463 | - B
464 | - C
465 | - D
466 | - E
467 | - F
468 | - G
469 | - H
470 |
471 | ");
472 | assert_eq!(dom.find("li:nth-child(odd)").iter().map(|x| x.text()).collect::>(), ["A", "C", "E", "G"]);
473 | assert_eq!(dom.find("li:NTH-CHILD(ODD)").iter().map(|x| x.text()).collect::>(), ["A", "C", "E", "G"]);
474 | assert_eq!(dom.find("li:nth-last-child(odd)").iter().map(|x| x.text()).collect::>(), ["B", "D", "F", "H"]);
475 | assert_eq!(dom.find(":nth-child(odd)").get(0).unwrap().tag().unwrap(), "ul");
476 | assert_eq!(dom.find(":nth-child(odd)").get(1).unwrap().text(), "A");
477 | assert_eq!(dom.find(":nth-child(1)").get(0).unwrap().tag().unwrap(), "ul");
478 | assert_eq!(dom.find(":nth-child(1)").get(1).unwrap().text(), "A");
479 | assert_eq!(dom.find(":nth-last-child(odd)").get(0).unwrap().tag().unwrap(), "ul");
480 | assert_eq!(dom.find(":nth-last-child(odd)").last().unwrap().text(), "H");
481 | assert_eq!(dom.find(":nth-last-child(1)").get(0).unwrap().tag().unwrap(), "ul");
482 | assert_eq!(dom.find(":nth-last-child(1)").get(1).unwrap().text(), "H");
483 | assert_eq!(dom.find("li:nth-child(2n+1)").iter().map(|x| x.text()).collect::>(), ["A", "C", "E", "G"]);
484 | assert_eq!(dom.find("li:nth-child(2n + 1)").iter().map(|x| x.text()).collect::>(), ["A", "C", "E", "G"]);
485 | assert_eq!(dom.find("li:nth-last-child(2n+1)").iter().map(|x| x.text()).collect::>(), ["B", "D", "F", "H"]);
486 | assert_eq!(dom.find("li:nth-child(even)").iter().map(|x| x.text()).collect::>(), ["B", "D", "F", "H"]);
487 | assert_eq!(dom.find("li:NTH-CHILD(EVEN)").iter().map(|x| x.text()).collect::>(), ["B", "D", "F", "H"]);
488 | assert_eq!(dom.find("li:nth-last-child( even )").iter().map(|x| x.text()).collect::>(), ["A", "C", "E", "G"]);
489 | assert_eq!(dom.find("li:nth-child(2n+2)").iter().map(|x| x.text()).collect::>(), ["B", "D", "F", "H"]);
490 | assert_eq!(dom.find("li:nTh-chILd(2N+2)").iter().map(|x| x.text()).collect::>(), ["B", "D", "F", "H"]);
491 | assert_eq!(dom.find("li:nth-child( 2n + 2 )").iter().map(|x| x.text()).collect::>(), ["B", "D", "F", "H"]);
492 | assert_eq!(dom.find("li:nth-last-child(2n+2)").iter().map(|x| x.text()).collect::>(), ["A", "C", "E", "G"]);
493 | assert_eq!(dom.find("li:nth-child(4n+1)").iter().map(|x| x.text()).collect::>(), ["A", "E"]);
494 | assert_eq!(dom.find("li:nth-last-child(4n+1)").iter().map(|x| x.text()).collect::>(), ["D", "H"]);
495 | assert_eq!(dom.find("li:nth-child(4n+4)").iter().map(|x| x.text()).collect::>(), ["D", "H"]);
496 | assert_eq!(dom.find("li:nth-last-child(4n+4)").iter().map(|x| x.text()).collect::>(), ["A", "E"]);
497 | assert_eq!(dom.find("li:nth-child(4n)").iter().map(|x| x.text()).collect::>(), ["D", "H"]);
498 | assert_eq!(dom.find("li:nth-child( 4n )").iter().map(|x| x.text()).collect::>(), ["D", "H"]);
499 | assert_eq!(dom.find("li:nth-last-child(4n)").iter().map(|x| x.text()).collect::>(), ["A", "E"]);
500 | assert_eq!(dom.find("li:nth-child(5n-2)").iter().map(|x| x.text()).collect::>(), ["C", "H"]);
501 | assert_eq!(dom.find("li:nth-child( 5n - 2 )").iter().map(|x| x.text()).collect::>(), ["C", "H"]);
502 | assert_eq!(dom.find("li:nth-last-child(5n-2)").iter().map(|x| x.text()).collect::>(), ["A", "F"]);
503 | assert_eq!(dom.find("li:nth-child(-n+3)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C"]);
504 | assert_eq!(dom.find("li:nth-child( -n + 3 )").iter().map(|x| x.text()).collect::>(), ["A", "B", "C"]);
505 | assert_eq!(dom.find("li:nth-last-child(-n+3)").iter().map(|x| x.text()).collect::>(), ["F", "G", "H"]);
506 | assert_eq!(dom.find("li:nth-child(-1n+3)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C"]);
507 | assert_eq!(dom.find("li:nth-last-child(-1n+3)").iter().map(|x| x.text()).collect::>(), ["F", "G", "H"]);
508 | assert_eq!(dom.find("li:nth-child(3n)").iter().map(|x| x.text()).collect::>(), ["C", "F"]);
509 | assert_eq!(dom.find("li:nth-last-child(3n)").iter().map(|x| x.text()).collect::>(), ["C", "F"]);
510 | assert_eq!(dom.find("li:NTH-LAST-CHILD(3N)").iter().map(|x| x.text()).collect::>(), ["C", "F"]);
511 | assert_eq!(dom.find("li:Nth-Last-Child(3N)").iter().map(|x| x.text()).collect::>(), ["C", "F"]);
512 | assert_eq!(dom.find("li:nth-child( 3 )").iter().map(|x| x.text()).collect::>(), ["C"]);
513 | assert_eq!(dom.find("li:nth-last-child( +3 )").iter().map(|x| x.text()).collect::>(), ["F"]);
514 | assert_eq!(dom.find("li:nth-child(1n+0)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]);
515 | assert_eq!(dom.find("li:nth-child(1n-0)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]);
516 | assert_eq!(dom.find("li:nth-child(n+0)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]);
517 | assert_eq!(dom.find("li:nth-child(n)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]);
518 | assert_eq!(dom.find("li:nth-child(n+0)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]);
519 | assert_eq!(dom.find("li:NTH-CHILD(N+0)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]);
520 | assert_eq!(dom.find("li:Nth-Child(N+0)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]);
521 | assert_eq!(dom.find("li:nth-child(n)").iter().map(|x| x.text()).collect::>(), ["A", "B", "C", "D", "E", "F", "G"]);
522 | assert_eq!(dom.find("li:nth-child(0n+1)").iter().map(|x| x.text()).collect::>(), ["A"]);
523 | assert_eq!(dom.find("li:nth-child(0n+0)").len(), 0);
524 | assert_eq!(dom.find("li:nth-child(0)").len(), 0);
525 | assert_eq!(dom.find("li:nth-child()").len(), 0);
526 | assert_eq!(dom.find("li:nth-child(whatever)").len(), 0);
527 | assert_eq!(dom.find("li:whatever(whatever)").len(), 0);
528 |
529 | // Even more pseudo-classes
530 | let dom = DOM::new(r#"
531 |
532 | - A
533 | B
534 | - C
535 | D
536 | - E
537 | - F
538 | G
539 | - H
540 | - I
541 |
542 |
545 |
550 | "#);
551 | assert_eq!(dom.find("ul :nth-child(odd)").iter().map(|x| x.text()).collect::>(), ["A", "C", "E", "G", "I"]);
552 | assert_eq!(dom.find("li:nth-of-type(odd)").iter().map(|x| x.text()).collect::>(), ["A", "E", "H"]);
553 | assert_eq!(dom.find("li:nth-last-of-type( odd )").iter().map(|x| x.text()).collect::>(), ["C", "F", "I"]);
554 | assert_eq!(dom.find("p:nth-of-type(odd)").iter().map(|x| x.text()).collect::>(), ["B", "G"]);
555 | assert_eq!(dom.find("p:nth-last-of-type(odd)").iter().map(|x| x.text()).collect::>(), ["B", "G"]);
556 | assert_eq!(dom.find("ul :nth-child(1)").iter().map(|x| x.text()).collect::>(), ["A"]);
557 | assert_eq!(dom.find("ul :first-child").iter().map(|x| x.text()).collect::>(), ["A"]);
558 | assert_eq!(dom.find("p:nth-of-type(1)").iter().map(|x| x.text()).collect::>(), ["B"]);
559 | assert_eq!(dom.find("p:first-of-type").iter().map(|x| x.text()).collect::>(), ["B"]);
560 | assert_eq!(dom.find("li:nth-of-type(1)").iter().map(|x| x.text()).collect::>(), ["A"]);
561 | assert_eq!(dom.find("li:first-of-type").iter().map(|x| x.text()).collect::>(), ["A"]);
562 | assert_eq!(dom.find("ul :nth-last-child(-n+1)").iter().map(|x| x.text()).collect::>(), ["I"]);
563 | assert_eq!(dom.find("ul :last-child").iter().map(|x| x.text()).collect::>(), ["I"]);
564 | assert_eq!(dom.find("p:nth-last-of-type(-n+1)").iter().map(|x| x.text()).collect::>(), ["G"]);
565 | assert_eq!(dom.find("p:last-of-type").iter().map(|x| x.text()).collect::>(), ["G"]);
566 | assert_eq!(dom.find("li:nth-last-of-type(-n+1)").iter().map(|x| x.text()).collect::>(), ["I"]);
567 | assert_eq!(dom.find("li:last-of-type").iter().map(|x| x.text()).collect::>(), ["I"]);
568 | assert_eq!(dom.find("ul :nth-child(-n+3):not(li)").iter().map(|x| x.text()).collect::>(), ["B"]);
569 | assert_eq!(dom.find("ul :nth-child(-n+3):NOT(li)").iter().map(|x| x.text()).collect::>(), ["B"]);
570 | assert_eq!(dom.find("ul :nth-child(-n+3):not(:first-child)").iter().map(|x| x.text()).collect::>(), ["B", "C"]);
571 | assert_eq!(dom.find("ul :nth-child(-n+3):not(.♥)").iter().map(|x| x.text()).collect::>(), ["A", "B"]);
572 | assert_eq!(dom.find("ul :nth-child(-n+3):not([class$=\"♥\"])").iter().map(|x| x.text()).collect::>(), ["A", "B"]);
573 | assert_eq!(dom.find("ul :nth-child(-n+3):not(li[class$=\"♥\"])").iter().map(|x| x.text()).collect::>(), ["A", "B"]);
574 | assert_eq!(dom.find("ul :nth-child(-n+3):not([class$=\"♥\"][class^=\"test\"])").iter().map(|x| x.text()).collect::>(), ["A", "B"]);
575 | assert_eq!(dom.find("ul :nth-child(-n+3):not(*[class$=\"♥\"])").iter().map(|x| x.text()).collect::>(), ["A", "B"]);
576 | assert_eq!(dom.find("ul :nth-child(-n+3):not(:nth-child(-n+2))").iter().map(|x| x.text()).collect::>(), ["C"]);
577 | assert_eq!(dom.find("ul :nth-child(-n+3):not(:nth-child(1)):not(:nth-child(2))").iter().map(|x| x.text()).collect::>(), ["C"]);
578 | assert_eq!(dom.find(":only-child").iter().map(|x| x.text()).collect::>(), ["J"]);
579 | assert_eq!(dom.find("div :only-of-type").iter().map(|x| x.text()).collect::>(), ["J", "K"]);
580 | assert_eq!(dom.find("div:only-child").iter().map(|x| x.text()).collect::>(), ["J"]);
581 | assert_eq!(dom.find("div div:only-of-type").iter().map(|x| x.text()).collect::>(), ["J", "K"]);
582 | }
583 |
--------------------------------------------------------------------------------