├── .gitignore ├── LICENSE.md ├── Makefile.am ├── README-EN.md ├── ReadMe.md ├── autogen.sh ├── configure.ac ├── examples └── main.cpp ├── projectStruct-CN.svg ├── projectStruct-EN.svg └── src ├── Applicability ├── HTMLCSSRefAdaptor.cpp ├── HTMLCSSRefAdaptor.h ├── PseudoSelectorRefChiefJudge.cpp └── PseudoSelectorRefChiefJudge.hpp ├── CSSParser ├── CSSLex.cpp ├── CSSLex.hpp ├── CSSLexStatus.h ├── CSSParser.cpp ├── CSSParser.hpp ├── CSSParserStatus.h ├── Keyword │ ├── KeywordItem.cpp │ └── KeywordItem.hpp └── Selectors │ ├── AttributeSelector.cpp │ ├── AttributeSelector.hpp │ ├── ClassSelector.cpp │ ├── ClassSelector.hpp │ ├── CombineSelector.cpp │ ├── CombineSelector.hpp │ ├── IdSelector.cpp │ ├── IdSelector.hpp │ ├── PseudoSelector.cpp │ ├── PseudoSelector.hpp │ ├── Selector.cpp │ ├── Selector.hpp │ ├── SelectorGroup.cpp │ ├── SelectorGroup.hpp │ ├── SelectorSequence.cpp │ ├── SelectorSequence.hpp │ ├── SelectorsHeader.h │ ├── SignSelector.cpp │ ├── SignSelector.hpp │ ├── TypeSelector.cpp │ ├── TypeSelector.hpp │ ├── UniversalSelector.cpp │ └── UniversalSelector.hpp └── Vendor ├── Utils ├── ContainerUtil.cpp ├── ContainerUtil.hpp ├── StringUtil.cpp └── StringUtil.h └── gumbo ├── CHANGES.md ├── CONTRIBUTING.md ├── GumboInterface.cpp ├── GumboInterface.h ├── Thanks ├── UrlUtil.cpp ├── UrlUtil.h ├── attribute.c ├── attribute.h ├── char_ref.c ├── char_ref.h ├── char_ref.rl ├── error.c ├── error.h ├── gumbo.h ├── gumbo_edit.c ├── gumbo_edit.h ├── include ├── gumbo_windll.def └── strings.h ├── insertion_mode.h ├── parser.c ├── parser.h ├── string_buffer.c ├── string_buffer.h ├── string_piece.c ├── string_piece.h ├── tag.c ├── tag.in ├── tag_enum.h ├── tag_perf.h ├── tag_sizes.h ├── tag_strings.h ├── token_type.h ├── tokenizer.c ├── tokenizer.h ├── tokenizer_states.h ├── utf8.c ├── utf8.h ├── util.c ├── util.h ├── vector.c └── vector.h /.gitignore: -------------------------------------------------------------------------------- 1 | #build products 2 | *.o 3 | *.d 4 | *.a 5 | 6 | # Editor swap files 7 | *.swp 8 | *.swo 9 | *.swn 10 | 11 | # Other build artifacts 12 | /Debug 13 | /visualc/Debug 14 | /visualc/Release 15 | /visualc/gumbo.sdf 16 | /visualc/gumbo.opensdf 17 | /build 18 | .log 19 | .sdf 20 | .opensdf 21 | .deps 22 | .dirstamp 23 | .libs 24 | Makefile 25 | Makefile.in 26 | aclocal.m4 27 | autom4te.cache 28 | compile 29 | config.guess 30 | config.log 31 | config.status 32 | config.sub 33 | configure 34 | depcomp 35 | gumbo.pc 36 | gumbo_test 37 | gumbo_test.log 38 | gumbo_test.trs 39 | install-sh 40 | libtool 41 | ltmain.sh 42 | m4/ 43 | missing 44 | test-driver 45 | test-suite.log 46 | ar-lib 47 | config.h.in 48 | config.h 49 | stamp-h1 50 | 51 | #bin 52 | example 53 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Beijing logicreation Information & Technology Co., Ltd 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | lib_LIBRARIES = libcssparser.a 2 | libcssparser_a_SOURCES = \ 3 | src/CSSParser/CSSParser.cpp\ 4 | src/CSSParser/CSSLex.cpp\ 5 | src/CSSParser/Keyword/KeywordItem.cpp\ 6 | src/CSSParser/Selectors/ClassSelector.cpp\ 7 | src/CSSParser/Selectors/PseudoSelector.cpp\ 8 | src/CSSParser/Selectors/SelectorGroup.cpp\ 9 | src/CSSParser/Selectors/SignSelector.cpp\ 10 | src/CSSParser/Selectors/CombineSelector.cpp\ 11 | src/CSSParser/Selectors/AttributeSelector.cpp\ 12 | src/CSSParser/Selectors/SelectorSequence.cpp\ 13 | src/CSSParser/Selectors/UniversalSelector.cpp\ 14 | src/CSSParser/Selectors/TypeSelector.cpp\ 15 | src/CSSParser/Selectors/Selector.cpp\ 16 | src/CSSParser/Selectors/IdSelector.cpp\ 17 | src/Applicability/HTMLCSSRefAdaptor.cpp\ 18 | src/Applicability/PseudoSelectorRefChiefJudge.cpp\ 19 | src/Vendor/gumbo/string_buffer.c\ 20 | src/Vendor/gumbo/util.c\ 21 | src/Vendor/gumbo/vector.c\ 22 | src/Vendor/gumbo/attribute.c\ 23 | src/Vendor/gumbo/UrlUtil.cpp\ 24 | src/Vendor/gumbo/char_ref.c\ 25 | src/Vendor/gumbo/utf8.c\ 26 | src/Vendor/gumbo/error.c\ 27 | src/Vendor/gumbo/tag.c\ 28 | src/Vendor/gumbo/GumboInterface.cpp\ 29 | src/Vendor/gumbo/parser.c\ 30 | src/Vendor/gumbo/tokenizer.c\ 31 | src/Vendor/gumbo/string_piece.c\ 32 | src/Vendor/gumbo/gumbo_edit.c\ 33 | src/Vendor/Utils/ContainerUtil.cpp\ 34 | src/Vendor/Utils/StringUtil.cpp 35 | 36 | bin_PROGRAMS = example 37 | libcssparser_a_CPPFLAGS = \ 38 | -Isrc/Applicability\ 39 | -Isrc/CSSParser\ 40 | -Isrc/CSSParser/Keyword\ 41 | -Isrc/CSSParser/Selectors\ 42 | -Isrc/Vendor/gumbo\ 43 | -Isrc/Vendor/Utils 44 | example_CPPFLAGS =\ 45 | -Isrc/Applicability\ 46 | -Isrc/CSSParser\ 47 | -Isrc/CSSParser/Keyword\ 48 | -Isrc/CSSParser/Selectors\ 49 | -Isrc/Vendor/gumbo\ 50 | -Isrc/Vendor/Utils 51 | example_SOURCES = examples/main.cpp 52 | example_DEPENDENCIES = libcssparser.a 53 | example_LDADD = libcssparser.a 54 | -------------------------------------------------------------------------------- /README-EN.md: -------------------------------------------------------------------------------- 1 | # DDCSSParser - a C++ CSS Parser 2 | 3 | [![LISCENSE](https://img.shields.io/packagist/l/doctrine/orm.svg)](./LICENSE.md) 4 | 5 | [中文文档](./README.md) 6 | 7 | ------ 8 | 9 | DDCSSParser is a CSS parser implemented by c++. By this parser, user can get a series of CSS model and check whether a CSS model (a CSS rule) can be applied to an HTML DOM node. 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | ## Installation 18 | 19 | ##### Using Code 20 | 21 | * Copy source code in working directory `git clone https://github.com/luojilab/CSSParser.git` 22 | * Extract the source code in **Src** directory into working project 23 | 24 | ##### Running Demo 25 | 26 | - `./autogen.sh` 27 | - `./configure` 28 | - `make` 29 | - `./example fullpath/to/htmlfile fullpath/to/cssfile` 30 | 31 | ## Features 32 | 33 | #### Supported CSS Selector: 34 | 35 | - Simple Selector: 36 | - Type Selector 37 | - Universal Selector 38 | - Attribute Selector 39 | - Class Selector 40 | - ID Selector 41 | - Pseudo Class 42 | - Selector Sequence 43 | - Selector Group 44 | - Combinators: 45 | - Descendant combinator 46 | - Child combinators 47 | - Sibling combinators 48 | 49 | ## Goals to Achive 50 | 51 | * Remove dependency to [Gumbo](https://github.com/google/gumbo-parser) 52 | * Support modeling for CSS rule 53 | * Remove some potential bugs 54 | 55 | ## Basic Usage 56 | 57 | * Modling CSS Selector 58 | 59 | ```c++ 60 | // parse css file 61 | future::CSSParser* parser = new future::CSSParser; 62 | parser->parseByFile("/Path/to/CssFile"); 63 | std::listselectors = parser>getSelectors(); 64 | // parse css string 65 | parser->parseByString("body #id {}"); 66 | std::listselectors = parser>getSelectors(); 67 | ``` 68 | 69 | * Check whether a CSS selector can be applied to an HTML DOM node 70 | 71 | ```c++ 72 | // selectors is all selector model parsed by a css file 73 | future::Selector* selector = *selectors.begin(); 74 | // get a DOM node (root node in this example) 75 | GumboNode *root = gi.get_root_node(); 76 | // initial argument 77 | future::HTMLCSSRefAdaptor::GumboArray nodesArray = &root; 78 | // check whether a CSS selector can be applied to an HTML DOM node 79 | future::HTMLCSSRefAdaptor::nodeAdaptToSelector(&nodesArray, selector); 80 | ``` 81 | 82 | * Find out all the HTML DOM nodes that can use a specific CSS node 83 | 84 | ```c++ 85 | // tranverse DOM tree 86 | void AllNodesApplyToSelector(GumboNode *root, future::Selector* selector, std::list&list) 87 | { 88 | if (root->type == GUMBO_NODE_ELEMENT) { 89 | GumboVector children = root->v.element.children; 90 | for (unsigned int i = 0; i < children.length; i++) { 91 | GumboNode*child = (GumboNode *)(children.data[i]); 92 | AllNodesApplyToSelector(child, selector, list); 93 | } 94 | future::HTMLCSSRefAdaptor::GumboArray nodesArray = &root; 95 | if (future::HTMLCSSRefAdaptor::nodeAdaptToSelector(&nodesArray, selector)) { 96 | list.push_back(root); 97 | } 98 | } 99 | future::HTMLCSSRefAdaptor::CleanResource(); 100 | } 101 | // Find out all the HTML DOM nodes that can use a specific CSS node 102 | std::list&allNodesRef = allNodes; 103 | future::Selector* s = *selectors.begin(); 104 | AllNodesApplyToSelector(gi.get_root_node(), s, allNodesRef); 105 | ``` 106 | 107 | ## Applicability 108 | 109 | The code has been test on iOS/Android/Linux/Windows 110 | 111 | Using on Android, you must write a seperator API for Java 112 | 113 | ## Dependency 114 | 115 | * [Google Gumbo](https://github.com/google/gumbo-parser) -------------------------------------------------------------------------------- /ReadMe.md: -------------------------------------------------------------------------------- 1 | # DDCSSParser - C++的CSS解析器 2 | 3 | [![LISCENSE](https://img.shields.io/packagist/l/doctrine/orm.svg)](./LICENSE.md) 4 | 5 | [English document](./README-EN.md) 6 | 7 | ------ 8 | 9 | DDCSSParser是一个C++实现的css解析器。最终会生成一系列的selector的模型(Selector类),并且会依据一个Selector判断CSS是否适用于某一个HTML的DOM节点。 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | ## 安装 18 | 19 | ##### 使用代码 20 | 21 | * 将代码拷贝至本地`git clone https://github.com/luojilab/CSSParser.git` 22 | * 提取出来**Src**文件夹中的内容到工程中 23 | 24 | ##### 运行demo 25 | 26 | * `./autogen.sh` 27 | * `./configure` 28 | * `make` 29 | * `./example fullpath/to/htmlfile fullpath/to/cssfile` 30 | 31 | ## 特性 32 | 33 | #### 支持的CSS Selector: 34 | 35 | * Simple Selector: 36 | * Type Selector 37 | * Universal Selector 38 | * Attribute Selector 39 | * Class Selector 40 | * ID Selector 41 | * Pseudo Class 42 | * Selector Sequence 43 | * Selector Group 44 | * Combinators: 45 | * Descendant combinator 46 | * Child combinators 47 | * Sibling combinators 48 | 49 | #### 将要实现的目标: 50 | 51 | * 移除对于[Gumbo](https://github.com/google/gumbo-parser)的依赖 52 | * 增加对于CSS规则的建模 53 | * 消除可能存在的一些bug 54 | 55 | ## 基础用法 56 | 57 | * 建立CSS Selector的模型 58 | 59 | ```c++ 60 | // 解析css文件 61 | future::CSSParser* parser = new future::CSSParser; 62 | parser->parseByFile("/Path/to/CssFile"); 63 | std::listselectors = parser>getSelectors(); 64 | // 解析css字符串 65 | parser->parseByString("body #id {}"); 66 | std::listselectors = parser>getSelectors(); 67 | ``` 68 | 69 | * 查询CSS某一条规则是否应用到HTML的某一节点上 70 | 71 | ```c++ 72 | // selectors为解析出来的所有selector的选择器模型 73 | future::Selector* selector = *selectors.begin(); 74 | // 获取某一个节点(本例是根结点) 75 | GumboNode *root = gi.get_root_node(); 76 | // 初始化数据 77 | future::HTMLCSSRefAdaptor::GumboArray nodesArray = &root; 78 | // 查询selector这个css选择是否可以应用到root这个DOM节点中 79 | future::HTMLCSSRefAdaptor::nodeAdaptToSelector(&nodesArray, selector); 80 | ``` 81 | 82 | * 查询满足某一个Selector的所有DOM节点 83 | 84 | ```c++ 85 | // 遍历DOM树 86 | void AllNodesApplyToSelector(GumboNode *root, future::Selector* selector, std::list&list) 87 | { 88 | if (root->type == GUMBO_NODE_ELEMENT) { 89 | GumboVector children = root->v.element.children; 90 | for (unsigned int i = 0; i < children.length; i++) { 91 | GumboNode*child = (GumboNode *)(children.data[i]); 92 | AllNodesApplyToSelector(child, selector, list); 93 | } 94 | future::HTMLCSSRefAdaptor::GumboArray nodesArray = &root; 95 | if (future::HTMLCSSRefAdaptor::nodeAdaptToSelector(&nodesArray, selector)) { 96 | list.push_back(root); 97 | } 98 | } 99 | future::HTMLCSSRefAdaptor::CleanResource(); 100 | } 101 | // 查找所有满足选择器s的所有DOM节点 102 | std::list&allNodesRef = allNodes; 103 | future::Selector* s = *selectors.begin(); 104 | AllNodesApplyToSelector(gi.get_root_node(), s, allNodesRef); 105 | ``` 106 | 107 | ## 适用性 108 | 109 | 本代码经过测试已经在iOS/Android/Linux/windows上测试通过。 110 | 111 | *Android需要单独写接口对接Java* 112 | 113 | 114 | ## 依赖 115 | 116 | * [Google Gumbo](https://github.com/google/gumbo-parser) 117 | 118 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | if test -z "$LIBTOOLIZE" -a "`uname`" = "Darwin"; then 2 | if command -v "glibtoolize" >/dev/null; then 3 | LIBTOOLIZE=glibtoolize 4 | elif command -v "libtoolize" >/dev/null; then 5 | LIBTOOLIZE=libtoolize 6 | else 7 | echo "autogen.sh: line $LINENO: command glibtoolize or libtoolize not found" 8 | exit 1 9 | fi 10 | fi 11 | 12 | autoreconf --install 13 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_INIT([cssparser], [1.0], [fuguoqiang@luojilab.com]) 2 | AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects]) 3 | AC_PROG_CC 4 | AC_PROG_CXX 5 | AC_PROG_RANLIB 6 | AM_PROG_AR 7 | AC_CONFIG_HEADERS([config.h]) 8 | AC_CONFIG_FILES([ 9 | Makefile 10 | ]) 11 | AC_OUTPUT 12 | -------------------------------------------------------------------------------- /examples/main.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // main.cpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/1. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #include 10 | #include "CSSParser.hpp" 11 | #include "KeywordItem.hpp" 12 | #include "gumbo.h" 13 | #include "GumboInterface.h" 14 | #include "HTMLCSSRefAdaptor.h" 15 | 16 | void Applicability(GumboNode *root, future::Selector* selector); 17 | void AllNodesApplyToSelector(GumboNode *root, future::Selector* selector, std::list&list); 18 | 19 | int main(int argc, const char * argv[]) { 20 | if (argc < 3) { 21 | printf("Please enter html path and css path\n"); 22 | return 0; 23 | } 24 | const char* htmlPath = argv[1]; 25 | const char* cssPath = argv[2]; 26 | if (!strlen(htmlPath)) { 27 | printf("HTML path is empty\n"); 28 | return 0; 29 | } 30 | if (!strlen(cssPath)) { 31 | printf("css path is empty\n"); 32 | return 0; 33 | } 34 | 35 | FILE *f = fopen(htmlPath, "r"); 36 | if (!f) { printf("HTML file open failed\n"); return 0;} 37 | fseek(f, 0, SEEK_END); 38 | size_t size = ftell(f); 39 | if (size == 0) { printf("HTML file is empty\n"); return 0; } 40 | fseek(f, 0, SEEK_SET); 41 | char* ptr = new char[size + 1]; 42 | memset(ptr, 0, size + 1); 43 | fread((void *)ptr, size, 1, f); 44 | fclose(f); 45 | 46 | future::GumboInterface gi(ptr, "2"); 47 | future::CSSParser* parser = new future::CSSParser; 48 | parser->parseByFile(cssPath); 49 | std::setselectors = parser->getSelectors(); 50 | 51 | printf("********** All Keywords **********\n"); 52 | for(future::KeywordItem* keyword : parser->getKeywords()) { 53 | printf("%s\n", keyword->getName().c_str()); 54 | } 55 | // Print all selectors 56 | printf("\n\n********** All Selectors **********\n"); 57 | for(future::Selector *s : selectors) { 58 | printf("%s\n", s->description().c_str()); 59 | } 60 | printf("\n"); 61 | // Demo1: 62 | // 检测html中的node,是否适用于一个css的selector 63 | // Check whether a css selector canapply to a html node 64 | for (future::Selector *s : selectors) { 65 | // Tranvers DOM tree 66 | Applicability(gi.get_root_node(), s); 67 | } 68 | // Demo2: 69 | // 找到所有适用于某一selector的DMO节点 70 | // checkout All Nodes in DOM that can apply to a specific selector 71 | std::listallNodes; 72 | std::list&allNodesRef = allNodes; 73 | future::Selector* s = *selectors.begin(); 74 | AllNodesApplyToSelector(gi.get_root_node(), s, allNodesRef); 75 | delete [] ptr; 76 | delete parser; 77 | return 0; 78 | } 79 | 80 | void Applicability(GumboNode *root, future::Selector* selector) 81 | { 82 | if (root->type == GUMBO_NODE_ELEMENT) { 83 | GumboVector children = root->v.element.children; 84 | for (unsigned int i = 0; i < children.length; i++) { 85 | GumboNode*child = (GumboNode *)(children.data[i]); 86 | Applicability(child, selector); 87 | } 88 | const char* name = HTMLTagNames[root->v.element.tag]; 89 | future::HTMLCSSRefAdaptor::GumboArray nodesArray = &root; 90 | int *temp = new int(1); 91 | printf("%s match selector %s line: %d\n", 92 | name, 93 | future::HTMLCSSRefAdaptor::nodeAdaptToSelector(&nodesArray, selector, temp) ? "YES" : "FALSE", 94 | root->v.element.start_pos.line); 95 | delete temp; 96 | 97 | } 98 | future::HTMLCSSRefAdaptor::CleanResource(); 99 | } 100 | 101 | void AllNodesApplyToSelector(GumboNode *root, future::Selector* selector, std::list&list) 102 | { 103 | if (root->type == GUMBO_NODE_ELEMENT) { 104 | GumboVector children = root->v.element.children; 105 | for (unsigned int i = 0; i < children.length; i++) { 106 | GumboNode*child = (GumboNode *)(children.data[i]); 107 | AllNodesApplyToSelector(child, selector, list); 108 | } 109 | future::HTMLCSSRefAdaptor::GumboArray nodesArray = &root; 110 | int *temp = new int(1); 111 | if (future::HTMLCSSRefAdaptor::nodeAdaptToSelector(&nodesArray, selector, temp)) { 112 | list.push_back(root); 113 | } 114 | } 115 | future::HTMLCSSRefAdaptor::CleanResource(); 116 | } 117 | -------------------------------------------------------------------------------- /projectStruct-EN.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | Produced by OmniGraffle 7.7.1 32 | 2018-12-07 13:56:24 +0000 33 | 34 | 35 | Canvas 1 36 | 37 | 38 | Layer 1 39 | 40 | 41 | 42 | 43 | CSS 44 | Data 45 | buffer 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | CSS 56 | Lexer 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | CSS 67 | Parser 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | CSS 78 | Model 79 | 80 | 81 | 82 | 83 | 84 | 85 | Gumb- 86 | o 87 | 88 | 89 | 90 | 91 | 92 | 93 | HTML 94 | Data 95 | buffer 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | HTML 106 | Model 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | HTMLC 120 | SSRefA 121 | daptor 122 | 123 | 124 | 125 | 126 | Tokens 127 | 128 | 129 | 130 | 131 | Models 132 | 133 | 134 | 135 | 136 | Models 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | AST 147 | RPN 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | DFA 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | Cyclic recursion 168 | check 169 | CSS rule match 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | -------------------------------------------------------------------------------- /src/Applicability/HTMLCSSRefAdaptor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * HTMLCSSRefAdaptor.h 3 | * 4 | * Created on: 2018-8-9 5 | * Author: 1m0nster 6 | */ 7 | 8 | #ifndef HTMLCSSREFADAPTOR_H_ 9 | #define HTMLCSSREFADAPTOR_H_ 10 | #include "gumbo.h" 11 | #include "SelectorsHeader.h" 12 | 13 | namespace future { 14 | 15 | class HTMLCSSRefAdaptor { 16 | public: 17 | typedef GumboNode*** GumboArrayPtr; 18 | typedef GumboNode** GumboArray; 19 | HTMLCSSRefAdaptor(); 20 | virtual ~HTMLCSSRefAdaptor(); 21 | /** 22 | * Check Whether a selector can apply to the node in nodes 23 | * @param nodes the check nodes 24 | * @param selector the selector 25 | * @param potentialSize the size of nodes array 26 | * @note the nodes can be changed after the comparing. 27 | */ 28 | static bool nodeAdaptToSelector(GumboArrayPtr nodes, Selector* selector, int *potentialSize); 29 | 30 | /** 31 | * Clean all resource after the compare 32 | */ 33 | static void CleanResource(); 34 | 35 | private: 36 | static bool nodeAdaptToIDSelector(GumboArrayPtr node, IdSelector* selector, int *potentialSize); 37 | static bool nodeAdaptToClassSelector(GumboArrayPtr node, ClassSelector* selector, int *potentialSize); 38 | static bool nodeAdaptToTypeSelector(GumboArrayPtr node, TypeSelector* selector, int *potentialSize); 39 | static bool nodeAdaptToAttributeSelector(GumboArrayPtr node, AttributeSelector* selector, int *potentialSize); 40 | static bool nodeAdaptToPseudoSelector(GumboArrayPtr node, PseudoSelector* selector, int *potentialSize); 41 | static bool nodeAdaptToSequenceSelector(GumboArrayPtr node, SequenceSelector* selector, int *potentialSize); 42 | static bool nodeAdaptToCombineSelector(GumboArrayPtr node, CombineSelector* selector, int *potentialSize); 43 | static bool nodeAdaptToGroupSelector(GumboArrayPtr node, GroupSelector* selector, int *potentialSize); 44 | static void updateNextNodes(const std::list&, GumboArrayPtr arrayPtr, int *size); 45 | }; 46 | 47 | } 48 | 49 | #endif /* HTMLCSSREFADAPTOR_H_ */ 50 | -------------------------------------------------------------------------------- /src/Applicability/PseudoSelectorRefChiefJudge.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // PseudoSelectorRefChiefJudge.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/9/12. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef PseudoSelectorRefChiefJudge_hpp 10 | #define PseudoSelectorRefChiefJudge_hpp 11 | 12 | #include 13 | #include 14 | #include "gumbo.h" 15 | #include "PseudoSelector.hpp" 16 | 17 | namespace future { 18 | class PseudoSelector; 19 | class PseudoSelectorRefChiefJudge { 20 | public: 21 | static bool nodeAdaptToPseudo(GumboNode *, PseudoSelector*); 22 | private: 23 | typedef void(*traverseAction)(GumboNode* node, bool& stop, void* userData); 24 | private: 25 | static std::map& getDynamicPseudoClassMap(); 26 | 27 | static void traverseElementNodeSiblings(GumboNode *node, traverseAction ac, void *userData); 28 | 29 | static int indexOfSiblings(GumboNode* node); 30 | static int indexEqualTypeOfSiblings(GumboNode* node); 31 | 32 | static int lastIndexOfSiblings(GumboNode* node); 33 | static int lastIndexEqualTypeOfSiblings(GumboNode* node); 34 | 35 | static bool indexAdaptToPoly(PseudoSelector::Parameter* parameter, int idx); 36 | 37 | static bool formulaHaveIntergerSolution(PseudoSelector::Parameter* parameter, int idx); 38 | }; 39 | } 40 | 41 | #endif /* PseudoSelectorRefChiefJudge_hpp */ 42 | -------------------------------------------------------------------------------- /src/CSSParser/CSSLex.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // CSSLex.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef CSSLex_hpp 10 | #define CSSLex_hpp 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "CSSLexStatus.h" 17 | 18 | namespace future { 19 | class Lex { 20 | public: 21 | struct CSSToken { 22 | CSSTokenType type; 23 | std::string data; 24 | CSSToken() 25 | { 26 | } 27 | ~CSSToken() 28 | { 29 | } 30 | }; 31 | public: 32 | Lex(); 33 | ~Lex(); 34 | CSSToken* GetToken(); 35 | void CleanResource(); 36 | void SetBufferSource(const std::string& fileName); 37 | void SetBufferString(const std::string& bufferString); 38 | private: 39 | CSSToken* GetIdentToken(); 40 | CSSToken* GetNumberToken(); 41 | CSSToken* GetTextToken(char stringType); 42 | bool isDigitalCharacter(char); 43 | bool isLetter(char); 44 | bool isHexCharacter(char); 45 | bool isWs(char); 46 | std::string createData(size_t start, size_t end); 47 | private: 48 | std::string m_fileName; 49 | const char* m_buffer; 50 | size_t m_bufferSize; 51 | size_t m_firstPos; 52 | size_t m_forwardPos; 53 | std::set m_tokenCache; 54 | }; 55 | } 56 | #endif /* CSSLex_hpp */ 57 | -------------------------------------------------------------------------------- /src/CSSParser/CSSLexStatus.h: -------------------------------------------------------------------------------- 1 | // 2 | // CSSLexStatus.h 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/1. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef CSSLexStatus_h 10 | #define CSSLexStatus_h 11 | 12 | enum CSSDFAStatus { 13 | Start, 14 | iDentStart, 15 | iDent, 16 | NMStart, 17 | NMChar, 18 | EscapeStartInNMStart, 19 | EscapeStartInNMChar, 20 | EscapeStartInHash, 21 | EscapeStartInATKeyword, 22 | HashStart, 23 | Hash, 24 | Ws, 25 | AtKeyWordStart, 26 | AtKeyWord, 27 | include, 28 | dot, 29 | end, 30 | blockStart, 31 | blockEnd, 32 | comma, 33 | plus, 34 | greater, 35 | tidle, 36 | dashMatch, 37 | prefixMatch, 38 | suffixMatch, 39 | subStringMatch, 40 | includes, 41 | star, 42 | colon, 43 | semicolon, 44 | leftSqureBracket, 45 | rightSqureBracket, 46 | equal, 47 | string1Start, 48 | string1End, 49 | string2Start, 50 | string2End, 51 | annotationStart, 52 | annotationEnd, 53 | function, 54 | numberStart, 55 | num, 56 | rightBracket, 57 | minus, 58 | LexError 59 | }; 60 | enum CSSTokenType { 61 | INCLUDES, 62 | DASHMATCH, 63 | PREFIXMATCH, 64 | SUFFIXMATCH, 65 | SUBSTRINGMATCH, 66 | IDENT, 67 | STRING, 68 | FUNCTION, 69 | NUMBER, 70 | HASH, 71 | PLUS, 72 | GREATER, 73 | COMMA, 74 | TIDLE, 75 | ATKEYWORD, 76 | STAR, 77 | PERCENTAGE, 78 | DIMENSION, 79 | CDO, 80 | CDC, 81 | WS, 82 | DOT, 83 | ERROR, 84 | BLOCKSTART, 85 | BLOCKEND, 86 | COLON, 87 | LEFTSQUREBRACKET, 88 | RIGHTSQUREBRACKET, 89 | EQUAL, 90 | ANNOTATION, 91 | SYNTAXEND, 92 | RIGHTBRACKET, 93 | MINUS, 94 | END 95 | }; 96 | #define IDENT_START_SIGN '-' 97 | #define UNDER_LINE_SIGN '_' 98 | #define BACK_SPLASH '\\' 99 | #define HASH_SIGN '#' 100 | #define KEYWORD_SIGN '@' 101 | #define BLOCK_START_SIGN '{' 102 | #define BLOCK_END_SIGN '}' 103 | #define EQUAL_SIGN '=' 104 | #define COMMA_SIGN ',' 105 | #define PLUS_SIGN '+' 106 | #define TIDLE_SIGN '~' 107 | #define GREATER_SIGN '>' 108 | #define COLON_SIGN ':' 109 | #define LEFT_SQURE_BRACKET '[' 110 | #define RIGHT_SQURE_BRACKET ']' 111 | 112 | 113 | #endif /* CSSLexStatus_h */ 114 | -------------------------------------------------------------------------------- /src/CSSParser/CSSParser.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // CSSParser.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef CSSParser_hpp 10 | #define CSSParser_hpp 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "CSSLex.hpp" 17 | #include "CSSParserStatus.h" 18 | #include "Selectors/SelectorsHeader.h" 19 | #include "Keyword/KeywordItem.hpp" 20 | #include "Selectors/PseudoSelector.hpp" 21 | 22 | namespace future { 23 | class Lex; 24 | class Selector; 25 | class CSSParser { 26 | public: 27 | struct ASTNode { 28 | Selector* head; 29 | ASTNode* left; 30 | ASTNode* right; 31 | ASTNode() 32 | { 33 | head = NULL; 34 | left = NULL; 35 | right = NULL; 36 | } 37 | }; 38 | public: 39 | CSSParser(); 40 | ~CSSParser(); 41 | /** 42 | * Start parsing a css file 43 | */ 44 | bool parseByFile(const std::string& cssFile); 45 | 46 | /** 47 | * Start parsing css string 48 | */ 49 | bool parseByString (const std::string& cssString); 50 | 51 | /** 52 | * Get the selector models 53 | */ 54 | std::set getSelectors(); 55 | 56 | /** 57 | * Get the Keyworld models 58 | */ 59 | std::list getKeywords(); 60 | 61 | void cleanRes(); 62 | 63 | private: 64 | typedef void(*treeTranverseAction)(ASTNode *); 65 | typedef CSSParser::ASTNode *(*treeTranverseWithUserDataAction)(std::stack* stack); 66 | friend CSSParser::ASTNode* TreeTranverseCreateExpressionAction(std::stack*); 67 | 68 | static void initialASTNode(ASTNode *target, Selector* head, ASTNode* left, ASTNode* right); 69 | 70 | static void pushOperatedElement(std::stack&, Selector* head); 71 | 72 | bool parse(); 73 | 74 | void prepareByFile(const std::string& filePath); 75 | 76 | void prepareByString(const std::string& cssString); 77 | 78 | void clean(); 79 | 80 | bool startSelector(CSSTokenType); 81 | 82 | bool tokenHasInfo(CSSTokenType); 83 | 84 | bool topHaveSign(std::stack&); 85 | 86 | Selector* getSelector(Lex::CSSToken* token); 87 | 88 | PseudoSelector::Parameter* getFunctionParamenter(); 89 | 90 | std::list createATS(std::stack&); 91 | 92 | void pushSign(std::stack&, SignSelector::SignType); 93 | 94 | void buildReversePolishNotation(std::stack& operatorStack, std::stack& operandStack); 95 | 96 | void RMLtranverseAST(ASTNode *root, treeTranverseAction action); 97 | 98 | void LRMtranverseAST(ASTNode *root, treeTranverseAction action); 99 | 100 | void LMRtranverseAST(ASTNode *root, treeTranverseAction action); 101 | 102 | void MLRtranverseAST(ASTNode *root, treeTranverseWithUserDataAction action, void *userData); 103 | private: 104 | Lex* m_lexer; 105 | CSSParserStatus m_status; 106 | std::string m_hostCssFile; 107 | std::set m_selectors; 108 | std::list m_keywords; 109 | std::list m_signSelecors; 110 | }; 111 | } 112 | 113 | #endif /* CSSParser_hpp */ 114 | -------------------------------------------------------------------------------- /src/CSSParser/CSSParserStatus.h: -------------------------------------------------------------------------------- 1 | // 2 | // CSSParserStatus.h 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef CSSParserStatus_h 10 | #define CSSParserStatus_h 11 | 12 | //enum CSSParserStatus { 13 | // START, 14 | // KEYWORD, 15 | // TYPESELECTOR, 16 | // UNIVERSIALSELECTOR, 17 | // IDSELECTOR, 18 | // ATTRIBUTSELECTOR, 19 | // CLASSSELECTOR, 20 | // SELECTORSEQUENCE, 21 | // PSEUDOSELECOT, 22 | // SELECTOR, 23 | // SELECTORGROUP, 24 | // RULESTART, 25 | // RULEEND, 26 | // PARSEERROR 27 | //}; 28 | 29 | enum CSSParserStatus { 30 | START, 31 | INSELECTOR, 32 | STARTBLOCK, 33 | INATKEYWORD 34 | }; 35 | extern const int HTMLTAGMAXSIZE; 36 | extern const char* HTMLTagNames[]; 37 | #endif /* CSSParserStatus_h */ 38 | -------------------------------------------------------------------------------- /src/CSSParser/Keyword/KeywordItem.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // KeywordItem.cpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/8. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #include "KeywordItem.hpp" 10 | 11 | namespace future { 12 | KeywordItem::KeywordItem(const std::string& name) 13 | { 14 | m_name = name; 15 | } 16 | 17 | KeywordItem::~KeywordItem() 18 | { 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/CSSParser/Keyword/KeywordItem.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // KeywordItem.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/8. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef KeywordItem_hpp 10 | #define KeywordItem_hpp 11 | 12 | #include 13 | #include 14 | 15 | namespace future { 16 | class KeywordItem { 17 | public: 18 | KeywordItem(const std::string& name); 19 | ~KeywordItem(); 20 | void setData(const std::string& data) 21 | { 22 | m_data = data; 23 | }; 24 | 25 | std::string& getData() 26 | { 27 | return m_data; 28 | } 29 | 30 | std::string getName() 31 | { 32 | return m_name; 33 | } 34 | private: 35 | std::string m_name; 36 | std::string m_data; 37 | }; 38 | } 39 | 40 | #endif /* KeywordItem_hpp */ 41 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/AttributeSelector.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // AttributeSelector.cpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #include "AttributeSelector.hpp" 10 | 11 | namespace future { 12 | bool AttributeSelector::isBaseSelector() 13 | { 14 | return true; 15 | } 16 | 17 | int AttributeSelector::weight() 18 | { 19 | return 10; 20 | } 21 | 22 | std::string AttributeSelector::description() 23 | { 24 | std::string relation = "no relation"; 25 | switch (m_filterRule) { 26 | case Prefix: { 27 | relation = "prefix"; 28 | break; 29 | } 30 | case Suffix: { 31 | relation = "suffix"; 32 | break; 33 | } 34 | case Include: { 35 | relation = "include"; 36 | break; 37 | } 38 | case Equal: { 39 | relation = "equal"; 40 | break; 41 | } 42 | case Substring: { 43 | relation = "substring"; 44 | break; 45 | } 46 | case DashMatch: { 47 | relation = "dashmatch"; 48 | break; 49 | } 50 | case NoRule: { 51 | relation = "no rule"; 52 | break; 53 | } 54 | default: 55 | break; 56 | } 57 | if (m_filterRule == Prefix) { 58 | relation = "prefix"; 59 | } 60 | return "AttributeSelector: " + m_key + " " + relation + " " + m_value; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/AttributeSelector.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // AttributeSelector.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef AttributeSelector_hpp 10 | #define AttributeSelector_hpp 11 | 12 | #include 13 | #include 14 | #include "Selector.hpp" 15 | 16 | namespace future { 17 | class AttributeSelector: public Selector { 18 | public: 19 | enum attributeFilterRule { 20 | Prefix, 21 | Suffix, 22 | Include, 23 | Equal, 24 | Substring, 25 | DashMatch, 26 | NoRule 27 | }; 28 | public: 29 | AttributeSelector(const std::string& key, const std::string& value, attributeFilterRule rule) 30 | { 31 | m_key = key; 32 | m_value = value; 33 | m_filterRule = rule; 34 | m_selectorType = Selector::AttributeSelector; 35 | } 36 | 37 | inline std::string getKey() 38 | { 39 | return m_key; 40 | } 41 | 42 | inline std::string getValue() 43 | { 44 | return m_value; 45 | } 46 | 47 | inline attributeFilterRule getKVRule() 48 | { 49 | return m_filterRule; 50 | } 51 | 52 | bool isBaseSelector(); 53 | int weight(); 54 | virtual std::string description(); 55 | private: 56 | std::string m_key; 57 | std::string m_value; 58 | attributeFilterRule m_filterRule; 59 | }; 60 | } 61 | 62 | #endif /* AttributeSelector_hpp */ 63 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/ClassSelector.cpp: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // ClassSelector.cpp 4 | // DDCSSParser 5 | // 6 | // Created by 1m0nster on 2018/8/7. 7 | // Copyright © 2018 1m0nster. All rights reserved. 8 | // 9 | 10 | #include "ClassSelector.hpp" 11 | 12 | namespace future { 13 | bool ClassSelector::isBaseSelector() 14 | { 15 | return true; 16 | } 17 | 18 | int ClassSelector::weight() 19 | { 20 | return 10; 21 | } 22 | 23 | std::string ClassSelector::description() 24 | { 25 | return "Class Selector (Class name is \"" + m_class + "\")"; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/ClassSelector.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // ClassSelector.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef ClassSelector_hpp 10 | #define ClassSelector_hpp 11 | 12 | #include 13 | #include 14 | #include "Selector.hpp" 15 | 16 | namespace future { 17 | class ClassSelector: public Selector { 18 | public: 19 | ClassSelector(const std::string& cls) 20 | { 21 | m_class = cls; 22 | m_selectorType = Selector::ClassSelector; 23 | }; 24 | inline std::string getClassIdentifier() 25 | { 26 | return m_class; 27 | } 28 | bool isBaseSelector(); 29 | int weight(); 30 | virtual std::string description(); 31 | private: 32 | std::string m_class; 33 | }; 34 | } 35 | 36 | #endif /* ClassSelector_hpp */ 37 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/CombineSelector.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // CombineSelector.cpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #include "CombineSelector.hpp" 10 | #include 11 | 12 | namespace future { 13 | 14 | CombineSelector::~CombineSelector() 15 | { 16 | CleanContainer(m_normalSiblingList); 17 | CleanContainer(m_instanceSiblingList); 18 | CleanContainer(m_normalInhericalList); 19 | CleanContainer(m_instanceSiblingList); 20 | } 21 | 22 | void CombineSelector::initialNormalSiblingList(future::Selector *head, future::Selector *sibling) 23 | { 24 | if (m_combineType != NoCombine) { 25 | assert(0); 26 | } 27 | if (!head || !sibling) { 28 | return; 29 | } 30 | CleanContainer(m_normalSiblingList); 31 | m_normalSiblingList.clear(); 32 | m_normalSiblingList.push_back(head); 33 | m_normalSiblingList.push_back(sibling); 34 | m_combineType = NormalSibling; 35 | } 36 | 37 | void CombineSelector::initialInstanceSiblingList(future::Selector *head, future::Selector *sibling) 38 | { 39 | if (m_combineType != NoCombine) { 40 | assert(0); 41 | } 42 | if (!head || !sibling) { 43 | return; 44 | } 45 | CleanContainer(m_instanceSiblingList); 46 | m_instanceSiblingList.clear(); 47 | m_instanceSiblingList.push_back(head); 48 | m_instanceSiblingList.push_back(sibling); 49 | m_combineType = InstanceSibling; 50 | } 51 | 52 | void CombineSelector::initialNormalInhericalList(future::Selector *root, future::Selector *child) 53 | { 54 | if (m_combineType != NoCombine) { 55 | assert(0); 56 | } 57 | if (!root || !child) { 58 | return; 59 | } 60 | CleanContainer(m_normalInhericalList); 61 | m_normalInhericalList.clear(); 62 | m_normalInhericalList.push_back(root); 63 | m_normalInhericalList.push_back(child); 64 | m_combineType = NormalInherical; 65 | } 66 | 67 | void CombineSelector::initialInstanceInhericalList(future::Selector *root, future::Selector *child) 68 | { 69 | if (m_combineType != NoCombine) { 70 | assert(0); 71 | } 72 | if (!root || !child) { 73 | return; 74 | } 75 | CleanContainer(m_instanceInhericalList); 76 | m_instanceInhericalList.clear(); 77 | m_instanceInhericalList.push_back(root); 78 | m_instanceInhericalList.push_back(child); 79 | m_combineType = InstanceInherical; 80 | } 81 | 82 | bool CombineSelector::isBaseSelector() 83 | { 84 | return false; 85 | } 86 | 87 | int CombineSelector::weight() 88 | { 89 | int w = 0; 90 | std::list::iterator one; 91 | std::list::iterator other; 92 | do { 93 | if (m_normalInhericalList.size() == 2) { 94 | one = m_normalInhericalList.begin(); 95 | other = --m_normalInhericalList.end(); 96 | break; 97 | } 98 | if (m_instanceInhericalList.size() == 2) { 99 | one = m_instanceInhericalList.begin(); 100 | other = --m_instanceInhericalList.end(); 101 | break; 102 | } 103 | if (m_normalSiblingList.size() == 2) { 104 | one = m_normalSiblingList.begin(); 105 | other = --m_normalSiblingList.end(); 106 | break; 107 | } 108 | if (m_instanceSiblingList.size() == 2) { 109 | one = m_instanceSiblingList.begin(); 110 | other = --m_instanceSiblingList.end(); 111 | break; 112 | } 113 | } while (0); 114 | if (!*one || !*other) { 115 | return w; 116 | } 117 | w += (*one)->weight() + (*other)->weight(); 118 | return w; 119 | } 120 | 121 | Selector* CombineSelector::getBefore() 122 | { 123 | std::list::iterator before; 124 | do { 125 | if (m_normalInhericalList.size() == 2) { 126 | before = m_normalInhericalList.begin(); 127 | break; 128 | } 129 | if (m_instanceInhericalList.size() == 2) { 130 | before = m_instanceInhericalList.begin(); 131 | break; 132 | } 133 | if (m_normalSiblingList.size() == 2) { 134 | before = m_normalSiblingList.begin(); 135 | break; 136 | } 137 | if (m_instanceSiblingList.size() == 2) { 138 | before = m_instanceSiblingList.begin(); 139 | break; 140 | } 141 | } while (0); 142 | return *before; 143 | } 144 | 145 | Selector* CombineSelector::getAfter() 146 | { 147 | std::list::iterator after; 148 | do { 149 | if (m_normalInhericalList.size() == 2) { 150 | after = --m_normalInhericalList.end(); 151 | break; 152 | } 153 | if (m_instanceInhericalList.size() == 2) { 154 | after = --m_instanceInhericalList.end(); 155 | break; 156 | } 157 | if (m_normalSiblingList.size() == 2) { 158 | after = --m_normalSiblingList.end(); 159 | break; 160 | } 161 | if (m_instanceSiblingList.size() == 2) { 162 | after = --m_instanceSiblingList.end(); 163 | break; 164 | } 165 | } while (0); 166 | return *after; 167 | } 168 | 169 | CombineSelector::CombineType CombineSelector::getCombineType() 170 | { 171 | return m_combineType; 172 | } 173 | 174 | std::string CombineSelector::description() 175 | { 176 | std::string relation = "no relation"; 177 | std::string desc = "CombineSelector: {\n"; 178 | if (!m_instanceSiblingList.empty()) { 179 | relation = "instance sibling"; 180 | } else if (!m_normalSiblingList.empty()) { 181 | relation = "normal sibling"; 182 | } else if (!m_normalInhericalList.empty()) { 183 | relation = "normal inherical"; 184 | } else if (!m_instanceInhericalList.empty()) { 185 | relation = "instance inherical"; 186 | } 187 | desc += "(" + getBefore()->description() + ")" + relation + 188 | + "(" + getAfter()->description() + ")\n}"; 189 | return desc; 190 | } 191 | 192 | } // namespace future 193 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/CombineSelector.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // CombineSelector.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef CombineSelector_hpp 10 | #define CombineSelector_hpp 11 | 12 | #include 13 | #include 14 | #include 15 | #include "Selector.hpp" 16 | 17 | namespace future { 18 | class CombineSelector: public Selector { 19 | public: 20 | enum CombineType { 21 | InstanceSibling, 22 | NormalSibling, 23 | InstanceInherical, 24 | NormalInherical, 25 | NoCombine 26 | }; 27 | public: 28 | CombineSelector() 29 | { 30 | m_selectorType = Selector::CombineSelector; 31 | m_combineType = NoCombine; 32 | } 33 | ~CombineSelector(); 34 | void initialInstanceSiblingList(Selector* head, Selector* sibling); 35 | void initialNormalSiblingList(Selector* head, Selector* sibling); 36 | void initialInstanceInhericalList(Selector* root, Selector* child); 37 | void initialNormalInhericalList(Selector* root, Selector* child); 38 | bool isBaseSelector(); 39 | int weight(); 40 | Selector* getBefore(); 41 | Selector* getAfter(); 42 | CombineType getCombineType(); 43 | inline std::list getInstanceSiblingList() 44 | { 45 | return m_instanceSiblingList; 46 | } 47 | 48 | inline std::list getNormalSiblingList() 49 | { 50 | return m_normalSiblingList; 51 | } 52 | 53 | inline std::list getInstanceInhericalList() 54 | { 55 | return m_instanceInhericalList; 56 | } 57 | 58 | inline std::list getNormalInhericalList() 59 | { 60 | return m_normalInhericalList; 61 | } 62 | 63 | virtual std::string description(); 64 | 65 | private: 66 | std::list m_instanceSiblingList; 67 | std::list m_normalSiblingList; 68 | std::list m_instanceInhericalList; 69 | std::list m_normalInhericalList; 70 | CombineType m_combineType; 71 | }; 72 | } 73 | 74 | #endif /* CombineSelector_hpp */ 75 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/IdSelector.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // IdSelector.cpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #include "IdSelector.hpp" 10 | 11 | namespace future { 12 | 13 | bool IdSelector::isBaseSelector() 14 | { 15 | return true; 16 | } 17 | 18 | int IdSelector::weight() 19 | { 20 | return 100; 21 | } 22 | 23 | std::string IdSelector::description() 24 | { 25 | return "IdSelector (id is " + m_id + ")\n"; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/IdSelector.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // IdSelector.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef IdSelector_hpp 10 | #define IdSelector_hpp 11 | 12 | #include 13 | #include 14 | #include "Selector.hpp" 15 | 16 | namespace future { 17 | class IdSelector: public Selector { 18 | public: 19 | IdSelector(const std::string& id) 20 | { 21 | m_id = id; 22 | m_selectorType = Selector::IDSelector; 23 | }; 24 | inline std::string getIdIdentifier() { 25 | return m_id; 26 | } 27 | bool isBaseSelector(); 28 | int weight(); 29 | virtual std::string description(); 30 | private: 31 | std::string m_id; 32 | }; 33 | } 34 | 35 | #endif /* IdSelector_hpp */ 36 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/PseudoSelector.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // PseudoSelector.cpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #include "PseudoSelector.hpp" 10 | 11 | namespace future { 12 | 13 | PseudoSelector::~PseudoSelector() 14 | { 15 | delete m_parameter; 16 | m_parameter = NULL; 17 | } 18 | 19 | bool PseudoSelector::isBaseSelector() 20 | { 21 | return true; 22 | } 23 | 24 | int PseudoSelector::weight() 25 | { 26 | return 10; 27 | } 28 | 29 | void PseudoSelector::setParameter(future::PseudoSelector::Parameter *p) 30 | { 31 | if (m_parameter == p) { 32 | return; 33 | } 34 | if (m_parameter) { 35 | delete m_parameter; 36 | } 37 | m_parameter = p; 38 | } 39 | 40 | PseudoSelector::Parameter* PseudoSelector::getParameter() 41 | { 42 | return m_parameter; 43 | } 44 | 45 | std::string PseudoSelector::getPseudoData() 46 | { 47 | return m_data; 48 | } 49 | 50 | std::string PseudoSelector::description() 51 | { 52 | std::string parament = "PseudoSelector: { pseudo name:" + m_data + "\n"; 53 | if (m_parameter) { 54 | parament += "parament: "; 55 | char cnumber[256] = {'\0'}; 56 | if (m_parameter->type == STRING) { 57 | parament += m_parameter->pString; 58 | } else if (m_parameter->type == NUMBER) { 59 | sprintf(cnumber, "%d", m_parameter->pNumber); 60 | parament += cnumber; 61 | } else if (m_parameter->type == POLYNOMIAL) { 62 | char coe[256] = {'\0'}; 63 | char con[256] = {'\0'}; 64 | std::string sign = m_parameter->polynomial.sign == 1 ? "+" : "-"; 65 | sprintf(coe, "%d", m_parameter->polynomial.coefficient); 66 | sprintf(con, "%d", m_parameter->polynomial.constant); 67 | parament += std::string(coe) + "n" + sign + con; 68 | } else if (m_parameter->type == IDENT) { 69 | parament += m_parameter->pString; 70 | } else if (m_parameter->type == NONE) { 71 | parament += "None"; 72 | } 73 | } 74 | parament += "}\n"; 75 | return parament; 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/PseudoSelector.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // PseudoSelector.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef PseudoSelector_hpp 10 | #define PseudoSelector_hpp 11 | 12 | #include 13 | #include 14 | #include "Selector.hpp" 15 | 16 | namespace future { 17 | class PseudoSelector: public Selector { 18 | public: 19 | enum ParameterType { 20 | STRING, 21 | NUMBER, 22 | POLYNOMIAL, 23 | IDENT, 24 | NONE 25 | }; 26 | struct Parameter { 27 | struct polynomial { 28 | int coefficient; 29 | int constant; 30 | int sign; 31 | polynomial() 32 | { 33 | coefficient = 0; 34 | constant = 0; 35 | sign = 0; 36 | } 37 | } polynomial; 38 | std::string pString; 39 | int pNumber; 40 | ParameterType type; 41 | Parameter() 42 | { 43 | type = ParameterType::NONE; 44 | pNumber = 0; 45 | pString = ""; 46 | } 47 | }; 48 | public: 49 | PseudoSelector(const std::string& data) 50 | { 51 | m_selectorType = Selector::PseudoSelector; 52 | m_data = data; 53 | m_parameter = NULL; 54 | } 55 | 56 | virtual ~PseudoSelector(); 57 | 58 | bool isBaseSelector(); 59 | 60 | int weight(); 61 | 62 | void setParameter(Parameter *); 63 | 64 | std::string getPseudoData(); 65 | 66 | Parameter* getParameter(); 67 | 68 | virtual std::string description(); 69 | private: 70 | std::string m_data; 71 | Parameter* m_parameter; 72 | }; 73 | } 74 | 75 | #endif /* PseudoSelector_hpp */ 76 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/Selector.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Selector.cpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #include "Selector.hpp" 10 | 11 | namespace future { 12 | Selector::Selector() 13 | { 14 | } 15 | 16 | Selector::~Selector() 17 | { 18 | } 19 | 20 | std::string Selector::description() 21 | { 22 | switch (m_selectorType) { 23 | case TypeSelector: 24 | return "TypeSelector"; 25 | case IDSelector: 26 | return "IDSelector"; 27 | case ClassSelector: 28 | return "ClassSelector"; 29 | case UniversalSelector: 30 | return "UniversalSelector"; 31 | case AttributeSelector: 32 | return "AttributeSelector"; 33 | case PseudoSelector: 34 | return "PseudoSelector"; 35 | case SimpleSelectorSequence: 36 | return "SimpleSelectorSequence"; 37 | case CombineSelector: 38 | return "CombineSelector"; 39 | case SelectorGroup: 40 | return "SelectorGroup"; 41 | case SignSelector: 42 | return "SignSelector"; 43 | 44 | default: 45 | return "UnknownSelector"; 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/Selector.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Selector.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef Selector_hpp 10 | #define Selector_hpp 11 | 12 | #include 13 | #include 14 | #include "ContainerUtil.hpp" 15 | 16 | namespace future { 17 | class Selector { 18 | friend class CombineSelector; 19 | public: 20 | enum SelectorType { 21 | TypeSelector, 22 | IDSelector, 23 | ClassSelector, 24 | UniversalSelector, 25 | AttributeSelector, 26 | PseudoSelector, 27 | SimpleSelectorSequence, 28 | CombineSelector, 29 | SelectorGroup, 30 | SignSelector 31 | }; 32 | public: 33 | Selector(); 34 | virtual ~Selector() = 0; 35 | inline std::string& getRuleData() { 36 | return m_ruleData; 37 | } 38 | 39 | void setRuleData(const std::string& data) { 40 | m_ruleData = data; 41 | } 42 | 43 | SelectorType getType() { 44 | return m_selectorType; 45 | } 46 | 47 | void setHostCSSFilePath(const std::string& path) { 48 | m_hostCSSFilePath = path; 49 | } 50 | 51 | std::string getHostCSSFilePath() { 52 | return m_hostCSSFilePath; 53 | } 54 | 55 | virtual bool isBaseSelector() = 0; 56 | virtual int weight() = 0; 57 | 58 | virtual std::string description(); 59 | protected: 60 | std::string m_hostCSSFilePath; 61 | std::string m_ruleData; 62 | SelectorType m_selectorType; 63 | }; 64 | } 65 | 66 | #endif /* Selector_hpp */ 67 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/SelectorGroup.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // SelectorGroup.cpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #include "SelectorGroup.hpp" 10 | 11 | namespace future { 12 | 13 | void GroupSelector::addSelector(future::Selector *s) 14 | { 15 | if (!s) { 16 | return ; 17 | } 18 | m_selectors.push_back(s); 19 | } 20 | 21 | GroupSelector::~GroupSelector() 22 | { 23 | CleanContainer(m_selectors); 24 | } 25 | 26 | bool GroupSelector::isBaseSelector() 27 | { 28 | return false; 29 | } 30 | 31 | int GroupSelector::weight() 32 | { 33 | std::list::iterator it = m_selectors.begin(); 34 | std::list::iterator end = m_selectors.end(); 35 | int w = 0; 36 | while(it != end) { 37 | w += (*it++)->weight(); 38 | } 39 | return w; 40 | } 41 | 42 | std::string GroupSelector::description() 43 | { 44 | std::string desc = "Group Selector:[\n"; 45 | for (Selector* s: m_selectors) { 46 | desc += s->description() + ",\n"; 47 | } 48 | desc += "]\n"; 49 | return desc; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/SelectorGroup.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // SelectorGroup.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef SelectorGroup_hpp 10 | #define SelectorGroup_hpp 11 | 12 | #include 13 | #include 14 | #include 15 | #include "Selector.hpp" 16 | 17 | namespace future { 18 | class GroupSelector: public Selector { 19 | public: 20 | GroupSelector() 21 | { 22 | m_selectorType = Selector::SelectorGroup; 23 | } 24 | ~GroupSelector(); 25 | void addSelector(Selector *); 26 | std::listgetAllSelectors() 27 | { 28 | return m_selectors; 29 | } 30 | bool isBaseSelector(); 31 | int weight(); 32 | virtual std::string description(); 33 | private: 34 | std::list m_selectors; 35 | }; 36 | } 37 | 38 | #endif /* SelectorGroup_hpp */ 39 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/SelectorSequence.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // SelectorSequence.cpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #include "SelectorSequence.hpp" 10 | 11 | namespace future { 12 | void SequenceSelector::appendSelector(Selector *s) 13 | { 14 | if (!s) { 15 | return; 16 | } 17 | m_selectors.push_back(s); 18 | } 19 | 20 | SequenceSelector::~SequenceSelector() 21 | { 22 | CleanContainer(m_selectors); 23 | } 24 | 25 | bool SequenceSelector::isBaseSelector() 26 | { 27 | return false; 28 | } 29 | 30 | int SequenceSelector::weight() 31 | { 32 | std::list::iterator it = m_selectors.begin(); 33 | std::list::iterator end = m_selectors.end(); 34 | int w = 0; 35 | while(it != end) { 36 | w += (*it++)->weight(); 37 | } 38 | return w; 39 | } 40 | 41 | std::string SequenceSelector::description() 42 | { 43 | std::string desc = "SequenceSelector:[\n"; 44 | auto it = m_selectors.begin(); 45 | auto end = m_selectors.end(); 46 | while (it != end) { 47 | desc += (*it)->description() + "\n"; 48 | ++it; 49 | } 50 | desc += "]\n"; 51 | return desc; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/SelectorSequence.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // SelectorSequence.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef SelectorSequence_hpp 10 | #define SelectorSequence_hpp 11 | 12 | #include 13 | #include 14 | #include 15 | #include "Selector.hpp" 16 | 17 | namespace future { 18 | class SequenceSelector: public Selector { 19 | public: 20 | SequenceSelector() 21 | { 22 | m_selectorType = Selector::SimpleSelectorSequence; 23 | } 24 | ~SequenceSelector(); 25 | void appendSelector(Selector *); 26 | std::list getContrains() 27 | { 28 | return m_selectors; 29 | } 30 | bool isBaseSelector(); 31 | int weight(); 32 | virtual std::string description(); 33 | private: 34 | std::list m_selectors; 35 | }; 36 | } 37 | 38 | #endif /* SelectorSequence_hpp */ 39 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/SelectorsHeader.h: -------------------------------------------------------------------------------- 1 | // 2 | // SelectorsHeader.h 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef SelectorsHeader_h 10 | #define SelectorsHeader_h 11 | 12 | #include "Selector.hpp" 13 | #include "IdSelector.hpp" 14 | #include "ClassSelector.hpp" 15 | #include "AttributeSelector.hpp" 16 | #include "TypeSelector.hpp" 17 | #include "UniversalSelector.hpp" 18 | #include "SelectorSequence.hpp" 19 | #include "CombineSelector.hpp" 20 | #include "SelectorGroup.hpp" 21 | #include "SignSelector.hpp" 22 | #include "PseudoSelector.hpp" 23 | 24 | #endif /* SelectorsHeader_h */ 25 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/SignSelector.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // SignSelector.cpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #include "SignSelector.hpp" 10 | 11 | namespace future { 12 | bool SignSelector::operator>(future::SignSelector *other) 13 | { 14 | SignType otherType = other->getSignType(); 15 | return m_SignType == Concat && otherType != Concat; 16 | } 17 | 18 | bool SignSelector::isBaseSelector() 19 | { 20 | return true; 21 | } 22 | 23 | int SignSelector::weight() 24 | { 25 | return 0; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/SignSelector.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // SignSelector.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef SignSelector_hpp 10 | #define SignSelector_hpp 11 | 12 | #include 13 | #include "Selector.hpp" 14 | 15 | namespace future { 16 | class SignSelector: public Selector { 17 | public: 18 | enum SignType { 19 | NormalInherit, 20 | Plus, 21 | Greater, 22 | Tidle, 23 | Concat, 24 | Comma, 25 | }; 26 | SignSelector(SignType type) 27 | { 28 | m_SignType = type; 29 | m_selectorType = Selector::SignSelector; 30 | } 31 | inline SignType getSignType() 32 | { 33 | return m_SignType; 34 | } 35 | bool operator >(SignSelector *); 36 | bool isBaseSelector(); 37 | int weight(); 38 | private: 39 | SignType m_SignType; 40 | }; 41 | } 42 | 43 | #endif /* SignSelector_hpp */ 44 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/TypeSelector.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // TypeSelector.cpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #include "TypeSelector.hpp" 10 | 11 | namespace future { 12 | 13 | bool TypeSelector::isBaseSelector() 14 | { 15 | return true; 16 | } 17 | 18 | int TypeSelector::weight() 19 | { 20 | return 1; 21 | } 22 | 23 | std::string TypeSelector::description() 24 | { 25 | return "TypeSelector: (type name is " + m_typeName + ")\n"; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/TypeSelector.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // TypeSelector.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef TypeSelector_hpp 10 | #define TypeSelector_hpp 11 | 12 | #include 13 | #include 14 | #include "Selector.hpp" 15 | 16 | namespace future { 17 | class TypeSelector: public Selector { 18 | public: 19 | TypeSelector(const std::string& typeName) 20 | { 21 | m_typeName = typeName; 22 | m_selectorType = Selector::TypeSelector; 23 | } 24 | 25 | inline std::string getTagName() 26 | { 27 | return m_typeName; 28 | } 29 | 30 | bool isBaseSelector(); 31 | int weight(); 32 | virtual std::string description(); 33 | private: 34 | std::string m_typeName; 35 | }; 36 | } 37 | 38 | #endif /* TypeSelector_hpp */ 39 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/UniversalSelector.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // UniversalSelector.cpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #include "UniversalSelector.hpp" 10 | 11 | namespace future { 12 | 13 | bool UniversalSelector::isBaseSelector() 14 | { 15 | return true; 16 | } 17 | 18 | int UniversalSelector::weight() 19 | { 20 | return 0; 21 | } 22 | 23 | std::string UniversalSelector::description() 24 | { 25 | return "UniversalSelector"; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/CSSParser/Selectors/UniversalSelector.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // UniversalSelector.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/7. 6 | // Copyright © 2018 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef UniversalSelector_hpp 10 | #define UniversalSelector_hpp 11 | 12 | #include 13 | 14 | #include 15 | #include "Selector.hpp" 16 | 17 | namespace future { 18 | class UniversalSelector: public Selector { 19 | public: 20 | UniversalSelector() 21 | { 22 | m_selectorType = Selector::UniversalSelector; 23 | } 24 | bool isBaseSelector(); 25 | virtual std::string description(); 26 | int weight(); 27 | }; 28 | } 29 | 30 | #endif /* UniversalSelector_hpp */ 31 | -------------------------------------------------------------------------------- /src/Vendor/Utils/ContainerUtil.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // ContainerUtil.cpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/30. 6 | // Copyright © 2018年 1m0nster. All rights reserved. 7 | // 8 | 9 | #include "ContainerUtil.hpp" 10 | -------------------------------------------------------------------------------- /src/Vendor/Utils/ContainerUtil.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // ContainerUtil.hpp 3 | // DDCSSParser 4 | // 5 | // Created by 1m0nster on 2018/8/30. 6 | // Copyright © 2018年 1m0nster. All rights reserved. 7 | // 8 | 9 | #ifndef ContainerUtil_hpp 10 | #define ContainerUtil_hpp 11 | 12 | #include 13 | #include 14 | 15 | namespace future { 16 | /* 17 | * Container type must be STL Sequence 18 | * because of the unique and forword iterator conception 19 | */ 20 | template 21 | void CleanContainer(T& container) 22 | { 23 | std::set nonrepeatContainer(container.begin(), container.end()); 24 | auto it = nonrepeatContainer.begin(); 25 | auto end = nonrepeatContainer.end(); 26 | while(it != end) { 27 | delete *it++; 28 | } 29 | nonrepeatContainer.clear(); 30 | container.clear(); 31 | } 32 | } 33 | 34 | #endif /* ContainerUtil_hpp */ 35 | -------------------------------------------------------------------------------- /src/Vendor/Utils/StringUtil.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * StringUtil.cpp 3 | * 4 | * Created on: 2017-11-11 5 | * Author: bingjian 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "StringUtil.h" 14 | 15 | namespace future { 16 | 17 | StringUtil::StringUtil() { 18 | 19 | } 20 | 21 | StringUtil::~StringUtil() { 22 | } 23 | 24 | std::string StringUtil::longlong2str(long long in) { 25 | std::stringstream stream; 26 | stream << in; 27 | return stream.str(); 28 | } 29 | 30 | std::string StringUtil::int2str(int in) { 31 | std::stringstream stream; 32 | stream << in; 33 | return stream.str(); 34 | } 35 | 36 | int StringUtil::str2int(const std::string &in) { 37 | int out = 0; 38 | std::stringstream stream(in); 39 | stream >> out; 40 | return out; 41 | } 42 | 43 | bool StringUtil::startWith(std::string &str, std::string &start) { 44 | return str.compare(0, start.size(), start) == 0; 45 | } 46 | 47 | bool StringUtil::endWith(std::string &str, std::string &end) { 48 | return str.compare(str.size() - end.size(), end.size(), end) == 0; 49 | } 50 | 51 | std::string StringUtil::tolower(const std::string &str) { 52 | std::string ret(str); 53 | std::transform(ret.begin(), ret.end(), ret.begin(), ::tolower); 54 | return ret; 55 | } 56 | 57 | std::string StringUtil::toupper(const std::string &str) { 58 | std::string ret(str); 59 | std::transform(ret.begin(), ret.end(), ret.begin(), ::toupper); 60 | return ret; 61 | } 62 | 63 | bool StringUtil::contains(const std::string str1, const std::string str2) { 64 | if (str1.find(str2) != std::string::npos) { 65 | return true; 66 | } 67 | return false; 68 | } 69 | 70 | std::string StringUtil::tostring(long long in) { 71 | std::stringstream ss; 72 | std::string ret; 73 | ss << in; 74 | ss >> ret; 75 | return ret; 76 | } 77 | 78 | void StringUtil::StringReplace(std::string &srcStr, const std::string &findStr, 79 | const std::string &replaceStr) { 80 | std::size_t replaceStrLen = replaceStr.length(); 81 | for (std::size_t pos = 0; pos != std::string::npos; pos += replaceStrLen) { 82 | if ((pos = srcStr.find(findStr, pos)) != std::string::npos) { 83 | srcStr.replace(pos, findStr.length(), replaceStr); 84 | } else { 85 | break; 86 | } 87 | } 88 | } 89 | 90 | std::string &StringUtil::trim (std::string &s){ 91 | if(s.empty()){ 92 | return s; 93 | } 94 | s.erase(0,s.find_first_not_of(" ")); 95 | s.erase(s.find_last_not_of(" ") + 1); 96 | return s; 97 | } 98 | 99 | std::list StringUtil::SeperatorBy(const std::string& s, char seperator) 100 | { 101 | std::listcontainer; 102 | std::istringstream f(s); 103 | std::istringstream& stream = f; 104 | std::string out; 105 | while(std::getline(stream, out, seperator)) { 106 | container.push_back(out); 107 | } 108 | return container; 109 | } 110 | 111 | std::string StringUtil::DeleteCharacter(const std::string &source, char target) 112 | { 113 | std::string dest = source; 114 | dest.erase(std::remove(dest.begin(), dest.end(), target)); 115 | return dest; 116 | } 117 | 118 | } 119 | -------------------------------------------------------------------------------- /src/Vendor/Utils/StringUtil.h: -------------------------------------------------------------------------------- 1 | /* 2 | * StringUtil.h 3 | * 4 | * Created on: 2017-11-11 5 | * Author: bingjian 6 | */ 7 | 8 | #ifndef STRINGUTIL_H_ 9 | #define STRINGUTIL_H_ 10 | #include 11 | #include 12 | 13 | namespace future { 14 | 15 | class StringUtil { 16 | public: 17 | StringUtil(); 18 | virtual ~StringUtil(); 19 | 20 | static std::string longlong2str(long long in); 21 | static std::string int2str(int in); 22 | static int str2int(const std::string &in); 23 | static bool startWith(std::string &str,std::string &strWith); 24 | static bool endWith(std::string &str,std::string &strWith); 25 | static std::string tolower(const std::string &str); 26 | static std::string toupper(const std::string &str); 27 | static bool contains(const std::string str1,const std::string str2); 28 | static std::string tostring(long long in); 29 | static void StringReplace(std::string &srcStr, const std::string &findStr,const std::string &replaceStr ); 30 | static std::string &trim (std::string &s); 31 | static std::list SeperatorBy(const std::string& , char c); 32 | static std::string DeleteCharacter(const std::string&, char); 33 | }; 34 | 35 | } 36 | #endif /* STRINGUTIL_H_ */ 37 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/CHANGES.md: -------------------------------------------------------------------------------- 1 | Gumbo 0.9.2 (2014-09-21) 2 | 3 | * Performance improvements: Ragel-based char ref decoder and DFA-based UTF8 4 | * decoder, totaling speedups of up to 300%. 5 | * Added benchmarking program and some sample data. 6 | * Fixed a compiler error under Visual Studio. 7 | * Fix an error in the ctypes bindings that could lead to memory corruption in 8 | * the Python bindings. 9 | * Fix duplicate attributes when parsing tags. 10 | * Don't leave semicolons behind when consuming entity references (rgrove) 11 | * Internally rename some functions in preparation for an amalgamation file 12 | * (jdeng) 13 | * Add proper cflags for gyp builds (skabbes) 14 | 15 | Gumbo 0.9.1 (2014-08-07) 16 | 17 | * First version listed on PyPi. 18 | * Autotools files excluded from GitHub and generated via autogen.sh. (endgame) 19 | * Numerous compiler warnings fixed. (bnoordhuis, craigbarnes) 20 | * Google security audit passed. 21 | * Gyp support (tfarina) 22 | * Naming convention for structs changed to avoid C reserved words. 23 | * Fix several integer and buffer overflows (Maxime2) 24 | * Some Visual Studio compiler support (bugparty) 25 | * Python3 compatibility for the ctypes bindings. 26 | 27 | Gumbo 0.9.0 (2013-08-13) 28 | 29 | * Initial release open-sourced by Google. 30 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing 2 | =========== 3 | Bug reports are very much welcome. Please use GitHub's issue-tracking feature, as it makes it easier to keep track of bugs and makes it possible for other project watchers to view the existing issues. 4 | 5 | Patches and pull requests are also welcome, but before accepting patches, I need you to sign the Google Contributor License Agreement: 6 | 7 | https://developers.google.com/open-source/cla/individual 8 | https://developers.google.com/open-source/cla/corporate 9 | 10 | (Electronic signatures are fine for individual contributors.) 11 | 12 | If you're unwilling to do this, it would be most helpful if you could file bug reports that include detailed prose about where in the code the error is and how to fix it, but leave out exact source code. 13 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/GumboInterface.h: -------------------------------------------------------------------------------- 1 | #ifndef GUMBO_INTERFACE 2 | #define GUMBO_INTERFACE 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "gumbo.h" 9 | #include "gumbo_edit.h" 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace future { 17 | 18 | struct GumboWellFormedError { 19 | int line; 20 | int column; 21 | std::string message; 22 | }; 23 | 24 | class GumboInterface { 25 | public: 26 | 27 | GumboInterface(const std::string &source, const std::string &version); 28 | GumboInterface(const std::string &source, const std::string &version, 29 | const std::map &source_updates); 30 | ~GumboInterface(); 31 | 32 | void parse(); 33 | std::string repair(); 34 | std::string getxhtml(); 35 | std::string prettyprint(std::string indent_chars = " "); 36 | 37 | // returns list tags that match manifest properties 38 | std::list get_all_properties(); 39 | 40 | // returns "html" node 41 | GumboNode * get_root_node(); 42 | 43 | // routines for working with gumbo paths 44 | GumboNode* get_node_from_path(std::list & apath); 45 | std::list get_path_to_node(GumboNode* node); 46 | 47 | // routines for working with qwebpaths 48 | GumboNode* get_node_from_qwebpath(std::string webpath); 49 | std::string get_qwebpath_to_node(GumboNode* node); 50 | 51 | // routines for updating while serializing (see SourceUpdates and AnchorUpdates 52 | std::string perform_source_updates( 53 | const std::string & my_current_book_relpath); 54 | std::string perform_style_updates( 55 | const std::string & my_current_book_relpath); 56 | std::string perform_link_updates(const std::string & newlinks); 57 | std::string get_body_contents(); 58 | std::string perform_body_updates(const std::string & new_body); 59 | 60 | // routines for working with nodes with specific attributes 61 | std::list get_all_nodes_with_attribute( 62 | const std::string & attname); 63 | std::list get_all_values_for_attribute( 64 | const std::string & attname); 65 | std::map get_attributes_of_node(GumboNode* node); 66 | 67 | // routines for working with nodes with specific tags 68 | std::list get_all_nodes_with_tag(GumboTag tag); 69 | std::list get_all_nodes_with_tags( 70 | const std::list & tags); 71 | 72 | // utility routines 73 | std::string get_tag_name(GumboNode *node); 74 | std::string get_local_text_of_node(GumboNode* node); 75 | std::string get_text_of_node(GumboNode* node); 76 | 77 | // routine to check if well-formed 78 | std::list error_check(); 79 | 80 | // routines to work with node and it children only 81 | std::list get_nodes_with_attribute(GumboNode* node, 82 | const char * att_name); 83 | 84 | std::list get_nodes_with_tags(GumboNode* node, 85 | const std::list & tags); 86 | 87 | std::list get_all_nodes(GumboNode* node); 88 | 89 | private: 90 | 91 | enum UpdateTypes { 92 | NoUpdates = 0, 93 | SourceUpdates = 1 << 0, 94 | LinkUpdates = 1 << 1, 95 | BodyUpdates = 1 << 2, 96 | StyleUpdates = 1 << 3 97 | }; 98 | 99 | std::list get_properties(GumboNode* node); 100 | 101 | std::list get_values_for_attr(GumboNode* node, 102 | const char* attr_name); 103 | 104 | std::string serialize(GumboNode* node, 105 | enum UpdateTypes doupdates = NoUpdates); 106 | 107 | std::string serialize_contents(GumboNode* node, 108 | enum UpdateTypes doupdates = NoUpdates); 109 | 110 | std::string prettyprint(GumboNode* node, int lvl, 111 | const std::string indent_chars); 112 | 113 | std::string prettyprint_contents(GumboNode* node, int lvl, 114 | const std::string indent_chars); 115 | 116 | std::string build_doctype(GumboNode *node); 117 | 118 | std::string get_attribute_name(GumboAttribute * at); 119 | 120 | std::string build_attributes(GumboAttribute * at, bool no_entities, 121 | bool run_src_updates = false, bool run_style_updates = false); 122 | 123 | std::string update_attribute_value(const std::string &href); 124 | 125 | std::string update_style_urls(const std::string& source); 126 | 127 | std::string substitute_xml_entities_into_text(const std::string &text); 128 | 129 | std::string substitute_xml_entities_into_attributes(char quote, 130 | const std::string &text); 131 | 132 | bool in_set(std::set &s, std::string &key); 133 | 134 | void rtrim(std::string &s); 135 | 136 | void ltrim(std::string &s); 137 | 138 | void ltrimnewlines(std::string &s); 139 | 140 | void newlinetrim(std::string &s); 141 | 142 | void condense_whitespace(std::string &s); 143 | 144 | void replace_all(std::string &s, const char * s1, const char * s2); 145 | 146 | // Hopefully now unneeded 147 | // std::string fix_self_closing_tags(const std::string & source); 148 | 149 | std::string m_source; 150 | GumboOutput* m_output; 151 | std::string m_utf8src; 152 | const std::map & m_sourceupdates; 153 | std::string m_newcsslinks; 154 | std::string m_currentdir; 155 | std::string m_newbody;bool m_hasnbsp; 156 | std::string m_version; 157 | 158 | }; 159 | } 160 | #endif 161 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/Thanks: -------------------------------------------------------------------------------- 1 | Gumbo HTML parser THANKS file 2 | 3 | Gumbo was originally written by Jonathan Tang, but many people helped out through suggestions, question-answering, code reviews, bugfixes, and organizational support. Here is a list of these people. Help me keep it complete and exempt of errors. 4 | 5 | Adam Barth 6 | Ben Noordhuis 7 | Bowen Han 8 | Constantinos Michael 9 | Craig Barnes 10 | Geoffrey Sneddon 11 | Ian Hickson 12 | Jack Deng 13 | Jonathan Shneier 14 | Mason Tang 15 | Maxim Zakharov 16 | Neal Norwitz 17 | Othar Hansson 18 | Ryan Grove 19 | Stefan Haustein 20 | Steffen Meschkat 21 | Steven Kabbes 22 | Thiago Farina 23 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/UrlUtil.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * UrlUtil.cpp 3 | * 4 | * Created on: 2017-11-21 5 | * Author: bingjian 6 | */ 7 | 8 | #include 9 | #include "UrlUtil.h" 10 | 11 | namespace future { 12 | 13 | UrlUtil::UrlUtil() { 14 | // TODO 自动生成的构造函数存根 15 | 16 | } 17 | 18 | UrlUtil::~UrlUtil() { 19 | // TODO 自动生成的析构函数存根 20 | } 21 | 22 | unsigned char UrlUtil::ToHex(unsigned char x) { 23 | return x > 9 ? x + 55 : x + 48; 24 | } 25 | 26 | unsigned char UrlUtil::FromHex(unsigned char x) { 27 | unsigned char y; 28 | if (x >= 'A' && x <= 'Z') { 29 | y = x - 'A' + 10; 30 | } else if (x >= 'a' && x <= 'z') { 31 | y = x - 'a' + 10; 32 | } else if (x >= '0' && x <= '9') { 33 | y = x - '0'; 34 | } else { 35 | assert(0); 36 | } 37 | return y; 38 | } 39 | 40 | std::string UrlUtil::UrlEncode(const std::string& str) { 41 | std::string strTemp = ""; 42 | size_t length = str.length(); 43 | for (size_t i = 0; i < length; i++) { 44 | if (isalnum((unsigned char) str[i]) || (str[i] == '-') || (str[i] 45 | == '_') || (str[i] == '.') || (str[i] == '~')) 46 | strTemp += str[i]; 47 | else if (str[i] == ' ') 48 | strTemp += "+"; 49 | else { 50 | strTemp += '%'; 51 | strTemp += ToHex((unsigned char) str[i] >> 4); 52 | strTemp += ToHex((unsigned char) str[i] % 16); 53 | } 54 | } 55 | return strTemp; 56 | } 57 | 58 | std::string UrlUtil::UrlDecode(const std::string& str) { 59 | std::string strTemp = ""; 60 | size_t length = str.length(); 61 | for (size_t i = 0; i < length; i++) { 62 | if (str[i] == '+') 63 | strTemp += ' '; 64 | else if (str[i] == '%') { 65 | assert(i + 2 < length); 66 | unsigned char high = FromHex((unsigned char) str[++i]); 67 | unsigned char low = FromHex((unsigned char) str[++i]); 68 | strTemp += high * 16 + low; 69 | } else 70 | strTemp += str[i]; 71 | } 72 | return strTemp; 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/UrlUtil.h: -------------------------------------------------------------------------------- 1 | /* 2 | * UrlUtil.h 3 | * 4 | * Created on: 2017-11-21 5 | * Author: bingjian 6 | */ 7 | 8 | #ifndef URLUTIL_H_ 9 | #define URLUTIL_H_ 10 | #include 11 | 12 | namespace future { 13 | 14 | class UrlUtil { 15 | public: 16 | UrlUtil(); 17 | virtual ~UrlUtil(); 18 | static std::string UrlEncode(const std::string& str); 19 | static std::string UrlDecode(const std::string& str); 20 | 21 | private: 22 | static unsigned char ToHex(unsigned char x); 23 | static unsigned char FromHex(unsigned char x); 24 | 25 | }; 26 | 27 | } 28 | 29 | #endif /* URLUTIL_H_ */ 30 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/attribute.c: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | 17 | #include "attribute.h" 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "util.h" 25 | #include "vector.h" 26 | 27 | struct GumboInternalParser; 28 | 29 | GumboAttribute* gumbo_get_attribute( 30 | const GumboVector* attributes, const char* name) { 31 | unsigned int i = 0; 32 | for (i = 0; i < attributes->length; ++i) { 33 | GumboAttribute* attr = attributes->data[i]; 34 | if (!strcasecmp(attr->name, name)) { 35 | return attr; 36 | } 37 | } 38 | return NULL; 39 | } 40 | 41 | void gumbo_attribute_set_value(GumboAttribute *attr, const char *value) 42 | { 43 | gumbo_free((void *)attr->value); 44 | attr->value = gumbo_strdup(value); 45 | attr->original_value = kGumboEmptyString; 46 | attr->value_start = kGumboEmptySourcePosition; 47 | attr->value_end = kGumboEmptySourcePosition; 48 | } 49 | 50 | void gumbo_destroy_attribute(GumboAttribute* attribute) { 51 | gumbo_free((void*) attribute->name); 52 | gumbo_free((void*) attribute->value); 53 | gumbo_free((void*) attribute); 54 | } 55 | 56 | void gumbo_element_set_attribute( 57 | GumboElement *element, const char *name, const char *value) 58 | { 59 | GumboVector *attributes = &element->attributes; 60 | GumboAttribute *attr = gumbo_get_attribute(attributes, name); 61 | 62 | if (!attr) { 63 | attr = gumbo_malloc(sizeof(GumboAttribute)); 64 | attr->value = NULL; 65 | attr->attr_namespace = GUMBO_ATTR_NAMESPACE_NONE; 66 | 67 | attr->name = gumbo_strdup(name); 68 | attr->original_name = kGumboEmptyString; 69 | attr->name_start = kGumboEmptySourcePosition; 70 | attr->name_end = kGumboEmptySourcePosition; 71 | 72 | gumbo_vector_add(attr, attributes); 73 | } 74 | 75 | gumbo_attribute_set_value(attr, value); 76 | } 77 | 78 | void gumbo_element_remove_attribute_at(GumboElement *element, unsigned int pos) { 79 | GumboAttribute *attr = element->attributes.data[pos]; 80 | gumbo_vector_remove_at(pos, &element->attributes); 81 | gumbo_destroy_attribute(attr); 82 | } 83 | 84 | void gumbo_element_remove_attribute(GumboElement *element, GumboAttribute *attr) { 85 | int idx = gumbo_vector_index_of(&element->attributes, attr); 86 | if (idx >= 0) { 87 | gumbo_vector_remove_at(idx, &element->attributes); 88 | gumbo_destroy_attribute(attr); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/attribute.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | 17 | #ifndef GUMBO_ATTRIBUTE_H_ 18 | #define GUMBO_ATTRIBUTE_H_ 19 | 20 | #include "gumbo.h" 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | struct GumboInternalParser; 27 | 28 | void gumbo_attribute_set_value(GumboAttribute *attr, const char *value); 29 | void gumbo_destroy_attribute(GumboAttribute* attribute); 30 | 31 | void gumbo_element_set_attribute( 32 | GumboElement *element, const char *name, const char *value); 33 | void gumbo_element_remove_attribute_at(GumboElement *element, unsigned int pos); 34 | void gumbo_element_remove_attribute(GumboElement *element, GumboAttribute *attr); 35 | 36 | #ifdef __cplusplus 37 | } 38 | #endif 39 | 40 | #endif // GUMBO_ATTRIBUTE_H_ 41 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/char_ref.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | // 17 | // Internal header for character reference handling; this should not be exposed 18 | // transitively by any public API header. This is why the functions aren't 19 | // namespaced. 20 | 21 | #ifndef GUMBO_CHAR_REF_H_ 22 | #define GUMBO_CHAR_REF_H_ 23 | 24 | #include 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | struct GumboInternalParser; 31 | struct GumboInternalUtf8Iterator; 32 | 33 | // Value that indicates no character was produced. 34 | extern const int kGumboNoChar; 35 | 36 | // Certain named character references generate two codepoints, not one, and so 37 | // the consume_char_ref subroutine needs to return this instead of an int. The 38 | // first field will be kGumboNoChar if no character reference was found; the 39 | // second field will be kGumboNoChar if that is the case or if the character 40 | // reference returns only a single codepoint. 41 | typedef struct { 42 | int first; 43 | int second; 44 | } OneOrTwoCodepoints; 45 | 46 | // Implements the "consume a character reference" section of the spec. 47 | // This reads in characters from the input as necessary, and fills in a 48 | // OneOrTwoCodepoints struct containing the characters read. It may add parse 49 | // errors to the GumboParser's errors vector, if the spec calls for it. Pass a 50 | // space for the "additional allowed char" when the spec says "with no 51 | // additional allowed char". Returns false on parse error, true otherwise. 52 | bool consume_char_ref( 53 | struct GumboInternalParser* parser, struct GumboInternalUtf8Iterator* input, 54 | int additional_allowed_char, bool is_in_attribute, 55 | OneOrTwoCodepoints* output); 56 | 57 | #ifdef __cplusplus 58 | } 59 | #endif 60 | 61 | #endif // GUMBO_CHAR_REF_H_ 62 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/error.c: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | 17 | #include "error.h" 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "gumbo.h" 25 | #include "parser.h" 26 | #include "string_buffer.h" 27 | #include "util.h" 28 | #include "vector.h" 29 | 30 | // static const size_t kMessageBufferSize = 256; 31 | 32 | // Prints a formatted message to a StringBuffer. This automatically resizes the 33 | // StringBuffer as necessary to fit the message. Returns the number of bytes 34 | // written. 35 | static int print_message(GumboStringBuffer* output, const char* format, ...) { 36 | va_list args; 37 | int remaining_capacity = output->capacity - output->length; 38 | va_start(args, format); 39 | int bytes_written = vsnprintf(output->data + output->length, 40 | remaining_capacity, format, args); 41 | va_end(args); 42 | 43 | #ifdef _MSC_VER 44 | if (bytes_written == -1) { 45 | // vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of 46 | // returning the number of bytes that would've been written had there been 47 | // enough. In this case, we'll double the buffer size and hope it fits when 48 | // we retry (letting it fail and returning 0 if it doesn't), since there's 49 | // no way to smartly resize the buffer. 50 | gumbo_string_buffer_reserve(output->capacity * 2, output); 51 | va_start(args, format); 52 | int result = vsnprintf(output->data + output->length, 53 | remaining_capacity, format, args); 54 | va_end(args); 55 | return result == -1 ? 0 : result; 56 | } 57 | #else 58 | // -1 in standard C99 indicates an encoding error. Return 0 and do nothing. 59 | if (bytes_written == -1) { 60 | return 0; 61 | } 62 | #endif 63 | 64 | if (bytes_written >= remaining_capacity) { 65 | gumbo_string_buffer_reserve(output->capacity + bytes_written, output); 66 | remaining_capacity = output->capacity - output->length; 67 | va_start(args, format); 68 | bytes_written = vsnprintf(output->data + output->length, 69 | remaining_capacity, format, args); 70 | va_end(args); 71 | } 72 | output->length += bytes_written; 73 | return bytes_written; 74 | } 75 | 76 | static void print_tag_stack(const GumboParserError* error, GumboStringBuffer* output) { 77 | print_message(output, " Currently open tags: "); 78 | for (unsigned int i = 0; i < error->tag_stack.length; ++i) { 79 | if (i) { 80 | print_message(output, ", "); 81 | } 82 | GumboTag tag = (GumboTag) error->tag_stack.data[i]; 83 | print_message(output, gumbo_normalized_tagname(tag)); 84 | } 85 | gumbo_string_buffer_append_codepoint('.', output); 86 | } 87 | 88 | static void handle_parser_error( 89 | const GumboParserError* error, 90 | GumboStringBuffer* output) { 91 | if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL && 92 | error->input_type != GUMBO_TOKEN_DOCTYPE) { 93 | print_message(output, 94 | "The doctype must be the first token in the document"); 95 | return; 96 | } 97 | 98 | switch (error->input_type) { 99 | case GUMBO_TOKEN_DOCTYPE: 100 | print_message(output, "This is not a legal doctype"); 101 | return; 102 | case GUMBO_TOKEN_COMMENT: 103 | // Should never happen; comments are always legal. 104 | assert(0); 105 | // But just in case... 106 | print_message(output, "Comments aren't legal here"); 107 | return; 108 | case GUMBO_TOKEN_CDATA: 109 | case GUMBO_TOKEN_WHITESPACE: 110 | case GUMBO_TOKEN_CHARACTER: 111 | print_message(output, "Character tokens aren't legal here"); 112 | return; 113 | case GUMBO_TOKEN_NULL: 114 | print_message(output, "Null bytes are not allowed in HTML5"); 115 | return; 116 | case GUMBO_TOKEN_EOF: 117 | if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) { 118 | print_message(output, "You must provide a doctype"); 119 | } else { 120 | print_message(output, "Premature end of file"); 121 | print_tag_stack(error, output); 122 | } 123 | return; 124 | case GUMBO_TOKEN_START_TAG: 125 | case GUMBO_TOKEN_END_TAG: 126 | print_message(output, "That tag isn't allowed here"); 127 | print_tag_stack(error, output); 128 | // TODO(jdtang): Give more specific messaging. 129 | return; 130 | } 131 | } 132 | 133 | // Finds the preceding newline in an original source buffer from a given byte 134 | // location. Returns a character pointer to the character after that, or a 135 | // pointer to the beginning of the string if this is the first line. 136 | static const char* find_last_newline( 137 | const char* original_text, const char* error_location) { 138 | assert(error_location >= original_text); 139 | const char* c = error_location; 140 | // if the error location itself is a newline then start searching for 141 | // the preceding newline one character earlier, (if possible) 142 | if ((*c == '\n') && (c != original_text)) --c; 143 | for (; c != original_text && *c != '\n'; --c) { 144 | // There may be an error at EOF, which would be a nul byte. 145 | assert(*c || c == error_location); 146 | } 147 | return c == original_text ? c : c + 1; 148 | } 149 | 150 | // Finds the next newline in the original source buffer from a given byte 151 | // location. Returns a character pointer to that newline, or a pointer to the 152 | // terminating null byte if this is the last line. 153 | static const char* find_next_newline( 154 | const char* original_text, const char* error_location) { 155 | const char* c = error_location; 156 | for (; *c && *c != '\n'; ++c); 157 | return c; 158 | } 159 | 160 | GumboError* gumbo_add_error(GumboParser* parser) { 161 | int max_errors = parser->_options->max_errors; 162 | if (max_errors >= 0 && parser->_output->errors.length >= (unsigned int) max_errors) { 163 | return NULL; 164 | } 165 | GumboError* error = gumbo_malloc(sizeof(GumboError)); 166 | gumbo_vector_add(error, &parser->_output->errors); 167 | return error; 168 | } 169 | 170 | void gumbo_error_to_string( 171 | const GumboError* error, GumboStringBuffer* output) { 172 | print_message(output, "@%d:%d: ", 173 | error->position.line, error->position.column); 174 | switch (error->type) { 175 | case GUMBO_ERR_UTF8_INVALID: 176 | print_message(output, "Invalid UTF8 character 0x%x", 177 | error->v.codepoint); 178 | break; 179 | case GUMBO_ERR_UTF8_TRUNCATED: 180 | print_message(output, 181 | "Input stream ends with a truncated UTF8 character 0x%x", 182 | error->v.codepoint); 183 | break; 184 | case GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS: 185 | print_message(output, 186 | "No digits after &# in numeric character reference"); 187 | break; 188 | case GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON: 189 | print_message(output, 190 | "The numeric character reference &#%d should be followed " 191 | "by a semicolon", error->v.codepoint); 192 | break; 193 | case GUMBO_ERR_NUMERIC_CHAR_REF_INVALID: 194 | print_message(output, 195 | "The numeric character reference &#%d; encodes an invalid " 196 | "unicode codepoint", error->v.codepoint); 197 | break; 198 | case GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON: 199 | // The textual data came from one of the literal strings in the table, and 200 | // so it'll be null-terminated. 201 | print_message(output, 202 | "The named character reference &%.*s should be followed by a " 203 | "semicolon", (int) error->v.text.length, error->v.text.data); 204 | break; 205 | case GUMBO_ERR_NAMED_CHAR_REF_INVALID: 206 | print_message(output, 207 | "The named character reference &%.*s; is not a valid entity name", 208 | (int) error->v.text.length, error->v.text.data); 209 | break; 210 | case GUMBO_ERR_DUPLICATE_ATTR: 211 | print_message(output, 212 | "Attribute %s occurs multiple times, at positions %d and %d", 213 | error->v.duplicate_attr.name, 214 | error->v.duplicate_attr.original_index, 215 | error->v.duplicate_attr.new_index); 216 | break; 217 | case GUMBO_ERR_PARSER: 218 | case GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG: 219 | handle_parser_error(&error->v.parser, output); 220 | break; 221 | default: 222 | print_message(output, 223 | "Tokenizer error with an unimplemented error message"); 224 | break; 225 | } 226 | gumbo_string_buffer_append_codepoint('.', output); 227 | } 228 | 229 | void gumbo_caret_diagnostic_to_string(const GumboError* error, 230 | const char* source_text, GumboStringBuffer* output) { 231 | gumbo_error_to_string(error, output); 232 | 233 | const char* line_start = 234 | find_last_newline(source_text, error->original_text); 235 | const char* line_end = 236 | find_next_newline(source_text, error->original_text); 237 | GumboStringPiece original_line; 238 | original_line.data = line_start; 239 | original_line.length = line_end - line_start; 240 | 241 | gumbo_string_buffer_append_codepoint('\n', output); 242 | gumbo_string_buffer_append_string(&original_line, output); 243 | gumbo_string_buffer_append_codepoint('\n', output); 244 | gumbo_string_buffer_reserve( 245 | output->length + error->position.column, output); 246 | int num_spaces = error->position.column - 1; 247 | memset(output->data + output->length, ' ', num_spaces); 248 | output->length += num_spaces; 249 | gumbo_string_buffer_append_codepoint('^', output); 250 | gumbo_string_buffer_append_codepoint('\n', output); 251 | } 252 | 253 | void gumbo_print_caret_diagnostic( 254 | const GumboError* error, const char* source_text) { 255 | GumboStringBuffer text; 256 | gumbo_string_buffer_init(&text); 257 | gumbo_caret_diagnostic_to_string(error, source_text, &text); 258 | printf("%.*s", (int) text.length, text.data); 259 | gumbo_string_buffer_destroy(&text); 260 | } 261 | 262 | void gumbo_error_destroy(GumboError* error) { 263 | if (error->type == GUMBO_ERR_PARSER || 264 | error->type == GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG) { 265 | gumbo_vector_destroy(&error->v.parser.tag_stack); 266 | } else if (error->type == GUMBO_ERR_DUPLICATE_ATTR) { 267 | gumbo_free((void*) error->v.duplicate_attr.name); 268 | } 269 | gumbo_free(error); 270 | } 271 | 272 | void gumbo_init_errors(GumboParser* parser) { 273 | gumbo_vector_init(5, &parser->_output->errors); 274 | } 275 | 276 | void gumbo_destroy_errors(GumboParser* parser) { 277 | for (unsigned int i = 0; i < parser->_output->errors.length; ++i) { 278 | gumbo_error_destroy(parser->_output->errors.data[i]); 279 | } 280 | gumbo_vector_destroy(&parser->_output->errors); 281 | } 282 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/error.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | // 17 | // Error types, enums, and handling functions. 18 | 19 | #ifndef GUMBO_ERROR_H_ 20 | #define GUMBO_ERROR_H_ 21 | #ifdef _MSC_VER 22 | #define _CRT_SECURE_NO_WARNINGS 23 | #endif 24 | #include 25 | 26 | #include "gumbo.h" 27 | #include "insertion_mode.h" 28 | #include "string_buffer.h" 29 | #include "token_type.h" 30 | 31 | #ifdef __cplusplus 32 | extern "C" { 33 | #endif 34 | 35 | struct GumboInternalParser; 36 | 37 | typedef enum { 38 | GUMBO_ERR_UTF8_INVALID, 39 | GUMBO_ERR_UTF8_TRUNCATED, 40 | GUMBO_ERR_UTF8_NULL, 41 | GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS, 42 | GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON, 43 | GUMBO_ERR_NUMERIC_CHAR_REF_INVALID, 44 | GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON, 45 | GUMBO_ERR_NAMED_CHAR_REF_INVALID, 46 | GUMBO_ERR_TAG_STARTS_WITH_QUESTION, 47 | GUMBO_ERR_TAG_EOF, 48 | GUMBO_ERR_TAG_INVALID, 49 | GUMBO_ERR_CLOSE_TAG_EMPTY, 50 | GUMBO_ERR_CLOSE_TAG_EOF, 51 | GUMBO_ERR_CLOSE_TAG_INVALID, 52 | GUMBO_ERR_SCRIPT_EOF, 53 | GUMBO_ERR_ATTR_NAME_EOF, 54 | GUMBO_ERR_ATTR_NAME_INVALID, 55 | GUMBO_ERR_ATTR_DOUBLE_QUOTE_EOF, 56 | GUMBO_ERR_ATTR_SINGLE_QUOTE_EOF, 57 | GUMBO_ERR_ATTR_UNQUOTED_EOF, 58 | GUMBO_ERR_ATTR_UNQUOTED_RIGHT_BRACKET, 59 | GUMBO_ERR_ATTR_UNQUOTED_EQUALS, 60 | GUMBO_ERR_ATTR_AFTER_EOF, 61 | GUMBO_ERR_ATTR_AFTER_INVALID, 62 | GUMBO_ERR_DUPLICATE_ATTR, 63 | GUMBO_ERR_SOLIDUS_EOF, 64 | GUMBO_ERR_SOLIDUS_INVALID, 65 | GUMBO_ERR_DASHES_OR_DOCTYPE, 66 | GUMBO_ERR_COMMENT_EOF, 67 | GUMBO_ERR_COMMENT_INVALID, 68 | GUMBO_ERR_COMMENT_BANG_AFTER_DOUBLE_DASH, 69 | GUMBO_ERR_COMMENT_DASH_AFTER_DOUBLE_DASH, 70 | GUMBO_ERR_COMMENT_SPACE_AFTER_DOUBLE_DASH, 71 | GUMBO_ERR_COMMENT_END_BANG_EOF, 72 | GUMBO_ERR_DOCTYPE_EOF, 73 | GUMBO_ERR_DOCTYPE_INVALID, 74 | GUMBO_ERR_DOCTYPE_SPACE, 75 | GUMBO_ERR_DOCTYPE_RIGHT_BRACKET, 76 | GUMBO_ERR_DOCTYPE_SPACE_OR_RIGHT_BRACKET, 77 | GUMBO_ERR_DOCTYPE_END, 78 | GUMBO_ERR_PARSER, 79 | GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG, 80 | } GumboErrorType; 81 | 82 | // Additional data for duplicated attributes. 83 | typedef struct GumboInternalDuplicateAttrError { 84 | // The name of the attribute. Owned by this struct. 85 | const char* name; 86 | 87 | // The (0-based) index within the attributes vector of the original 88 | // occurrence. 89 | unsigned int original_index; 90 | 91 | // The (0-based) index where the new occurrence would be. 92 | unsigned int new_index; 93 | } GumboDuplicateAttrError; 94 | 95 | // A simplified representation of the tokenizer state, designed to be more 96 | // useful to clients of this library than the internal representation. This 97 | // condenses the actual states used in the tokenizer state machine into a few 98 | // values that will be familiar to users of HTML. 99 | typedef enum { 100 | GUMBO_ERR_TOKENIZER_DATA, 101 | GUMBO_ERR_TOKENIZER_CHAR_REF, 102 | GUMBO_ERR_TOKENIZER_RCDATA, 103 | GUMBO_ERR_TOKENIZER_RAWTEXT, 104 | GUMBO_ERR_TOKENIZER_PLAINTEXT, 105 | GUMBO_ERR_TOKENIZER_SCRIPT, 106 | GUMBO_ERR_TOKENIZER_TAG, 107 | GUMBO_ERR_TOKENIZER_SELF_CLOSING_TAG, 108 | GUMBO_ERR_TOKENIZER_ATTR_NAME, 109 | GUMBO_ERR_TOKENIZER_ATTR_VALUE, 110 | GUMBO_ERR_TOKENIZER_MARKUP_DECLARATION, 111 | GUMBO_ERR_TOKENIZER_COMMENT, 112 | GUMBO_ERR_TOKENIZER_DOCTYPE, 113 | GUMBO_ERR_TOKENIZER_CDATA, 114 | } GumboTokenizerErrorState; 115 | 116 | // Additional data for tokenizer errors. 117 | // This records the current state and codepoint encountered - this is usually 118 | // enough to reconstruct what went wrong and provide a friendly error message. 119 | typedef struct GumboInternalTokenizerError { 120 | // The bad codepoint encountered. 121 | int codepoint; 122 | 123 | // The state that the tokenizer was in at the time. 124 | GumboTokenizerErrorState state; 125 | } GumboTokenizerError; 126 | 127 | // Additional data for parse errors. 128 | typedef struct GumboInternalParserError { 129 | // The type of input token that resulted in this error. 130 | GumboTokenType input_type; 131 | 132 | // The HTML tag of the input token. TAG_UNKNOWN if this was not a tag token. 133 | GumboTag input_tag; 134 | 135 | // The insertion mode that the parser was in at the time. 136 | GumboInsertionMode parser_state; 137 | 138 | // The tag stack at the point of the error. Note that this is an GumboVector 139 | // of GumboTag's *stored by value* - cast the void* to an GumboTag directly to 140 | // get at the tag. 141 | GumboVector /* GumboTag */ tag_stack; 142 | } GumboParserError; 143 | 144 | // The overall error struct representing an error in decoding/tokenizing/parsing 145 | // the HTML. This contains an enumerated type flag, a source position, and then 146 | // a union of fields containing data specific to the error. 147 | typedef struct GumboInternalError { 148 | // The type of error. 149 | GumboErrorType type; 150 | 151 | // The position within the source file where the error occurred. 152 | GumboSourcePosition position; 153 | 154 | // A pointer to the byte within the original source file text where the error 155 | // occurred (note that this is not the same as position.offset, as that gives 156 | // character-based instead of byte-based offsets). 157 | const char* original_text; 158 | 159 | // Type-specific error information. 160 | union { 161 | // The code point we encountered, for: 162 | // * GUMBO_ERR_UTF8_INVALID 163 | // * GUMBO_ERR_UTF8_TRUNCATED 164 | // * GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON 165 | // * GUMBO_ERR_NUMERIC_CHAR_REF_INVALID 166 | uint64_t codepoint; 167 | 168 | // Tokenizer errors. 169 | GumboTokenizerError tokenizer; 170 | 171 | // Short textual data, for: 172 | // * GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON 173 | // * GUMBO_ERR_NAMED_CHAR_REF_INVALID 174 | GumboStringPiece text; 175 | 176 | // Duplicate attribute data, for GUMBO_ERR_DUPLICATE_ATTR. 177 | GumboDuplicateAttrError duplicate_attr; 178 | 179 | // Parser state, for GUMBO_ERR_PARSER and 180 | // GUMBO_ERR_UNACKNOWLEDGE_SELF_CLOSING_TAG. 181 | struct GumboInternalParserError parser; 182 | } v; 183 | } GumboError; 184 | 185 | // Adds a new error to the parser's error list, and returns a pointer to it so 186 | // that clients can fill out the rest of its fields. May return NULL if we're 187 | // already over the max_errors field specified in GumboOptions. 188 | GumboError* gumbo_add_error(struct GumboInternalParser* parser); 189 | 190 | // Initializes the errors vector in the parser. 191 | void gumbo_init_errors(struct GumboInternalParser* errors); 192 | 193 | // Frees all the errors in the 'errors_' field of the parser. 194 | void gumbo_destroy_errors(struct GumboInternalParser* errors); 195 | 196 | // Frees the memory used for a single GumboError. 197 | void gumbo_error_destroy(GumboError* error); 198 | 199 | // Prints an error to a string. This fills an empty GumboStringBuffer with a 200 | // freshly-allocated buffer containing the error message text. The caller is 201 | // responsible for deleting the buffer. (Note that the buffer is allocated with 202 | // the allocator specified in the GumboParser config and hence should be freed 203 | // by gumbo_free().) 204 | void gumbo_error_to_string(const GumboError* error, GumboStringBuffer* output); 205 | 206 | // Prints a caret diagnostic to a string. This fills an empty GumboStringBuffer 207 | // with a freshly-allocated buffer containing the error message text. The 208 | // caller is responsible for deleting the buffer. (Note that the buffer is 209 | // allocated with the allocator specified in the GumboParser config and hence 210 | // should be freed by gumbo_parser_deallocate().) 211 | void gumbo_caret_diagnostic_to_string(const GumboError* error, 212 | const char* source_text, GumboStringBuffer* output); 213 | 214 | // Like gumbo_caret_diagnostic_to_string, but prints the text to stdout instead 215 | // of writing to a string. 216 | void gumbo_print_caret_diagnostic( 217 | const GumboError* error, const char* source_text); 218 | 219 | #ifdef __cplusplus 220 | } 221 | #endif 222 | 223 | #endif // GUMBO_ERROR_H_ 224 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/gumbo_edit.c: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Kevin B. Hendricks, Stratford Ontario All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include "attribute.h" 23 | #include "vector.h" 24 | #include "gumbo.h" 25 | #include "utf8.h" 26 | #include "util.h" 27 | 28 | 29 | /* main interface routine for editing in gumbo */ 30 | /** 31 | * const GumboVector kGumboEmptyVector = { NULL, 0, 0 }; 32 | * const GumboSourcePosition kGumboEmptySourcePosition = { 0, 0, 0 }; 33 | 34 | * void gumbo_attribute_set_value(GumboAttribute *attr, const char *value); 35 | * void gumbo_destroy_attribute(GumboAttribute* attribute); 36 | * void gumbo_element_set_attribute(GumboElement *element, const char *name, const char *value); 37 | * void gumbo_element_remove_attribute_at(GumboElement *element, unsigned int pos); 38 | * void gumbo_element_remove_attribute(GumboElement *element, GumboAttribute *attr); 39 | 40 | * void gumbo_vector_init(size_t initial_capacity, GumboVector* vector); 41 | * void gumbo_vector_destroy(GumboVector* vector); 42 | * void gumbo_vector_add(void* element, GumboVector* vector); 43 | * void* gumbo_vector_pop(GumboVector* vector); 44 | * void gumbo_vector_insert_at(void* element, int index, GumboVector* vector); 45 | * void gumbo_vector_remove(const void* element, GumboVector* vector); 46 | * void* gumbo_vector_remove_at(int index, GumboVector* vector); 47 | * int gumbo_vector_index_of(GumboVector* vector, const void* element); 48 | * void gumbo_vector_splice(int where, int n_to_remove, void **data, int n_to_insert, GumboVector* vector); 49 | 50 | * GumboTag gumbo_tag_enum(const char* tagname); 51 | * GumboTag gumbo_tagn_enum(const char* tagname, int length); 52 | 53 | * extern GumboNode *gumbo_create_node(GumboNodeType type); 54 | * extern void gumbo_destroy_node(GumboNode *node); 55 | */ 56 | 57 | 58 | // used internally by gumbo_new_output init 59 | static GumboNode* gumbo_new_document_node(void) { 60 | GumboNode* document_node = gumbo_create_node(GUMBO_NODE_DOCUMENT); 61 | gumbo_vector_init(1, &document_node->v.document.children); 62 | GumboDocument* document = &document_node->v.document; 63 | document->has_doctype = false; 64 | document->name = NULL; 65 | document->public_identifier = NULL; 66 | document->system_identifier = NULL; 67 | return document_node; 68 | } 69 | 70 | 71 | // create and initialize a completely new tree output area 72 | GumboOutput* gumbo_new_output_init(void) { 73 | GumboOutput* output = gumbo_malloc(sizeof(GumboOutput)); 74 | output->root = NULL; 75 | output->document = gumbo_new_document_node(); 76 | gumbo_vector_init(0, &output->errors); 77 | return output; 78 | } 79 | 80 | 81 | // Creates an text node of specified type and returns it. 82 | // Types are GUMBO_NODE_TEXT, GUMBO_NODE_WHITESPACE, GUMBO_NODE_CDATA, and GUMBO_NODE_COMMENT 83 | // No entities are allowed (replace them with their utf-8 character equivalents) 84 | // Note: CDATA and COMMENTS text should NOT include their respective delimiters 85 | // ie. No <-- --> and not CDATA[[ and ]] 86 | GumboNode* gumbo_create_text_node(GumboNodeType type, const char * text) { 87 | assert(type != GUMBO_NODE_DOCUMENT); 88 | assert(type != GUMBO_NODE_TEMPLATE); 89 | assert(type != GUMBO_NODE_ELEMENT); 90 | GumboNode* textnode = gumbo_create_node(type); 91 | textnode->type = GUMBO_NODE_COMMENT; 92 | textnode->parse_flags = GUMBO_INSERTION_NORMAL; 93 | textnode->v.text.text = gumbo_strdup(text); 94 | return textnode; 95 | } 96 | 97 | 98 | // Creates an element node with the tag (enum) in the specified namespace and returns it. 99 | // Since no original text exists, any created element tag must already exist in the tag_enum.h 100 | // This is why we have expanded the set of recognized tags to include all svg and mathml tags 101 | GumboNode* gumbo_create_element_node(GumboTag tag, GumboNamespaceEnum gns) { 102 | GumboNode* node = gumbo_create_node(GUMBO_NODE_ELEMENT); 103 | GumboElement* element = &node->v.element; 104 | gumbo_vector_init(1, &element->children); 105 | gumbo_vector_init(0, &element->attributes); 106 | element->tag = tag; 107 | element->tag_namespace = gns; 108 | element->original_tag = kGumboEmptyString; 109 | element->original_end_tag = kGumboEmptyString; 110 | element->start_pos = kGumboEmptySourcePosition; 111 | element->end_pos = kGumboEmptySourcePosition; 112 | return node; 113 | } 114 | 115 | 116 | // Creates an template node and returns it. 117 | GumboNode* gumbo_create_template_node() { 118 | GumboNode* node = gumbo_create_node(GUMBO_NODE_TEMPLATE); 119 | GumboElement* element = &node->v.element; 120 | gumbo_vector_init(1, &element->children); 121 | gumbo_vector_init(0, &element->attributes); 122 | element->tag = GUMBO_TAG_TEMPLATE; 123 | element->tag_namespace = GUMBO_NAMESPACE_HTML; 124 | element->original_tag = kGumboEmptyString; 125 | element->original_end_tag = kGumboEmptyString; 126 | element->start_pos = kGumboEmptySourcePosition; 127 | element->end_pos = kGumboEmptySourcePosition; 128 | return node; 129 | } 130 | 131 | 132 | // Appends a node to the end of its parent, setting the "parent" and 133 | // "index_within_parent" fields appropriately. 134 | void gumbo_append_node(GumboNode* parent, GumboNode* node) { 135 | assert(node->parent == NULL); 136 | assert(node->index_within_parent == -1); 137 | GumboVector* children; 138 | if (parent->type == GUMBO_NODE_ELEMENT || parent->type == GUMBO_NODE_TEMPLATE) { 139 | children = &parent->v.element.children; 140 | } else { 141 | assert(parent->type == GUMBO_NODE_DOCUMENT); 142 | children = &parent->v.document.children; 143 | } 144 | node->parent = parent; 145 | node->index_within_parent = children->length; 146 | gumbo_vector_add((void*) node, children); 147 | assert(node->index_within_parent < children->length); 148 | } 149 | 150 | 151 | // Inserts a node at the specified index in the specified parent, 152 | // updating the "parent" and "index_within_parent" fields of it and all its siblings. 153 | // If the index is -1, this simply calls gumbo_append_node. 154 | void gumbo_insert_node(GumboNode* node, GumboNode* target_parent, int target_index) { 155 | assert(node->parent == NULL); 156 | assert(node->index_within_parent == -1); 157 | GumboNode* parent = target_parent; 158 | int index = target_index; 159 | if (index != -1) { 160 | GumboVector* children = NULL; 161 | if (parent->type == GUMBO_NODE_ELEMENT || 162 | parent->type == GUMBO_NODE_TEMPLATE) { 163 | children = &parent->v.element.children; 164 | } else if (parent->type == GUMBO_NODE_DOCUMENT) { 165 | children = &parent->v.document.children; 166 | } else { 167 | assert(0); 168 | } 169 | assert(index >= 0); 170 | assert(index < children->length); 171 | node->parent = parent; 172 | node->index_within_parent = index; 173 | gumbo_vector_insert_at((void*) node, index, children); 174 | assert(node->index_within_parent < children->length); 175 | for (unsigned int i = index + 1; i < children->length; ++i) { 176 | GumboNode* sibling = children->data[i]; 177 | sibling->index_within_parent = i; 178 | assert(sibling->index_within_parent < children->length); 179 | } 180 | } else { 181 | gumbo_append_node(parent, node); 182 | } 183 | } 184 | 185 | 186 | void gumbo_remove_from_parent(GumboNode* node) { 187 | if (!node->parent) { 188 | return; 189 | } 190 | assert(node->parent->type == GUMBO_NODE_ELEMENT || 191 | node->parent->type == GUMBO_NODE_TEMPLATE || 192 | node->parent->type == GUMBO_NODE_DOCUMENT); 193 | GumboVector* children = &node->parent->v.element.children; 194 | if (node->parent->type == GUMBO_NODE_DOCUMENT) { 195 | children = &node->parent->v.document.children; 196 | } 197 | int index = gumbo_vector_index_of(children, node); 198 | assert(index != -1); 199 | gumbo_vector_remove_at(index, children); 200 | node->parent = NULL; 201 | node->index_within_parent = -1; 202 | for (unsigned int i = index; i < children->length; ++i) { 203 | GumboNode* child = children->data[i]; 204 | child->index_within_parent = i; 205 | } 206 | } 207 | 208 | 209 | // Clones attributes, tags, etc. of a node, but does not copy the content (its children). 210 | // The clone shares no structure with the original node: all owned strings and 211 | // values are fresh copies. 212 | GumboNode* clone_element_node(const GumboNode* node) { 213 | assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE); 214 | GumboNode* new_node = gumbo_malloc(sizeof(GumboNode)); 215 | *new_node = *node; 216 | new_node->parent = NULL; 217 | new_node->index_within_parent = -1; 218 | GumboElement* element = &new_node->v.element; 219 | gumbo_vector_init(1, &element->children); 220 | const GumboVector* old_attributes = &node->v.element.attributes; 221 | gumbo_vector_init(old_attributes->length, &element->attributes); 222 | for (unsigned int i = 0; i < old_attributes->length; ++i) { 223 | const GumboAttribute* old_attr = old_attributes->data[i]; 224 | GumboAttribute* attr = gumbo_malloc(sizeof(GumboAttribute)); 225 | *attr = *old_attr; 226 | attr->name = gumbo_strdup(old_attr->name); 227 | attr->value = gumbo_strdup(old_attr->value); 228 | gumbo_vector_add(attr, &element->attributes); 229 | } 230 | return new_node; 231 | } 232 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/gumbo_edit.h: -------------------------------------------------------------------------------- 1 | // Copyright 2015-2016 Kevin B. Hendricks, Stratford Ontario All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | 16 | #ifndef GUMBO_EDIT_H_ 17 | #define GUMBO_EDIT_H_ 18 | 19 | #include "gumbo.h" 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | // See gumbo.h for: 26 | // void gumbo_create_node(void); 27 | // void gumbo_destroy_node(GumboNode* node) 28 | 29 | // create and initialize a completely new output tree 30 | GumboOutput* gumbo_new_output_init(void); 31 | 32 | // Creates an text node of specified type and returns it. 33 | // Types are GUMBO_NODE_TEXT, GUMBO_NODE_WHITESPACE, GUMBO_NODE_CDATA, and GUMBO_NODE_COMMENT 34 | // No entities are allowed (replace them with their utf-8 character equivalents) 35 | // Note: CDATA and COMMENTS text should NOT include their respective delimiters 36 | // ie. No <-- --> and not CDATA[[ and ]] 37 | 38 | // Note: Use gumbo_destroy_node(GumboNode * node) to properly destroy the node if outside 39 | // the final output tree 40 | 41 | GumboNode* gumbo_create_text_node(GumboNodeType type, const char * text); 42 | 43 | // Creates an element node with the tag (enum) in the specified namespace and returns it. 44 | // Since no original text exists, any created element tag must already exist in the tag_enum.h 45 | // This is why we have expanded the set of recognized tags to include all svg and mathml tags 46 | 47 | // Note: Use gumbo_destroy_node(GumboNode * node) to properly destroy the node if outside 48 | // the final output tree 49 | 50 | GumboNode* gumbo_create_element_node(GumboTag tag, GumboNamespaceEnum gns); 51 | 52 | // Creates an template node and returns it. 53 | 54 | // Note: Use gumbo_destroy_node(GumboNode * node) to properly destroy the node if outside 55 | // the final output tree. 56 | 57 | GumboNode* gumbo_create_template_node(void); 58 | 59 | // Appends a node to the end of its parent, setting the "parent" and 60 | // "index_within_parent" fields appropriately. 61 | 62 | void gumbo_append_node(GumboNode* parent, GumboNode* node); 63 | 64 | // Inserts a node at the specified index in the specified parent, 65 | // updating the "parent" and "index_within_parent" fields of it and all its siblings. 66 | // If the index is -1, this simply calls gumbo_append_node. 67 | 68 | void gumbo_insert_node(GumboNode* node, GumboNode* target_parent, int target_index); 69 | 70 | // removes a node from its parent but does not destroy it 71 | 72 | // Note: Use gumbo_destroy_node(GumboNode * node) to properly destroy the node if outside 73 | // the final output tree. 74 | 75 | void gumbo_remove_from_parent(GumboNode* node); 76 | 77 | // Clones attributes, tags, etc. of a node, but does not copy the content (its children). 78 | // The clone shares no structure with the original node: all owned strings and 79 | // values are fresh copies. 80 | 81 | // Note: Use gumbo_destroy_node(GumboNode * node) to properly destroy the node if outside 82 | // the output tree. 83 | 84 | GumboNode* clone_element_node(const GumboNode* node); 85 | 86 | 87 | // interface from attribute.h 88 | void gumbo_attribute_set_value(GumboAttribute *attr, const char *value); 89 | void gumbo_destroy_attribute(GumboAttribute* attribute); 90 | void gumbo_element_set_attribute(GumboElement *element, const char *name, const char *value); 91 | void gumbo_element_remove_attribute_at(GumboElement *element, unsigned int pos); 92 | void gumbo_element_remove_attribute(GumboElement *element, GumboAttribute *attr); 93 | 94 | // interface from vector.h 95 | // Initializes a new GumboVector with the specified initial capacity. 96 | void gumbo_vector_init(size_t initial_capacity, GumboVector* vector); 97 | 98 | // Frees the memory used by an GumboVector. Does not free the contained pointers. 99 | void gumbo_vector_destroy(GumboVector* vector); 100 | 101 | // Adds a new element to an GumboVector. 102 | void gumbo_vector_add(void* element, GumboVector* vector); 103 | 104 | // Removes and returns the element most recently added to the GumboVector. 105 | // Ownership is transferred to caller. Capacity is unchanged. If the vector is 106 | // empty, NULL is returned. 107 | void* gumbo_vector_pop(GumboVector* vector); 108 | 109 | // Inserts an element at a specific index. This is potentially O(N) time, but 110 | // is necessary for some of the spec's behavior. 111 | void gumbo_vector_insert_at(void* element, int index, GumboVector* vector); 112 | 113 | // Removes an element from the vector, or does nothing if the element is not in the vector. 114 | void gumbo_vector_remove(const void* element, GumboVector* vector); 115 | 116 | // Removes and returns an element at a specific index. Note that this is 117 | // potentially O(N) time and should be used sparingly. 118 | void* gumbo_vector_remove_at(int index, GumboVector* vector); 119 | 120 | int gumbo_vector_index_of(GumboVector* vector, const void* element); 121 | void gumbo_vector_splice(int where, int n_to_remove, void **data, int n_to_insert, GumboVector* vector); 122 | 123 | #ifdef __cplusplus 124 | } 125 | #endif 126 | 127 | #endif // GUMBO_EDIT_H_ 128 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/include/gumbo_windll.def: -------------------------------------------------------------------------------- 1 | LIBRARY sigilgumbo.dll 2 | EXPORTS 3 | clear_stack_to_table_body_context @1 4 | clone_element_node @2 5 | clone_node @3 6 | consume_char_ref @4 7 | get_appropriate_insertion_location @5 8 | gumbo_add_error @6 9 | gumbo_append_node @7 10 | gumbo_attribute_set_value @8 11 | gumbo_caret_diagnostic_to_string @9 12 | gumbo_create_element_node @10 13 | gumbo_create_node @11 14 | gumbo_create_template_node @12 15 | gumbo_create_text_node @13 16 | gumbo_debug @14 17 | gumbo_destroy_attribute @15 18 | gumbo_destroy_errors @16 19 | gumbo_destroy_node @17 20 | gumbo_destroy_output @18 21 | gumbo_element_remove_attribute @19 22 | gumbo_element_remove_attribute_at @20 23 | gumbo_element_set_attribute @21 24 | gumbo_error_destroy @22 25 | gumbo_error_to_string @23 26 | gumbo_get_attribute @24 27 | gumbo_init_errors @25 28 | gumbo_insert_node @26 29 | gumbo_lex @27 30 | gumbo_memory_set_allocator @28 31 | gumbo_memory_set_free @29 32 | gumbo_new_output_init @30 33 | gumbo_normalize_svg_tagname @31 34 | gumbo_normalized_tagname @32 35 | gumbo_parse @33 36 | gumbo_parse_fragment @34 37 | gumbo_parse_with_options @35 38 | gumbo_print_caret_diagnostic @36 39 | gumbo_remove_from_parent @37 40 | gumbo_string_buffer_append_codepoint @38 41 | gumbo_string_buffer_append_string @39 42 | gumbo_string_buffer_clear @40 43 | gumbo_string_buffer_cstr @41 44 | gumbo_string_buffer_destroy @42 45 | gumbo_string_buffer_init @43 46 | gumbo_string_buffer_put @44 47 | gumbo_string_buffer_putv @45 48 | gumbo_string_buffer_reserve @46 49 | gumbo_string_buffer_to_string @47 50 | gumbo_string_copy @48 51 | gumbo_string_equals @49 52 | gumbo_string_equals_ignore_case @50 53 | gumbo_tag_enum @51 54 | gumbo_tag_from_original_text @52 55 | gumbo_tagn_enum @53 56 | gumbo_token_destroy @54 57 | gumbo_tokenizer_set_is_current_node_foreign @55 58 | gumbo_tokenizer_set_state @56 59 | gumbo_tokenizer_state_destroy @57 60 | gumbo_tokenizer_state_init @58 61 | gumbo_user_allocator @59 62 | gumbo_user_free @60 63 | gumbo_vector_add @61 64 | gumbo_vector_destroy @62 65 | gumbo_vector_index_of @63 66 | gumbo_vector_init @64 67 | gumbo_vector_insert_at @65 68 | gumbo_vector_pop @66 69 | gumbo_vector_remove @67 70 | gumbo_vector_remove_at @68 71 | gumbo_vector_splice @69 72 | kGumboDefaultOptions @70 73 | kGumboEmptySourcePosition @71 74 | kGumboEmptyString @72 75 | kGumboEmptyVector @73 76 | kGumboNoChar @74 77 | kGumboTagNames @75 78 | kScriptTag @76 79 | kUtf8ReplacementChar @77 80 | utf8_is_invalid_code_point @78 81 | utf8iterator_current @79 82 | utf8iterator_fill_error_at_mark @80 83 | utf8iterator_get_char_pointer @81 84 | utf8iterator_get_end_pointer @82 85 | utf8iterator_get_position @83 86 | utf8iterator_init @84 87 | utf8iterator_mark @85 88 | utf8iterator_maybe_consume_match @86 89 | utf8iterator_next @87 90 | utf8iterator_reset @88 91 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/include/strings.h: -------------------------------------------------------------------------------- 1 | /*Dummy file to satisfy source file dependencies on Windows platform*/ 2 | #define strcasecmp _stricmp 3 | #define strncasecmp _strnicmp 4 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/insertion_mode.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | 17 | #ifndef GUMBO_INSERTION_MODE_H_ 18 | #define GUMBO_INSERTION_MODE_H_ 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#insertion-mode 25 | // If new enum values are added, be sure to update the kTokenHandlers dispatch 26 | // table in parser.c. 27 | typedef enum { 28 | GUMBO_INSERTION_MODE_INITIAL, 29 | GUMBO_INSERTION_MODE_BEFORE_HTML, 30 | GUMBO_INSERTION_MODE_BEFORE_HEAD, 31 | GUMBO_INSERTION_MODE_IN_HEAD, 32 | GUMBO_INSERTION_MODE_IN_HEAD_NOSCRIPT, 33 | GUMBO_INSERTION_MODE_AFTER_HEAD, 34 | GUMBO_INSERTION_MODE_IN_BODY, 35 | GUMBO_INSERTION_MODE_TEXT, 36 | GUMBO_INSERTION_MODE_IN_TABLE, 37 | GUMBO_INSERTION_MODE_IN_TABLE_TEXT, 38 | GUMBO_INSERTION_MODE_IN_CAPTION, 39 | GUMBO_INSERTION_MODE_IN_COLUMN_GROUP, 40 | GUMBO_INSERTION_MODE_IN_TABLE_BODY, 41 | GUMBO_INSERTION_MODE_IN_ROW, 42 | GUMBO_INSERTION_MODE_IN_CELL, 43 | GUMBO_INSERTION_MODE_IN_SELECT, 44 | GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE, 45 | GUMBO_INSERTION_MODE_IN_TEMPLATE, 46 | GUMBO_INSERTION_MODE_AFTER_BODY, 47 | GUMBO_INSERTION_MODE_IN_FRAMESET, 48 | GUMBO_INSERTION_MODE_AFTER_FRAMESET, 49 | GUMBO_INSERTION_MODE_AFTER_AFTER_BODY, 50 | GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET 51 | } GumboInsertionMode; 52 | 53 | #ifdef __cplusplus 54 | } // extern C 55 | #endif 56 | 57 | #endif // GUMBO_INSERTION_MODE_H_ 58 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/parser.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | // 17 | // Contains the definition of the top-level GumboParser structure that's 18 | // threaded through basically every internal function in the library. 19 | 20 | #ifndef GUMBO_PARSER_H_ 21 | #define GUMBO_PARSER_H_ 22 | 23 | #ifdef __cplusplus 24 | extern "C" { 25 | #endif 26 | 27 | struct GumboInternalParserState; 28 | struct GumboInternalOutput; 29 | struct GumboInternalOptions; 30 | struct GumboInternalTokenizerState; 31 | 32 | // An overarching struct that's threaded through (nearly) all functions in the 33 | // library, OOP-style. This gives each function access to the options and 34 | // output, along with any internal state needed for the parse. 35 | typedef struct GumboInternalParser { 36 | // Settings for this parse run. 37 | const struct GumboInternalOptions* _options; 38 | 39 | // Output for the parse. 40 | struct GumboInternalOutput* _output; 41 | 42 | // The internal tokenizer state, defined as a pointer to avoid a cyclic 43 | // dependency on html5tokenizer.h. The main parse routine is responsible for 44 | // initializing this on parse start, and destroying it on parse end. 45 | // End-users will never see a non-garbage value in this pointer. 46 | struct GumboInternalTokenizerState* _tokenizer_state; 47 | 48 | // The internal parser state. Initialized on parse start and destroyed on 49 | // parse end; end-users will never see a non-garbage value in this pointer. 50 | struct GumboInternalParserState* _parser_state; 51 | } GumboParser; 52 | 53 | #ifdef __cplusplus 54 | } 55 | #endif 56 | 57 | #endif // GUMBO_PARSER_H_ 58 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/string_buffer.c: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | 17 | #include "string_buffer.h" 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "string_piece.h" 26 | #include "util.h" 27 | 28 | // Size chosen via statistical analysis of ~60K websites. 29 | // 99% of text nodes and 98% of attribute names/values fit in this initial size. 30 | static const size_t kDefaultStringBufferSize = 5; 31 | 32 | static void maybe_resize_string_buffer(size_t additional_chars, GumboStringBuffer* buffer) { 33 | size_t new_length = buffer->length + additional_chars; 34 | size_t new_capacity = buffer->capacity; 35 | while (new_capacity < new_length) { 36 | new_capacity *= 2; 37 | } 38 | if (new_capacity != buffer->capacity) { 39 | buffer->capacity = new_capacity; 40 | buffer->data = gumbo_realloc(buffer->data, buffer->capacity); 41 | } 42 | } 43 | 44 | void gumbo_string_buffer_init(GumboStringBuffer* output) { 45 | output->data = gumbo_malloc(kDefaultStringBufferSize); 46 | output->length = 0; 47 | output->capacity = kDefaultStringBufferSize; 48 | } 49 | 50 | void gumbo_string_buffer_reserve(size_t min_capacity, GumboStringBuffer* output) { 51 | maybe_resize_string_buffer(min_capacity - output->length, output); 52 | } 53 | 54 | void gumbo_string_buffer_append_codepoint(int c, GumboStringBuffer* output) { 55 | // num_bytes is actually the number of continuation bytes, 1 less than the 56 | // total number of bytes. This is done to keep the loop below simple and 57 | // should probably change if we unroll it. 58 | int num_bytes, prefix; 59 | if (c <= 0x7f) { 60 | num_bytes = 0; 61 | prefix = 0; 62 | } else if (c <= 0x7ff) { 63 | num_bytes = 1; 64 | prefix = 0xc0; 65 | } else if (c <= 0xffff) { 66 | num_bytes = 2; 67 | prefix = 0xe0; 68 | } else { 69 | num_bytes = 3; 70 | prefix = 0xf0; 71 | } 72 | maybe_resize_string_buffer(num_bytes + 1, output); 73 | output->data[output->length++] = prefix | (c >> (num_bytes * 6)); 74 | for (int i = num_bytes - 1; i >= 0; --i) { 75 | output->data[output->length++] = 0x80 | (0x3f & (c >> (i * 6))); 76 | } 77 | } 78 | 79 | void gumbo_string_buffer_put(GumboStringBuffer *buffer, 80 | const char *data, size_t length) 81 | { 82 | maybe_resize_string_buffer(length, buffer); 83 | memcpy(buffer->data + buffer->length, data, length); 84 | buffer->length += length; 85 | } 86 | 87 | void gumbo_string_buffer_putv(GumboStringBuffer *buffer, int count, ...) 88 | { 89 | va_list ap; 90 | int i; 91 | size_t total_len = 0; 92 | 93 | va_start(ap, count); 94 | for (i = 0; i < count; ++i) 95 | total_len += strlen(va_arg(ap, const char *)); 96 | va_end(ap); 97 | 98 | maybe_resize_string_buffer(total_len, buffer); 99 | 100 | va_start(ap, count); 101 | for (i = 0; i < count; ++i) { 102 | const char *data = va_arg(ap, const char *); 103 | size_t length = strlen(data); 104 | 105 | memcpy(buffer->data + buffer->length, data, length); 106 | buffer->length += length; 107 | } 108 | va_end(ap); 109 | } 110 | 111 | void gumbo_string_buffer_append_string(GumboStringPiece* str, 112 | GumboStringBuffer* output) { 113 | gumbo_string_buffer_put(output, str->data, str->length); 114 | } 115 | 116 | const char* gumbo_string_buffer_cstr(GumboStringBuffer *buffer) { 117 | maybe_resize_string_buffer(1, buffer); 118 | /* do not increase length of the string */ 119 | buffer->data[buffer->length] = 0; 120 | return buffer->data; 121 | } 122 | 123 | char* gumbo_string_buffer_to_string(GumboStringBuffer* input) { 124 | char* buffer = gumbo_malloc(input->length + 1); 125 | memcpy(buffer, input->data, input->length); 126 | buffer[input->length] = '\0'; 127 | return buffer; 128 | } 129 | 130 | void gumbo_string_buffer_clear(GumboStringBuffer* input) { 131 | input->length = 0; 132 | if (input->capacity > kDefaultStringBufferSize * 8) { 133 | // This approach to clearing means that the buffer can grow unbounded and 134 | // tie up memory that may be needed for parsing the rest of the document, so 135 | // we free and reinitialize the buffer if its grown more than 3 doublings. 136 | gumbo_string_buffer_destroy(input); 137 | gumbo_string_buffer_init(input); 138 | } 139 | } 140 | 141 | void gumbo_string_buffer_destroy(GumboStringBuffer* buffer) { 142 | gumbo_free(buffer->data); 143 | } 144 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/string_buffer.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | // 17 | #ifndef GUMBO_STRING_BUFFER_H_ 18 | #define GUMBO_STRING_BUFFER_H_ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | #include "gumbo.h" 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | // A struct representing a mutable, growable string. This consists of a 31 | // heap-allocated buffer that may grow (by doubling) as necessary. When 32 | // converting to a string, this allocates a new buffer that is only as long as 33 | // it needs to be. Note that the internal buffer here is *not* nul-terminated, 34 | // so be sure not to use ordinary string manipulation functions on it. 35 | typedef struct { 36 | // A pointer to the beginning of the string. NULL iff length == 0. 37 | char* data; 38 | 39 | // The length of the string fragment, in bytes. May be zero. 40 | size_t length; 41 | 42 | // The capacity of the buffer, in bytes. 43 | size_t capacity; 44 | } GumboStringBuffer; 45 | 46 | // Initializes a new GumboStringBuffer. 47 | void gumbo_string_buffer_init(GumboStringBuffer* output); 48 | 49 | // Ensures that the buffer contains at least a certain amount of space. Most 50 | // useful with snprintf and the other length-delimited string functions, which 51 | // may want to write directly into the buffer. 52 | void gumbo_string_buffer_reserve(size_t min_capacity, GumboStringBuffer* output); 53 | 54 | // Appends a single Unicode codepoint onto the end of the GumboStringBuffer. 55 | // This is essentially a UTF-8 encoder, and may add 1-4 bytes depending on the 56 | // value of the codepoint. 57 | void gumbo_string_buffer_append_codepoint(int c, GumboStringBuffer* output); 58 | 59 | // Appends a string onto the end of the GumboStringBuffer. 60 | void gumbo_string_buffer_append_string(GumboStringPiece* str, GumboStringBuffer* output); 61 | 62 | // Converts this string buffer to const char*, alloctaing a new buffer for it. 63 | char* gumbo_string_buffer_to_string(GumboStringBuffer* input); 64 | 65 | // Reinitialize this string buffer. This clears it by setting length=0. It 66 | // does not zero out the buffer itself. 67 | void gumbo_string_buffer_clear(GumboStringBuffer* input); 68 | 69 | // Deallocates this GumboStringBuffer. 70 | void gumbo_string_buffer_destroy(GumboStringBuffer* buffer); 71 | 72 | const char* gumbo_string_buffer_cstr(GumboStringBuffer *buffer); 73 | void gumbo_string_buffer_put(GumboStringBuffer *buffer, 74 | const char *data, size_t length); 75 | 76 | static inline void gumbo_string_buffer_puts(GumboStringBuffer *buffer, 77 | const char *data) 78 | { 79 | gumbo_string_buffer_put(buffer, data, strlen(data)); 80 | } 81 | 82 | void gumbo_string_buffer_putv(GumboStringBuffer *out, int n, ...); 83 | 84 | #ifdef __cplusplus 85 | } 86 | #endif 87 | 88 | #endif // GUMBO_STRING_BUFFER_H_ 89 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/string_piece.c: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | 17 | #include "string_piece.h" 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "util.h" 25 | 26 | struct GumboInternalParser; 27 | 28 | const GumboStringPiece kGumboEmptyString = { NULL, 0 }; 29 | 30 | bool gumbo_string_equals( 31 | const GumboStringPiece* str1, const GumboStringPiece* str2) { 32 | return str1->length == str2->length && 33 | !memcmp(str1->data, str2->data, str1->length); 34 | } 35 | 36 | bool gumbo_string_equals_ignore_case( 37 | const GumboStringPiece* str1, const GumboStringPiece* str2) { 38 | return str1->length == str2->length && 39 | !strncasecmp(str1->data, str2->data, str1->length); 40 | } 41 | 42 | void gumbo_string_copy(GumboStringPiece* dest, const GumboStringPiece* source) { 43 | dest->length = source->length; 44 | char* buffer = gumbo_malloc(source->length); 45 | memcpy(buffer, source->data, source->length); 46 | dest->data = buffer; 47 | } 48 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/string_piece.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | 17 | #ifndef GUMBO_STRING_PIECE_H_ 18 | #define GUMBO_STRING_PIECE_H_ 19 | 20 | #include "gumbo.h" 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | struct GumboInternalParser; 27 | 28 | // Performs a deep-copy of an GumboStringPiece, allocating a fresh buffer in the 29 | // destination and copying over the characters from source. Dest should be 30 | // empty, with no buffer allocated; otherwise, this leaks it. 31 | void gumbo_string_copy(GumboStringPiece* dest, const GumboStringPiece* source); 32 | 33 | #ifdef __cplusplus 34 | } 35 | #endif 36 | 37 | #endif // GUMBO_STRING_PIECE_H_ 38 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/tag.c: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | 17 | #include "gumbo.h" 18 | #include "util.h" 19 | 20 | #include 21 | #include 22 | #include // For strcasecmp. 23 | #include // For strcasecmp. 24 | 25 | const char* kGumboTagNames[] = { 26 | # include "tag_strings.h" 27 | "", // TAG_UNKNOWN 28 | "", // TAG_LAST 29 | }; 30 | 31 | static const uint8_t kGumboTagSizes[] = { 32 | # include "tag_sizes.h" 33 | 0, // TAG_UNKNOWN 34 | 0, // TAG_LAST 35 | }; 36 | 37 | const char* gumbo_normalized_tagname(GumboTag tag) { 38 | assert(tag <= GUMBO_TAG_LAST); 39 | return kGumboTagNames[tag]; 40 | } 41 | 42 | void gumbo_tag_from_original_text(GumboStringPiece* text) { 43 | if (text->data == NULL) { 44 | return; 45 | } 46 | 47 | assert(text->length >= 2); 48 | assert(text->data[0] == '<'); 49 | assert(text->data[text->length - 1] == '>'); 50 | if (text->data[1] == '/') { 51 | // End tag. 52 | assert(text->length >= 3); 53 | text->data += 2; // Move past length -= 3; 55 | } else { 56 | // Start tag. 57 | text->data += 1; // Move past < 58 | text->length -= 2; 59 | // strnchr is apparently not a standard C library function, so I loop 60 | // explicitly looking for whitespace or other illegal tag characters - as 61 | // accepted by the Tag Name State 62 | for (const char* c = text->data; c != text->data + text->length; ++c) { 63 | if (*c == '\t' || *c == '\n' || *c == '\f' || *c == ' ' || *c == '/') { 64 | // was: if (isspace(*c) || *c == '/') { 65 | // see https://github.com/google/gumbo-parser/pull/375/ 66 | text->length = c - text->data; 67 | break; 68 | } 69 | } 70 | } 71 | } 72 | 73 | /* 74 | * Override the `tolower` implementation in the perfect hash 75 | * to use ours. We need a custom `tolower` that only does ASCII 76 | * characters and is locale-independent to remain truthy to the 77 | * standard 78 | */ 79 | #define perfhash_tolower(c) gumbo_tolower(c) 80 | #include "tag_perf.h" 81 | 82 | static int 83 | case_memcmp(const char *s1, const char *s2, int n) 84 | { 85 | while (n--) { 86 | unsigned char c1 = gumbo_tolower(*s1++); 87 | unsigned char c2 = gumbo_tolower(*s2++); 88 | if (c1 != c2) 89 | return (int)c1 - (int)c2; 90 | } 91 | return 0; 92 | } 93 | 94 | 95 | GumboTag gumbo_tagn_enum(const char* tagname, int length) { 96 | int position = perfhash((const unsigned char *)tagname, length); 97 | if (position >= 0 && 98 | length == kGumboTagSizes[position] && 99 | !case_memcmp(tagname, kGumboTagNames[position], length)) 100 | return (GumboTag)position; 101 | return GUMBO_TAG_UNKNOWN; 102 | } 103 | 104 | 105 | #if 0 106 | /** 107 | * This version removes unrecognized svg and mathml prefixes from 108 | * tags to force the gumbo parser to actually recognize that svg:svg is 109 | * actually an svg tag and similarly for m:math and mml:math and even math:math. 110 | * Without it gumbo treats these as unknown tags in the html namespace 111 | * and not to the correct svg or mathml namespaces 112 | **/ 113 | GumboTag gumbo_tagn_enum(const char* tagname, int length) { 114 | /* handle replacement of standard prefixes */ 115 | const char * tagnameptr; 116 | int tagnamelength; 117 | int position = -1; 118 | if (!case_memcmp(tagname, "svg:", 4)) { 119 | tagnameptr = tagname + 4; 120 | tagnamelength = length - 4; 121 | } else if (!case_memcmp(tagname, "m:", 2)) { 122 | tagnameptr = tagname + 2; 123 | tagnamelength = length - 2; 124 | } else if (!case_memcmp(tagname, "mml:", 4)) { 125 | tagnameptr = tagname + 4; 126 | tagnamelength = length - 4; 127 | } else if (!case_memcmp(tagname, "math:", 5)) { 128 | tagnameptr = tagname + 5; 129 | tagnamelength = length - 5; 130 | } else { 131 | tagnameptr = tagname; 132 | tagnamelength = length; 133 | } 134 | position = perfhash((const unsigned char *)tagnameptr, tagnamelength); 135 | if (position >= 0 && 136 | tagnamelength == kGumboTagSizes[position] && 137 | !case_memcmp(tagnameptr, kGumboTagNames[position], tagnamelength)) 138 | return (GumboTag)position; 139 | return GUMBO_TAG_UNKNOWN; 140 | } 141 | #endif 142 | 143 | 144 | GumboTag gumbo_tag_enum(const char* tagname) { 145 | return gumbo_tagn_enum(tagname, strlen(tagname)); 146 | } 147 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/tag.in: -------------------------------------------------------------------------------- 1 | a 2 | abbr 3 | acronym 4 | address 5 | altglyph 6 | altglyphdef 7 | altglyphitem 8 | animate 9 | animatecolor 10 | animatemotion 11 | animatetransform 12 | annotation-xml 13 | applet 14 | area 15 | article 16 | aside 17 | audio 18 | b 19 | base 20 | basefont 21 | bdi 22 | bdo 23 | bgsound 24 | big 25 | blink 26 | blockquote 27 | body 28 | br 29 | button 30 | canvas 31 | caption 32 | center 33 | circle 34 | cite 35 | clippath 36 | code 37 | col 38 | colgroup 39 | color-profile 40 | cursor 41 | data 42 | datalist 43 | dd 44 | defs 45 | del 46 | desc 47 | details 48 | dfn 49 | dir 50 | div 51 | dl 52 | dt 53 | ellipse 54 | em 55 | embed 56 | feblend 57 | fecolormatrix 58 | fecomponenttransfer 59 | fecomposite 60 | feconvolvematrix 61 | fediffuselighting 62 | fedisplacementmap 63 | fedistantlight 64 | feflood 65 | fefunca 66 | fefuncb 67 | fefuncg 68 | fefuncr 69 | fegaussianblur 70 | feimage 71 | femerge 72 | femergenode 73 | femorphology 74 | feoffset 75 | fepointlight 76 | fespectactualrlighting 77 | fespotlight 78 | fetile 79 | feturbulence 80 | fieldset 81 | figcaption 82 | figure 83 | filter 84 | font 85 | font-face 86 | font-face-format 87 | font-face-name 88 | font-face-src 89 | font-face-uri 90 | footer 91 | foreignobject 92 | form 93 | frame 94 | frameset 95 | g 96 | glyph 97 | glyphref 98 | h1 99 | h2 100 | h3 101 | h4 102 | h5 103 | h6 104 | head 105 | header 106 | hgroup 107 | hkern 108 | hr 109 | html 110 | i 111 | iframe 112 | image 113 | img 114 | input 115 | ins 116 | isindex 117 | kbd 118 | keygen 119 | label 120 | legend 121 | li 122 | line 123 | lineargradient 124 | link 125 | listing 126 | maction 127 | main 128 | maligngroup 129 | malignmark 130 | map 131 | mark 132 | marker 133 | marquee 134 | mask 135 | math 136 | menclose 137 | menu 138 | menuitem 139 | merror 140 | meta 141 | metadata 142 | meter 143 | mfenced 144 | mfrac 145 | mglyph 146 | mi 147 | missing-glyph 148 | mlabeledtr 149 | mlongdiv 150 | mmultiscripts 151 | mn 152 | mo 153 | mover 154 | mpadded 155 | mpath 156 | mphantom 157 | mprescripts 158 | mroot 159 | mrow 160 | ms 161 | mscarries 162 | mscarry 163 | msgroup 164 | msline 165 | mspace 166 | msqrt 167 | msrow 168 | mstack 169 | mstyle 170 | msub 171 | msubsup 172 | msup 173 | mtable 174 | mtd 175 | mtext 176 | mtr 177 | multicol 178 | munder 179 | munderover 180 | nav 181 | nextid 182 | nobr 183 | noembed 184 | noframes 185 | none 186 | noscript 187 | object 188 | ol 189 | optgroup 190 | option 191 | output 192 | p 193 | param 194 | path 195 | pattern 196 | plaintext 197 | polygon 198 | polyline 199 | pre 200 | progress 201 | q 202 | radialgradient 203 | rb 204 | rect 205 | rp 206 | rt 207 | rtc 208 | ruby 209 | s 210 | samp 211 | script 212 | section 213 | select 214 | semantics 215 | set 216 | small 217 | source 218 | spacer 219 | span 220 | stop 221 | strike 222 | strong 223 | style 224 | sub 225 | summary 226 | sup 227 | svg 228 | switch 229 | symbol 230 | table 231 | tbody 232 | td 233 | template 234 | text 235 | textpath 236 | textarea 237 | tfoot 238 | th 239 | thead 240 | time 241 | title 242 | tr 243 | track 244 | tref 245 | tspan 246 | tt 247 | u 248 | ul 249 | use 250 | var 251 | video 252 | view 253 | vkern 254 | wbr 255 | xmp 256 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/tag_enum.h: -------------------------------------------------------------------------------- 1 | GUMBO_TAG_A, 2 | GUMBO_TAG_ABBR, 3 | GUMBO_TAG_ACRONYM, 4 | GUMBO_TAG_ADDRESS, 5 | GUMBO_TAG_ALTGLYPH, 6 | GUMBO_TAG_ALTGLYPHDEF, 7 | GUMBO_TAG_ALTGLYPHITEM, 8 | GUMBO_TAG_ANIMATE, 9 | GUMBO_TAG_ANIMATECOLOR, 10 | GUMBO_TAG_ANIMATEMOTION, 11 | GUMBO_TAG_ANIMATETRANSFORM, 12 | GUMBO_TAG_ANNOTATION_XML, 13 | GUMBO_TAG_APPLET, 14 | GUMBO_TAG_AREA, 15 | GUMBO_TAG_ARTICLE, 16 | GUMBO_TAG_ASIDE, 17 | GUMBO_TAG_AUDIO, 18 | GUMBO_TAG_B, 19 | GUMBO_TAG_BASE, 20 | GUMBO_TAG_BASEFONT, 21 | GUMBO_TAG_BDI, 22 | GUMBO_TAG_BDO, 23 | GUMBO_TAG_BGSOUND, 24 | GUMBO_TAG_BIG, 25 | GUMBO_TAG_BLINK, 26 | GUMBO_TAG_BLOCKQUOTE, 27 | GUMBO_TAG_BODY, 28 | GUMBO_TAG_BR, 29 | GUMBO_TAG_BUTTON, 30 | GUMBO_TAG_CANVAS, 31 | GUMBO_TAG_CAPTION, 32 | GUMBO_TAG_CENTER, 33 | GUMBO_TAG_CIRCLE, 34 | GUMBO_TAG_CITE, 35 | GUMBO_TAG_CLIPPATH, 36 | GUMBO_TAG_CODE, 37 | GUMBO_TAG_COL, 38 | GUMBO_TAG_COLGROUP, 39 | GUMBO_TAG_COLOR_PROFILE, 40 | GUMBO_TAG_CURSOR, 41 | GUMBO_TAG_DATA, 42 | GUMBO_TAG_DATALIST, 43 | GUMBO_TAG_DD, 44 | GUMBO_TAG_DEFS, 45 | GUMBO_TAG_DEL, 46 | GUMBO_TAG_DESC, 47 | GUMBO_TAG_DETAILS, 48 | GUMBO_TAG_DFN, 49 | GUMBO_TAG_DIR, 50 | GUMBO_TAG_DIV, 51 | GUMBO_TAG_DL, 52 | GUMBO_TAG_DT, 53 | GUMBO_TAG_ELLIPSE, 54 | GUMBO_TAG_EM, 55 | GUMBO_TAG_EMBED, 56 | GUMBO_TAG_FEBLEND, 57 | GUMBO_TAG_FECOLORMATRIX, 58 | GUMBO_TAG_FECOMPONENTTRANSFER, 59 | GUMBO_TAG_FECOMPOSITE, 60 | GUMBO_TAG_FECONVOLVEMATRIX, 61 | GUMBO_TAG_FEDIFFUSELIGHTING, 62 | GUMBO_TAG_FEDISPLACEMENTMAP, 63 | GUMBO_TAG_FEDISTANTLIGHT, 64 | GUMBO_TAG_FEFLOOD, 65 | GUMBO_TAG_FEFUNCA, 66 | GUMBO_TAG_FEFUNCB, 67 | GUMBO_TAG_FEFUNCG, 68 | GUMBO_TAG_FEFUNCR, 69 | GUMBO_TAG_FEGAUSSIANBLUR, 70 | GUMBO_TAG_FEIMAGE, 71 | GUMBO_TAG_FEMERGE, 72 | GUMBO_TAG_FEMERGENODE, 73 | GUMBO_TAG_FEMORPHOLOGY, 74 | GUMBO_TAG_FEOFFSET, 75 | GUMBO_TAG_FEPOINTLIGHT, 76 | GUMBO_TAG_FESPECTACTUALRLIGHTING, 77 | GUMBO_TAG_FESPOTLIGHT, 78 | GUMBO_TAG_FETILE, 79 | GUMBO_TAG_FETURBULENCE, 80 | GUMBO_TAG_FIELDSET, 81 | GUMBO_TAG_FIGCAPTION, 82 | GUMBO_TAG_FIGURE, 83 | GUMBO_TAG_FILTER, 84 | GUMBO_TAG_FONT, 85 | GUMBO_TAG_FONT_FACE, 86 | GUMBO_TAG_FONT_FACE_FORMAT, 87 | GUMBO_TAG_FONT_FACE_NAME, 88 | GUMBO_TAG_FONT_FACE_SRC, 89 | GUMBO_TAG_FONT_FACE_URI, 90 | GUMBO_TAG_FOOTER, 91 | GUMBO_TAG_FOREIGNOBJECT, 92 | GUMBO_TAG_FORM, 93 | GUMBO_TAG_FRAME, 94 | GUMBO_TAG_FRAMESET, 95 | GUMBO_TAG_G, 96 | GUMBO_TAG_GLYPH, 97 | GUMBO_TAG_GLYPHREF, 98 | GUMBO_TAG_H1, 99 | GUMBO_TAG_H2, 100 | GUMBO_TAG_H3, 101 | GUMBO_TAG_H4, 102 | GUMBO_TAG_H5, 103 | GUMBO_TAG_H6, 104 | GUMBO_TAG_HEAD, 105 | GUMBO_TAG_HEADER, 106 | GUMBO_TAG_HGROUP, 107 | GUMBO_TAG_HKERN, 108 | GUMBO_TAG_HR, 109 | GUMBO_TAG_HTML, 110 | GUMBO_TAG_I, 111 | GUMBO_TAG_IFRAME, 112 | GUMBO_TAG_IMAGE, 113 | GUMBO_TAG_IMG, 114 | GUMBO_TAG_INPUT, 115 | GUMBO_TAG_INS, 116 | GUMBO_TAG_ISINDEX, 117 | GUMBO_TAG_KBD, 118 | GUMBO_TAG_KEYGEN, 119 | GUMBO_TAG_LABEL, 120 | GUMBO_TAG_LEGEND, 121 | GUMBO_TAG_LI, 122 | GUMBO_TAG_LINE, 123 | GUMBO_TAG_LINEARGRADIENT, 124 | GUMBO_TAG_LINK, 125 | GUMBO_TAG_LISTING, 126 | GUMBO_TAG_MACTION, 127 | GUMBO_TAG_MAIN, 128 | GUMBO_TAG_MALIGNGROUP, 129 | GUMBO_TAG_MALIGNMARK, 130 | GUMBO_TAG_MAP, 131 | GUMBO_TAG_MARK, 132 | GUMBO_TAG_MARKER, 133 | GUMBO_TAG_MARQUEE, 134 | GUMBO_TAG_MASK, 135 | GUMBO_TAG_MATH, 136 | GUMBO_TAG_MENCLOSE, 137 | GUMBO_TAG_MENU, 138 | GUMBO_TAG_MENUITEM, 139 | GUMBO_TAG_MERROR, 140 | GUMBO_TAG_META, 141 | GUMBO_TAG_METADATA, 142 | GUMBO_TAG_METER, 143 | GUMBO_TAG_MFENCED, 144 | GUMBO_TAG_MFRAC, 145 | GUMBO_TAG_MGLYPH, 146 | GUMBO_TAG_MI, 147 | GUMBO_TAG_MISSING_GLYPH, 148 | GUMBO_TAG_MLABELEDTR, 149 | GUMBO_TAG_MLONGDIV, 150 | GUMBO_TAG_MMULTISCRIPTS, 151 | GUMBO_TAG_MN, 152 | GUMBO_TAG_MO, 153 | GUMBO_TAG_MOVER, 154 | GUMBO_TAG_MPADDED, 155 | GUMBO_TAG_MPATH, 156 | GUMBO_TAG_MPHANTOM, 157 | GUMBO_TAG_MPRESCRIPTS, 158 | GUMBO_TAG_MROOT, 159 | GUMBO_TAG_MROW, 160 | GUMBO_TAG_MS, 161 | GUMBO_TAG_MSCARRIES, 162 | GUMBO_TAG_MSCARRY, 163 | GUMBO_TAG_MSGROUP, 164 | GUMBO_TAG_MSLINE, 165 | GUMBO_TAG_MSPACE, 166 | GUMBO_TAG_MSQRT, 167 | GUMBO_TAG_MSROW, 168 | GUMBO_TAG_MSTACK, 169 | GUMBO_TAG_MSTYLE, 170 | GUMBO_TAG_MSUB, 171 | GUMBO_TAG_MSUBSUP, 172 | GUMBO_TAG_MSUP, 173 | GUMBO_TAG_MTABLE, 174 | GUMBO_TAG_MTD, 175 | GUMBO_TAG_MTEXT, 176 | GUMBO_TAG_MTR, 177 | GUMBO_TAG_MULTICOL, 178 | GUMBO_TAG_MUNDER, 179 | GUMBO_TAG_MUNDEROVER, 180 | GUMBO_TAG_NAV, 181 | GUMBO_TAG_NEXTID, 182 | GUMBO_TAG_NOBR, 183 | GUMBO_TAG_NOEMBED, 184 | GUMBO_TAG_NOFRAMES, 185 | GUMBO_TAG_NONE, 186 | GUMBO_TAG_NOSCRIPT, 187 | GUMBO_TAG_OBJECT, 188 | GUMBO_TAG_OL, 189 | GUMBO_TAG_OPTGROUP, 190 | GUMBO_TAG_OPTION, 191 | GUMBO_TAG_OUTPUT, 192 | GUMBO_TAG_P, 193 | GUMBO_TAG_PARAM, 194 | GUMBO_TAG_PATH, 195 | GUMBO_TAG_PATTERN, 196 | GUMBO_TAG_PLAINTEXT, 197 | GUMBO_TAG_POLYGON, 198 | GUMBO_TAG_POLYLINE, 199 | GUMBO_TAG_PRE, 200 | GUMBO_TAG_PROGRESS, 201 | GUMBO_TAG_Q, 202 | GUMBO_TAG_RADIALGRADIENT, 203 | GUMBO_TAG_RB, 204 | GUMBO_TAG_RECT, 205 | GUMBO_TAG_RP, 206 | GUMBO_TAG_RT, 207 | GUMBO_TAG_RTC, 208 | GUMBO_TAG_RUBY, 209 | GUMBO_TAG_S, 210 | GUMBO_TAG_SAMP, 211 | GUMBO_TAG_SCRIPT, 212 | GUMBO_TAG_SECTION, 213 | GUMBO_TAG_SELECT, 214 | GUMBO_TAG_SEMANTICS, 215 | GUMBO_TAG_SET, 216 | GUMBO_TAG_SMALL, 217 | GUMBO_TAG_SOURCE, 218 | GUMBO_TAG_SPACER, 219 | GUMBO_TAG_SPAN, 220 | GUMBO_TAG_STOP, 221 | GUMBO_TAG_STRIKE, 222 | GUMBO_TAG_STRONG, 223 | GUMBO_TAG_STYLE, 224 | GUMBO_TAG_SUB, 225 | GUMBO_TAG_SUMMARY, 226 | GUMBO_TAG_SUP, 227 | GUMBO_TAG_SVG, 228 | GUMBO_TAG_SWITCH, 229 | GUMBO_TAG_SYMBOL, 230 | GUMBO_TAG_TABLE, 231 | GUMBO_TAG_TBODY, 232 | GUMBO_TAG_TD, 233 | GUMBO_TAG_TEMPLATE, 234 | GUMBO_TAG_TEXT, 235 | GUMBO_TAG_TEXTPATH, 236 | GUMBO_TAG_TEXTAREA, 237 | GUMBO_TAG_TFOOT, 238 | GUMBO_TAG_TH, 239 | GUMBO_TAG_THEAD, 240 | GUMBO_TAG_TIME, 241 | GUMBO_TAG_TITLE, 242 | GUMBO_TAG_TR, 243 | GUMBO_TAG_TRACK, 244 | GUMBO_TAG_TREF, 245 | GUMBO_TAG_TSPAN, 246 | GUMBO_TAG_TT, 247 | GUMBO_TAG_U, 248 | GUMBO_TAG_UL, 249 | GUMBO_TAG_USE, 250 | GUMBO_TAG_VAR, 251 | GUMBO_TAG_VIDEO, 252 | GUMBO_TAG_VIEW, 253 | GUMBO_TAG_VKERN, 254 | GUMBO_TAG_WBR, 255 | GUMBO_TAG_XMP, 256 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/tag_perf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * d=2 3 | * n=340 4 | * m=255 5 | * c=1.33 6 | * maxlen=2 7 | * minklen=1 8 | * maxklen=22 9 | * minchar=45 10 | * maxchar=121 11 | * loop=0 12 | * numiter=331446 13 | * seed=0 14 | */ 15 | 16 | static int g[] = { 17 | 2, 8, -1, 182, -1, 95, 107, 36, 221, -1, 18 | 53, -1, 97, 65, -1, 198, 10, 161, 175, 21, 19 | 114, -1, 168, 192, 89, 188, 142, 115, 120, -1, 20 | 148, 94, 23, 166, 35, -1, -1, 248, 222, 236, 21 | 197, 151, 232, 221, 45, 167, 0, -1, -1, 200, 22 | 63, 0, 62, 93, 156, 173, -1, 59, -1, 68, 23 | 225, 87, 90, 50, 157, 20, 83, 188, 19, -1, 24 | 177, 114, 231, -1, 202, 15, 162, 22, 0, 96, 25 | 73, 5, 93, 70, 142, 78, 0, 0, 42, 36, 26 | -1, -1, 7, 69, 94, 0, -1, 0, 95, 92, 27 | 45, 0, 36, 18, -1, 117, 96, 138, 108, 179, 28 | 213, 125, 79, -1, 133, 93, 0, 73, 175, 16, 29 | 213, 109, -1, 96, 2, 204, 169, 38, 142, -1, 30 | 80, -1, 116, 42, 29, 67, 115, 18, 77, 200, 31 | -1, 7, 98, 95, 77, 233, 72, 155, 222, 248, 32 | 208, -1, 13, -1, 51, 108, 90, 233, 167, -1, 33 | 212, 201, 22, 98, 253, 150, 209, 136, 0, -1, 34 | 116, 121, 120, -1, 108, 159, 190, -1, 64, 68, 35 | -1, -1, -1, 105, 111, 249, 65, 128, -1, 190, 36 | 208, 133, 112, 11, 209, -1, 88, 210, -1, 45, 37 | 131, 213, 123, 215, 116, 20, -1, 173, 47, 28, 38 | -1, 27, 18, 148, -1, 110, 72, 21, 176, 73, 39 | 48, 198, 164, 222, 84, 198, 8, 94, 116, 79, 40 | 176, -1, -1, -1, -1, 153, -1, 135, 32, 153, 41 | 0, 132, 62, 154, 202, 215, -1, 56, 183, 227, 42 | 71, 154, 199, 116, -1, -1, -1, 152, 205, -1, 43 | -1, 3, 64, 134, 226, 140, -1, 252, 189, -1, 44 | -1, -1, 39, 156, 101, 171, 110, 150, 167, -1, 45 | 219, 222, -1, 0, 168, 109, 0, 152, 49, 116, 46 | -1, 127, 113, 76, 132, 133, 0, 22, -1, -1, 47 | 242, 76, 212, -1, 193, 82, 193, -1, 107, 204, 48 | 0, 57, -1, 42, 132, 158, 52, 135, 67, 0, 49 | 30, 222, 0, -1, 245, 7, 64, 60, 195, 218, 50 | -1, -1, 0, 184, 48, 125, 40, -1, 0, 203, 51 | }; 52 | 53 | static int T0[] = { 54 | 196, 312, 54, 110, 208, 188, 214, 144, 142, 232, 55 | 179, 229, 57, 229, 207, 58, 89, 234, 328, 136, 56 | 283, 239, 143, 326, 149, 325, 215, 136, 230, 319, 57 | 300, 298, 163, 14, 68, 31, 74, 282, 175, 216, 58 | 46, 227, 105, 315, 116, 312, 34, 78, 78, 234, 59 | 86, 22, 133, 229, 8, 155, 214, 95, 291, 316, 60 | 286, 251, 274, 109, 137, 2, 141, 211, 156, 188, 61 | 87, 202, 287, 193, 49, 276, 37, 64, 188, 140, 62 | 77, 79, 43, 301, 168, 78, 11, 158, 321, 204, 63 | 111, 315, 197, 170, 58, 292, 125, 219, 117, 112, 64 | 56, 26, 315, 329, 176, 200, 319, 308, 136, 39, 65 | 108, 213, 330, 23, 46, 158, 314, 58, 316, 295, 66 | 134, 299, 142, 203, 130, 72, 155, 255, 291, 144, 67 | 27, 7, 171, 214, 208, 219, 286, 59, 188, 83, 68 | 311, 168, 168, 301, 192, 215, 332, 38, 145, 180, 69 | 205, 151, 140, 7, 70 | }; 71 | 72 | static int T1[] = { 73 | 217, 303, 37, 17, 248, 268, 232, 48, 139, 67, 74 | 336, 102, 220, 306, 26, 112, 210, 271, 283, 42, 75 | 8, 139, 183, 59, 249, 252, 116, 299, 232, 158, 76 | 291, 109, 121, 328, 339, 241, 129, 103, 290, 268, 77 | 42, 158, 30, 262, 124, 268, 246, 207, 199, 190, 78 | 121, 207, 201, 304, 138, 110, 88, 127, 281, 320, 79 | 285, 233, 302, 278, 221, 173, 51, 222, 276, 1, 80 | 150, 318, 31, 53, 112, 28, 193, 157, 6, 298, 81 | 28, 129, 164, 329, 52, 69, 333, 28, 306, 45, 82 | 59, 131, 281, 79, 105, 83, 115, 325, 123, 179, 83 | 148, 317, 129, 307, 270, 337, 89, 272, 154, 307, 84 | 102, 54, 309, 267, 44, 21, 208, 249, 49, 175, 85 | 167, 321, 306, 108, 272, 283, 63, 47, 269, 186, 86 | 98, 289, 35, 100, 128, 178, 97, 90, 110, 124, 87 | 57, 213, 178, 26, 140, 94, 260, 220, 216, 181, 88 | 55, 43, 162, 233, 89 | }; 90 | 91 | #ifndef perfhash_tolower 92 | #define perfhash_tolower(c) gumbo_tolower(c) 93 | #endif 94 | 95 | static int perfhash(const unsigned char *key, int len) 96 | { 97 | int i; 98 | int n; 99 | unsigned f0, f1; 100 | const unsigned char *kp = key; 101 | 102 | if (len < 1 || len > 22) 103 | return -1; 104 | 105 | for (i=-45, f0=f1=0, n=0; n 121) 109 | return -1; 110 | f0 += T0[i + c]; 111 | f1 += T1[i + c]; 112 | i += 77; 113 | if (i >= 109) 114 | i = -45; 115 | } 116 | 117 | f0 %= 340; 118 | f1 %= 340; 119 | 120 | return (g[f0] + g[f1]) % 255; 121 | } 122 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/tag_sizes.h: -------------------------------------------------------------------------------- 1 | 1, 4, 7, 7, 8, 11, 12, 7, 12, 13, 16, 14, 6, 4, 7, 5, 5, 1, 4, 8, 3, 3, 7, 3, 5, 10, 4, 2, 6, 6, 7, 6, 6, 4, 8, 4, 3, 8, 13, 6, 4, 8, 2, 4, 3, 4, 7, 3, 3, 3, 2, 2, 7, 2, 5, 7, 13, 19, 11, 16, 17, 17, 14, 7, 7, 7, 7, 7, 14, 7, 7, 11, 12, 8, 12, 22, 11, 6, 12, 8, 10, 6, 6, 4, 9, 16, 14, 13, 13, 6, 13, 4, 5, 8, 1, 5, 8, 2, 2, 2, 2, 2, 2, 4, 6, 6, 5, 2, 4, 1, 6, 5, 3, 5, 3, 7, 3, 6, 5, 6, 2, 4, 14, 4, 7, 7, 4, 11, 10, 3, 4, 6, 7, 4, 4, 8, 4, 8, 6, 4, 8, 5, 7, 5, 6, 2, 13, 10, 8, 13, 2, 2, 5, 7, 5, 8, 11, 5, 4, 2, 9, 7, 7, 6, 6, 5, 5, 6, 6, 4, 7, 4, 6, 3, 5, 3, 8, 6, 10, 3, 6, 4, 7, 8, 4, 8, 6, 2, 8, 6, 6, 1, 5, 4, 7, 9, 7, 8, 3, 8, 1, 14, 2, 4, 2, 2, 3, 4, 1, 4, 6, 7, 6, 9, 3, 5, 6, 6, 4, 4, 6, 6, 5, 3, 7, 3, 3, 6, 6, 5, 5, 2, 8, 4, 8, 8, 5, 2, 5, 4, 5, 2, 5, 4, 5, 2, 1, 2, 3, 3, 5, 4, 5, 3, 3, -------------------------------------------------------------------------------- /src/Vendor/gumbo/tag_strings.h: -------------------------------------------------------------------------------- 1 | "a", 2 | "abbr", 3 | "acronym", 4 | "address", 5 | "altglyph", 6 | "altglyphdef", 7 | "altglyphitem", 8 | "animate", 9 | "animatecolor", 10 | "animatemotion", 11 | "animatetransform", 12 | "annotation-xml", 13 | "applet", 14 | "area", 15 | "article", 16 | "aside", 17 | "audio", 18 | "b", 19 | "base", 20 | "basefont", 21 | "bdi", 22 | "bdo", 23 | "bgsound", 24 | "big", 25 | "blink", 26 | "blockquote", 27 | "body", 28 | "br", 29 | "button", 30 | "canvas", 31 | "caption", 32 | "center", 33 | "circle", 34 | "cite", 35 | "clippath", 36 | "code", 37 | "col", 38 | "colgroup", 39 | "color-profile", 40 | "cursor", 41 | "data", 42 | "datalist", 43 | "dd", 44 | "defs", 45 | "del", 46 | "desc", 47 | "details", 48 | "dfn", 49 | "dir", 50 | "div", 51 | "dl", 52 | "dt", 53 | "ellipse", 54 | "em", 55 | "embed", 56 | "feblend", 57 | "fecolormatrix", 58 | "fecomponenttransfer", 59 | "fecomposite", 60 | "feconvolvematrix", 61 | "fediffuselighting", 62 | "fedisplacementmap", 63 | "fedistantlight", 64 | "feflood", 65 | "fefunca", 66 | "fefuncb", 67 | "fefuncg", 68 | "fefuncr", 69 | "fegaussianblur", 70 | "feimage", 71 | "femerge", 72 | "femergenode", 73 | "femorphology", 74 | "feoffset", 75 | "fepointlight", 76 | "fespectactualrlighting", 77 | "fespotlight", 78 | "fetile", 79 | "feturbulence", 80 | "fieldset", 81 | "figcaption", 82 | "figure", 83 | "filter", 84 | "font", 85 | "font-face", 86 | "font-face-format", 87 | "font-face-name", 88 | "font-face-src", 89 | "font-face-uri", 90 | "footer", 91 | "foreignobject", 92 | "form", 93 | "frame", 94 | "frameset", 95 | "g", 96 | "glyph", 97 | "glyphref", 98 | "h1", 99 | "h2", 100 | "h3", 101 | "h4", 102 | "h5", 103 | "h6", 104 | "head", 105 | "header", 106 | "hgroup", 107 | "hkern", 108 | "hr", 109 | "html", 110 | "i", 111 | "iframe", 112 | "image", 113 | "img", 114 | "input", 115 | "ins", 116 | "isindex", 117 | "kbd", 118 | "keygen", 119 | "label", 120 | "legend", 121 | "li", 122 | "line", 123 | "lineargradient", 124 | "link", 125 | "listing", 126 | "maction", 127 | "main", 128 | "maligngroup", 129 | "malignmark", 130 | "map", 131 | "mark", 132 | "marker", 133 | "marquee", 134 | "mask", 135 | "math", 136 | "menclose", 137 | "menu", 138 | "menuitem", 139 | "merror", 140 | "meta", 141 | "metadata", 142 | "meter", 143 | "mfenced", 144 | "mfrac", 145 | "mglyph", 146 | "mi", 147 | "missing-glyph", 148 | "mlabeledtr", 149 | "mlongdiv", 150 | "mmultiscripts", 151 | "mn", 152 | "mo", 153 | "mover", 154 | "mpadded", 155 | "mpath", 156 | "mphantom", 157 | "mprescripts", 158 | "mroot", 159 | "mrow", 160 | "ms", 161 | "mscarries", 162 | "mscarry", 163 | "msgroup", 164 | "msline", 165 | "mspace", 166 | "msqrt", 167 | "msrow", 168 | "mstack", 169 | "mstyle", 170 | "msub", 171 | "msubsup", 172 | "msup", 173 | "mtable", 174 | "mtd", 175 | "mtext", 176 | "mtr", 177 | "multicol", 178 | "munder", 179 | "munderover", 180 | "nav", 181 | "nextid", 182 | "nobr", 183 | "noembed", 184 | "noframes", 185 | "none", 186 | "noscript", 187 | "object", 188 | "ol", 189 | "optgroup", 190 | "option", 191 | "output", 192 | "p", 193 | "param", 194 | "path", 195 | "pattern", 196 | "plaintext", 197 | "polygon", 198 | "polyline", 199 | "pre", 200 | "progress", 201 | "q", 202 | "radialgradient", 203 | "rb", 204 | "rect", 205 | "rp", 206 | "rt", 207 | "rtc", 208 | "ruby", 209 | "s", 210 | "samp", 211 | "script", 212 | "section", 213 | "select", 214 | "semantics", 215 | "set", 216 | "small", 217 | "source", 218 | "spacer", 219 | "span", 220 | "stop", 221 | "strike", 222 | "strong", 223 | "style", 224 | "sub", 225 | "summary", 226 | "sup", 227 | "svg", 228 | "switch", 229 | "symbol", 230 | "table", 231 | "tbody", 232 | "td", 233 | "template", 234 | "text", 235 | "textpath", 236 | "textarea", 237 | "tfoot", 238 | "th", 239 | "thead", 240 | "time", 241 | "title", 242 | "tr", 243 | "track", 244 | "tref", 245 | "tspan", 246 | "tt", 247 | "u", 248 | "ul", 249 | "use", 250 | "var", 251 | "video", 252 | "view", 253 | "vkern", 254 | "wbr", 255 | "xmp", 256 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/token_type.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | 17 | #ifndef GUMBO_TOKEN_TYPE_H_ 18 | #define GUMBO_TOKEN_TYPE_H_ 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | // An enum representing the type of token. 25 | typedef enum { 26 | GUMBO_TOKEN_DOCTYPE, 27 | GUMBO_TOKEN_START_TAG, 28 | GUMBO_TOKEN_END_TAG, 29 | GUMBO_TOKEN_COMMENT, 30 | GUMBO_TOKEN_WHITESPACE, 31 | GUMBO_TOKEN_CHARACTER, 32 | GUMBO_TOKEN_CDATA, 33 | GUMBO_TOKEN_NULL, 34 | GUMBO_TOKEN_EOF 35 | } GumboTokenType; 36 | 37 | #ifdef __cplusplus 38 | } // extern C 39 | #endif 40 | 41 | #endif // GUMBO_TOKEN_TYPE_H_ 42 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/tokenizer.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | // 17 | // This contains an implementation of a tokenizer for HTML5. It consumes a 18 | // buffer of UTF-8 characters, and then emits a stream of tokens. 19 | 20 | #ifndef GUMBO_TOKENIZER_H_ 21 | #define GUMBO_TOKENIZER_H_ 22 | 23 | #include 24 | #include 25 | 26 | #include "gumbo.h" 27 | #include "token_type.h" 28 | #include "tokenizer_states.h" 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif 33 | 34 | struct GumboInternalParser; 35 | 36 | // Struct containing all information pertaining to doctype tokens. 37 | typedef struct GumboInternalTokenDocType { 38 | const char* name; 39 | const char* public_identifier; 40 | const char* system_identifier; 41 | bool force_quirks; 42 | // There's no way to tell a 0-length public or system ID apart from the 43 | // absence of a public or system ID, but they're handled different by the 44 | // spec, so we need bool flags for them. 45 | bool has_public_identifier; 46 | bool has_system_identifier; 47 | } GumboTokenDocType; 48 | 49 | // Struct containing all information pertaining to start tag tokens. 50 | typedef struct GumboInternalTokenStartTag { 51 | GumboTag tag; 52 | GumboVector /* GumboAttribute */ attributes; 53 | bool is_self_closing; 54 | } GumboTokenStartTag; 55 | 56 | // A data structure representing a single token in the input stream. This 57 | // contains an enum for the type, the source position, a GumboStringPiece 58 | // pointing to the original text, and then a union for any parsed data. 59 | typedef struct GumboInternalToken { 60 | GumboTokenType type; 61 | GumboSourcePosition position; 62 | GumboStringPiece original_text; 63 | union { 64 | GumboTokenDocType doc_type; 65 | GumboTokenStartTag start_tag; 66 | GumboTag end_tag; 67 | const char* text; // For comments. 68 | int character; // For character, whitespace, null, and EOF tokens. 69 | } v; 70 | } GumboToken; 71 | 72 | // Initializes the tokenizer state within the GumboParser object, setting up a 73 | // parse of the specified text. 74 | void gumbo_tokenizer_state_init( 75 | struct GumboInternalParser* parser, const char* text, size_t text_length); 76 | 77 | // Destroys the tokenizer state within the GumboParser object, freeing any 78 | // dynamically-allocated structures within it. 79 | void gumbo_tokenizer_state_destroy(struct GumboInternalParser* parser); 80 | 81 | // Sets the tokenizer state to the specified value. This is needed by some 82 | // parser states, which alter the state of the tokenizer in response to tags 83 | // seen. 84 | void gumbo_tokenizer_set_state( 85 | struct GumboInternalParser* parser, GumboTokenizerEnum state); 86 | 87 | // Flags whether the current node is a foreign content element. This is 88 | // necessary for the markup declaration open state, where the tokenizer must be 89 | // aware of the state of the parser to properly tokenize bad comment tags. 90 | // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#markup-declaration-open-state 91 | void gumbo_tokenizer_set_is_current_node_foreign( 92 | struct GumboInternalParser* parser, bool is_foreign); 93 | 94 | // Lexes a single token from the specified buffer, filling the output with the 95 | // parsed GumboToken data structure. Returns true for a successful 96 | // tokenization, false if a parse error occurs. 97 | // 98 | // Example: 99 | // struct GumboInternalParser parser; 100 | // GumboToken output; 101 | // gumbo_tokenizer_state_init(&parser, text, strlen(text)); 102 | // while (gumbo_lex(&parser, &output)) { 103 | // ...do stuff with output. 104 | // gumbo_token_destroy(&parser, &token); 105 | // } 106 | // gumbo_tokenizer_state_destroy(&parser); 107 | bool gumbo_lex(struct GumboInternalParser* parser, GumboToken* output); 108 | 109 | // Frees the internally-allocated pointers within an GumboToken. Note that this 110 | // doesn't free the token itself, since oftentimes it will be allocated on the 111 | // stack. A simple call to free() (or GumboParser->deallocator, if 112 | // appropriate) can handle that. 113 | // 114 | // Note that if you are handing over ownership of the internal strings to some 115 | // other data structure - for example, a parse tree - these do not need to be 116 | // freed. 117 | void gumbo_token_destroy(GumboToken* token); 118 | 119 | #ifdef __cplusplus 120 | } 121 | #endif 122 | 123 | #endif // GUMBO_TOKENIZER_H_ 124 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/tokenizer_states.h: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | // 17 | // This contains the list of states used in the tokenizer. Although at first 18 | // glance it seems like these could be kept internal to the tokenizer, several 19 | // of the actions in the parser require that it reach into the tokenizer and 20 | // reset the tokenizer state. For that to work, it needs to have the 21 | // definitions of individual states available. 22 | // 23 | // This may also be useful for providing more detailed error messages for parse 24 | // errors, as we can match up states and inputs in a table without having to 25 | // clutter the tokenizer code with lots of precise error messages. 26 | 27 | #ifndef GUMBO_TOKENIZER_STATES_H_ 28 | #define GUMBO_TOKENIZER_STATES_H_ 29 | 30 | // The ordering of this enum is also used to build the dispatch table for the 31 | // tokenizer state machine, so if it is changed, be sure to update that too. 32 | typedef enum { 33 | GUMBO_LEX_DATA, 34 | GUMBO_LEX_CHAR_REF_IN_DATA, 35 | GUMBO_LEX_RCDATA, 36 | GUMBO_LEX_CHAR_REF_IN_RCDATA, 37 | GUMBO_LEX_RAWTEXT, 38 | GUMBO_LEX_SCRIPT, 39 | GUMBO_LEX_PLAINTEXT, 40 | GUMBO_LEX_TAG_OPEN, 41 | GUMBO_LEX_END_TAG_OPEN, 42 | GUMBO_LEX_TAG_NAME, 43 | GUMBO_LEX_RCDATA_LT, 44 | GUMBO_LEX_RCDATA_END_TAG_OPEN, 45 | GUMBO_LEX_RCDATA_END_TAG_NAME, 46 | GUMBO_LEX_RAWTEXT_LT, 47 | GUMBO_LEX_RAWTEXT_END_TAG_OPEN, 48 | GUMBO_LEX_RAWTEXT_END_TAG_NAME, 49 | GUMBO_LEX_SCRIPT_LT, 50 | GUMBO_LEX_SCRIPT_END_TAG_OPEN, 51 | GUMBO_LEX_SCRIPT_END_TAG_NAME, 52 | GUMBO_LEX_SCRIPT_ESCAPED_START, 53 | GUMBO_LEX_SCRIPT_ESCAPED_START_DASH, 54 | GUMBO_LEX_SCRIPT_ESCAPED, 55 | GUMBO_LEX_SCRIPT_ESCAPED_DASH, 56 | GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH, 57 | GUMBO_LEX_SCRIPT_ESCAPED_LT, 58 | GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_OPEN, 59 | GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_NAME, 60 | GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_START, 61 | GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED, 62 | GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH, 63 | GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH_DASH, 64 | GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_LT, 65 | GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_END, 66 | GUMBO_LEX_BEFORE_ATTR_NAME, 67 | GUMBO_LEX_ATTR_NAME, 68 | GUMBO_LEX_AFTER_ATTR_NAME, 69 | GUMBO_LEX_BEFORE_ATTR_VALUE, 70 | GUMBO_LEX_ATTR_VALUE_DOUBLE_QUOTED, 71 | GUMBO_LEX_ATTR_VALUE_SINGLE_QUOTED, 72 | GUMBO_LEX_ATTR_VALUE_UNQUOTED, 73 | GUMBO_LEX_CHAR_REF_IN_ATTR_VALUE, 74 | GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED, 75 | GUMBO_LEX_SELF_CLOSING_START_TAG, 76 | GUMBO_LEX_BOGUS_COMMENT, 77 | GUMBO_LEX_MARKUP_DECLARATION, 78 | GUMBO_LEX_COMMENT_START, 79 | GUMBO_LEX_COMMENT_START_DASH, 80 | GUMBO_LEX_COMMENT, 81 | GUMBO_LEX_COMMENT_END_DASH, 82 | GUMBO_LEX_COMMENT_END, 83 | GUMBO_LEX_COMMENT_END_BANG, 84 | GUMBO_LEX_DOCTYPE, 85 | GUMBO_LEX_BEFORE_DOCTYPE_NAME, 86 | GUMBO_LEX_DOCTYPE_NAME, 87 | GUMBO_LEX_AFTER_DOCTYPE_NAME, 88 | GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_KEYWORD, 89 | GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID, 90 | GUMBO_LEX_DOCTYPE_PUBLIC_ID_DOUBLE_QUOTED, 91 | GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED, 92 | GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID, 93 | GUMBO_LEX_BETWEEN_DOCTYPE_PUBLIC_SYSTEM_ID, 94 | GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_KEYWORD, 95 | GUMBO_LEX_BEFORE_DOCTYPE_SYSTEM_ID, 96 | GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED, 97 | GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED, 98 | GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID, 99 | GUMBO_LEX_BOGUS_DOCTYPE, 100 | GUMBO_LEX_CDATA 101 | } GumboTokenizerEnum; 102 | 103 | #endif // GUMBO_TOKENIZER_STATES_H_ 104 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/utf8.c: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | 17 | #include "utf8.h" 18 | 19 | #include 20 | #include 21 | #include 22 | #include // For strncasecmp. 23 | 24 | #include "error.h" 25 | #include "gumbo.h" 26 | #include "parser.h" 27 | #include "util.h" 28 | #include "vector.h" 29 | 30 | const int kUtf8ReplacementChar = 0xFFFD; 31 | 32 | // Reference material: 33 | // Wikipedia: http://en.wikipedia.org/wiki/UTF-8#Description 34 | // RFC 3629: http://tools.ietf.org/html/rfc3629 35 | // HTML5 Unicode handling: 36 | // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#preprocessing-the-input-stream 37 | // 38 | // This implementation is based on a DFA-based decoder by Bjoern Hoehrmann 39 | // . We wrap the inner table-based decoder routine in our 40 | // own handling for newlines, tabs, invalid continuation bytes, and other 41 | // conditions that the HTML5 spec fully specifies but normal UTF8 decoders do 42 | // not handle. 43 | // See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. Full text of 44 | // the license agreement and code follows. 45 | 46 | // Copyright (c) 2008-2009 Bjoern Hoehrmann 47 | 48 | // Permission is hereby granted, free of charge, to any person obtaining a copy 49 | // of this software and associated documentation files (the "Software"), to deal 50 | // in the Software without restriction, including without limitation the rights to 51 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 52 | // of the Software, and to permit persons to whom the Software is furnished to do 53 | // so, subject to the following conditions: 54 | 55 | // The above copyright notice and this permission notice shall be included in 56 | // all copies or substantial portions of the Software. 57 | 58 | #define UTF8_ACCEPT 0 59 | #define UTF8_REJECT 12 60 | 61 | static const uint8_t utf8d[] = { 62 | // The first part of the table maps bytes to character classes that 63 | // to reduce the size of the transition table and create bitmasks. 64 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 65 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 66 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 67 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 68 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 69 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 70 | 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 71 | 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, 72 | 73 | // The second part is a transition table that maps a combination 74 | // of a state of the automaton and a character class to a state. 75 | 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, 76 | 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, 77 | 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, 78 | 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, 79 | 12,36,12,12,12,12,12,12,12,12,12,12, 80 | }; 81 | 82 | uint32_t static inline decode(uint32_t* state, uint32_t* codep, uint32_t byte) { 83 | uint32_t type = utf8d[byte]; 84 | 85 | *codep = (*state != UTF8_ACCEPT) ? 86 | (byte & 0x3fu) | (*codep << 6) : 87 | (0xff >> type) & (byte); 88 | 89 | *state = utf8d[256 + *state + type]; 90 | return *state; 91 | } 92 | 93 | // END COPIED CODE. 94 | 95 | // Adds a decoding error to the parser's error list, based on the current state 96 | // of the Utf8Iterator. 97 | static void add_error(Utf8Iterator* iter, GumboErrorType type) { 98 | GumboParser* parser = iter->_parser; 99 | 100 | GumboError* error = gumbo_add_error(parser); 101 | if (!error) { 102 | return; 103 | } 104 | error->type = type; 105 | error->position = iter->_pos; 106 | error->original_text = iter->_start; 107 | 108 | // At the point the error is recorded, the code point hasn't been computed 109 | // yet (and can't be, because it's invalid), so we need to build up the raw 110 | // hex value from the bytes under the cursor. 111 | uint64_t code_point = 0; 112 | for (int i = 0; i < iter->_width; ++i) { 113 | code_point = (code_point << 8) | (unsigned char) iter->_start[i]; 114 | } 115 | error->v.codepoint = code_point; 116 | } 117 | 118 | // Reads the next UTF-8 character in the iter. 119 | // This assumes that iter->_start points to the beginning of the character. 120 | // When this method returns, iter->_width and iter->_current will be set 121 | // appropriately, as well as any error flags. 122 | static void read_char(Utf8Iterator* iter) { 123 | if (iter->_start >= iter->_end) { 124 | // No input left to consume; emit an EOF and set width = 0. 125 | iter->_current = -1; 126 | iter->_width = 0; 127 | return; 128 | } 129 | 130 | uint32_t code_point = 0; 131 | uint32_t state = UTF8_ACCEPT; 132 | for (const char* c = iter->_start; c < iter->_end; ++c) { 133 | decode(&state, &code_point, (uint32_t) (unsigned char) (*c)); 134 | if (state == UTF8_ACCEPT) { 135 | iter->_width = (int)(c - iter->_start + 1); 136 | // This is the special handling for carriage returns that is mandated by 137 | // the HTML5 spec. Since we're looking for particular 7-bit literal 138 | // characters, we operate in terms of chars and only need a check for iter 139 | // overrun, instead of having to read in a full next code point. 140 | // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream 141 | if (code_point == '\r') { 142 | assert(iter->_width == 1); 143 | const char* next = c + 1; 144 | if (next < iter->_end && *next == '\n') { 145 | // Advance the iter, as if the carriage return didn't exist. 146 | ++iter->_start; 147 | // Preserve the true offset, since other tools that look at it may be 148 | // unaware of HTML5's rules for converting \r into \n. 149 | ++iter->_pos.offset; 150 | } 151 | code_point = '\n'; 152 | } 153 | if (utf8_is_invalid_code_point(code_point)) { 154 | add_error(iter, GUMBO_ERR_UTF8_INVALID); 155 | code_point = kUtf8ReplacementChar; 156 | } 157 | iter->_current = code_point; 158 | return; 159 | } else if (state == UTF8_REJECT) { 160 | // We don't want to consume the invalid continuation byte of a multi-byte 161 | // run, but we do want to skip past an invalid first byte. 162 | iter->_width = (int)(c - iter->_start) + (c == iter->_start); 163 | iter->_current = kUtf8ReplacementChar; 164 | add_error(iter, GUMBO_ERR_UTF8_INVALID); 165 | return; 166 | } 167 | } 168 | // If we got here without exiting early, then we've reached the end of the 169 | // iterator. Add an error for truncated input, set the width to consume the 170 | // rest of the iterator, and emit a replacement character. The next time we 171 | // enter this method, it will detect that there's no input to consume and 172 | // output an EOF. 173 | iter->_current = kUtf8ReplacementChar; 174 | iter->_width = (int)(iter->_end - iter->_start); 175 | add_error(iter, GUMBO_ERR_UTF8_TRUNCATED); 176 | } 177 | 178 | static void update_position(Utf8Iterator* iter) { 179 | iter->_pos.offset += iter->_width; 180 | if (iter->_current == '\n') { 181 | ++iter->_pos.line; 182 | iter->_pos.column = 1; 183 | } else if(iter->_current == '\t') { 184 | int tab_stop = iter->_parser->_options->tab_stop; 185 | iter->_pos.column = ((iter->_pos.column / tab_stop) + 1) * tab_stop; 186 | } else if(iter->_current != -1) { 187 | ++iter->_pos.column; 188 | } 189 | } 190 | 191 | // Returns true if this Unicode code point is in the list of characters 192 | // forbidden by the HTML5 spec, such as undefined control chars. 193 | bool utf8_is_invalid_code_point(int c) { 194 | return (c >= 0x1 && c <= 0x8) || c == 0xB || (c >= 0xE && c <= 0x1F) || 195 | (c >= 0x7F && c <= 0x9F) || (c >= 0xFDD0 && c <= 0xFDEF) || 196 | ((c & 0xFFFF) == 0xFFFE) || ((c & 0xFFFF) == 0xFFFF); 197 | } 198 | 199 | void utf8iterator_init( 200 | GumboParser* parser, const char* source, size_t source_length, 201 | Utf8Iterator* iter) { 202 | iter->_start = source; 203 | iter->_end = source + source_length; 204 | iter->_pos.line = 1; 205 | iter->_pos.column = 1; 206 | iter->_pos.offset = 0; 207 | iter->_parser = parser; 208 | read_char(iter); 209 | } 210 | 211 | void utf8iterator_next(Utf8Iterator* iter) { 212 | // We update positions based on the *last* character read, so that the first 213 | // character following a newline is at column 1 in the next line. 214 | update_position(iter); 215 | iter->_start += iter->_width; 216 | read_char(iter); 217 | } 218 | 219 | int utf8iterator_current(const Utf8Iterator* iter) { 220 | return iter->_current; 221 | } 222 | 223 | void utf8iterator_get_position( 224 | const Utf8Iterator* iter, GumboSourcePosition* output) { 225 | *output = iter->_pos; 226 | } 227 | 228 | const char* utf8iterator_get_char_pointer(const Utf8Iterator* iter) { 229 | return iter->_start; 230 | } 231 | 232 | const char* utf8iterator_get_end_pointer(const Utf8Iterator* iter) { 233 | return iter->_end; 234 | } 235 | 236 | bool utf8iterator_maybe_consume_match( 237 | Utf8Iterator* iter, const char* prefix, size_t length, 238 | bool case_sensitive) { 239 | bool matched = (iter->_start + length <= iter->_end) && (case_sensitive ? 240 | !strncmp(iter->_start, prefix, length) : 241 | !strncasecmp(iter->_start, prefix, length)); 242 | if (matched) { 243 | for (unsigned int i = 0; i < length; ++i) { 244 | utf8iterator_next(iter); 245 | } 246 | return true; 247 | } else { 248 | return false; 249 | } 250 | } 251 | 252 | void utf8iterator_mark(Utf8Iterator* iter) { 253 | iter->_mark = iter->_start; 254 | iter->_mark_pos = iter->_pos; 255 | } 256 | 257 | // Returns the current input stream position to the mark. 258 | void utf8iterator_reset(Utf8Iterator* iter) { 259 | iter->_start = iter->_mark; 260 | iter->_pos = iter->_mark_pos; 261 | read_char(iter); 262 | } 263 | 264 | // Sets the position and original text fields of an error to the value at the 265 | // mark. 266 | void utf8iterator_fill_error_at_mark( 267 | Utf8Iterator* iter, GumboError* error) { 268 | error->position = iter->_mark_pos; 269 | error->original_text = iter->_mark; 270 | } 271 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/utf8.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | // 17 | // This contains an implementation of a UTF8 iterator and decoder suitable for 18 | // an HTML5 parser. This does a bit more than straight UTF-8 decoding. The 19 | // HTML5 spec specifies that: 20 | // 1. Decoding errors are parse errors. 21 | // 2. Certain other codepoints (eg. control characters) are parse errors. 22 | // 3. Carriage returns and CR/LF groups are converted to line feeds. 23 | // http://www.whatwg.org/specs/web-apps/current-work/multipage/infrastructure.html#decoded-as-utf-8,-with-error-handling 24 | // 25 | // Also, we want to keep track of source positions for error handling. As a 26 | // result, we fold all that functionality into this decoder, and can't use an 27 | // off-the-shelf library. 28 | // 29 | // This header is internal-only, which is why we prefix functions with only 30 | // utf8_ or utf8_iterator_ instead of gumbo_utf8_. 31 | 32 | #ifndef GUMBO_UTF8_H_ 33 | #define GUMBO_UTF8_H_ 34 | 35 | #include 36 | #include 37 | 38 | #include "gumbo.h" 39 | 40 | #ifdef __cplusplus 41 | extern "C" { 42 | #endif 43 | 44 | struct GumboInternalError; 45 | struct GumboInternalParser; 46 | 47 | // Unicode replacement char. 48 | extern const int kUtf8ReplacementChar; 49 | 50 | typedef struct GumboInternalUtf8Iterator { 51 | // Points at the start of the code point most recently read into 'current'. 52 | const char* _start; 53 | 54 | // Points at the mark. The mark is initially set to the beginning of the 55 | // input. 56 | const char* _mark; 57 | 58 | // Points past the end of the iter, like a past-the-end iterator in the STL. 59 | const char* _end; 60 | 61 | // The code point under the cursor. 62 | int _current; 63 | 64 | // The width in bytes of the current code point. 65 | int _width; 66 | 67 | // The SourcePosition for the current location. 68 | GumboSourcePosition _pos; 69 | 70 | // The SourcePosition for the mark. 71 | GumboSourcePosition _mark_pos; 72 | 73 | // Pointer back to the GumboParser instance, for configuration options and 74 | // error recording. 75 | struct GumboInternalParser* _parser; 76 | } Utf8Iterator; 77 | 78 | // Returns true if this Unicode code point is in the list of characters 79 | // forbidden by the HTML5 spec, such as NUL bytes and undefined control chars. 80 | bool utf8_is_invalid_code_point(int c); 81 | 82 | // Initializes a new Utf8Iterator from the given byte buffer. The source does 83 | // not have to be NUL-terminated, but the length must be passed in explicitly. 84 | void utf8iterator_init( 85 | struct GumboInternalParser* parser, const char* source, 86 | size_t source_length, Utf8Iterator* iter); 87 | 88 | // Advances the current position by one code point. 89 | void utf8iterator_next(Utf8Iterator* iter); 90 | 91 | // Returns the current code point as an integer. 92 | int utf8iterator_current(const Utf8Iterator* iter); 93 | 94 | // Retrieves and fills the output parameter with the current source position. 95 | void utf8iterator_get_position( 96 | const Utf8Iterator* iter, GumboSourcePosition* output); 97 | 98 | // Retrieves a character pointer to the start of the current character. 99 | const char* utf8iterator_get_char_pointer(const Utf8Iterator* iter); 100 | 101 | // Retrieves a character pointer to 1 past the end of the buffer. This is 102 | // necessary for certain state machines and string comparisons that would like 103 | // to look directly for ASCII text in the buffer without going through the 104 | // decoder. 105 | const char* utf8iterator_get_end_pointer(const Utf8Iterator* iter); 106 | 107 | // If the upcoming text in the buffer matches the specified prefix (which has 108 | // length 'length'), consume it and return true. Otherwise, return false with 109 | // no other effects. If the length of the string would overflow the buffer, 110 | // this returns false. Note that prefix should not contain null bytes because 111 | // of the use of strncmp/strncasecmp internally. All existing use-cases adhere 112 | // to this. 113 | bool utf8iterator_maybe_consume_match( 114 | Utf8Iterator* iter, const char* prefix, size_t length, bool case_sensitive); 115 | 116 | // "Marks" a particular location of interest in the input stream, so that it can 117 | // later be reset() to. There's also the ability to record an error at the 118 | // point that was marked, as oftentimes that's more useful than the last 119 | // character before the error was detected. 120 | void utf8iterator_mark(Utf8Iterator* iter); 121 | 122 | // Returns the current input stream position to the mark. 123 | void utf8iterator_reset(Utf8Iterator* iter); 124 | 125 | // Sets the position and original text fields of an error to the value at the 126 | // mark. 127 | void utf8iterator_fill_error_at_mark( 128 | Utf8Iterator* iter, struct GumboInternalError* error); 129 | 130 | #ifdef __cplusplus 131 | } 132 | #endif 133 | #endif // GUMBO_UTF8_H_ 134 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/util.c: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | 17 | #include "util.h" 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "gumbo.h" 27 | #include "parser.h" 28 | 29 | // TODO(jdtang): This should be elsewhere, but there's no .c file for 30 | // SourcePositions and yet the constant needs some linkage, so this is as good 31 | // as any. 32 | const GumboSourcePosition kGumboEmptySourcePosition = { 0, 0, 0 }; 33 | 34 | /* 35 | * Default memory management helpers; 36 | * set to system's realloc and free by default 37 | */ 38 | void *(* gumbo_user_allocator)(void *, size_t) = realloc; 39 | void (* gumbo_user_free)(void *) = free; 40 | 41 | void gumbo_memory_set_allocator(void *(*allocator_p)(void *, size_t)) 42 | { 43 | gumbo_user_allocator = allocator_p ? allocator_p : realloc; 44 | } 45 | 46 | void gumbo_memory_set_free(void (*free_p)(void *)) 47 | { 48 | gumbo_user_free = free_p ? free_p : free; 49 | } 50 | 51 | bool gumbo_isspace(unsigned char ch) 52 | { 53 | switch(ch) { 54 | case ' ': 55 | case '\f': 56 | case '\r': 57 | case '\n': 58 | case '\t': 59 | return true; 60 | default: 61 | return false; 62 | } 63 | } 64 | 65 | bool gumbo_isalnum(unsigned char ch) 66 | { 67 | if ('a' <= ch && ch <= 'z') return true; 68 | if ('A' <= ch && ch <= 'Z') return true; 69 | if ('0' <= ch && ch <= '9') return true; 70 | return false; 71 | } 72 | 73 | // Debug function to trace operation of the parser. Pass --copts=-DGUMBO_DEBUG 74 | // to use. 75 | void gumbo_debug(const char* format, ...) { 76 | #ifdef GUMBO_DEBUG 77 | va_list args; 78 | va_start(args, format); 79 | vprintf(format, args); 80 | va_end(args); 81 | fflush(stdout); 82 | #endif 83 | } 84 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/util.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | // 17 | // This contains some utility functions that didn't fit into any of the other 18 | // headers. 19 | 20 | #ifndef GUMBO_UTIL_H_ 21 | #define GUMBO_UTIL_H_ 22 | #ifdef _MSC_VER 23 | #define _CRT_SECURE_NO_WARNINGS 24 | #endif 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif 33 | 34 | extern void *(* gumbo_user_allocator)(void *, size_t); 35 | extern void (* gumbo_user_free)(void *); 36 | 37 | static inline void *gumbo_malloc(size_t size) 38 | { 39 | return gumbo_user_allocator(NULL, size); 40 | } 41 | 42 | static inline void *gumbo_realloc(void *ptr, size_t size) 43 | { 44 | return gumbo_user_allocator(ptr, size); 45 | } 46 | 47 | static inline char *gumbo_strdup(const char *str) 48 | { 49 | size_t len = strlen(str) + 1; 50 | char *copy = (char *)gumbo_malloc(len); 51 | memcpy(copy, str, len); 52 | return copy; 53 | } 54 | 55 | static inline void gumbo_free(void *ptr) 56 | { 57 | gumbo_user_free(ptr); 58 | } 59 | 60 | static inline int gumbo_tolower(int c) 61 | { 62 | return c | ((c >= 'A' && c <= 'Z') << 5); 63 | } 64 | 65 | static inline bool gumbo_isalpha(int c) 66 | { 67 | return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; 68 | } 69 | 70 | bool gumbo_isspace(unsigned char ch); 71 | 72 | bool gumbo_isalnum(unsigned char ch); 73 | 74 | // Debug wrapper for printf, to make it easier to turn off debugging info when 75 | // required. 76 | void gumbo_debug(const char* format, ...); 77 | 78 | #ifdef __cplusplus 79 | } 80 | #endif 81 | 82 | #endif // GUMBO_UTIL_H_ 83 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/vector.c: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | 17 | #include "vector.h" 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "util.h" 25 | 26 | struct GumboInternalParser; 27 | 28 | const GumboVector kGumboEmptyVector = { NULL, 0, 0 }; 29 | 30 | void gumbo_vector_init(size_t initial_capacity, GumboVector* vector) { 31 | vector->length = 0; 32 | vector->capacity = initial_capacity; 33 | vector->data = NULL; 34 | if (initial_capacity) 35 | vector->data = gumbo_malloc(sizeof(void*) * initial_capacity); 36 | } 37 | 38 | void gumbo_vector_destroy(GumboVector* vector) { 39 | gumbo_free(vector->data); 40 | } 41 | 42 | static void enlarge_vector_if_full(GumboVector* vector, int space) { 43 | unsigned int new_length = vector->length + space; 44 | unsigned int new_capacity = vector->capacity; 45 | 46 | if (!new_capacity) 47 | new_capacity = 2; 48 | 49 | while (new_capacity < new_length) 50 | new_capacity *= 2; 51 | 52 | if (new_capacity != vector->capacity) { 53 | vector->capacity = new_capacity; 54 | vector->data = gumbo_realloc(vector->data, 55 | sizeof(void *) * vector->capacity); 56 | } 57 | } 58 | 59 | void gumbo_vector_add(void* element, GumboVector* vector) { 60 | enlarge_vector_if_full(vector, 1); 61 | assert(vector->data); 62 | assert(vector->length < vector->capacity); 63 | vector->data[vector->length++] = element; 64 | } 65 | 66 | void* gumbo_vector_pop(GumboVector* vector) { 67 | if (vector->length == 0) { 68 | return NULL; 69 | } 70 | return vector->data[--vector->length]; 71 | } 72 | 73 | int gumbo_vector_index_of(GumboVector* vector, const void* element) { 74 | for (unsigned int i = 0; i < vector->length; ++i) { 75 | if (vector->data[i] == element) { 76 | return i; 77 | } 78 | } 79 | return -1; 80 | } 81 | 82 | void gumbo_vector_insert_at(void* element, unsigned int index, GumboVector* vector) { 83 | assert(index >= 0); 84 | assert(index <= vector->length); 85 | enlarge_vector_if_full(vector, 1); 86 | ++vector->length; 87 | memmove(&vector->data[index + 1], &vector->data[index], 88 | sizeof(void*) * (vector->length - index - 1)); 89 | vector->data[index] = element; 90 | } 91 | 92 | void gumbo_vector_splice(int where, int n_to_remove, 93 | void **data, int n_to_insert, 94 | GumboVector* vector) { 95 | enlarge_vector_if_full(vector, n_to_insert - n_to_remove); 96 | memmove(vector->data + where + n_to_insert, 97 | vector->data + where + n_to_remove, 98 | sizeof(void *) * (vector->length - where - n_to_remove)); 99 | memcpy(vector->data + where, data, sizeof(void *) * n_to_insert); 100 | vector->length = vector->length + n_to_insert - n_to_remove; 101 | } 102 | 103 | void gumbo_vector_remove(const void* node, GumboVector* vector) { 104 | int index = gumbo_vector_index_of(vector, node); 105 | if (index == -1) { 106 | return; 107 | } 108 | gumbo_vector_remove_at(index, vector); 109 | } 110 | 111 | void* gumbo_vector_remove_at(unsigned int index, GumboVector* vector) { 112 | assert(index >= 0); 113 | assert(index < vector->length); 114 | void* result = vector->data[index]; 115 | memmove(&vector->data[index], &vector->data[index + 1], 116 | sizeof(void*) * (vector->length - index - 1)); 117 | --vector->length; 118 | return result; 119 | } 120 | -------------------------------------------------------------------------------- /src/Vendor/gumbo/vector.h: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // 15 | // Author: jdtang@google.com (Jonathan Tang) 16 | 17 | #ifndef GUMBO_VECTOR_H_ 18 | #define GUMBO_VECTOR_H_ 19 | 20 | #include "gumbo.h" 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | // Initializes a new GumboVector with the specified initial capacity. 27 | void gumbo_vector_init(size_t initial_capacity, GumboVector* vector); 28 | 29 | // Frees the memory used by an GumboVector. Does not free the contained 30 | // pointers. 31 | void gumbo_vector_destroy(GumboVector* vector); 32 | 33 | // Adds a new element to an GumboVector. 34 | void gumbo_vector_add(void* element, GumboVector* vector); 35 | 36 | // Removes and returns the element most recently added to the GumboVector. 37 | // Ownership is transferred to caller. Capacity is unchanged. If the vector is 38 | // empty, NULL is returned. 39 | void* gumbo_vector_pop(GumboVector* vector); 40 | 41 | // Inserts an element at a specific index. This is potentially O(N) time, but 42 | // is necessary for some of the spec's behavior. 43 | void gumbo_vector_insert_at(void* element, unsigned int index, GumboVector* vector); 44 | 45 | // Removes an element from the vector, or does nothing if the element is not in 46 | // the vector. 47 | void gumbo_vector_remove(const void* element, GumboVector* vector); 48 | 49 | // Removes and returns an element at a specific index. Note that this is 50 | // potentially O(N) time and should be used sparingly. 51 | void* gumbo_vector_remove_at(unsigned int index, GumboVector* vector); 52 | 53 | int gumbo_vector_index_of(GumboVector* vector, const void* element); 54 | 55 | void gumbo_vector_splice( 56 | int where, int n_to_remove, void **data, int n_to_insert, GumboVector* vector); 57 | 58 | #ifdef __cplusplus 59 | } 60 | #endif 61 | 62 | #endif // GUMBO_VECTOR_H_ 63 | --------------------------------------------------------------------------------