├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── c ├── generate_bindings.sh ├── generate_common.d └── modest.dpp ├── dub.sdl └── source ├── arrogant ├── c │ ├── common.d │ └── modest.d └── package.d └── arrogant_test_app.d /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.o 3 | *.obj 4 | 5 | # Compiled Dynamic libraries 6 | *.so 7 | *.dylib 8 | *.dll 9 | 10 | # Compiled Static libraries 11 | *.a 12 | *.lib 13 | 14 | # Executables 15 | *.exe 16 | 17 | # DUB 18 | .dub 19 | docs.json 20 | __dummy.html 21 | docs/ 22 | 23 | # Code coverage 24 | *.lst 25 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "c/Modest"] 2 | path = c/Modest 3 | url = https://github.com/lexborisov/Modest.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 2night SpA 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # arrogant 2 | Fully conformant HTML5 dom library with CSS4 selectors. Based on [Modest](https://github.com/lexborisov/Modest). 3 | 4 | Tested on Linux. Should work fine on OSX and Windows. 5 | 6 | # prerequisites: how to build & install modest 7 | 8 | Modest is written in pure C, without any external dependency. 9 | Just fetch source code and compile. 10 | 11 | ``` 12 | git clone https://github.com/2night/arrogant.git 13 | cd arrogant 14 | git submodule update --init 15 | cd c/Modest 16 | make 17 | sudo make install 18 | sudo ldconfig 19 | ``` 20 | 21 | # run an example 22 | 23 | ``` 24 | dub -c arrogant_test_app 25 | ``` 26 | 27 | # hello world 28 | 29 | ```d 30 | import arrogant; 31 | import std.stdio : writeln, stdout; 32 | 33 | void main() 34 | { 35 | auto src = `
Hello World
`; 36 | auto arrogant = Arrogant(); 37 | auto tree = arrogant.parse(src); 38 | 39 | // Change div content from "Hello World!" to "Hello D!" 40 | tree.byTagName("div").front.innerText = "Hello D!"; 41 | 42 | // Print the edited html 43 | writeln(tree.document); 44 | 45 | assert(tree.document.innerHTML == "
Hello D!
"); 46 | } 47 | ``` 48 | # get data from webpage 49 | 50 | ```d 51 | import arrogant; 52 | import std.net.curl; 53 | import std.stdio : writeln, stdout; 54 | 55 | void main() 56 | { 57 | auto src = "https://forum.dlang.org".get; 58 | auto arrogant = Arrogant(); 59 | auto tree = arrogant.parse(src); 60 | size_t cnt = 0; 61 | 62 | writeln("Recent posts on forum.dlang.org:\n"); 63 | 64 | // Search for summary divs 65 | foreach(post; tree.byClass("forum-index-col-lastpost")) 66 | { 67 | string title = post.byClass("forum-postsummary-subject").front["title"]; 68 | string author = post.byClass("forum-postsummary-author").front["title"]; 69 | string date = post.byCssSelector("span.forum-postsummary-time > span").front["title"]; 70 | 71 | writeln("Title: ", title); 72 | writeln("By: ", author); 73 | writeln("Date: ", date); 74 | writeln("--------------"); 75 | 76 | cnt++; 77 | } 78 | 79 | writeln("Total: ", cnt, " posts"); 80 | } 81 | ``` 82 | 83 | # more 84 | 85 | Check [this code](https://github.com/2night/arrogant/blob/master/source/arrogant_test_app.d) or [read documentation](http://arrogant.dpldocs.info/index.html) 86 | -------------------------------------------------------------------------------- /c/generate_bindings.sh: -------------------------------------------------------------------------------- 1 | 2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | cd $DIR 4 | INCLUDE=$DIR/Modest/include/ 5 | TARGET=$DIR/../source/arrogant/c 6 | 7 | # Generate bindings from headers with dpp: http://dpp.dub.pm 8 | d++ --include-path $INCLUDE --preprocess-only modest.dpp 9 | 10 | # Workaround for a dpp bug? 11 | sed -i 's/volatile//g' modest.d 12 | 13 | # Generate d-style structs 14 | rdmd generate_common.d 15 | 16 | mv common.d ../source/arrogant/c/common.d 17 | echo 'module arrogant.c.modest;' > ../source/arrogant/c/modest.d 18 | cat modest.d >> ../source/arrogant/c/modest.d 19 | 20 | rm modest.d -------------------------------------------------------------------------------- /c/generate_common.d: -------------------------------------------------------------------------------- 1 | import std.string : replace, toLower, capitalize; 2 | import std.algorithm : splitter, joiner, map, startsWith, canFind; 3 | import std.stdio: writeln; 4 | import std.conv : to, text; 5 | import std.file : append, remove, exists; 6 | 7 | import modest; 8 | 9 | 10 | string WriteEnum(string name, T, string prefix)() if(is(T == enum)) { 11 | string ret = "enum " ~ name ~ " {\n"; 12 | string s; 13 | 14 | auto kw = ["default", "switch", "template"]; 15 | 16 | static foreach(member; __traits(allMembers, T)) { 17 | s = member.replace(prefix, ""); 18 | s = s.splitter("_").map!(x => x.length == 0?"_":x.toLower.capitalize).joiner().to!string; 19 | if (s.startsWith("_")) s = text("_", s[1].toLower, s[2 .. $]); 20 | else s = text(s[0].toLower, s[1..$]); 21 | 22 | if (kw.canFind(s)) s ~= "_"; 23 | 24 | ret ~= text(` `, s, ` = `, T.stringof, `.`, member, ",\n"); 25 | } 26 | 27 | ret ~= "}\n"; 28 | 29 | return ret; 30 | } 31 | 32 | 33 | void main() 34 | { 35 | if (exists("common.d")) remove("common.d"); 36 | "common.d".append("module arrogant.c.common;\n"); 37 | "common.d".append("import arrogant.c.modest;\n"); 38 | "common.d".append("auto MYHTML_FAILED(T)(auto ref T _status_) { return _status_ != myhtml_status_t.MyHTML_STATUS_OK; }\n"); 39 | "common.d".append(WriteEnum!("MyEncodingList", myencoding_list, "MyENCODING_")); 40 | "common.d".append(WriteEnum!("MyHtmlTagId", myhtml_tags, "MyHTML_TAG_")); 41 | "common.d".append(WriteEnum!("MyHtmlOptions", myhtml_options, "MyHTML_OPTIONS_")); 42 | "common.d".append(WriteEnum!("MyHtmlNamespace", myhtml_namespace, "MyHTML_NAMESPACE_")); 43 | } 44 | -------------------------------------------------------------------------------- /c/modest.dpp: -------------------------------------------------------------------------------- 1 | 2 | #include "myencoding/encoding.h" 3 | 4 | 5 | #include "myhtml/tree.h" 6 | #include "myhtml/api.h" 7 | #include "myhtml/myhtml.h" 8 | #include "mycss/mycss.h" 9 | #include "mycss/api.h" 10 | #include "modest/finder/finder.h" 11 | -------------------------------------------------------------------------------- /dub.sdl: -------------------------------------------------------------------------------- 1 | name "arrogant" 2 | description "Fully conformant HTML5 dom library with CSS4 selectors." 3 | authors "Andrea Fontana" 4 | copyright "Copyright © 2018, 2night.it" 5 | license "mit" 6 | libs "modest" 7 | 8 | configuration "arrogant" { 9 | targetType "autodetect" 10 | } 11 | 12 | configuration "arrogant_test_app" { 13 | targetType "executable" 14 | versions "arrogant_test_app" 15 | } 16 | 17 | configuration "arrogant_tests" { 18 | targetType "executable" 19 | versions "arrogant_tests" 20 | } 21 | -------------------------------------------------------------------------------- /source/arrogant/c/common.d: -------------------------------------------------------------------------------- 1 | module arrogant.c.common; 2 | import arrogant.c.modest; 3 | auto MYHTML_FAILED(T)(auto ref T _status_) { return _status_ != myhtml_status_t.MyHTML_STATUS_OK; } 4 | enum MyEncodingList { 5 | default_ = myencoding_list.MyENCODING_DEFAULT, 6 | notDetermined = myencoding_list.MyENCODING_NOT_DETERMINED, 7 | utf8 = myencoding_list.MyENCODING_UTF_8, 8 | utf16le = myencoding_list.MyENCODING_UTF_16LE, 9 | utf16be = myencoding_list.MyENCODING_UTF_16BE, 10 | xUserDefined = myencoding_list.MyENCODING_X_USER_DEFINED, 11 | big5 = myencoding_list.MyENCODING_BIG5, 12 | eucJp = myencoding_list.MyENCODING_EUC_JP, 13 | eucKr = myencoding_list.MyENCODING_EUC_KR, 14 | gb18030 = myencoding_list.MyENCODING_GB18030, 15 | gbk = myencoding_list.MyENCODING_GBK, 16 | ibm866 = myencoding_list.MyENCODING_IBM866, 17 | iso2022Jp = myencoding_list.MyENCODING_ISO_2022_JP, 18 | iso885910 = myencoding_list.MyENCODING_ISO_8859_10, 19 | iso885913 = myencoding_list.MyENCODING_ISO_8859_13, 20 | iso885914 = myencoding_list.MyENCODING_ISO_8859_14, 21 | iso885915 = myencoding_list.MyENCODING_ISO_8859_15, 22 | iso885916 = myencoding_list.MyENCODING_ISO_8859_16, 23 | iso88592 = myencoding_list.MyENCODING_ISO_8859_2, 24 | iso88593 = myencoding_list.MyENCODING_ISO_8859_3, 25 | iso88594 = myencoding_list.MyENCODING_ISO_8859_4, 26 | iso88595 = myencoding_list.MyENCODING_ISO_8859_5, 27 | iso88596 = myencoding_list.MyENCODING_ISO_8859_6, 28 | iso88597 = myencoding_list.MyENCODING_ISO_8859_7, 29 | iso88598 = myencoding_list.MyENCODING_ISO_8859_8, 30 | iso88598I = myencoding_list.MyENCODING_ISO_8859_8_I, 31 | koi8R = myencoding_list.MyENCODING_KOI8_R, 32 | koi8U = myencoding_list.MyENCODING_KOI8_U, 33 | macintosh = myencoding_list.MyENCODING_MACINTOSH, 34 | shiftJis = myencoding_list.MyENCODING_SHIFT_JIS, 35 | windows1250 = myencoding_list.MyENCODING_WINDOWS_1250, 36 | windows1251 = myencoding_list.MyENCODING_WINDOWS_1251, 37 | windows1252 = myencoding_list.MyENCODING_WINDOWS_1252, 38 | windows1253 = myencoding_list.MyENCODING_WINDOWS_1253, 39 | windows1254 = myencoding_list.MyENCODING_WINDOWS_1254, 40 | windows1255 = myencoding_list.MyENCODING_WINDOWS_1255, 41 | windows1256 = myencoding_list.MyENCODING_WINDOWS_1256, 42 | windows1257 = myencoding_list.MyENCODING_WINDOWS_1257, 43 | windows1258 = myencoding_list.MyENCODING_WINDOWS_1258, 44 | windows874 = myencoding_list.MyENCODING_WINDOWS_874, 45 | xMacCyrillic = myencoding_list.MyENCODING_X_MAC_CYRILLIC, 46 | lastEntry = myencoding_list.MyENCODING_LAST_ENTRY, 47 | } 48 | enum MyHtmlTagId { 49 | _undef = myhtml_tags.MyHTML_TAG__UNDEF, 50 | _text = myhtml_tags.MyHTML_TAG__TEXT, 51 | _comment = myhtml_tags.MyHTML_TAG__COMMENT, 52 | _doctype = myhtml_tags.MyHTML_TAG__DOCTYPE, 53 | a = myhtml_tags.MyHTML_TAG_A, 54 | abbr = myhtml_tags.MyHTML_TAG_ABBR, 55 | acronym = myhtml_tags.MyHTML_TAG_ACRONYM, 56 | address = myhtml_tags.MyHTML_TAG_ADDRESS, 57 | annotationXml = myhtml_tags.MyHTML_TAG_ANNOTATION_XML, 58 | applet = myhtml_tags.MyHTML_TAG_APPLET, 59 | area = myhtml_tags.MyHTML_TAG_AREA, 60 | article = myhtml_tags.MyHTML_TAG_ARTICLE, 61 | aside = myhtml_tags.MyHTML_TAG_ASIDE, 62 | audio = myhtml_tags.MyHTML_TAG_AUDIO, 63 | b = myhtml_tags.MyHTML_TAG_B, 64 | base = myhtml_tags.MyHTML_TAG_BASE, 65 | basefont = myhtml_tags.MyHTML_TAG_BASEFONT, 66 | bdi = myhtml_tags.MyHTML_TAG_BDI, 67 | bdo = myhtml_tags.MyHTML_TAG_BDO, 68 | bgsound = myhtml_tags.MyHTML_TAG_BGSOUND, 69 | big = myhtml_tags.MyHTML_TAG_BIG, 70 | blink = myhtml_tags.MyHTML_TAG_BLINK, 71 | blockquote = myhtml_tags.MyHTML_TAG_BLOCKQUOTE, 72 | body = myhtml_tags.MyHTML_TAG_BODY, 73 | br = myhtml_tags.MyHTML_TAG_BR, 74 | button = myhtml_tags.MyHTML_TAG_BUTTON, 75 | canvas = myhtml_tags.MyHTML_TAG_CANVAS, 76 | caption = myhtml_tags.MyHTML_TAG_CAPTION, 77 | center = myhtml_tags.MyHTML_TAG_CENTER, 78 | cite = myhtml_tags.MyHTML_TAG_CITE, 79 | code = myhtml_tags.MyHTML_TAG_CODE, 80 | col = myhtml_tags.MyHTML_TAG_COL, 81 | colgroup = myhtml_tags.MyHTML_TAG_COLGROUP, 82 | command = myhtml_tags.MyHTML_TAG_COMMAND, 83 | comment = myhtml_tags.MyHTML_TAG_COMMENT, 84 | datalist = myhtml_tags.MyHTML_TAG_DATALIST, 85 | dd = myhtml_tags.MyHTML_TAG_DD, 86 | del = myhtml_tags.MyHTML_TAG_DEL, 87 | details = myhtml_tags.MyHTML_TAG_DETAILS, 88 | dfn = myhtml_tags.MyHTML_TAG_DFN, 89 | dialog = myhtml_tags.MyHTML_TAG_DIALOG, 90 | dir = myhtml_tags.MyHTML_TAG_DIR, 91 | div = myhtml_tags.MyHTML_TAG_DIV, 92 | dl = myhtml_tags.MyHTML_TAG_DL, 93 | dt = myhtml_tags.MyHTML_TAG_DT, 94 | em = myhtml_tags.MyHTML_TAG_EM, 95 | embed = myhtml_tags.MyHTML_TAG_EMBED, 96 | fieldset = myhtml_tags.MyHTML_TAG_FIELDSET, 97 | figcaption = myhtml_tags.MyHTML_TAG_FIGCAPTION, 98 | figure = myhtml_tags.MyHTML_TAG_FIGURE, 99 | font = myhtml_tags.MyHTML_TAG_FONT, 100 | footer = myhtml_tags.MyHTML_TAG_FOOTER, 101 | form = myhtml_tags.MyHTML_TAG_FORM, 102 | frame = myhtml_tags.MyHTML_TAG_FRAME, 103 | frameset = myhtml_tags.MyHTML_TAG_FRAMESET, 104 | h1 = myhtml_tags.MyHTML_TAG_H1, 105 | h2 = myhtml_tags.MyHTML_TAG_H2, 106 | h3 = myhtml_tags.MyHTML_TAG_H3, 107 | h4 = myhtml_tags.MyHTML_TAG_H4, 108 | h5 = myhtml_tags.MyHTML_TAG_H5, 109 | h6 = myhtml_tags.MyHTML_TAG_H6, 110 | head = myhtml_tags.MyHTML_TAG_HEAD, 111 | header = myhtml_tags.MyHTML_TAG_HEADER, 112 | hgroup = myhtml_tags.MyHTML_TAG_HGROUP, 113 | hr = myhtml_tags.MyHTML_TAG_HR, 114 | html = myhtml_tags.MyHTML_TAG_HTML, 115 | i = myhtml_tags.MyHTML_TAG_I, 116 | iframe = myhtml_tags.MyHTML_TAG_IFRAME, 117 | image = myhtml_tags.MyHTML_TAG_IMAGE, 118 | img = myhtml_tags.MyHTML_TAG_IMG, 119 | input = myhtml_tags.MyHTML_TAG_INPUT, 120 | ins = myhtml_tags.MyHTML_TAG_INS, 121 | isindex = myhtml_tags.MyHTML_TAG_ISINDEX, 122 | kbd = myhtml_tags.MyHTML_TAG_KBD, 123 | keygen = myhtml_tags.MyHTML_TAG_KEYGEN, 124 | label = myhtml_tags.MyHTML_TAG_LABEL, 125 | legend = myhtml_tags.MyHTML_TAG_LEGEND, 126 | li = myhtml_tags.MyHTML_TAG_LI, 127 | link = myhtml_tags.MyHTML_TAG_LINK, 128 | listing = myhtml_tags.MyHTML_TAG_LISTING, 129 | main = myhtml_tags.MyHTML_TAG_MAIN, 130 | map = myhtml_tags.MyHTML_TAG_MAP, 131 | mark = myhtml_tags.MyHTML_TAG_MARK, 132 | marquee = myhtml_tags.MyHTML_TAG_MARQUEE, 133 | menu = myhtml_tags.MyHTML_TAG_MENU, 134 | menuitem = myhtml_tags.MyHTML_TAG_MENUITEM, 135 | meta = myhtml_tags.MyHTML_TAG_META, 136 | meter = myhtml_tags.MyHTML_TAG_METER, 137 | mtext = myhtml_tags.MyHTML_TAG_MTEXT, 138 | nav = myhtml_tags.MyHTML_TAG_NAV, 139 | nobr = myhtml_tags.MyHTML_TAG_NOBR, 140 | noembed = myhtml_tags.MyHTML_TAG_NOEMBED, 141 | noframes = myhtml_tags.MyHTML_TAG_NOFRAMES, 142 | noscript = myhtml_tags.MyHTML_TAG_NOSCRIPT, 143 | object = myhtml_tags.MyHTML_TAG_OBJECT, 144 | ol = myhtml_tags.MyHTML_TAG_OL, 145 | optgroup = myhtml_tags.MyHTML_TAG_OPTGROUP, 146 | option = myhtml_tags.MyHTML_TAG_OPTION, 147 | output = myhtml_tags.MyHTML_TAG_OUTPUT, 148 | p = myhtml_tags.MyHTML_TAG_P, 149 | param = myhtml_tags.MyHTML_TAG_PARAM, 150 | plaintext = myhtml_tags.MyHTML_TAG_PLAINTEXT, 151 | pre = myhtml_tags.MyHTML_TAG_PRE, 152 | progress = myhtml_tags.MyHTML_TAG_PROGRESS, 153 | q = myhtml_tags.MyHTML_TAG_Q, 154 | rb = myhtml_tags.MyHTML_TAG_RB, 155 | rp = myhtml_tags.MyHTML_TAG_RP, 156 | rt = myhtml_tags.MyHTML_TAG_RT, 157 | rtc = myhtml_tags.MyHTML_TAG_RTC, 158 | ruby = myhtml_tags.MyHTML_TAG_RUBY, 159 | s = myhtml_tags.MyHTML_TAG_S, 160 | samp = myhtml_tags.MyHTML_TAG_SAMP, 161 | script = myhtml_tags.MyHTML_TAG_SCRIPT, 162 | section = myhtml_tags.MyHTML_TAG_SECTION, 163 | select = myhtml_tags.MyHTML_TAG_SELECT, 164 | small = myhtml_tags.MyHTML_TAG_SMALL, 165 | source = myhtml_tags.MyHTML_TAG_SOURCE, 166 | span = myhtml_tags.MyHTML_TAG_SPAN, 167 | strike = myhtml_tags.MyHTML_TAG_STRIKE, 168 | strong = myhtml_tags.MyHTML_TAG_STRONG, 169 | style = myhtml_tags.MyHTML_TAG_STYLE, 170 | sub = myhtml_tags.MyHTML_TAG_SUB, 171 | summary = myhtml_tags.MyHTML_TAG_SUMMARY, 172 | sup = myhtml_tags.MyHTML_TAG_SUP, 173 | svg = myhtml_tags.MyHTML_TAG_SVG, 174 | table = myhtml_tags.MyHTML_TAG_TABLE, 175 | tbody = myhtml_tags.MyHTML_TAG_TBODY, 176 | td = myhtml_tags.MyHTML_TAG_TD, 177 | template_ = myhtml_tags.MyHTML_TAG_TEMPLATE, 178 | textarea = myhtml_tags.MyHTML_TAG_TEXTAREA, 179 | tfoot = myhtml_tags.MyHTML_TAG_TFOOT, 180 | th = myhtml_tags.MyHTML_TAG_TH, 181 | thead = myhtml_tags.MyHTML_TAG_THEAD, 182 | time = myhtml_tags.MyHTML_TAG_TIME, 183 | title = myhtml_tags.MyHTML_TAG_TITLE, 184 | tr = myhtml_tags.MyHTML_TAG_TR, 185 | track = myhtml_tags.MyHTML_TAG_TRACK, 186 | tt = myhtml_tags.MyHTML_TAG_TT, 187 | u = myhtml_tags.MyHTML_TAG_U, 188 | ul = myhtml_tags.MyHTML_TAG_UL, 189 | var = myhtml_tags.MyHTML_TAG_VAR, 190 | video = myhtml_tags.MyHTML_TAG_VIDEO, 191 | wbr = myhtml_tags.MyHTML_TAG_WBR, 192 | xmp = myhtml_tags.MyHTML_TAG_XMP, 193 | altglyph = myhtml_tags.MyHTML_TAG_ALTGLYPH, 194 | altglyphdef = myhtml_tags.MyHTML_TAG_ALTGLYPHDEF, 195 | altglyphitem = myhtml_tags.MyHTML_TAG_ALTGLYPHITEM, 196 | animate = myhtml_tags.MyHTML_TAG_ANIMATE, 197 | animatecolor = myhtml_tags.MyHTML_TAG_ANIMATECOLOR, 198 | animatemotion = myhtml_tags.MyHTML_TAG_ANIMATEMOTION, 199 | animatetransform = myhtml_tags.MyHTML_TAG_ANIMATETRANSFORM, 200 | circle = myhtml_tags.MyHTML_TAG_CIRCLE, 201 | clippath = myhtml_tags.MyHTML_TAG_CLIPPATH, 202 | colorProfile = myhtml_tags.MyHTML_TAG_COLOR_PROFILE, 203 | cursor = myhtml_tags.MyHTML_TAG_CURSOR, 204 | defs = myhtml_tags.MyHTML_TAG_DEFS, 205 | desc = myhtml_tags.MyHTML_TAG_DESC, 206 | ellipse = myhtml_tags.MyHTML_TAG_ELLIPSE, 207 | feblend = myhtml_tags.MyHTML_TAG_FEBLEND, 208 | fecolormatrix = myhtml_tags.MyHTML_TAG_FECOLORMATRIX, 209 | fecomponenttransfer = myhtml_tags.MyHTML_TAG_FECOMPONENTTRANSFER, 210 | fecomposite = myhtml_tags.MyHTML_TAG_FECOMPOSITE, 211 | feconvolvematrix = myhtml_tags.MyHTML_TAG_FECONVOLVEMATRIX, 212 | fediffuselighting = myhtml_tags.MyHTML_TAG_FEDIFFUSELIGHTING, 213 | fedisplacementmap = myhtml_tags.MyHTML_TAG_FEDISPLACEMENTMAP, 214 | fedistantlight = myhtml_tags.MyHTML_TAG_FEDISTANTLIGHT, 215 | fedropshadow = myhtml_tags.MyHTML_TAG_FEDROPSHADOW, 216 | feflood = myhtml_tags.MyHTML_TAG_FEFLOOD, 217 | fefunca = myhtml_tags.MyHTML_TAG_FEFUNCA, 218 | fefuncb = myhtml_tags.MyHTML_TAG_FEFUNCB, 219 | fefuncg = myhtml_tags.MyHTML_TAG_FEFUNCG, 220 | fefuncr = myhtml_tags.MyHTML_TAG_FEFUNCR, 221 | fegaussianblur = myhtml_tags.MyHTML_TAG_FEGAUSSIANBLUR, 222 | feimage = myhtml_tags.MyHTML_TAG_FEIMAGE, 223 | femerge = myhtml_tags.MyHTML_TAG_FEMERGE, 224 | femergenode = myhtml_tags.MyHTML_TAG_FEMERGENODE, 225 | femorphology = myhtml_tags.MyHTML_TAG_FEMORPHOLOGY, 226 | feoffset = myhtml_tags.MyHTML_TAG_FEOFFSET, 227 | fepointlight = myhtml_tags.MyHTML_TAG_FEPOINTLIGHT, 228 | fespecularlighting = myhtml_tags.MyHTML_TAG_FESPECULARLIGHTING, 229 | fespotlight = myhtml_tags.MyHTML_TAG_FESPOTLIGHT, 230 | fetile = myhtml_tags.MyHTML_TAG_FETILE, 231 | feturbulence = myhtml_tags.MyHTML_TAG_FETURBULENCE, 232 | filter = myhtml_tags.MyHTML_TAG_FILTER, 233 | fontFace = myhtml_tags.MyHTML_TAG_FONT_FACE, 234 | fontFaceFormat = myhtml_tags.MyHTML_TAG_FONT_FACE_FORMAT, 235 | fontFaceName = myhtml_tags.MyHTML_TAG_FONT_FACE_NAME, 236 | fontFaceSrc = myhtml_tags.MyHTML_TAG_FONT_FACE_SRC, 237 | fontFaceUri = myhtml_tags.MyHTML_TAG_FONT_FACE_URI, 238 | foreignobject = myhtml_tags.MyHTML_TAG_FOREIGNOBJECT, 239 | g = myhtml_tags.MyHTML_TAG_G, 240 | glyph = myhtml_tags.MyHTML_TAG_GLYPH, 241 | glyphref = myhtml_tags.MyHTML_TAG_GLYPHREF, 242 | hkern = myhtml_tags.MyHTML_TAG_HKERN, 243 | line = myhtml_tags.MyHTML_TAG_LINE, 244 | lineargradient = myhtml_tags.MyHTML_TAG_LINEARGRADIENT, 245 | marker = myhtml_tags.MyHTML_TAG_MARKER, 246 | mask = myhtml_tags.MyHTML_TAG_MASK, 247 | metadata = myhtml_tags.MyHTML_TAG_METADATA, 248 | missingGlyph = myhtml_tags.MyHTML_TAG_MISSING_GLYPH, 249 | mpath = myhtml_tags.MyHTML_TAG_MPATH, 250 | path = myhtml_tags.MyHTML_TAG_PATH, 251 | pattern = myhtml_tags.MyHTML_TAG_PATTERN, 252 | polygon = myhtml_tags.MyHTML_TAG_POLYGON, 253 | polyline = myhtml_tags.MyHTML_TAG_POLYLINE, 254 | radialgradient = myhtml_tags.MyHTML_TAG_RADIALGRADIENT, 255 | rect = myhtml_tags.MyHTML_TAG_RECT, 256 | set = myhtml_tags.MyHTML_TAG_SET, 257 | stop = myhtml_tags.MyHTML_TAG_STOP, 258 | switch_ = myhtml_tags.MyHTML_TAG_SWITCH, 259 | symbol = myhtml_tags.MyHTML_TAG_SYMBOL, 260 | text = myhtml_tags.MyHTML_TAG_TEXT, 261 | textpath = myhtml_tags.MyHTML_TAG_TEXTPATH, 262 | tref = myhtml_tags.MyHTML_TAG_TREF, 263 | tspan = myhtml_tags.MyHTML_TAG_TSPAN, 264 | use = myhtml_tags.MyHTML_TAG_USE, 265 | view = myhtml_tags.MyHTML_TAG_VIEW, 266 | vkern = myhtml_tags.MyHTML_TAG_VKERN, 267 | math = myhtml_tags.MyHTML_TAG_MATH, 268 | maction = myhtml_tags.MyHTML_TAG_MACTION, 269 | maligngroup = myhtml_tags.MyHTML_TAG_MALIGNGROUP, 270 | malignmark = myhtml_tags.MyHTML_TAG_MALIGNMARK, 271 | menclose = myhtml_tags.MyHTML_TAG_MENCLOSE, 272 | merror = myhtml_tags.MyHTML_TAG_MERROR, 273 | mfenced = myhtml_tags.MyHTML_TAG_MFENCED, 274 | mfrac = myhtml_tags.MyHTML_TAG_MFRAC, 275 | mglyph = myhtml_tags.MyHTML_TAG_MGLYPH, 276 | mi = myhtml_tags.MyHTML_TAG_MI, 277 | mlabeledtr = myhtml_tags.MyHTML_TAG_MLABELEDTR, 278 | mlongdiv = myhtml_tags.MyHTML_TAG_MLONGDIV, 279 | mmultiscripts = myhtml_tags.MyHTML_TAG_MMULTISCRIPTS, 280 | mn = myhtml_tags.MyHTML_TAG_MN, 281 | mo = myhtml_tags.MyHTML_TAG_MO, 282 | mover = myhtml_tags.MyHTML_TAG_MOVER, 283 | mpadded = myhtml_tags.MyHTML_TAG_MPADDED, 284 | mphantom = myhtml_tags.MyHTML_TAG_MPHANTOM, 285 | mroot = myhtml_tags.MyHTML_TAG_MROOT, 286 | mrow = myhtml_tags.MyHTML_TAG_MROW, 287 | ms = myhtml_tags.MyHTML_TAG_MS, 288 | mscarries = myhtml_tags.MyHTML_TAG_MSCARRIES, 289 | mscarry = myhtml_tags.MyHTML_TAG_MSCARRY, 290 | msgroup = myhtml_tags.MyHTML_TAG_MSGROUP, 291 | msline = myhtml_tags.MyHTML_TAG_MSLINE, 292 | mspace = myhtml_tags.MyHTML_TAG_MSPACE, 293 | msqrt = myhtml_tags.MyHTML_TAG_MSQRT, 294 | msrow = myhtml_tags.MyHTML_TAG_MSROW, 295 | mstack = myhtml_tags.MyHTML_TAG_MSTACK, 296 | mstyle = myhtml_tags.MyHTML_TAG_MSTYLE, 297 | msub = myhtml_tags.MyHTML_TAG_MSUB, 298 | msup = myhtml_tags.MyHTML_TAG_MSUP, 299 | msubsup = myhtml_tags.MyHTML_TAG_MSUBSUP, 300 | _endOfFile = myhtml_tags.MyHTML_TAG__END_OF_FILE, 301 | firstEntry = myhtml_tags.MyHTML_TAG_FIRST_ENTRY, 302 | lastEntry = myhtml_tags.MyHTML_TAG_LAST_ENTRY, 303 | } 304 | enum MyHtmlOptions { 305 | default_ = myhtml_options.MyHTML_OPTIONS_DEFAULT, 306 | parseModeSingle = myhtml_options.MyHTML_OPTIONS_PARSE_MODE_SINGLE, 307 | parseModeAllInOne = myhtml_options.MyHTML_OPTIONS_PARSE_MODE_ALL_IN_ONE, 308 | parseModeSeparately = myhtml_options.MyHTML_OPTIONS_PARSE_MODE_SEPARATELY, 309 | } 310 | enum MyHtmlNamespace { 311 | undef = myhtml_namespace.MyHTML_NAMESPACE_UNDEF, 312 | html = myhtml_namespace.MyHTML_NAMESPACE_HTML, 313 | mathml = myhtml_namespace.MyHTML_NAMESPACE_MATHML, 314 | svg = myhtml_namespace.MyHTML_NAMESPACE_SVG, 315 | xlink = myhtml_namespace.MyHTML_NAMESPACE_XLINK, 316 | xml = myhtml_namespace.MyHTML_NAMESPACE_XML, 317 | xmlns = myhtml_namespace.MyHTML_NAMESPACE_XMLNS, 318 | any = myhtml_namespace.MyHTML_NAMESPACE_ANY, 319 | lastEntry = myhtml_namespace.MyHTML_NAMESPACE_LAST_ENTRY, 320 | } 321 | -------------------------------------------------------------------------------- /source/arrogant/package.d: -------------------------------------------------------------------------------- 1 | module arrogant; 2 | 3 | import arrogant.c.modest; 4 | 5 | // Public enums & stuffs 6 | public import arrogant.c.common; 7 | 8 | import std.traits; 9 | import std.conv : to; 10 | import std.typecons : Flag, Yes, No; 11 | import std.string : toStringz; 12 | 13 | /** Use this enum with `node.byAttribute()` search */ 14 | enum AttributeSearchType 15 | { 16 | exact, /// 17 | startsWith, /// 18 | endsWith, /// 19 | contains, /// 20 | spaceSeparated, /// 21 | hypenSeparated /// 22 | } 23 | 24 | class ArrogantException : Exception 25 | { 26 | this(uint err) 27 | { 28 | import std.conv : to; 29 | super("Arrogant exception: " ~ to!string(err)); 30 | } 31 | } 32 | 33 | 34 | /** An html attribute of a tag */ 35 | struct Attribute 36 | { 37 | /** The attribute key */ 38 | @property auto key() { return _key; } 39 | 40 | // The attribute value */ 41 | @property auto value() { return _value; } 42 | 43 | private this(myhtml_tree_attr_t* attr) 44 | { 45 | { 46 | size_t length; 47 | _key = myhtml_attribute_key(attr, &length)[0..length].to!string; 48 | } 49 | 50 | { 51 | size_t length; 52 | _value = myhtml_attribute_value(attr, &length)[0..length].to!string; 53 | } 54 | } 55 | 56 | @disable this(); 57 | 58 | private string _key; 59 | private string _value; 60 | } 61 | 62 | 63 | 64 | /** A HTML Node */ 65 | struct Node 66 | { 67 | @disable this(); 68 | 69 | /// Check if node is null / empty 70 | bool isNull() { return myhtml_tree_node == null; } 71 | 72 | 73 | /** 74 | * Get the tag id for this node (ex: a, div, body, ...) 75 | * Examples: 76 | * -------------------- 77 | * tree.body.tag.writeln(); // prints "body" 78 | * -------------------- 79 | */ 80 | MyHtmlTagId tagId() { return cast(MyHtmlTagId )myhtml_tree_node.tag_id; } 81 | string tag() { return tagId.to!string; } /// Ditto 82 | 83 | /** 84 | * "in" operator to check for an attribute inside a node 85 | * Examples: 86 | * -------------------- 87 | * if ("href" in node) writeln("Link: ", node["href"]); 88 | * -------------------- 89 | */ 90 | bool opBinaryRight(string op)(string key) if (op == "in") 91 | { 92 | auto attr = myhtml_attribute_by_key (myhtml_tree_node, key.toStringz, key.length); 93 | return attr !is null; 94 | } 95 | 96 | /** Read an attribute from node */ 97 | auto opIndex(string attribute) 98 | { 99 | import std.typecons : Nullable; 100 | 101 | Nullable!string value; 102 | auto attr = myhtml_attribute_by_key (myhtml_tree_node, attribute.toStringz, attribute.length); 103 | 104 | if (attr !is null) 105 | { 106 | size_t length; 107 | auto v = myhtml_attribute_value(attr, &length); 108 | return Nullable!string(v[0..length].to!string); 109 | } 110 | 111 | return value; 112 | } 113 | 114 | /** Write an attribute */ 115 | auto opIndexAssign(string value, string key) 116 | { 117 | removeAttribute(key); 118 | myhtml_attribute_add (myhtml_tree_node, key.toStringz, key.length, value.toStringz, value.length, MyEncodingList.default_); 119 | return value; 120 | } 121 | 122 | auto opIndexAssign(typeof(null) value, string key) 123 | { 124 | removeAttribute(key); 125 | myhtml_attribute_add (myhtml_tree_node, key.toStringz, key.length, null, 0, MyEncodingList.default_); 126 | return value; 127 | } 128 | 129 | /** 130 | Remove an attribute 131 | Returns: `true` if attribute exists `false` otherwise. 132 | */ 133 | bool removeAttribute(string key) 134 | { 135 | auto attr = myhtml_attribute_by_key (myhtml_tree_node, key.toStringz, key.length); 136 | if (attr !is null) 137 | { 138 | myhtml_attribute_delete(myhtml_node_tree(myhtml_tree_node), myhtml_tree_node, attr); 139 | return true; 140 | } 141 | 142 | return false; 143 | } 144 | 145 | /** Remove node from tree and delete it */ 146 | void deleteNode() { myhtml_node_delete_recursive(myhtml_tree_node); } 147 | 148 | /// 149 | Node firstChild() 150 | { 151 | return Node(myhtml_node_child(myhtml_tree_node)); 152 | } 153 | 154 | /// 155 | Node lastChild() 156 | { 157 | return Node(myhtml_node_last_child(myhtml_tree_node)); 158 | } 159 | 160 | /// 161 | auto parent() 162 | { 163 | return Node(myhtml_node_parent(myhtml_tree_node)); 164 | } 165 | 166 | /// 167 | auto next() 168 | { 169 | return Node(myhtml_node_next(myhtml_tree_node)); 170 | } 171 | 172 | /// 173 | auto previous() 174 | { 175 | return Node(myhtml_node_prev(myhtml_tree_node)); 176 | } 177 | 178 | /* 179 | Get children of this node. 180 | Returns: a lazy `ChildrenRange`. If you want to edit children, convert to array before. 181 | */ 182 | auto children() 183 | { 184 | struct ChildrenRange 185 | { 186 | @disable this(); 187 | 188 | private this(myhtml_tree_node_t *n) { parent = n; current = myhtml_node_child(parent); } 189 | 190 | @property empty() { return current == null; } 191 | @property Node front() { return Node(current); } 192 | void popFront() { current = myhtml_node_next(current); } 193 | 194 | void opAssign(ChildrenRange rhs) 195 | { 196 | current = rhs.current; 197 | parent = rhs.parent; 198 | } 199 | 200 | private: 201 | myhtml_tree_node_t *current; 202 | myhtml_tree_node_t *parent; 203 | } 204 | 205 | return ChildrenRange(myhtml_tree_node); 206 | } 207 | 208 | /** All node's attributes 209 | Returns: a lazy range of Attributes 210 | */ 211 | auto attributes() 212 | { 213 | struct AttributesRange 214 | { 215 | this(myhtml_tree_node_t *n) { parent = n; current = myhtml_node_attribute_first(parent); } 216 | 217 | @property empty() { return current == null; } 218 | auto front() { return Attribute(current); } 219 | void popFront() { current = myhtml_attribute_next(current); } 220 | 221 | private: 222 | myhtml_tree_attr_t *current; 223 | myhtml_tree_node_t *parent; 224 | } 225 | 226 | return AttributesRange(myhtml_tree_node); 227 | } 228 | 229 | /** Get the text of this node. Only for text nodes! */ 230 | @property string text() 231 | { 232 | return myhtml_node_text(myhtml_tree_node, null).to!string; 233 | } 234 | 235 | /** Set the text of this node. Only for text nodes! */ 236 | @property void text(string s) 237 | { 238 | myhtml_node_text_set(myhtml_tree_node, s.toStringz, s.length, MyEncodingList.default_); 239 | } 240 | 241 | /** Return node html representation */ 242 | string toString() 243 | { 244 | return innerHTML(); 245 | } 246 | 247 | /// Ditto 248 | @property string innerHTML() 249 | { 250 | mycore_string_raw_t str_raw; 251 | mycore_string_raw_clean_all(&str_raw); 252 | scope(exit) mycore_string_raw_destroy(&str_raw, false); 253 | 254 | if(myhtml_serialization_tree_buffer(myhtml_tree_node, &str_raw)) return ""; 255 | return str_raw.data[0..str_raw.length].to!string; 256 | } 257 | 258 | /** Set node html. All children will be deleted. */ 259 | @property void innerHTML(string s) 260 | { 261 | // Create a new tree to parse fragment 262 | auto tree = Tree(myhtml_tree_get_myhtml(myhtml_node_tree(myhtml_tree_node))); 263 | tree.parseFragment(s); 264 | 265 | // Clone fragment and move to current tree 266 | auto cloned = tree.first.clone(myhtml_node_tree(myhtml_tree_node)); 267 | 268 | // Delete all children! 269 | 270 | 271 | myhtml_tree_node_t*[] toDelete; 272 | 273 | for(auto current = myhtml_node_child(myhtml_tree_node); current != null; current = myhtml_node_next(current)) 274 | toDelete ~= current; 275 | 276 | foreach(n; toDelete) 277 | myhtml_node_delete_recursive(n); 278 | 279 | // Append new child 280 | appendChild(cloned); 281 | } 282 | 283 | /** Set node innerText. All children will be deleted. */ 284 | @property void innerText(string s) 285 | { 286 | // Create a text node 287 | auto text_node = myhtml_node_create ( 288 | myhtml_node_tree(myhtml_tree_node), 289 | MyHtmlTagId ._text, 290 | MyHtmlNamespace.html 291 | ); 292 | 293 | Node nodeToAppend = Node(text_node); 294 | nodeToAppend.text = s; 295 | 296 | // Delete all children! 297 | 298 | myhtml_tree_node_t*[] toDelete; 299 | 300 | for(auto current = myhtml_node_child(myhtml_tree_node); current != null; current = myhtml_node_next(current)) 301 | toDelete ~= current; 302 | 303 | foreach(n; toDelete) 304 | myhtml_node_delete_recursive(n); 305 | 306 | // Append new child 307 | appendChild(nodeToAppend); 308 | } 309 | 310 | @property string innerText() 311 | { 312 | import std.container.dlist; 313 | import std.array : Appender, array; 314 | import std.algorithm : map; 315 | 316 | auto appender = Appender!string(); 317 | 318 | auto toExplore = DList!(myhtml_tree_node_t*)(); 319 | toExplore.insertBack(myhtml_tree_node); 320 | 321 | while(!toExplore.empty) 322 | { 323 | auto current = Node(toExplore.front); 324 | toExplore.removeFront; 325 | if (current.tagId == MyHtmlTagId._text) appender ~= current.text(); 326 | else toExplore.insertFront(current.children.map!(x => x.myhtml_tree_node)); 327 | } 328 | 329 | return appender.data; 330 | } 331 | 332 | /** 333 | Create a copy of this node owned by another tree 334 | Returns: a `Node` owned by `destination` 335 | */ 336 | Node clone(Tree destination) { return clone(destination.myhtml_tree); } 337 | 338 | /// Create a copy of this node 339 | Node clone() { return clone(myhtml_node_tree(myhtml_tree_node)); } 340 | 341 | /// 342 | bool isSelfClosing() { return myhtml_node_is_close_self(myhtml_tree_node); } 343 | 344 | /// 345 | bool isVoidElement() { return myhtml_node_is_void_element(myhtml_tree_node); } 346 | 347 | /** Detach node from tree without destroying */ 348 | void detach() { myhtml_node_remove(myhtml_tree_node); } 349 | 350 | /// Fast way to append a text node 351 | void appendText(string s) 352 | { 353 | // Create a text node 354 | auto text_node = myhtml_node_create ( 355 | myhtml_node_tree(myhtml_tree_node), 356 | MyHtmlTagId ._text, 357 | MyHtmlNamespace.html 358 | ); 359 | 360 | Node nodeToAppend = Node(text_node); 361 | nodeToAppend.text = s; 362 | appendChild(nodeToAppend); 363 | } 364 | 365 | /// Fast way to append a comment node 366 | void appendComment(string s) 367 | { 368 | // Create a text node 369 | auto text_node = myhtml_node_create ( 370 | myhtml_node_tree(myhtml_tree_node), 371 | MyHtmlTagId ._comment, 372 | MyHtmlNamespace.html 373 | ); 374 | 375 | Node nodeToAppend = Node(text_node); 376 | nodeToAppend.text = s; 377 | appendChild(nodeToAppend); 378 | } 379 | 380 | /// 381 | void appendChild(Node n) { n.detach(); myhtml_node_append_child(myhtml_tree_node, n.myhtml_tree_node); } 382 | 383 | /// 384 | void insertBefore(Node n) { myhtml_node_insert_before(myhtml_tree_node, n.myhtml_tree_node); } 385 | 386 | /// 387 | void insertAfter(Node n) { myhtml_node_insert_after(myhtml_tree_node, n.myhtml_tree_node); } 388 | 389 | /// 390 | void insertToAppropriatePlace(Node n) { myhtml_node_insert_to_appropriate_place(myhtml_tree_node, n.myhtml_tree_node); } 391 | 392 | /** 393 | Search children using a css 3.1 selector 394 | Returns: a lazy range of nodes 395 | See_Also: byAttribute, byAttributeKey, byTagName, byClass, byId 396 | */ 397 | auto byCssSelector(string selector) 398 | { 399 | import std.exception : enforce; 400 | 401 | auto mycss = mycss_create(); 402 | mycss_init(mycss); 403 | 404 | auto entry = mycss_entry_create(); 405 | mycss_entry_init(mycss, entry); 406 | 407 | auto finder = modest_finder_create_simple(); 408 | 409 | mystatus_t out_status; 410 | mycss_selectors_list_t *list = mycss_selectors_parse 411 | ( 412 | mycss_entry_selectors(entry), 413 | MyEncodingList.default_, 414 | selector.toStringz, selector.length, 415 | &out_status 416 | ); 417 | 418 | enforce(list != null && ((list.flags & mycss_selectors_flags.MyCSS_SELECTORS_FLAGS_SELECTOR_BAD) == 0), "Can't compile css selector: " ~ selector); 419 | 420 | myhtml_collection_t* collection = null; 421 | 422 | modest_finder_by_selectors_list(finder, myhtml_tree_node, list, &collection); 423 | 424 | // Free resources! 425 | mycss_selectors_list_destroy(mycss_entry_selectors(entry), list, true); 426 | modest_finder_destroy(finder, true); 427 | mycss_entry_destroy(entry, true); 428 | mycss_destroy(mycss, true); 429 | 430 | return NodeRange(collection, myhtml_node_tree(myhtml_tree_node)); 431 | } 432 | 433 | /** 434 | Search children by tag name 435 | Returns: a lazy range of nodes 436 | See_Also: byAttribute, byAttributeKey, byClass, byId, byCssSelector 437 | */ 438 | auto byTagName(MyHtmlTagId name) 439 | { 440 | mystatus_t status; 441 | myhtml_collection_t* myhtml_collection = myhtml_collection_create(0, null); 442 | auto collection = NodeRange(myhtml_get_nodes_by_tag_id_in_scope(myhtml_node_tree(myhtml_tree_node), myhtml_collection, myhtml_tree_node, name, &status), myhtml_node_tree(myhtml_tree_node)); 443 | 444 | if (MYHTML_FAILED(status)) throw new ArrogantException(status); 445 | 446 | return collection; 447 | } 448 | 449 | /// Ditto 450 | auto byTagName(string name) 451 | { 452 | mystatus_t status; 453 | myhtml_collection_t* myhtml_collection = myhtml_collection_create(0, null); 454 | auto collection = NodeRange(myhtml_get_nodes_by_name_in_scope(myhtml_node_tree(myhtml_tree_node), myhtml_collection, myhtml_tree_node, name.toStringz, name.length, &status), myhtml_node_tree(myhtml_tree_node)); 455 | if (MYHTML_FAILED(status)) throw new ArrogantException(status); 456 | 457 | return collection; 458 | } 459 | 460 | /** 461 | Search children by class (space separated) 462 | Returns: a lazy range of nodes 463 | See_Also: byAttribute, byAttributeKey, byTagName, byId, byCssSelector 464 | */ 465 | auto byClass(string className) { return byAttribute!(AttributeSearchType.spaceSeparated)("class", className);} 466 | 467 | /** 468 | Search children by id 469 | Returns: a lazy range of nodes 470 | See_Also: byAttribute, byAttributeKey, byTagName, byClass, byCssSelector 471 | */ 472 | auto byId(string id) { return byAttribute("id", id);} 473 | 474 | /** 475 | Search children with a specified attribute 476 | Returns: a lazy range of nodes 477 | See_Also: byAttribute, byTagName, byClass, byId, byCssSelector 478 | */ 479 | auto byAttributeKey(string name) 480 | { 481 | mystatus_t status; 482 | myhtml_collection_t* myhtml_collection = myhtml_collection_create(0, null); 483 | auto collection = NodeRange(myhtml_get_nodes_by_attribute_key(myhtml_node_tree(myhtml_tree_node), myhtml_collection, myhtml_tree_node, name.toStringz, name.length, &status), myhtml_node_tree(myhtml_tree_node)); 484 | if (MYHTML_FAILED(status)) throw new ArrogantException(status); 485 | 486 | return collection; 487 | } 488 | 489 | /** 490 | Search children by tag attribute key/val. 491 | Returns: a lazy range of nodes 492 | See_Also: byAttributeKey, byTagName, byClass, byId, byCssSelector 493 | */ 494 | auto byAttribute(AttributeSearchType st = AttributeSearchType.exact, Flag!"caseInsensitive" caseInsensitive = No.caseInsensitive)(string key, string value) 495 | { 496 | mystatus_t status; 497 | typeof(&myhtml_get_nodes_by_attribute_value) callback; 498 | 499 | final switch(st) 500 | { 501 | case AttributeSearchType.exact: callback = &myhtml_get_nodes_by_attribute_value; break; 502 | case AttributeSearchType.startsWith: callback = &myhtml_get_nodes_by_attribute_value_begin; break; 503 | case AttributeSearchType.endsWith: callback = &myhtml_get_nodes_by_attribute_value_end; break; 504 | case AttributeSearchType.contains: callback = &myhtml_get_nodes_by_attribute_value_contain; break; 505 | case AttributeSearchType.spaceSeparated : callback = &myhtml_get_nodes_by_attribute_value_whitespace_separated; break; 506 | case AttributeSearchType.hypenSeparated: callback = &myhtml_get_nodes_by_attribute_value_hyphen_separated; break; 507 | } 508 | 509 | myhtml_collection_t* myhtml_collection = myhtml_collection_create(0, null); 510 | 511 | auto collection = NodeRange 512 | ( 513 | callback 514 | ( 515 | myhtml_node_tree(myhtml_tree_node), 516 | myhtml_collection, 517 | myhtml_tree_node, 518 | caseInsensitive == Yes.caseInsensitive, 519 | key.toStringz, key.length, value.toStringz, value.length, 520 | &status 521 | ), 522 | myhtml_node_tree(myhtml_tree_node) 523 | ); 524 | 525 | if (MYHTML_FAILED(status)) throw new ArrogantException(status); 526 | return collection; 527 | } 528 | 529 | /** Create a new html node */ 530 | this(ref Tree tree, MyHtmlTagId tag, MyHtmlNamespace ns = MyHtmlNamespace.html) 531 | { 532 | myhtml_tree_node = myhtml_node_create ( 533 | tree.myhtml_tree, 534 | tag, 535 | ns 536 | ); 537 | 538 | Tree.acquire(tree.myhtml_tree); 539 | } 540 | 541 | this(this) { Tree.acquire(myhtml_node_tree(myhtml_tree_node)); } 542 | 543 | void opAssign(Node rhs) 544 | { 545 | Tree.acquire(myhtml_node_tree(rhs.myhtml_tree_node)); 546 | Tree.release(myhtml_node_tree(myhtml_tree_node)); 547 | myhtml_tree_node = rhs.myhtml_tree_node; 548 | } 549 | 550 | ~this() { Tree.release(myhtml_node_tree(myhtml_tree_node)); } 551 | 552 | private: 553 | 554 | this(myhtml_tree_node_t *node) 555 | { 556 | myhtml_tree_node = node; 557 | Tree.acquire(myhtml_node_tree(node)); 558 | } 559 | 560 | Node clone(myhtml_tree_t* destination) 561 | { 562 | 563 | struct CopyQueueItem 564 | { 565 | myhtml_tree_node_t* destParent; // Where node will be appended 566 | myhtml_tree_node_t* toCopy; // The node to copy 567 | } 568 | 569 | import std.container.dlist; 570 | auto copyQueue = DList!CopyQueueItem(); 571 | 572 | // Clone a single node without children 573 | myhtml_tree_node_t* cloneNode(myhtml_tree_t* _destination, myhtml_tree_node_t* _node) 574 | { 575 | // Create a new node rooted on destination tree 576 | auto ret = myhtml_node_create ( 577 | _destination, 578 | _node.tag_id, 579 | _node.ns 580 | ); 581 | 582 | // Copy text if present 583 | { 584 | size_t textLength; 585 | auto text = myhtml_node_text(_node, &textLength); 586 | myhtml_node_text_set(ret, text, textLength, MyEncodingList.default_); 587 | } 588 | 589 | // Clone attributes 590 | for (auto attribute = myhtml_node_attribute_first(_node); attribute != null; attribute = myhtml_attribute_next(attribute)) 591 | { 592 | size_t keyLength, valueLength; 593 | auto k = myhtml_attribute_key(attribute, &keyLength); 594 | auto v = myhtml_attribute_value(attribute, &valueLength); 595 | myhtml_attribute_add (ret, k, keyLength, v, valueLength, MyEncodingList.default_); 596 | } 597 | 598 | // Return the filled node. 599 | return ret; 600 | } 601 | 602 | // Clone the root 603 | auto destinationRoot = cloneNode(destination, myhtml_tree_node); 604 | auto currentNode = myhtml_tree_node; 605 | auto currentDestNode = destinationRoot; 606 | 607 | while(true) 608 | { 609 | // Add children of current node to queue 610 | for (auto child = myhtml_node_child(currentNode); child != null; child = myhtml_node_next(child)) 611 | copyQueue.insertBack(CopyQueueItem(currentDestNode, child)); 612 | 613 | if (copyQueue.empty) break; 614 | 615 | // Get the first item in queue 616 | auto destParent = copyQueue.front.destParent; 617 | currentNode = copyQueue.front.toCopy; 618 | 619 | // Remove first element of list 620 | copyQueue.removeFront(); 621 | 622 | // Clone the children and add to new parent 623 | currentDestNode = cloneNode(destination, currentNode); 624 | myhtml_node_append_child(destParent, currentDestNode); 625 | } 626 | 627 | return Node(destinationRoot); 628 | } 629 | 630 | myhtml_tree_node_t* myhtml_tree_node = null; 631 | } 632 | 633 | import std.stdio; 634 | 635 | 636 | 637 | /** A lazy range of nodes, usually returned by a search */ 638 | struct NodeRange 639 | { 640 | @disable this(); 641 | 642 | Node opIndex(size_t i) 643 | { 644 | return Node(myhtml_collection.list[i+idx]); 645 | } 646 | 647 | size_t length() { if (myhtml_collection) return myhtml_collection.length; return 0; } 648 | 649 | @property Node front() { if (empty) assert(0, "Can't read nodes from an empty collection"); return this[0]; } 650 | @property bool empty() { return idx >= length(); } 651 | 652 | void popFront() { idx++; } 653 | 654 | 655 | ~this() { Tree.release(myhtml_tree); } 656 | this(this) { Tree.acquire(myhtml_tree); } 657 | 658 | void opAssign(NodeRange rhs) 659 | { 660 | Tree.acquire(rhs.myhtml_tree); 661 | Tree.release(myhtml_tree); 662 | myhtml_tree = rhs.myhtml_tree; 663 | myhtml_collection = rhs.myhtml_collection; 664 | idx = rhs.idx; 665 | } 666 | 667 | private: 668 | 669 | this(myhtml_collection_t* collection, myhtml_tree_t* tree) 670 | { 671 | myhtml_collection = collection; 672 | myhtml_tree = tree; 673 | Tree.acquire(myhtml_tree); 674 | } 675 | 676 | myhtml_collection_t* myhtml_collection; 677 | myhtml_tree_t* myhtml_tree; 678 | 679 | size_t idx = 0; 680 | } 681 | 682 | /** A html tree */ 683 | struct Tree 684 | { 685 | /// Create a new node owned by this tree 686 | Node createNode(MyHtmlTagId tag, MyHtmlNamespace ns = MyHtmlNamespace.html) { return Node(this, tag, ns); } 687 | 688 | /// Fast way to create a text node 689 | Node createTextNode(string text) { auto n = createNode(MyHtmlTagId._text); n.text = text; return n; } 690 | 691 | /// Fast way to create a comment node 692 | Node createCommentNode(string text) { auto n = createNode(MyHtmlTagId._comment); n.text = text; return n; } 693 | 694 | /// See: `Node.byXXXX` 695 | auto byClass(string className) { return document.byClass(className); } 696 | 697 | 698 | /// Ditto 699 | auto byId(string id) { return document.byId(id);} 700 | 701 | 702 | /// Ditto 703 | auto byCssSelector(string selector) { return document.byCssSelector(selector); } 704 | 705 | /// Ditto 706 | auto byTagName(MyHtmlTagId name) { return document.byTagName(name); } 707 | 708 | /// Ditto 709 | auto byTagName(string name) { return document.byTagName(name); } 710 | 711 | /// Ditto 712 | auto byAttributeKey(string name) { return document.byAttributeKey(name); } 713 | 714 | /// Ditto 715 | auto byAttribute(AttributeSearchType st = AttributeSearchType.exact, Flag!"caseInsensitive" caseInsensitive = No.caseInsensitive)(string key, string value) 716 | { 717 | return document.byAttribute!(st, caseInsensitive)(key, value); 718 | } 719 | 720 | /// The document root 721 | auto document() 722 | { 723 | return Node(myhtml_tree_get_document(myhtml_tree)); 724 | } 725 | 726 | /// The html node 727 | auto html() 728 | { 729 | return Node(myhtml_tree_get_node_html(myhtml_tree)); 730 | } 731 | 732 | /// The head node 733 | auto head() 734 | { 735 | return Node(myhtml_tree_get_node_head(myhtml_tree)); 736 | } 737 | 738 | /// The body node 739 | auto body() 740 | { 741 | return Node(myhtml_tree_get_node_body(myhtml_tree)); 742 | } 743 | 744 | /// Return the first node 745 | auto first() 746 | { 747 | return Node(myhtml_node_first(myhtml_tree)); 748 | } 749 | 750 | string toString() { Node tmp = first(); return tmp.toString(); } 751 | 752 | this(this) 753 | { 754 | acquire(myhtml_tree); 755 | valid = true; 756 | } 757 | 758 | void opAssign(Tree rhs) 759 | { 760 | acquire(rhs.myhtml_tree); 761 | 762 | if (valid) 763 | release(myhtml_tree); 764 | 765 | myhtml_tree = rhs.myhtml_tree; 766 | valid = true; 767 | } 768 | 769 | ~this() { 770 | if (valid) 771 | release(myhtml_tree); 772 | } 773 | 774 | @property isValid() { return valid; } 775 | 776 | private: 777 | 778 | 779 | void parse(T)(T html, MyEncodingList encoding = MyEncodingList.default_) if (isSomeString!T) 780 | { 781 | auto status = myhtml_parse(myhtml_tree, encoding, html.toStringz, html.length); 782 | if (MYHTML_FAILED(status)) throw new ArrogantException(status); 783 | } 784 | 785 | void parseFragment(T)(T html, MyHtmlTagId wrap = MyHtmlTagId .div, MyEncodingList encoding = MyEncodingList.default_, MyHtmlNamespace ns = MyHtmlNamespace.html) if (isSomeString!T) 786 | { 787 | auto status = myhtml_parse_fragment ( 788 | myhtml_tree, 789 | encoding, 790 | html.toStringz, html.length, 791 | wrap, 792 | ns 793 | ); 794 | if (MYHTML_FAILED(status)) throw new ArrogantException(status); 795 | } 796 | 797 | this(ref Arrogant parent) { this(parent.myhtml); } 798 | 799 | this(myhtml_t* parent) 800 | { 801 | myhtml_tree = myhtml_tree_create(); 802 | auto status = myhtml_tree_init(myhtml_tree, parent); 803 | if (MYHTML_FAILED(status)) throw new ArrogantException(status); 804 | acquire(myhtml_tree); 805 | valid = true; 806 | } 807 | 808 | bool valid = false; 809 | myhtml_tree_t* myhtml_tree = null; 810 | 811 | static size_t[myhtml_tree_t*] refCount; 812 | 813 | static void acquire(myhtml_tree_t* ptr) { refCount[ptr]++; Arrogant.acquire(myhtml_tree_get_myhtml(ptr)); } 814 | static void release(myhtml_tree_t* ptr) 815 | { 816 | size_t cnt = refCount[ptr]; 817 | assert(cnt > 0); 818 | refCount[ptr] = cnt - 1; 819 | myhtml_t* myhtml = myhtml_tree_get_myhtml(ptr); 820 | 821 | if (cnt == 1) 822 | { 823 | myhtml_tree_destroy(ptr); 824 | refCount.remove(ptr); 825 | } 826 | 827 | Arrogant.release(myhtml); 828 | } 829 | 830 | } 831 | 832 | struct Arrogant 833 | { 834 | /// Parse a html document 835 | Tree parse(T)(T html, MyEncodingList encoding = MyEncodingList.default_) if (isSomeString!T) 836 | { 837 | if (!myhtml) initArrogant(); 838 | Tree tree = Tree(myhtml); 839 | tree.parse(html, encoding); 840 | return tree; 841 | } 842 | 843 | /// Parse a html fragment 844 | Tree parseFragment(T)(T html, MyHtmlTagId wrap = MyHtmlTagId .div, MyEncodingList encoding = MyEncodingList.default_, MyHtmlNamespace ns = MyHtmlNamespace.html,) if (isSomeString!T) 845 | { 846 | if (!myhtml) initArrogant(); 847 | Tree tree = Tree(myhtml); 848 | tree.parseFragment(html, wrap, encoding, ns); 849 | return tree; 850 | } 851 | 852 | 853 | void opAssign(Arrogant rhs) 854 | { 855 | acquire(rhs.myhtml); 856 | 857 | if (valid) 858 | release(myhtml); 859 | 860 | myhtml = rhs.myhtml; 861 | valid = true; 862 | } 863 | 864 | /// 865 | this(MyHtmlOptions options, size_t threadCount = 1, size_t queueSize = 0) 866 | { 867 | initArrogant(options, threadCount, queueSize); 868 | } 869 | 870 | this(this) { acquire(myhtml); valid = true; } 871 | 872 | ~this() { if (valid) release(myhtml); } 873 | 874 | @property isValid() { return valid; } 875 | 876 | private: 877 | 878 | void initArrogant(MyHtmlOptions options = MyHtmlOptions.default_, size_t threadCount = 1, size_t queueSize = 0) 879 | { 880 | if (myhtml) return; 881 | 882 | myhtml = myhtml_create(); 883 | auto status = myhtml_init(myhtml, options, threadCount, queueSize); 884 | 885 | acquire(myhtml); 886 | 887 | if (MYHTML_FAILED(status)) 888 | throw new ArrogantException(status); 889 | 890 | valid = true; 891 | } 892 | 893 | myhtml_t* myhtml = null; 894 | bool valid = false; 895 | 896 | static size_t[myhtml_t*] refCount; 897 | static void acquire(myhtml_t* ptr) { refCount[ptr]++; } 898 | static void release(myhtml_t* ptr) 899 | { 900 | size_t cnt = refCount[ptr]; 901 | 902 | assert(cnt > 0); 903 | refCount[ptr] = cnt - 1; 904 | 905 | if (cnt == 1) 906 | { 907 | myhtml_destroy(ptr); 908 | refCount.remove(ptr); 909 | } 910 | } 911 | 912 | } 913 | -------------------------------------------------------------------------------- /source/arrogant_test_app.d: -------------------------------------------------------------------------------- 1 | version(arrogant_test_app) 2 | { 3 | import arrogant; 4 | import std.stdio : writeln, stdout; 5 | 6 | void main() 7 | { 8 | writeln("Simple example --"); 9 | 10 | // Simple example 11 | { 12 | auto src = `
Hello World
`; 13 | auto arrogant = Arrogant(); 14 | auto tree = arrogant.parse(src); 15 | 16 | // Change div content from "Hello World!" to "Hello D!" 17 | tree.byTagName("div").front.innerText = "Hello D!"; 18 | 19 | // Print the edited html 20 | writeln(tree.document); 21 | 22 | assert(tree.document.innerHTML == "
Hello D!
"); 23 | } 24 | 25 | writeln("Css selector --"); 26 | 27 | // Css Selector 28 | { 29 | auto src = ` 30 | 31 | 32 |
First div
33 |
34 |
Inner div
35 | Not this 36 | 37 | This link 38 |
39 | 40 | Other 41 | 42 | `; 43 | 44 | auto arrogant = Arrogant(); 45 | auto tree = arrogant.parse(src); 46 | 47 | // Looks for an anchor next to an img inside a div 48 | auto url = tree.byCssSelector("div > img + a").front["href"]; 49 | writeln("Selector: div > img + a Result:", url); 50 | assert(url == "right_link.html"); 51 | } 52 | 53 | writeln("Ranges --"); 54 | 55 | // Ranges 56 | { 57 | import std.algorithm: startsWith, filter, each; 58 | 59 | auto src = ` 60 | 61 | 62 | Relative link 63 | Relative link 64 | D programming language 65 | 66 | 67 | `; 68 | 69 | auto arrogant = Arrogant(); 70 | auto tree = arrogant.parse(src); 71 | 72 | // Add rel="nofollow" to all http/https links 73 | // https://issues.dlang.org/show_bug.cgi?id=11934 "each" implementation is bugged 74 | tree 75 | .byAttributeKey("href") 76 | .filter!(x => x["href"].startsWith("http://") || x["href"].startsWith("https://")) 77 | .each!(e => e["rel"] = "nofollow"); 78 | 79 | writeln(tree.byAttributeKey("href")); 80 | 81 | // Just one must be changed 82 | assert(tree.byAttribute("rel", "nofollow").length == 1); 83 | assert(tree.byAttribute("rel", "nofollow").front.innerText == "D programming language"); 84 | assert("rel" in tree.byAttribute("href", "http://www.dlang.org").front); 85 | } 86 | 87 | writeln("Cloning --"); 88 | 89 | // Cloning 90 | { 91 | auto src = ` 92 | 93 | 94 |
95 |
Fruit: Apple
96 |
Idx: 150
97 |
98 | 99 | 100 | `; 101 | 102 | auto arrogant = Arrogant(); 103 | auto tree = arrogant.parse(src); 104 | 105 | auto body = tree.body; 106 | 107 | // Retrieve template from page 108 | auto divTemplate = tree.byAttribute("data-custom", "template").front; 109 | 110 | // Clone template for each item 111 | foreach(idx, item; ["pear", "orange", "cherry"]) 112 | { 113 | import std.conv : to; 114 | 115 | auto newDiv = divTemplate.clone(); 116 | auto spans = newDiv.byTagName("span"); 117 | spans[0].innerText = item; 118 | spans[1].innerText = idx.to!string; 119 | 120 | body.appendChild(newDiv); 121 | } 122 | 123 | // Delete template & remove from page 124 | divTemplate.deleteNode(); 125 | 126 | import std.algorithm : map, each; 127 | 128 | // Print out all detected fruits 129 | tree 130 | .byClass("name") 131 | .map!(item => item.innerText) 132 | .each!(x => writeln("Fruit detected: ", x)); 133 | 134 | assert(tree.byCssSelector("body > div").length == 3); 135 | assert(tree.byCssSelector("body > div span.name")[1].innerText == "orange"); 136 | } 137 | 138 | writeln("Moving --"); 139 | 140 | // Moving elements 141 | { 142 | auto src = ` 143 | 144 | 145 |
146 | outside 147 | 148 | 149 | `; 150 | 151 | auto arrogant = Arrogant(); 152 | auto tree = arrogant.parse(src); 153 | auto link = tree.byTagName("a").front; 154 | auto container = tree.byId("container").front; 155 | 156 | // Move link inside div 157 | container.appendChild(link); 158 | 159 | writeln("Link parent tag: ", link.parent.tagId); 160 | 161 | assert(container.firstChild.tagId == MyHtmlTagId.a); 162 | assert(container.firstChild.innerText == "outside"); 163 | assert(tree.byTagName("a").length == 1); 164 | } 165 | 166 | // Get summaries from forum.dlang.org 167 | { 168 | import std.net.curl; 169 | import std.range; 170 | 171 | auto src = "https://forum.dlang.org".get; 172 | auto arrogant = Arrogant(); 173 | auto tree = arrogant.parse(src); 174 | size_t cnt = 0; 175 | 176 | writeln("Recent posts on forum.dlang.org:\n"); 177 | foreach(post; tree.byClass("forum-index-col-lastpost").take(2)) 178 | { 179 | string title = post.byClass("forum-postsummary-subject").front["title"]; 180 | string author = post.byClass("forum-postsummary-author").front["title"]; 181 | string date = post.byCssSelector("span.forum-postsummary-time > span").front["title"]; 182 | 183 | writeln("Title: ", title); 184 | writeln("By: ", author); 185 | writeln("Date: ", date); 186 | writeln("--------------"); 187 | 188 | cnt++; 189 | } 190 | 191 | writeln("Total: ", cnt, " posts"); 192 | assert(cnt != 0); 193 | } 194 | } 195 | } 196 | 197 | // Some internal tests 198 | version(arrogant_tests) 199 | { 200 | 201 | import arrogant; 202 | import std.stdio : writeln, stdout; 203 | 204 | // Testing reference count 205 | /* 206 | unittest { 207 | Node n; 208 | { 209 | auto src = `
Hello World
`; 210 | auto arrogant = Arrogant(); 211 | auto tree = arrogant.parse(src); 212 | 213 | // n lifespan is longer than parent one 214 | n = tree.byTagName("div").front; 215 | } 216 | 217 | assert(n.toString == "
Hello World
"); 218 | } 219 | */ 220 | 221 | /* 222 | unittest { 223 | NodeRange r; 224 | { 225 | auto src = `
Hello World
`; 226 | auto arrogant = Arrogant(); 227 | auto tree = arrogant.parse(src); 228 | 229 | // n lifespan is longer than parent one 230 | r = tree.byTagName("div"); 231 | } 232 | 233 | assert(r.front.toString == "
Hello World
"); 234 | } 235 | */ 236 | unittest { 237 | Tree tree2; 238 | { 239 | Tree tree3; 240 | auto src = `
Hello World
`; 241 | auto arrogant = Arrogant(); 242 | auto tree = arrogant.parse(src); 243 | 244 | tree2 = tree; 245 | tree3 = tree2; 246 | } 247 | 248 | assert(tree2.byTagName("div").front.toString == "
Hello World
"); 249 | } 250 | 251 | void main() {} 252 | 253 | } 254 | --------------------------------------------------------------------------------