├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── c
├── generate_bindings.sh
├── generate_common.d
└── modest.dpp
├── dub.sdl
└── source
├── arrogant
├── c
│ ├── common.d
│ └── modest.d
└── package.d
└── arrogant_test_app.d
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled Object files
2 | *.o
3 | *.obj
4 |
5 | # Compiled Dynamic libraries
6 | *.so
7 | *.dylib
8 | *.dll
9 |
10 | # Compiled Static libraries
11 | *.a
12 | *.lib
13 |
14 | # Executables
15 | *.exe
16 |
17 | # DUB
18 | .dub
19 | docs.json
20 | __dummy.html
21 | docs/
22 |
23 | # Code coverage
24 | *.lst
25 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "c/Modest"]
2 | path = c/Modest
3 | url = https://github.com/lexborisov/Modest.git
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 2night SpA
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # arrogant
2 | Fully conformant HTML5 dom library with CSS4 selectors. Based on [Modest](https://github.com/lexborisov/Modest).
3 |
4 | Tested on Linux. Should work fine on OSX and Windows.
5 |
6 | # prerequisites: how to build & install modest
7 |
8 | Modest is written in pure C, without any external dependency.
9 | Just fetch source code and compile.
10 |
11 | ```
12 | git clone https://github.com/2night/arrogant.git
13 | cd arrogant
14 | git submodule update --init
15 | cd c/Modest
16 | make
17 | sudo make install
18 | sudo ldconfig
19 | ```
20 |
21 | # run an example
22 |
23 | ```
24 | dub -c arrogant_test_app
25 | ```
26 |
27 | # hello world
28 |
29 | ```d
30 | import arrogant;
31 | import std.stdio : writeln, stdout;
32 |
33 | void main()
34 | {
35 | auto src = `
Hello World
`;
36 | auto arrogant = Arrogant();
37 | auto tree = arrogant.parse(src);
38 |
39 | // Change div content from "Hello World!" to "Hello D!"
40 | tree.byTagName("div").front.innerText = "Hello D!";
41 |
42 | // Print the edited html
43 | writeln(tree.document);
44 |
45 | assert(tree.document.innerHTML == "Hello D!
");
46 | }
47 | ```
48 | # get data from webpage
49 |
50 | ```d
51 | import arrogant;
52 | import std.net.curl;
53 | import std.stdio : writeln, stdout;
54 |
55 | void main()
56 | {
57 | auto src = "https://forum.dlang.org".get;
58 | auto arrogant = Arrogant();
59 | auto tree = arrogant.parse(src);
60 | size_t cnt = 0;
61 |
62 | writeln("Recent posts on forum.dlang.org:\n");
63 |
64 | // Search for summary divs
65 | foreach(post; tree.byClass("forum-index-col-lastpost"))
66 | {
67 | string title = post.byClass("forum-postsummary-subject").front["title"];
68 | string author = post.byClass("forum-postsummary-author").front["title"];
69 | string date = post.byCssSelector("span.forum-postsummary-time > span").front["title"];
70 |
71 | writeln("Title: ", title);
72 | writeln("By: ", author);
73 | writeln("Date: ", date);
74 | writeln("--------------");
75 |
76 | cnt++;
77 | }
78 |
79 | writeln("Total: ", cnt, " posts");
80 | }
81 | ```
82 |
83 | # more
84 |
85 | Check [this code](https://github.com/2night/arrogant/blob/master/source/arrogant_test_app.d) or [read documentation](http://arrogant.dpldocs.info/index.html)
86 |
--------------------------------------------------------------------------------
/c/generate_bindings.sh:
--------------------------------------------------------------------------------
1 |
2 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
3 | cd $DIR
4 | INCLUDE=$DIR/Modest/include/
5 | TARGET=$DIR/../source/arrogant/c
6 |
7 | # Generate bindings from headers with dpp: http://dpp.dub.pm
8 | d++ --include-path $INCLUDE --preprocess-only modest.dpp
9 |
10 | # Workaround for a dpp bug?
11 | sed -i 's/volatile//g' modest.d
12 |
13 | # Generate d-style structs
14 | rdmd generate_common.d
15 |
16 | mv common.d ../source/arrogant/c/common.d
17 | echo 'module arrogant.c.modest;' > ../source/arrogant/c/modest.d
18 | cat modest.d >> ../source/arrogant/c/modest.d
19 |
20 | rm modest.d
--------------------------------------------------------------------------------
/c/generate_common.d:
--------------------------------------------------------------------------------
1 | import std.string : replace, toLower, capitalize;
2 | import std.algorithm : splitter, joiner, map, startsWith, canFind;
3 | import std.stdio: writeln;
4 | import std.conv : to, text;
5 | import std.file : append, remove, exists;
6 |
7 | import modest;
8 |
9 |
10 | string WriteEnum(string name, T, string prefix)() if(is(T == enum)) {
11 | string ret = "enum " ~ name ~ " {\n";
12 | string s;
13 |
14 | auto kw = ["default", "switch", "template"];
15 |
16 | static foreach(member; __traits(allMembers, T)) {
17 | s = member.replace(prefix, "");
18 | s = s.splitter("_").map!(x => x.length == 0?"_":x.toLower.capitalize).joiner().to!string;
19 | if (s.startsWith("_")) s = text("_", s[1].toLower, s[2 .. $]);
20 | else s = text(s[0].toLower, s[1..$]);
21 |
22 | if (kw.canFind(s)) s ~= "_";
23 |
24 | ret ~= text(` `, s, ` = `, T.stringof, `.`, member, ",\n");
25 | }
26 |
27 | ret ~= "}\n";
28 |
29 | return ret;
30 | }
31 |
32 |
33 | void main()
34 | {
35 | if (exists("common.d")) remove("common.d");
36 | "common.d".append("module arrogant.c.common;\n");
37 | "common.d".append("import arrogant.c.modest;\n");
38 | "common.d".append("auto MYHTML_FAILED(T)(auto ref T _status_) { return _status_ != myhtml_status_t.MyHTML_STATUS_OK; }\n");
39 | "common.d".append(WriteEnum!("MyEncodingList", myencoding_list, "MyENCODING_"));
40 | "common.d".append(WriteEnum!("MyHtmlTagId", myhtml_tags, "MyHTML_TAG_"));
41 | "common.d".append(WriteEnum!("MyHtmlOptions", myhtml_options, "MyHTML_OPTIONS_"));
42 | "common.d".append(WriteEnum!("MyHtmlNamespace", myhtml_namespace, "MyHTML_NAMESPACE_"));
43 | }
44 |
--------------------------------------------------------------------------------
/c/modest.dpp:
--------------------------------------------------------------------------------
1 |
2 | #include "myencoding/encoding.h"
3 |
4 |
5 | #include "myhtml/tree.h"
6 | #include "myhtml/api.h"
7 | #include "myhtml/myhtml.h"
8 | #include "mycss/mycss.h"
9 | #include "mycss/api.h"
10 | #include "modest/finder/finder.h"
11 |
--------------------------------------------------------------------------------
/dub.sdl:
--------------------------------------------------------------------------------
1 | name "arrogant"
2 | description "Fully conformant HTML5 dom library with CSS4 selectors."
3 | authors "Andrea Fontana"
4 | copyright "Copyright © 2018, 2night.it"
5 | license "mit"
6 | libs "modest"
7 |
8 | configuration "arrogant" {
9 | targetType "autodetect"
10 | }
11 |
12 | configuration "arrogant_test_app" {
13 | targetType "executable"
14 | versions "arrogant_test_app"
15 | }
16 |
17 | configuration "arrogant_tests" {
18 | targetType "executable"
19 | versions "arrogant_tests"
20 | }
21 |
--------------------------------------------------------------------------------
/source/arrogant/c/common.d:
--------------------------------------------------------------------------------
1 | module arrogant.c.common;
2 | import arrogant.c.modest;
3 | auto MYHTML_FAILED(T)(auto ref T _status_) { return _status_ != myhtml_status_t.MyHTML_STATUS_OK; }
4 | enum MyEncodingList {
5 | default_ = myencoding_list.MyENCODING_DEFAULT,
6 | notDetermined = myencoding_list.MyENCODING_NOT_DETERMINED,
7 | utf8 = myencoding_list.MyENCODING_UTF_8,
8 | utf16le = myencoding_list.MyENCODING_UTF_16LE,
9 | utf16be = myencoding_list.MyENCODING_UTF_16BE,
10 | xUserDefined = myencoding_list.MyENCODING_X_USER_DEFINED,
11 | big5 = myencoding_list.MyENCODING_BIG5,
12 | eucJp = myencoding_list.MyENCODING_EUC_JP,
13 | eucKr = myencoding_list.MyENCODING_EUC_KR,
14 | gb18030 = myencoding_list.MyENCODING_GB18030,
15 | gbk = myencoding_list.MyENCODING_GBK,
16 | ibm866 = myencoding_list.MyENCODING_IBM866,
17 | iso2022Jp = myencoding_list.MyENCODING_ISO_2022_JP,
18 | iso885910 = myencoding_list.MyENCODING_ISO_8859_10,
19 | iso885913 = myencoding_list.MyENCODING_ISO_8859_13,
20 | iso885914 = myencoding_list.MyENCODING_ISO_8859_14,
21 | iso885915 = myencoding_list.MyENCODING_ISO_8859_15,
22 | iso885916 = myencoding_list.MyENCODING_ISO_8859_16,
23 | iso88592 = myencoding_list.MyENCODING_ISO_8859_2,
24 | iso88593 = myencoding_list.MyENCODING_ISO_8859_3,
25 | iso88594 = myencoding_list.MyENCODING_ISO_8859_4,
26 | iso88595 = myencoding_list.MyENCODING_ISO_8859_5,
27 | iso88596 = myencoding_list.MyENCODING_ISO_8859_6,
28 | iso88597 = myencoding_list.MyENCODING_ISO_8859_7,
29 | iso88598 = myencoding_list.MyENCODING_ISO_8859_8,
30 | iso88598I = myencoding_list.MyENCODING_ISO_8859_8_I,
31 | koi8R = myencoding_list.MyENCODING_KOI8_R,
32 | koi8U = myencoding_list.MyENCODING_KOI8_U,
33 | macintosh = myencoding_list.MyENCODING_MACINTOSH,
34 | shiftJis = myencoding_list.MyENCODING_SHIFT_JIS,
35 | windows1250 = myencoding_list.MyENCODING_WINDOWS_1250,
36 | windows1251 = myencoding_list.MyENCODING_WINDOWS_1251,
37 | windows1252 = myencoding_list.MyENCODING_WINDOWS_1252,
38 | windows1253 = myencoding_list.MyENCODING_WINDOWS_1253,
39 | windows1254 = myencoding_list.MyENCODING_WINDOWS_1254,
40 | windows1255 = myencoding_list.MyENCODING_WINDOWS_1255,
41 | windows1256 = myencoding_list.MyENCODING_WINDOWS_1256,
42 | windows1257 = myencoding_list.MyENCODING_WINDOWS_1257,
43 | windows1258 = myencoding_list.MyENCODING_WINDOWS_1258,
44 | windows874 = myencoding_list.MyENCODING_WINDOWS_874,
45 | xMacCyrillic = myencoding_list.MyENCODING_X_MAC_CYRILLIC,
46 | lastEntry = myencoding_list.MyENCODING_LAST_ENTRY,
47 | }
48 | enum MyHtmlTagId {
49 | _undef = myhtml_tags.MyHTML_TAG__UNDEF,
50 | _text = myhtml_tags.MyHTML_TAG__TEXT,
51 | _comment = myhtml_tags.MyHTML_TAG__COMMENT,
52 | _doctype = myhtml_tags.MyHTML_TAG__DOCTYPE,
53 | a = myhtml_tags.MyHTML_TAG_A,
54 | abbr = myhtml_tags.MyHTML_TAG_ABBR,
55 | acronym = myhtml_tags.MyHTML_TAG_ACRONYM,
56 | address = myhtml_tags.MyHTML_TAG_ADDRESS,
57 | annotationXml = myhtml_tags.MyHTML_TAG_ANNOTATION_XML,
58 | applet = myhtml_tags.MyHTML_TAG_APPLET,
59 | area = myhtml_tags.MyHTML_TAG_AREA,
60 | article = myhtml_tags.MyHTML_TAG_ARTICLE,
61 | aside = myhtml_tags.MyHTML_TAG_ASIDE,
62 | audio = myhtml_tags.MyHTML_TAG_AUDIO,
63 | b = myhtml_tags.MyHTML_TAG_B,
64 | base = myhtml_tags.MyHTML_TAG_BASE,
65 | basefont = myhtml_tags.MyHTML_TAG_BASEFONT,
66 | bdi = myhtml_tags.MyHTML_TAG_BDI,
67 | bdo = myhtml_tags.MyHTML_TAG_BDO,
68 | bgsound = myhtml_tags.MyHTML_TAG_BGSOUND,
69 | big = myhtml_tags.MyHTML_TAG_BIG,
70 | blink = myhtml_tags.MyHTML_TAG_BLINK,
71 | blockquote = myhtml_tags.MyHTML_TAG_BLOCKQUOTE,
72 | body = myhtml_tags.MyHTML_TAG_BODY,
73 | br = myhtml_tags.MyHTML_TAG_BR,
74 | button = myhtml_tags.MyHTML_TAG_BUTTON,
75 | canvas = myhtml_tags.MyHTML_TAG_CANVAS,
76 | caption = myhtml_tags.MyHTML_TAG_CAPTION,
77 | center = myhtml_tags.MyHTML_TAG_CENTER,
78 | cite = myhtml_tags.MyHTML_TAG_CITE,
79 | code = myhtml_tags.MyHTML_TAG_CODE,
80 | col = myhtml_tags.MyHTML_TAG_COL,
81 | colgroup = myhtml_tags.MyHTML_TAG_COLGROUP,
82 | command = myhtml_tags.MyHTML_TAG_COMMAND,
83 | comment = myhtml_tags.MyHTML_TAG_COMMENT,
84 | datalist = myhtml_tags.MyHTML_TAG_DATALIST,
85 | dd = myhtml_tags.MyHTML_TAG_DD,
86 | del = myhtml_tags.MyHTML_TAG_DEL,
87 | details = myhtml_tags.MyHTML_TAG_DETAILS,
88 | dfn = myhtml_tags.MyHTML_TAG_DFN,
89 | dialog = myhtml_tags.MyHTML_TAG_DIALOG,
90 | dir = myhtml_tags.MyHTML_TAG_DIR,
91 | div = myhtml_tags.MyHTML_TAG_DIV,
92 | dl = myhtml_tags.MyHTML_TAG_DL,
93 | dt = myhtml_tags.MyHTML_TAG_DT,
94 | em = myhtml_tags.MyHTML_TAG_EM,
95 | embed = myhtml_tags.MyHTML_TAG_EMBED,
96 | fieldset = myhtml_tags.MyHTML_TAG_FIELDSET,
97 | figcaption = myhtml_tags.MyHTML_TAG_FIGCAPTION,
98 | figure = myhtml_tags.MyHTML_TAG_FIGURE,
99 | font = myhtml_tags.MyHTML_TAG_FONT,
100 | footer = myhtml_tags.MyHTML_TAG_FOOTER,
101 | form = myhtml_tags.MyHTML_TAG_FORM,
102 | frame = myhtml_tags.MyHTML_TAG_FRAME,
103 | frameset = myhtml_tags.MyHTML_TAG_FRAMESET,
104 | h1 = myhtml_tags.MyHTML_TAG_H1,
105 | h2 = myhtml_tags.MyHTML_TAG_H2,
106 | h3 = myhtml_tags.MyHTML_TAG_H3,
107 | h4 = myhtml_tags.MyHTML_TAG_H4,
108 | h5 = myhtml_tags.MyHTML_TAG_H5,
109 | h6 = myhtml_tags.MyHTML_TAG_H6,
110 | head = myhtml_tags.MyHTML_TAG_HEAD,
111 | header = myhtml_tags.MyHTML_TAG_HEADER,
112 | hgroup = myhtml_tags.MyHTML_TAG_HGROUP,
113 | hr = myhtml_tags.MyHTML_TAG_HR,
114 | html = myhtml_tags.MyHTML_TAG_HTML,
115 | i = myhtml_tags.MyHTML_TAG_I,
116 | iframe = myhtml_tags.MyHTML_TAG_IFRAME,
117 | image = myhtml_tags.MyHTML_TAG_IMAGE,
118 | img = myhtml_tags.MyHTML_TAG_IMG,
119 | input = myhtml_tags.MyHTML_TAG_INPUT,
120 | ins = myhtml_tags.MyHTML_TAG_INS,
121 | isindex = myhtml_tags.MyHTML_TAG_ISINDEX,
122 | kbd = myhtml_tags.MyHTML_TAG_KBD,
123 | keygen = myhtml_tags.MyHTML_TAG_KEYGEN,
124 | label = myhtml_tags.MyHTML_TAG_LABEL,
125 | legend = myhtml_tags.MyHTML_TAG_LEGEND,
126 | li = myhtml_tags.MyHTML_TAG_LI,
127 | link = myhtml_tags.MyHTML_TAG_LINK,
128 | listing = myhtml_tags.MyHTML_TAG_LISTING,
129 | main = myhtml_tags.MyHTML_TAG_MAIN,
130 | map = myhtml_tags.MyHTML_TAG_MAP,
131 | mark = myhtml_tags.MyHTML_TAG_MARK,
132 | marquee = myhtml_tags.MyHTML_TAG_MARQUEE,
133 | menu = myhtml_tags.MyHTML_TAG_MENU,
134 | menuitem = myhtml_tags.MyHTML_TAG_MENUITEM,
135 | meta = myhtml_tags.MyHTML_TAG_META,
136 | meter = myhtml_tags.MyHTML_TAG_METER,
137 | mtext = myhtml_tags.MyHTML_TAG_MTEXT,
138 | nav = myhtml_tags.MyHTML_TAG_NAV,
139 | nobr = myhtml_tags.MyHTML_TAG_NOBR,
140 | noembed = myhtml_tags.MyHTML_TAG_NOEMBED,
141 | noframes = myhtml_tags.MyHTML_TAG_NOFRAMES,
142 | noscript = myhtml_tags.MyHTML_TAG_NOSCRIPT,
143 | object = myhtml_tags.MyHTML_TAG_OBJECT,
144 | ol = myhtml_tags.MyHTML_TAG_OL,
145 | optgroup = myhtml_tags.MyHTML_TAG_OPTGROUP,
146 | option = myhtml_tags.MyHTML_TAG_OPTION,
147 | output = myhtml_tags.MyHTML_TAG_OUTPUT,
148 | p = myhtml_tags.MyHTML_TAG_P,
149 | param = myhtml_tags.MyHTML_TAG_PARAM,
150 | plaintext = myhtml_tags.MyHTML_TAG_PLAINTEXT,
151 | pre = myhtml_tags.MyHTML_TAG_PRE,
152 | progress = myhtml_tags.MyHTML_TAG_PROGRESS,
153 | q = myhtml_tags.MyHTML_TAG_Q,
154 | rb = myhtml_tags.MyHTML_TAG_RB,
155 | rp = myhtml_tags.MyHTML_TAG_RP,
156 | rt = myhtml_tags.MyHTML_TAG_RT,
157 | rtc = myhtml_tags.MyHTML_TAG_RTC,
158 | ruby = myhtml_tags.MyHTML_TAG_RUBY,
159 | s = myhtml_tags.MyHTML_TAG_S,
160 | samp = myhtml_tags.MyHTML_TAG_SAMP,
161 | script = myhtml_tags.MyHTML_TAG_SCRIPT,
162 | section = myhtml_tags.MyHTML_TAG_SECTION,
163 | select = myhtml_tags.MyHTML_TAG_SELECT,
164 | small = myhtml_tags.MyHTML_TAG_SMALL,
165 | source = myhtml_tags.MyHTML_TAG_SOURCE,
166 | span = myhtml_tags.MyHTML_TAG_SPAN,
167 | strike = myhtml_tags.MyHTML_TAG_STRIKE,
168 | strong = myhtml_tags.MyHTML_TAG_STRONG,
169 | style = myhtml_tags.MyHTML_TAG_STYLE,
170 | sub = myhtml_tags.MyHTML_TAG_SUB,
171 | summary = myhtml_tags.MyHTML_TAG_SUMMARY,
172 | sup = myhtml_tags.MyHTML_TAG_SUP,
173 | svg = myhtml_tags.MyHTML_TAG_SVG,
174 | table = myhtml_tags.MyHTML_TAG_TABLE,
175 | tbody = myhtml_tags.MyHTML_TAG_TBODY,
176 | td = myhtml_tags.MyHTML_TAG_TD,
177 | template_ = myhtml_tags.MyHTML_TAG_TEMPLATE,
178 | textarea = myhtml_tags.MyHTML_TAG_TEXTAREA,
179 | tfoot = myhtml_tags.MyHTML_TAG_TFOOT,
180 | th = myhtml_tags.MyHTML_TAG_TH,
181 | thead = myhtml_tags.MyHTML_TAG_THEAD,
182 | time = myhtml_tags.MyHTML_TAG_TIME,
183 | title = myhtml_tags.MyHTML_TAG_TITLE,
184 | tr = myhtml_tags.MyHTML_TAG_TR,
185 | track = myhtml_tags.MyHTML_TAG_TRACK,
186 | tt = myhtml_tags.MyHTML_TAG_TT,
187 | u = myhtml_tags.MyHTML_TAG_U,
188 | ul = myhtml_tags.MyHTML_TAG_UL,
189 | var = myhtml_tags.MyHTML_TAG_VAR,
190 | video = myhtml_tags.MyHTML_TAG_VIDEO,
191 | wbr = myhtml_tags.MyHTML_TAG_WBR,
192 | xmp = myhtml_tags.MyHTML_TAG_XMP,
193 | altglyph = myhtml_tags.MyHTML_TAG_ALTGLYPH,
194 | altglyphdef = myhtml_tags.MyHTML_TAG_ALTGLYPHDEF,
195 | altglyphitem = myhtml_tags.MyHTML_TAG_ALTGLYPHITEM,
196 | animate = myhtml_tags.MyHTML_TAG_ANIMATE,
197 | animatecolor = myhtml_tags.MyHTML_TAG_ANIMATECOLOR,
198 | animatemotion = myhtml_tags.MyHTML_TAG_ANIMATEMOTION,
199 | animatetransform = myhtml_tags.MyHTML_TAG_ANIMATETRANSFORM,
200 | circle = myhtml_tags.MyHTML_TAG_CIRCLE,
201 | clippath = myhtml_tags.MyHTML_TAG_CLIPPATH,
202 | colorProfile = myhtml_tags.MyHTML_TAG_COLOR_PROFILE,
203 | cursor = myhtml_tags.MyHTML_TAG_CURSOR,
204 | defs = myhtml_tags.MyHTML_TAG_DEFS,
205 | desc = myhtml_tags.MyHTML_TAG_DESC,
206 | ellipse = myhtml_tags.MyHTML_TAG_ELLIPSE,
207 | feblend = myhtml_tags.MyHTML_TAG_FEBLEND,
208 | fecolormatrix = myhtml_tags.MyHTML_TAG_FECOLORMATRIX,
209 | fecomponenttransfer = myhtml_tags.MyHTML_TAG_FECOMPONENTTRANSFER,
210 | fecomposite = myhtml_tags.MyHTML_TAG_FECOMPOSITE,
211 | feconvolvematrix = myhtml_tags.MyHTML_TAG_FECONVOLVEMATRIX,
212 | fediffuselighting = myhtml_tags.MyHTML_TAG_FEDIFFUSELIGHTING,
213 | fedisplacementmap = myhtml_tags.MyHTML_TAG_FEDISPLACEMENTMAP,
214 | fedistantlight = myhtml_tags.MyHTML_TAG_FEDISTANTLIGHT,
215 | fedropshadow = myhtml_tags.MyHTML_TAG_FEDROPSHADOW,
216 | feflood = myhtml_tags.MyHTML_TAG_FEFLOOD,
217 | fefunca = myhtml_tags.MyHTML_TAG_FEFUNCA,
218 | fefuncb = myhtml_tags.MyHTML_TAG_FEFUNCB,
219 | fefuncg = myhtml_tags.MyHTML_TAG_FEFUNCG,
220 | fefuncr = myhtml_tags.MyHTML_TAG_FEFUNCR,
221 | fegaussianblur = myhtml_tags.MyHTML_TAG_FEGAUSSIANBLUR,
222 | feimage = myhtml_tags.MyHTML_TAG_FEIMAGE,
223 | femerge = myhtml_tags.MyHTML_TAG_FEMERGE,
224 | femergenode = myhtml_tags.MyHTML_TAG_FEMERGENODE,
225 | femorphology = myhtml_tags.MyHTML_TAG_FEMORPHOLOGY,
226 | feoffset = myhtml_tags.MyHTML_TAG_FEOFFSET,
227 | fepointlight = myhtml_tags.MyHTML_TAG_FEPOINTLIGHT,
228 | fespecularlighting = myhtml_tags.MyHTML_TAG_FESPECULARLIGHTING,
229 | fespotlight = myhtml_tags.MyHTML_TAG_FESPOTLIGHT,
230 | fetile = myhtml_tags.MyHTML_TAG_FETILE,
231 | feturbulence = myhtml_tags.MyHTML_TAG_FETURBULENCE,
232 | filter = myhtml_tags.MyHTML_TAG_FILTER,
233 | fontFace = myhtml_tags.MyHTML_TAG_FONT_FACE,
234 | fontFaceFormat = myhtml_tags.MyHTML_TAG_FONT_FACE_FORMAT,
235 | fontFaceName = myhtml_tags.MyHTML_TAG_FONT_FACE_NAME,
236 | fontFaceSrc = myhtml_tags.MyHTML_TAG_FONT_FACE_SRC,
237 | fontFaceUri = myhtml_tags.MyHTML_TAG_FONT_FACE_URI,
238 | foreignobject = myhtml_tags.MyHTML_TAG_FOREIGNOBJECT,
239 | g = myhtml_tags.MyHTML_TAG_G,
240 | glyph = myhtml_tags.MyHTML_TAG_GLYPH,
241 | glyphref = myhtml_tags.MyHTML_TAG_GLYPHREF,
242 | hkern = myhtml_tags.MyHTML_TAG_HKERN,
243 | line = myhtml_tags.MyHTML_TAG_LINE,
244 | lineargradient = myhtml_tags.MyHTML_TAG_LINEARGRADIENT,
245 | marker = myhtml_tags.MyHTML_TAG_MARKER,
246 | mask = myhtml_tags.MyHTML_TAG_MASK,
247 | metadata = myhtml_tags.MyHTML_TAG_METADATA,
248 | missingGlyph = myhtml_tags.MyHTML_TAG_MISSING_GLYPH,
249 | mpath = myhtml_tags.MyHTML_TAG_MPATH,
250 | path = myhtml_tags.MyHTML_TAG_PATH,
251 | pattern = myhtml_tags.MyHTML_TAG_PATTERN,
252 | polygon = myhtml_tags.MyHTML_TAG_POLYGON,
253 | polyline = myhtml_tags.MyHTML_TAG_POLYLINE,
254 | radialgradient = myhtml_tags.MyHTML_TAG_RADIALGRADIENT,
255 | rect = myhtml_tags.MyHTML_TAG_RECT,
256 | set = myhtml_tags.MyHTML_TAG_SET,
257 | stop = myhtml_tags.MyHTML_TAG_STOP,
258 | switch_ = myhtml_tags.MyHTML_TAG_SWITCH,
259 | symbol = myhtml_tags.MyHTML_TAG_SYMBOL,
260 | text = myhtml_tags.MyHTML_TAG_TEXT,
261 | textpath = myhtml_tags.MyHTML_TAG_TEXTPATH,
262 | tref = myhtml_tags.MyHTML_TAG_TREF,
263 | tspan = myhtml_tags.MyHTML_TAG_TSPAN,
264 | use = myhtml_tags.MyHTML_TAG_USE,
265 | view = myhtml_tags.MyHTML_TAG_VIEW,
266 | vkern = myhtml_tags.MyHTML_TAG_VKERN,
267 | math = myhtml_tags.MyHTML_TAG_MATH,
268 | maction = myhtml_tags.MyHTML_TAG_MACTION,
269 | maligngroup = myhtml_tags.MyHTML_TAG_MALIGNGROUP,
270 | malignmark = myhtml_tags.MyHTML_TAG_MALIGNMARK,
271 | menclose = myhtml_tags.MyHTML_TAG_MENCLOSE,
272 | merror = myhtml_tags.MyHTML_TAG_MERROR,
273 | mfenced = myhtml_tags.MyHTML_TAG_MFENCED,
274 | mfrac = myhtml_tags.MyHTML_TAG_MFRAC,
275 | mglyph = myhtml_tags.MyHTML_TAG_MGLYPH,
276 | mi = myhtml_tags.MyHTML_TAG_MI,
277 | mlabeledtr = myhtml_tags.MyHTML_TAG_MLABELEDTR,
278 | mlongdiv = myhtml_tags.MyHTML_TAG_MLONGDIV,
279 | mmultiscripts = myhtml_tags.MyHTML_TAG_MMULTISCRIPTS,
280 | mn = myhtml_tags.MyHTML_TAG_MN,
281 | mo = myhtml_tags.MyHTML_TAG_MO,
282 | mover = myhtml_tags.MyHTML_TAG_MOVER,
283 | mpadded = myhtml_tags.MyHTML_TAG_MPADDED,
284 | mphantom = myhtml_tags.MyHTML_TAG_MPHANTOM,
285 | mroot = myhtml_tags.MyHTML_TAG_MROOT,
286 | mrow = myhtml_tags.MyHTML_TAG_MROW,
287 | ms = myhtml_tags.MyHTML_TAG_MS,
288 | mscarries = myhtml_tags.MyHTML_TAG_MSCARRIES,
289 | mscarry = myhtml_tags.MyHTML_TAG_MSCARRY,
290 | msgroup = myhtml_tags.MyHTML_TAG_MSGROUP,
291 | msline = myhtml_tags.MyHTML_TAG_MSLINE,
292 | mspace = myhtml_tags.MyHTML_TAG_MSPACE,
293 | msqrt = myhtml_tags.MyHTML_TAG_MSQRT,
294 | msrow = myhtml_tags.MyHTML_TAG_MSROW,
295 | mstack = myhtml_tags.MyHTML_TAG_MSTACK,
296 | mstyle = myhtml_tags.MyHTML_TAG_MSTYLE,
297 | msub = myhtml_tags.MyHTML_TAG_MSUB,
298 | msup = myhtml_tags.MyHTML_TAG_MSUP,
299 | msubsup = myhtml_tags.MyHTML_TAG_MSUBSUP,
300 | _endOfFile = myhtml_tags.MyHTML_TAG__END_OF_FILE,
301 | firstEntry = myhtml_tags.MyHTML_TAG_FIRST_ENTRY,
302 | lastEntry = myhtml_tags.MyHTML_TAG_LAST_ENTRY,
303 | }
304 | enum MyHtmlOptions {
305 | default_ = myhtml_options.MyHTML_OPTIONS_DEFAULT,
306 | parseModeSingle = myhtml_options.MyHTML_OPTIONS_PARSE_MODE_SINGLE,
307 | parseModeAllInOne = myhtml_options.MyHTML_OPTIONS_PARSE_MODE_ALL_IN_ONE,
308 | parseModeSeparately = myhtml_options.MyHTML_OPTIONS_PARSE_MODE_SEPARATELY,
309 | }
310 | enum MyHtmlNamespace {
311 | undef = myhtml_namespace.MyHTML_NAMESPACE_UNDEF,
312 | html = myhtml_namespace.MyHTML_NAMESPACE_HTML,
313 | mathml = myhtml_namespace.MyHTML_NAMESPACE_MATHML,
314 | svg = myhtml_namespace.MyHTML_NAMESPACE_SVG,
315 | xlink = myhtml_namespace.MyHTML_NAMESPACE_XLINK,
316 | xml = myhtml_namespace.MyHTML_NAMESPACE_XML,
317 | xmlns = myhtml_namespace.MyHTML_NAMESPACE_XMLNS,
318 | any = myhtml_namespace.MyHTML_NAMESPACE_ANY,
319 | lastEntry = myhtml_namespace.MyHTML_NAMESPACE_LAST_ENTRY,
320 | }
321 |
--------------------------------------------------------------------------------
/source/arrogant/package.d:
--------------------------------------------------------------------------------
1 | module arrogant;
2 |
3 | import arrogant.c.modest;
4 |
5 | // Public enums & stuffs
6 | public import arrogant.c.common;
7 |
8 | import std.traits;
9 | import std.conv : to;
10 | import std.typecons : Flag, Yes, No;
11 | import std.string : toStringz;
12 |
13 | /** Use this enum with `node.byAttribute()` search */
14 | enum AttributeSearchType
15 | {
16 | exact, ///
17 | startsWith, ///
18 | endsWith, ///
19 | contains, ///
20 | spaceSeparated, ///
21 | hypenSeparated ///
22 | }
23 |
24 | class ArrogantException : Exception
25 | {
26 | this(uint err)
27 | {
28 | import std.conv : to;
29 | super("Arrogant exception: " ~ to!string(err));
30 | }
31 | }
32 |
33 |
34 | /** An html attribute of a tag */
35 | struct Attribute
36 | {
37 | /** The attribute key */
38 | @property auto key() { return _key; }
39 |
40 | // The attribute value */
41 | @property auto value() { return _value; }
42 |
43 | private this(myhtml_tree_attr_t* attr)
44 | {
45 | {
46 | size_t length;
47 | _key = myhtml_attribute_key(attr, &length)[0..length].to!string;
48 | }
49 |
50 | {
51 | size_t length;
52 | _value = myhtml_attribute_value(attr, &length)[0..length].to!string;
53 | }
54 | }
55 |
56 | @disable this();
57 |
58 | private string _key;
59 | private string _value;
60 | }
61 |
62 |
63 |
64 | /** A HTML Node */
65 | struct Node
66 | {
67 | @disable this();
68 |
69 | /// Check if node is null / empty
70 | bool isNull() { return myhtml_tree_node == null; }
71 |
72 |
73 | /**
74 | * Get the tag id for this node (ex: a, div, body, ...)
75 | * Examples:
76 | * --------------------
77 | * tree.body.tag.writeln(); // prints "body"
78 | * --------------------
79 | */
80 | MyHtmlTagId tagId() { return cast(MyHtmlTagId )myhtml_tree_node.tag_id; }
81 | string tag() { return tagId.to!string; } /// Ditto
82 |
83 | /**
84 | * "in" operator to check for an attribute inside a node
85 | * Examples:
86 | * --------------------
87 | * if ("href" in node) writeln("Link: ", node["href"]);
88 | * --------------------
89 | */
90 | bool opBinaryRight(string op)(string key) if (op == "in")
91 | {
92 | auto attr = myhtml_attribute_by_key (myhtml_tree_node, key.toStringz, key.length);
93 | return attr !is null;
94 | }
95 |
96 | /** Read an attribute from node */
97 | auto opIndex(string attribute)
98 | {
99 | import std.typecons : Nullable;
100 |
101 | Nullable!string value;
102 | auto attr = myhtml_attribute_by_key (myhtml_tree_node, attribute.toStringz, attribute.length);
103 |
104 | if (attr !is null)
105 | {
106 | size_t length;
107 | auto v = myhtml_attribute_value(attr, &length);
108 | return Nullable!string(v[0..length].to!string);
109 | }
110 |
111 | return value;
112 | }
113 |
114 | /** Write an attribute */
115 | auto opIndexAssign(string value, string key)
116 | {
117 | removeAttribute(key);
118 | myhtml_attribute_add (myhtml_tree_node, key.toStringz, key.length, value.toStringz, value.length, MyEncodingList.default_);
119 | return value;
120 | }
121 |
122 | auto opIndexAssign(typeof(null) value, string key)
123 | {
124 | removeAttribute(key);
125 | myhtml_attribute_add (myhtml_tree_node, key.toStringz, key.length, null, 0, MyEncodingList.default_);
126 | return value;
127 | }
128 |
129 | /**
130 | Remove an attribute
131 | Returns: `true` if attribute exists `false` otherwise.
132 | */
133 | bool removeAttribute(string key)
134 | {
135 | auto attr = myhtml_attribute_by_key (myhtml_tree_node, key.toStringz, key.length);
136 | if (attr !is null)
137 | {
138 | myhtml_attribute_delete(myhtml_node_tree(myhtml_tree_node), myhtml_tree_node, attr);
139 | return true;
140 | }
141 |
142 | return false;
143 | }
144 |
145 | /** Remove node from tree and delete it */
146 | void deleteNode() { myhtml_node_delete_recursive(myhtml_tree_node); }
147 |
148 | ///
149 | Node firstChild()
150 | {
151 | return Node(myhtml_node_child(myhtml_tree_node));
152 | }
153 |
154 | ///
155 | Node lastChild()
156 | {
157 | return Node(myhtml_node_last_child(myhtml_tree_node));
158 | }
159 |
160 | ///
161 | auto parent()
162 | {
163 | return Node(myhtml_node_parent(myhtml_tree_node));
164 | }
165 |
166 | ///
167 | auto next()
168 | {
169 | return Node(myhtml_node_next(myhtml_tree_node));
170 | }
171 |
172 | ///
173 | auto previous()
174 | {
175 | return Node(myhtml_node_prev(myhtml_tree_node));
176 | }
177 |
178 | /*
179 | Get children of this node.
180 | Returns: a lazy `ChildrenRange`. If you want to edit children, convert to array before.
181 | */
182 | auto children()
183 | {
184 | struct ChildrenRange
185 | {
186 | @disable this();
187 |
188 | private this(myhtml_tree_node_t *n) { parent = n; current = myhtml_node_child(parent); }
189 |
190 | @property empty() { return current == null; }
191 | @property Node front() { return Node(current); }
192 | void popFront() { current = myhtml_node_next(current); }
193 |
194 | void opAssign(ChildrenRange rhs)
195 | {
196 | current = rhs.current;
197 | parent = rhs.parent;
198 | }
199 |
200 | private:
201 | myhtml_tree_node_t *current;
202 | myhtml_tree_node_t *parent;
203 | }
204 |
205 | return ChildrenRange(myhtml_tree_node);
206 | }
207 |
208 | /** All node's attributes
209 | Returns: a lazy range of Attributes
210 | */
211 | auto attributes()
212 | {
213 | struct AttributesRange
214 | {
215 | this(myhtml_tree_node_t *n) { parent = n; current = myhtml_node_attribute_first(parent); }
216 |
217 | @property empty() { return current == null; }
218 | auto front() { return Attribute(current); }
219 | void popFront() { current = myhtml_attribute_next(current); }
220 |
221 | private:
222 | myhtml_tree_attr_t *current;
223 | myhtml_tree_node_t *parent;
224 | }
225 |
226 | return AttributesRange(myhtml_tree_node);
227 | }
228 |
229 | /** Get the text of this node. Only for text nodes! */
230 | @property string text()
231 | {
232 | return myhtml_node_text(myhtml_tree_node, null).to!string;
233 | }
234 |
235 | /** Set the text of this node. Only for text nodes! */
236 | @property void text(string s)
237 | {
238 | myhtml_node_text_set(myhtml_tree_node, s.toStringz, s.length, MyEncodingList.default_);
239 | }
240 |
241 | /** Return node html representation */
242 | string toString()
243 | {
244 | return innerHTML();
245 | }
246 |
247 | /// Ditto
248 | @property string innerHTML()
249 | {
250 | mycore_string_raw_t str_raw;
251 | mycore_string_raw_clean_all(&str_raw);
252 | scope(exit) mycore_string_raw_destroy(&str_raw, false);
253 |
254 | if(myhtml_serialization_tree_buffer(myhtml_tree_node, &str_raw)) return "";
255 | return str_raw.data[0..str_raw.length].to!string;
256 | }
257 |
258 | /** Set node html. All children will be deleted. */
259 | @property void innerHTML(string s)
260 | {
261 | // Create a new tree to parse fragment
262 | auto tree = Tree(myhtml_tree_get_myhtml(myhtml_node_tree(myhtml_tree_node)));
263 | tree.parseFragment(s);
264 |
265 | // Clone fragment and move to current tree
266 | auto cloned = tree.first.clone(myhtml_node_tree(myhtml_tree_node));
267 |
268 | // Delete all children!
269 |
270 |
271 | myhtml_tree_node_t*[] toDelete;
272 |
273 | for(auto current = myhtml_node_child(myhtml_tree_node); current != null; current = myhtml_node_next(current))
274 | toDelete ~= current;
275 |
276 | foreach(n; toDelete)
277 | myhtml_node_delete_recursive(n);
278 |
279 | // Append new child
280 | appendChild(cloned);
281 | }
282 |
283 | /** Set node innerText. All children will be deleted. */
284 | @property void innerText(string s)
285 | {
286 | // Create a text node
287 | auto text_node = myhtml_node_create (
288 | myhtml_node_tree(myhtml_tree_node),
289 | MyHtmlTagId ._text,
290 | MyHtmlNamespace.html
291 | );
292 |
293 | Node nodeToAppend = Node(text_node);
294 | nodeToAppend.text = s;
295 |
296 | // Delete all children!
297 |
298 | myhtml_tree_node_t*[] toDelete;
299 |
300 | for(auto current = myhtml_node_child(myhtml_tree_node); current != null; current = myhtml_node_next(current))
301 | toDelete ~= current;
302 |
303 | foreach(n; toDelete)
304 | myhtml_node_delete_recursive(n);
305 |
306 | // Append new child
307 | appendChild(nodeToAppend);
308 | }
309 |
310 | @property string innerText()
311 | {
312 | import std.container.dlist;
313 | import std.array : Appender, array;
314 | import std.algorithm : map;
315 |
316 | auto appender = Appender!string();
317 |
318 | auto toExplore = DList!(myhtml_tree_node_t*)();
319 | toExplore.insertBack(myhtml_tree_node);
320 |
321 | while(!toExplore.empty)
322 | {
323 | auto current = Node(toExplore.front);
324 | toExplore.removeFront;
325 | if (current.tagId == MyHtmlTagId._text) appender ~= current.text();
326 | else toExplore.insertFront(current.children.map!(x => x.myhtml_tree_node));
327 | }
328 |
329 | return appender.data;
330 | }
331 |
332 | /**
333 | Create a copy of this node owned by another tree
334 | Returns: a `Node` owned by `destination`
335 | */
336 | Node clone(Tree destination) { return clone(destination.myhtml_tree); }
337 |
338 | /// Create a copy of this node
339 | Node clone() { return clone(myhtml_node_tree(myhtml_tree_node)); }
340 |
341 | ///
342 | bool isSelfClosing() { return myhtml_node_is_close_self(myhtml_tree_node); }
343 |
344 | ///
345 | bool isVoidElement() { return myhtml_node_is_void_element(myhtml_tree_node); }
346 |
347 | /** Detach node from tree without destroying */
348 | void detach() { myhtml_node_remove(myhtml_tree_node); }
349 |
350 | /// Fast way to append a text node
351 | void appendText(string s)
352 | {
353 | // Create a text node
354 | auto text_node = myhtml_node_create (
355 | myhtml_node_tree(myhtml_tree_node),
356 | MyHtmlTagId ._text,
357 | MyHtmlNamespace.html
358 | );
359 |
360 | Node nodeToAppend = Node(text_node);
361 | nodeToAppend.text = s;
362 | appendChild(nodeToAppend);
363 | }
364 |
365 | /// Fast way to append a comment node
366 | void appendComment(string s)
367 | {
368 | // Create a text node
369 | auto text_node = myhtml_node_create (
370 | myhtml_node_tree(myhtml_tree_node),
371 | MyHtmlTagId ._comment,
372 | MyHtmlNamespace.html
373 | );
374 |
375 | Node nodeToAppend = Node(text_node);
376 | nodeToAppend.text = s;
377 | appendChild(nodeToAppend);
378 | }
379 |
380 | ///
381 | void appendChild(Node n) { n.detach(); myhtml_node_append_child(myhtml_tree_node, n.myhtml_tree_node); }
382 |
383 | ///
384 | void insertBefore(Node n) { myhtml_node_insert_before(myhtml_tree_node, n.myhtml_tree_node); }
385 |
386 | ///
387 | void insertAfter(Node n) { myhtml_node_insert_after(myhtml_tree_node, n.myhtml_tree_node); }
388 |
389 | ///
390 | void insertToAppropriatePlace(Node n) { myhtml_node_insert_to_appropriate_place(myhtml_tree_node, n.myhtml_tree_node); }
391 |
392 | /**
393 | Search children using a css 3.1 selector
394 | Returns: a lazy range of nodes
395 | See_Also: byAttribute, byAttributeKey, byTagName, byClass, byId
396 | */
397 | auto byCssSelector(string selector)
398 | {
399 | import std.exception : enforce;
400 |
401 | auto mycss = mycss_create();
402 | mycss_init(mycss);
403 |
404 | auto entry = mycss_entry_create();
405 | mycss_entry_init(mycss, entry);
406 |
407 | auto finder = modest_finder_create_simple();
408 |
409 | mystatus_t out_status;
410 | mycss_selectors_list_t *list = mycss_selectors_parse
411 | (
412 | mycss_entry_selectors(entry),
413 | MyEncodingList.default_,
414 | selector.toStringz, selector.length,
415 | &out_status
416 | );
417 |
418 | enforce(list != null && ((list.flags & mycss_selectors_flags.MyCSS_SELECTORS_FLAGS_SELECTOR_BAD) == 0), "Can't compile css selector: " ~ selector);
419 |
420 | myhtml_collection_t* collection = null;
421 |
422 | modest_finder_by_selectors_list(finder, myhtml_tree_node, list, &collection);
423 |
424 | // Free resources!
425 | mycss_selectors_list_destroy(mycss_entry_selectors(entry), list, true);
426 | modest_finder_destroy(finder, true);
427 | mycss_entry_destroy(entry, true);
428 | mycss_destroy(mycss, true);
429 |
430 | return NodeRange(collection, myhtml_node_tree(myhtml_tree_node));
431 | }
432 |
433 | /**
434 | Search children by tag name
435 | Returns: a lazy range of nodes
436 | See_Also: byAttribute, byAttributeKey, byClass, byId, byCssSelector
437 | */
438 | auto byTagName(MyHtmlTagId name)
439 | {
440 | mystatus_t status;
441 | myhtml_collection_t* myhtml_collection = myhtml_collection_create(0, null);
442 | auto collection = NodeRange(myhtml_get_nodes_by_tag_id_in_scope(myhtml_node_tree(myhtml_tree_node), myhtml_collection, myhtml_tree_node, name, &status), myhtml_node_tree(myhtml_tree_node));
443 |
444 | if (MYHTML_FAILED(status)) throw new ArrogantException(status);
445 |
446 | return collection;
447 | }
448 |
449 | /// Ditto
450 | auto byTagName(string name)
451 | {
452 | mystatus_t status;
453 | myhtml_collection_t* myhtml_collection = myhtml_collection_create(0, null);
454 | auto collection = NodeRange(myhtml_get_nodes_by_name_in_scope(myhtml_node_tree(myhtml_tree_node), myhtml_collection, myhtml_tree_node, name.toStringz, name.length, &status), myhtml_node_tree(myhtml_tree_node));
455 | if (MYHTML_FAILED(status)) throw new ArrogantException(status);
456 |
457 | return collection;
458 | }
459 |
460 | /**
461 | Search children by class (space separated)
462 | Returns: a lazy range of nodes
463 | See_Also: byAttribute, byAttributeKey, byTagName, byId, byCssSelector
464 | */
465 | auto byClass(string className) { return byAttribute!(AttributeSearchType.spaceSeparated)("class", className);}
466 |
467 | /**
468 | Search children by id
469 | Returns: a lazy range of nodes
470 | See_Also: byAttribute, byAttributeKey, byTagName, byClass, byCssSelector
471 | */
472 | auto byId(string id) { return byAttribute("id", id);}
473 |
474 | /**
475 | Search children with a specified attribute
476 | Returns: a lazy range of nodes
477 | See_Also: byAttribute, byTagName, byClass, byId, byCssSelector
478 | */
479 | auto byAttributeKey(string name)
480 | {
481 | mystatus_t status;
482 | myhtml_collection_t* myhtml_collection = myhtml_collection_create(0, null);
483 | auto collection = NodeRange(myhtml_get_nodes_by_attribute_key(myhtml_node_tree(myhtml_tree_node), myhtml_collection, myhtml_tree_node, name.toStringz, name.length, &status), myhtml_node_tree(myhtml_tree_node));
484 | if (MYHTML_FAILED(status)) throw new ArrogantException(status);
485 |
486 | return collection;
487 | }
488 |
489 | /**
490 | Search children by tag attribute key/val.
491 | Returns: a lazy range of nodes
492 | See_Also: byAttributeKey, byTagName, byClass, byId, byCssSelector
493 | */
494 | auto byAttribute(AttributeSearchType st = AttributeSearchType.exact, Flag!"caseInsensitive" caseInsensitive = No.caseInsensitive)(string key, string value)
495 | {
496 | mystatus_t status;
497 | typeof(&myhtml_get_nodes_by_attribute_value) callback;
498 |
499 | final switch(st)
500 | {
501 | case AttributeSearchType.exact: callback = &myhtml_get_nodes_by_attribute_value; break;
502 | case AttributeSearchType.startsWith: callback = &myhtml_get_nodes_by_attribute_value_begin; break;
503 | case AttributeSearchType.endsWith: callback = &myhtml_get_nodes_by_attribute_value_end; break;
504 | case AttributeSearchType.contains: callback = &myhtml_get_nodes_by_attribute_value_contain; break;
505 | case AttributeSearchType.spaceSeparated : callback = &myhtml_get_nodes_by_attribute_value_whitespace_separated; break;
506 | case AttributeSearchType.hypenSeparated: callback = &myhtml_get_nodes_by_attribute_value_hyphen_separated; break;
507 | }
508 |
509 | myhtml_collection_t* myhtml_collection = myhtml_collection_create(0, null);
510 |
511 | auto collection = NodeRange
512 | (
513 | callback
514 | (
515 | myhtml_node_tree(myhtml_tree_node),
516 | myhtml_collection,
517 | myhtml_tree_node,
518 | caseInsensitive == Yes.caseInsensitive,
519 | key.toStringz, key.length, value.toStringz, value.length,
520 | &status
521 | ),
522 | myhtml_node_tree(myhtml_tree_node)
523 | );
524 |
525 | if (MYHTML_FAILED(status)) throw new ArrogantException(status);
526 | return collection;
527 | }
528 |
529 | /** Create a new html node */
530 | this(ref Tree tree, MyHtmlTagId tag, MyHtmlNamespace ns = MyHtmlNamespace.html)
531 | {
532 | myhtml_tree_node = myhtml_node_create (
533 | tree.myhtml_tree,
534 | tag,
535 | ns
536 | );
537 |
538 | Tree.acquire(tree.myhtml_tree);
539 | }
540 |
541 | this(this) { Tree.acquire(myhtml_node_tree(myhtml_tree_node)); }
542 |
543 | void opAssign(Node rhs)
544 | {
545 | Tree.acquire(myhtml_node_tree(rhs.myhtml_tree_node));
546 | Tree.release(myhtml_node_tree(myhtml_tree_node));
547 | myhtml_tree_node = rhs.myhtml_tree_node;
548 | }
549 |
550 | ~this() { Tree.release(myhtml_node_tree(myhtml_tree_node)); }
551 |
552 | private:
553 |
554 | this(myhtml_tree_node_t *node)
555 | {
556 | myhtml_tree_node = node;
557 | Tree.acquire(myhtml_node_tree(node));
558 | }
559 |
560 | Node clone(myhtml_tree_t* destination)
561 | {
562 |
563 | struct CopyQueueItem
564 | {
565 | myhtml_tree_node_t* destParent; // Where node will be appended
566 | myhtml_tree_node_t* toCopy; // The node to copy
567 | }
568 |
569 | import std.container.dlist;
570 | auto copyQueue = DList!CopyQueueItem();
571 |
572 | // Clone a single node without children
573 | myhtml_tree_node_t* cloneNode(myhtml_tree_t* _destination, myhtml_tree_node_t* _node)
574 | {
575 | // Create a new node rooted on destination tree
576 | auto ret = myhtml_node_create (
577 | _destination,
578 | _node.tag_id,
579 | _node.ns
580 | );
581 |
582 | // Copy text if present
583 | {
584 | size_t textLength;
585 | auto text = myhtml_node_text(_node, &textLength);
586 | myhtml_node_text_set(ret, text, textLength, MyEncodingList.default_);
587 | }
588 |
589 | // Clone attributes
590 | for (auto attribute = myhtml_node_attribute_first(_node); attribute != null; attribute = myhtml_attribute_next(attribute))
591 | {
592 | size_t keyLength, valueLength;
593 | auto k = myhtml_attribute_key(attribute, &keyLength);
594 | auto v = myhtml_attribute_value(attribute, &valueLength);
595 | myhtml_attribute_add (ret, k, keyLength, v, valueLength, MyEncodingList.default_);
596 | }
597 |
598 | // Return the filled node.
599 | return ret;
600 | }
601 |
602 | // Clone the root
603 | auto destinationRoot = cloneNode(destination, myhtml_tree_node);
604 | auto currentNode = myhtml_tree_node;
605 | auto currentDestNode = destinationRoot;
606 |
607 | while(true)
608 | {
609 | // Add children of current node to queue
610 | for (auto child = myhtml_node_child(currentNode); child != null; child = myhtml_node_next(child))
611 | copyQueue.insertBack(CopyQueueItem(currentDestNode, child));
612 |
613 | if (copyQueue.empty) break;
614 |
615 | // Get the first item in queue
616 | auto destParent = copyQueue.front.destParent;
617 | currentNode = copyQueue.front.toCopy;
618 |
619 | // Remove first element of list
620 | copyQueue.removeFront();
621 |
622 | // Clone the children and add to new parent
623 | currentDestNode = cloneNode(destination, currentNode);
624 | myhtml_node_append_child(destParent, currentDestNode);
625 | }
626 |
627 | return Node(destinationRoot);
628 | }
629 |
630 | myhtml_tree_node_t* myhtml_tree_node = null;
631 | }
632 |
633 | import std.stdio;
634 |
635 |
636 |
637 | /** A lazy range of nodes, usually returned by a search */
638 | struct NodeRange
639 | {
640 | @disable this();
641 |
642 | Node opIndex(size_t i)
643 | {
644 | return Node(myhtml_collection.list[i+idx]);
645 | }
646 |
647 | size_t length() { if (myhtml_collection) return myhtml_collection.length; return 0; }
648 |
649 | @property Node front() { if (empty) assert(0, "Can't read nodes from an empty collection"); return this[0]; }
650 | @property bool empty() { return idx >= length(); }
651 |
652 | void popFront() { idx++; }
653 |
654 |
655 | ~this() { Tree.release(myhtml_tree); }
656 | this(this) { Tree.acquire(myhtml_tree); }
657 |
658 | void opAssign(NodeRange rhs)
659 | {
660 | Tree.acquire(rhs.myhtml_tree);
661 | Tree.release(myhtml_tree);
662 | myhtml_tree = rhs.myhtml_tree;
663 | myhtml_collection = rhs.myhtml_collection;
664 | idx = rhs.idx;
665 | }
666 |
667 | private:
668 |
669 | this(myhtml_collection_t* collection, myhtml_tree_t* tree)
670 | {
671 | myhtml_collection = collection;
672 | myhtml_tree = tree;
673 | Tree.acquire(myhtml_tree);
674 | }
675 |
676 | myhtml_collection_t* myhtml_collection;
677 | myhtml_tree_t* myhtml_tree;
678 |
679 | size_t idx = 0;
680 | }
681 |
682 | /** A html tree */
683 | struct Tree
684 | {
685 | /// Create a new node owned by this tree
686 | Node createNode(MyHtmlTagId tag, MyHtmlNamespace ns = MyHtmlNamespace.html) { return Node(this, tag, ns); }
687 |
688 | /// Fast way to create a text node
689 | Node createTextNode(string text) { auto n = createNode(MyHtmlTagId._text); n.text = text; return n; }
690 |
691 | /// Fast way to create a comment node
692 | Node createCommentNode(string text) { auto n = createNode(MyHtmlTagId._comment); n.text = text; return n; }
693 |
694 | /// See: `Node.byXXXX`
695 | auto byClass(string className) { return document.byClass(className); }
696 |
697 |
698 | /// Ditto
699 | auto byId(string id) { return document.byId(id);}
700 |
701 |
702 | /// Ditto
703 | auto byCssSelector(string selector) { return document.byCssSelector(selector); }
704 |
705 | /// Ditto
706 | auto byTagName(MyHtmlTagId name) { return document.byTagName(name); }
707 |
708 | /// Ditto
709 | auto byTagName(string name) { return document.byTagName(name); }
710 |
711 | /// Ditto
712 | auto byAttributeKey(string name) { return document.byAttributeKey(name); }
713 |
714 | /// Ditto
715 | auto byAttribute(AttributeSearchType st = AttributeSearchType.exact, Flag!"caseInsensitive" caseInsensitive = No.caseInsensitive)(string key, string value)
716 | {
717 | return document.byAttribute!(st, caseInsensitive)(key, value);
718 | }
719 |
720 | /// The document root
721 | auto document()
722 | {
723 | return Node(myhtml_tree_get_document(myhtml_tree));
724 | }
725 |
726 | /// The html node
727 | auto html()
728 | {
729 | return Node(myhtml_tree_get_node_html(myhtml_tree));
730 | }
731 |
732 | /// The head node
733 | auto head()
734 | {
735 | return Node(myhtml_tree_get_node_head(myhtml_tree));
736 | }
737 |
738 | /// The body node
739 | auto body()
740 | {
741 | return Node(myhtml_tree_get_node_body(myhtml_tree));
742 | }
743 |
744 | /// Return the first node
745 | auto first()
746 | {
747 | return Node(myhtml_node_first(myhtml_tree));
748 | }
749 |
750 | string toString() { Node tmp = first(); return tmp.toString(); }
751 |
752 | this(this)
753 | {
754 | acquire(myhtml_tree);
755 | valid = true;
756 | }
757 |
758 | void opAssign(Tree rhs)
759 | {
760 | acquire(rhs.myhtml_tree);
761 |
762 | if (valid)
763 | release(myhtml_tree);
764 |
765 | myhtml_tree = rhs.myhtml_tree;
766 | valid = true;
767 | }
768 |
769 | ~this() {
770 | if (valid)
771 | release(myhtml_tree);
772 | }
773 |
774 | @property isValid() { return valid; }
775 |
776 | private:
777 |
778 |
779 | void parse(T)(T html, MyEncodingList encoding = MyEncodingList.default_) if (isSomeString!T)
780 | {
781 | auto status = myhtml_parse(myhtml_tree, encoding, html.toStringz, html.length);
782 | if (MYHTML_FAILED(status)) throw new ArrogantException(status);
783 | }
784 |
785 | void parseFragment(T)(T html, MyHtmlTagId wrap = MyHtmlTagId .div, MyEncodingList encoding = MyEncodingList.default_, MyHtmlNamespace ns = MyHtmlNamespace.html) if (isSomeString!T)
786 | {
787 | auto status = myhtml_parse_fragment (
788 | myhtml_tree,
789 | encoding,
790 | html.toStringz, html.length,
791 | wrap,
792 | ns
793 | );
794 | if (MYHTML_FAILED(status)) throw new ArrogantException(status);
795 | }
796 |
797 | this(ref Arrogant parent) { this(parent.myhtml); }
798 |
799 | this(myhtml_t* parent)
800 | {
801 | myhtml_tree = myhtml_tree_create();
802 | auto status = myhtml_tree_init(myhtml_tree, parent);
803 | if (MYHTML_FAILED(status)) throw new ArrogantException(status);
804 | acquire(myhtml_tree);
805 | valid = true;
806 | }
807 |
808 | bool valid = false;
809 | myhtml_tree_t* myhtml_tree = null;
810 |
811 | static size_t[myhtml_tree_t*] refCount;
812 |
813 | static void acquire(myhtml_tree_t* ptr) { refCount[ptr]++; Arrogant.acquire(myhtml_tree_get_myhtml(ptr)); }
814 | static void release(myhtml_tree_t* ptr)
815 | {
816 | size_t cnt = refCount[ptr];
817 | assert(cnt > 0);
818 | refCount[ptr] = cnt - 1;
819 | myhtml_t* myhtml = myhtml_tree_get_myhtml(ptr);
820 |
821 | if (cnt == 1)
822 | {
823 | myhtml_tree_destroy(ptr);
824 | refCount.remove(ptr);
825 | }
826 |
827 | Arrogant.release(myhtml);
828 | }
829 |
830 | }
831 |
832 | struct Arrogant
833 | {
834 | /// Parse a html document
835 | Tree parse(T)(T html, MyEncodingList encoding = MyEncodingList.default_) if (isSomeString!T)
836 | {
837 | if (!myhtml) initArrogant();
838 | Tree tree = Tree(myhtml);
839 | tree.parse(html, encoding);
840 | return tree;
841 | }
842 |
843 | /// Parse a html fragment
844 | Tree parseFragment(T)(T html, MyHtmlTagId wrap = MyHtmlTagId .div, MyEncodingList encoding = MyEncodingList.default_, MyHtmlNamespace ns = MyHtmlNamespace.html,) if (isSomeString!T)
845 | {
846 | if (!myhtml) initArrogant();
847 | Tree tree = Tree(myhtml);
848 | tree.parseFragment(html, wrap, encoding, ns);
849 | return tree;
850 | }
851 |
852 |
853 | void opAssign(Arrogant rhs)
854 | {
855 | acquire(rhs.myhtml);
856 |
857 | if (valid)
858 | release(myhtml);
859 |
860 | myhtml = rhs.myhtml;
861 | valid = true;
862 | }
863 |
864 | ///
865 | this(MyHtmlOptions options, size_t threadCount = 1, size_t queueSize = 0)
866 | {
867 | initArrogant(options, threadCount, queueSize);
868 | }
869 |
870 | this(this) { acquire(myhtml); valid = true; }
871 |
872 | ~this() { if (valid) release(myhtml); }
873 |
874 | @property isValid() { return valid; }
875 |
876 | private:
877 |
878 | void initArrogant(MyHtmlOptions options = MyHtmlOptions.default_, size_t threadCount = 1, size_t queueSize = 0)
879 | {
880 | if (myhtml) return;
881 |
882 | myhtml = myhtml_create();
883 | auto status = myhtml_init(myhtml, options, threadCount, queueSize);
884 |
885 | acquire(myhtml);
886 |
887 | if (MYHTML_FAILED(status))
888 | throw new ArrogantException(status);
889 |
890 | valid = true;
891 | }
892 |
893 | myhtml_t* myhtml = null;
894 | bool valid = false;
895 |
896 | static size_t[myhtml_t*] refCount;
897 | static void acquire(myhtml_t* ptr) { refCount[ptr]++; }
898 | static void release(myhtml_t* ptr)
899 | {
900 | size_t cnt = refCount[ptr];
901 |
902 | assert(cnt > 0);
903 | refCount[ptr] = cnt - 1;
904 |
905 | if (cnt == 1)
906 | {
907 | myhtml_destroy(ptr);
908 | refCount.remove(ptr);
909 | }
910 | }
911 |
912 | }
913 |
--------------------------------------------------------------------------------
/source/arrogant_test_app.d:
--------------------------------------------------------------------------------
1 | version(arrogant_test_app)
2 | {
3 | import arrogant;
4 | import std.stdio : writeln, stdout;
5 |
6 | void main()
7 | {
8 | writeln("Simple example --");
9 |
10 | // Simple example
11 | {
12 | auto src = `Hello World
`;
13 | auto arrogant = Arrogant();
14 | auto tree = arrogant.parse(src);
15 |
16 | // Change div content from "Hello World!" to "Hello D!"
17 | tree.byTagName("div").front.innerText = "Hello D!";
18 |
19 | // Print the edited html
20 | writeln(tree.document);
21 |
22 | assert(tree.document.innerHTML == "Hello D!
");
23 | }
24 |
25 | writeln("Css selector --");
26 |
27 | // Css Selector
28 | {
29 | auto src = `
30 |
31 |
32 | First div
33 |
39 |
40 | Other
41 |
42 | `;
43 |
44 | auto arrogant = Arrogant();
45 | auto tree = arrogant.parse(src);
46 |
47 | // Looks for an anchor next to an img inside a div
48 | auto url = tree.byCssSelector("div > img + a").front["href"];
49 | writeln("Selector: div > img + a Result:", url);
50 | assert(url == "right_link.html");
51 | }
52 |
53 | writeln("Ranges --");
54 |
55 | // Ranges
56 | {
57 | import std.algorithm: startsWith, filter, each;
58 |
59 | auto src = `
60 |
61 |
62 | Relative link
63 | Relative link
64 | D programming language
65 |
66 |
67 | `;
68 |
69 | auto arrogant = Arrogant();
70 | auto tree = arrogant.parse(src);
71 |
72 | // Add rel="nofollow" to all http/https links
73 | // https://issues.dlang.org/show_bug.cgi?id=11934 "each" implementation is bugged
74 | tree
75 | .byAttributeKey("href")
76 | .filter!(x => x["href"].startsWith("http://") || x["href"].startsWith("https://"))
77 | .each!(e => e["rel"] = "nofollow");
78 |
79 | writeln(tree.byAttributeKey("href"));
80 |
81 | // Just one must be changed
82 | assert(tree.byAttribute("rel", "nofollow").length == 1);
83 | assert(tree.byAttribute("rel", "nofollow").front.innerText == "D programming language");
84 | assert("rel" in tree.byAttribute("href", "http://www.dlang.org").front);
85 | }
86 |
87 | writeln("Cloning --");
88 |
89 | // Cloning
90 | {
91 | auto src = `
92 |
93 |
94 |
95 |
Fruit: Apple
96 |
Idx: 150
97 |
98 |
99 |
100 | `;
101 |
102 | auto arrogant = Arrogant();
103 | auto tree = arrogant.parse(src);
104 |
105 | auto body = tree.body;
106 |
107 | // Retrieve template from page
108 | auto divTemplate = tree.byAttribute("data-custom", "template").front;
109 |
110 | // Clone template for each item
111 | foreach(idx, item; ["pear", "orange", "cherry"])
112 | {
113 | import std.conv : to;
114 |
115 | auto newDiv = divTemplate.clone();
116 | auto spans = newDiv.byTagName("span");
117 | spans[0].innerText = item;
118 | spans[1].innerText = idx.to!string;
119 |
120 | body.appendChild(newDiv);
121 | }
122 |
123 | // Delete template & remove from page
124 | divTemplate.deleteNode();
125 |
126 | import std.algorithm : map, each;
127 |
128 | // Print out all detected fruits
129 | tree
130 | .byClass("name")
131 | .map!(item => item.innerText)
132 | .each!(x => writeln("Fruit detected: ", x));
133 |
134 | assert(tree.byCssSelector("body > div").length == 3);
135 | assert(tree.byCssSelector("body > div span.name")[1].innerText == "orange");
136 | }
137 |
138 | writeln("Moving --");
139 |
140 | // Moving elements
141 | {
142 | auto src = `
143 |
144 |
145 |
146 | outside
147 |
148 |
149 | `;
150 |
151 | auto arrogant = Arrogant();
152 | auto tree = arrogant.parse(src);
153 | auto link = tree.byTagName("a").front;
154 | auto container = tree.byId("container").front;
155 |
156 | // Move link inside div
157 | container.appendChild(link);
158 |
159 | writeln("Link parent tag: ", link.parent.tagId);
160 |
161 | assert(container.firstChild.tagId == MyHtmlTagId.a);
162 | assert(container.firstChild.innerText == "outside");
163 | assert(tree.byTagName("a").length == 1);
164 | }
165 |
166 | // Get summaries from forum.dlang.org
167 | {
168 | import std.net.curl;
169 | import std.range;
170 |
171 | auto src = "https://forum.dlang.org".get;
172 | auto arrogant = Arrogant();
173 | auto tree = arrogant.parse(src);
174 | size_t cnt = 0;
175 |
176 | writeln("Recent posts on forum.dlang.org:\n");
177 | foreach(post; tree.byClass("forum-index-col-lastpost").take(2))
178 | {
179 | string title = post.byClass("forum-postsummary-subject").front["title"];
180 | string author = post.byClass("forum-postsummary-author").front["title"];
181 | string date = post.byCssSelector("span.forum-postsummary-time > span").front["title"];
182 |
183 | writeln("Title: ", title);
184 | writeln("By: ", author);
185 | writeln("Date: ", date);
186 | writeln("--------------");
187 |
188 | cnt++;
189 | }
190 |
191 | writeln("Total: ", cnt, " posts");
192 | assert(cnt != 0);
193 | }
194 | }
195 | }
196 |
197 | // Some internal tests
198 | version(arrogant_tests)
199 | {
200 |
201 | import arrogant;
202 | import std.stdio : writeln, stdout;
203 |
204 | // Testing reference count
205 | /*
206 | unittest {
207 | Node n;
208 | {
209 | auto src = `Hello World
`;
210 | auto arrogant = Arrogant();
211 | auto tree = arrogant.parse(src);
212 |
213 | // n lifespan is longer than parent one
214 | n = tree.byTagName("div").front;
215 | }
216 |
217 | assert(n.toString == "Hello World
");
218 | }
219 | */
220 |
221 | /*
222 | unittest {
223 | NodeRange r;
224 | {
225 | auto src = `Hello World
`;
226 | auto arrogant = Arrogant();
227 | auto tree = arrogant.parse(src);
228 |
229 | // n lifespan is longer than parent one
230 | r = tree.byTagName("div");
231 | }
232 |
233 | assert(r.front.toString == "Hello World
");
234 | }
235 | */
236 | unittest {
237 | Tree tree2;
238 | {
239 | Tree tree3;
240 | auto src = `Hello World
`;
241 | auto arrogant = Arrogant();
242 | auto tree = arrogant.parse(src);
243 |
244 | tree2 = tree;
245 | tree3 = tree2;
246 | }
247 |
248 | assert(tree2.byTagName("div").front.toString == "Hello World
");
249 | }
250 |
251 | void main() {}
252 |
253 | }
254 |
--------------------------------------------------------------------------------