├── .gitignore
├── HtmlParserEx.pas
├── LICENSE
├── README.md
└── demo
├── Bin
└── github.com_ying32_htmlparser.html
├── Unit23.dfm
├── Unit23.pas
├── demo.dpr
└── demo.dproj
/.gitignore:
--------------------------------------------------------------------------------
1 | # Uncomment these types if you want even more clean repository. But be careful.
2 | # It can make harm to an existing project source. Read explanations below.
3 | #
4 | # Resource files are binaries containing manifest, project icon and version info.
5 | # They can not be viewed as text or compared by diff-tools. Consider replacing them with .rc files.
6 | #*.res
7 | #
8 | # Type library file (binary). In old Delphi versions it should be stored.
9 | # Since Delphi 2009 it is produced from .ridl file and can safely be ignored.
10 | #*.tlb
11 | #
12 | # Diagram Portfolio file. Used by the diagram editor up to Delphi 7.
13 | # Uncomment this if you are not using diagrams or use newer Delphi version.
14 | #*.ddp
15 | #
16 | # Visual LiveBindings file. Added in Delphi XE2.
17 | # Uncomment this if you are not using LiveBindings Designer.
18 | #*.vlb
19 | #
20 | # Deployment Manager configuration file for your project. Added in Delphi XE2.
21 | # Uncomment this if it is not mobile development and you do not use remote debug feature.
22 | #*.deployproj
23 | #
24 | # C++ object files produced when C/C++ Output file generation is configured.
25 | # Uncomment this if you are not using external objects (zlib library for example).
26 | #*.obj
27 | #
28 |
29 | # Delphi compiler-generated binaries (safe to delete)
30 | *.exe
31 | *.dll
32 | *.bpl
33 | *.bpi
34 | *.dcp
35 | *.so
36 | *.apk
37 | *.drc
38 | *.map
39 | *.dres
40 | *.rsm
41 | *.tds
42 | *.dcu
43 | *.lib
44 | *.a
45 | *.o
46 | *.ocx
47 |
48 | # Delphi autogenerated files (duplicated info)
49 | *.cfg
50 | *.hpp
51 | *Resource.rc
52 |
53 | # Delphi local files (user-specific info)
54 | *.local
55 | *.identcache
56 | *.projdata
57 | *.tvsconfig
58 | *.dsk
59 |
60 | # Delphi history and backups
61 | __history/
62 | __recovery/
63 | *.~*
64 |
65 | # Castalia statistics file (since XE7 Castalia is distributed with Delphi)
66 | *.stat
67 |
--------------------------------------------------------------------------------
/HtmlParserEx.pas:
--------------------------------------------------------------------------------
1 | {
2 | Html解析器.
3 | 最近因为用到Html解析功能.在网上找了几款Delphi版本的,结果发现解析复杂的HTML都有一些问题.
4 | 没办法自己写了一款,经测试到现在没遇到任何解析不了的Html.
5 |
6 | wr960204 武稀松 2013
7 |
8 | http://www.raysoftware.cn/?p=370
9 |
10 | 感谢牛人杨延哲在HTML语法和CSS语法方面的帮助.
11 | Thank Yang Yanzhe.
12 |
13 | http://www.pockhero.com/
14 |
15 | 本版本只支持DelphiXE3之后的版本.如果用早期Delphi请使用HTMLParser.pas文件.
16 | 支持Windows,MacOSX,iOS,Android平台,完全去掉了对指针的使用.防止以后易博龙去掉
17 | 移动平台对指针的支持.
18 |
19 | 脱离了对旧版本的支持,甩掉包袱开发起来真的很爽!
20 |
21 | ---------------------------------------------------------------------------------
22 | ying32修改记录
23 | Email:1444386932@qq.com
24 |
25 | 2017年06月20日
26 |
27 | 1、为IHtmlElementList增加for in 语法支持
28 |
29 | 2017年05月04日
30 |
31 | 1、去除RegularExpressions单元的引用,不再使用TRegEx改使用RegularExpressionsCore单元中的TPerlRegEx
32 |
33 | 2017年04月19日
34 |
35 | 1、增加使用XPath功能的编译指令"UseXPath",默认不使用XPath,个人感觉没什么用
36 |
37 | 2016年11月23日
38 |
39 | 1、简单支持XPath,简单的吧,利用xpath转css selector,嘿
40 | xpath转换的代码改自python版本:https://github.com/santiycr/cssify/blob/master/cssify.py
41 | 另外对正则System.RegularExpressions.pas中TGroupCollection.GetItem进行了改进,没有找到命名组
42 | 且非PCRE_ERROR_NOSUBSTRING时返回空的,而不是抛出一个异常。暂时就简单粗爆的直接改吧,官方网站
43 | 上看到有人提过这个QC,不知道后面有没有解决。
44 |
45 | 2016年11月15日
46 |
47 | IHtmlElement和THtmlElement的改变:
48 | 1、Attributes属性增加Set方法
49 | 2、TagName属性增加Set方法
50 | 3、增加Parent属性
51 | 4、增加RemoveAttr方法
52 | 5、增加Remove方法
53 | 6、增加RemoveChild方法
54 | 7、增加Find方法,此为SimpleCSSSelector的一个另名
55 | 8、_GetHtml不再直接附加FOrignal属性值,而是使用GetSelfHtml重新对修改后的元素进行赋值操作,并更新FOrignal的值
56 | 9、增加Text属性
57 |
58 | 使用例:
59 | EL.Attributes['class'] := 'xxxx';
60 | EL.TagName = 'a';
61 | EL.Remove 移除自己
62 | EL.RemoveChild(El2);
63 |
64 | El.Find('a');
65 |
66 | IHtmlElementList和THtmlElementList的改变:
67 | 1、增加RemoveAll方法
68 | 2、增加Remove方法
69 | 3、增加Each方法
70 | 4、增加Text属性
71 | // 使用例:
72 |
73 | // 移除选择的元素
74 | LHtml.Find('a').RemoveAll
75 |
76 | // 查找并遍沥
77 | LHtml.Find('a').Each(
78 | procedure(AIndex: Integer; AEl: IHtmlElement)
79 | begin
80 | Writeln('Index=', AIndex, ', href=', AEl.Attributes['href']);
81 | end);
82 |
83 | // 直接输出,仅选中的第一个元素
84 | Writeln(LHtml.Find('title').Text);
85 |
86 | }
87 |
88 |
89 | unit HtmlParserEx;
90 |
91 |
92 | {$IF RTLVersion < 24.0}
93 | {$MESSAGE ERROR '只支持XE3及之后的版本'}
94 | {$ENDIF}
95 |
96 | interface
97 |
98 | uses
99 | SysUtils,
100 | Classes,
101 | Generics.Collections,
102 | RegularExpressionsCore;
103 |
104 | {$IF (defined(IOS) and defined(CPUARM)) or defined(ANDROID)}
105 | {$DEFINE MOBILE_DEV}
106 | {$ENDIF}
107 |
108 | const
109 | // By design each IHtmlElement has a TagName property, even it's a 'Text' element.
110 | // so cSpecialTagName_Text represents the fake tag name of a Text element
111 | cSpecialTagName_Text = '#TEXT';
112 |
113 | LowStrIndex = Low(string); // 移动平台=0,个人电脑平台=1
114 |
115 | type
116 |
117 | {$IFNDEF MSWINDOWS}
118 | { 接口使用WideString是为了可以给例如C++,VB等语言使用.
119 | 但是如果离开了Windows平台,其他平台是没有WideString这个COM的数据类型的.
120 | }
121 | WideString = String;
122 | {$ENDIF}
123 | IHtmlElement = interface;
124 | IHtmlElementList = interface;
125 |
126 | TElementEachEvent = reference to procedure(AIndex: Integer; AEl: IHtmlElement);
127 |
128 | IHtmlElement = interface
129 | ['{8C75239C-8CFA-499F-B115-7CEBEDFB421B}']
130 | function GetParent: IHtmlElement; stdcall;
131 | function GetTagName: WideString; stdcall;
132 | procedure SetTagName(Value: WideString); stdcall;
133 | function GetContent: WideString; stdcall;
134 | function GetOrignal: WideString; stdcall;
135 | function GetChildrenCount: Integer; stdcall;
136 | function GetChildren(Index: Integer): IHtmlElement; stdcall;
137 | function GetCloseTag: IHtmlElement; stdcall;
138 | function GetInnerHtml(): WideString; stdcall;
139 | function GetOuterHtml(): WideString; stdcall;
140 | function GetInnerText(): WideString; stdcall;
141 | procedure SetInnerText(Value: WideString); stdcall;
142 |
143 | function GetAttributes(Key: WideString): WideString; stdcall;
144 | procedure SetAttributes(Key: WideString; Value: WideString); stdcall;
145 |
146 | procedure RemoveAttr(AAttrName: string); stdcall;
147 |
148 | function GetSourceLineNum(): Integer; stdcall;
149 | function GetSourceColNum(): Integer; stdcall;
150 |
151 | // 增加移除节点
152 | function RemoveChild(ANode: IHtmlElement): Integer; stdcall;
153 | procedure Remove; stdcall;
154 | function AppedChild(const ATag: string): IHtmlElement; stdcall;
155 |
156 | // 属性是否存在
157 | function HasAttribute(AttributeName: WideString): Boolean; stdcall;
158 | { 用CSS选择器语法查找Element,不支持"伪类"
159 | CSS Selector Style search,not support Pseudo-classes.
160 |
161 | http://www.w3.org/TR/CSS2/selector.html
162 | }
163 |
164 | function SimpleCSSSelector(const selector: WideString): IHtmlElementList; stdcall;
165 | function Find(const selector: WideString): IHtmlElementList; stdcall;
166 |
167 | function FindX(const AXPath: WideString): IHtmlElementList; stdcall;
168 |
169 | // 枚举属性
170 | function EnumAttributeNames(Index: Integer): WideString; stdcall;
171 |
172 | property TagName: WideString read GetTagName write SetTagName;
173 | property ChildrenCount: Integer read GetChildrenCount;
174 | property Children[index: Integer]: IHtmlElement read GetChildren; default;
175 | property CloseTag: IHtmlElement read GetCloseTag;
176 | property Content: WideString read GetContent;
177 | property Orignal: WideString read GetOrignal;
178 | property Parent: IHtmlElement read GetParent;
179 | // 获取元素在源代码中的位置
180 | property SourceLineNum: Integer read GetSourceLineNum;
181 | property SourceColNum: Integer read GetSourceColNum;
182 | //
183 | property InnerHtml: WideString read GetInnerHtml;
184 | property OuterHtml: WideString read GetOuterHtml;
185 | property InnerText: WideString read GetInnerText write SetInnerText;
186 | property Text: WideString read GetInnerText write SetInnerText;
187 |
188 | property Attributes[Key: WideString]: WideString read GetAttributes write SetAttributes;
189 | // ying32 不改动原来的,只简化使用
190 | property Attrs[Key: WideString]: WideString read GetAttributes write SetAttributes;
191 | end;
192 |
193 |
194 | THtmlListEnumerator = class
195 | private
196 | FIndex: Integer;
197 | FList: IHtmlElementList;
198 | public
199 | constructor Create(AList: IHtmlElementList);
200 | function GetCurrent: IHtmlElement; inline;
201 | function MoveNext: Boolean;
202 | property Current: IHtmlElement read GetCurrent;
203 | end;
204 |
205 | IHtmlElementList = interface
206 | ['{8E1380C6-4263-4BF6-8D10-091A86D8E7D9}']
207 | function GetCount: Integer; stdcall;
208 | function GetItems(Index: Integer): IHtmlElement; stdcall;
209 | procedure RemoveAll; stdcall;
210 | procedure Remove(ANode: IHtmlElement); stdcall;
211 | procedure Each(f: TElementEachEvent); stdcall;
212 | function GetText: WideString; stdcall;
213 | function GetEnumerator: THtmlListEnumerator;
214 |
215 | property Text: WideString read GetText;
216 | property Count: Integer read GetCount;
217 | property Items[Index: Integer]: IHtmlElement read GetItems; default;
218 | end;
219 |
220 | function ParserHTML(const Source: WideString): IHtmlElement; stdcall;
221 | function DecodeHtmlEntities(S: String): string; forward;
222 |
223 | implementation
224 | uses
225 | StrUtils;
226 |
227 |
228 | type
229 | TStringDictionary = TDictionary;
230 | TPropDictionary = TDictionary;
231 | TStringDynArray = TArray;
232 |
233 | const
234 | WhiteSpace = [' ', #13, #10, #9];
235 | // CSS Attribute Compare Operator
236 | OperatorChar = ['=', '!', '*', '~', '|', '^', '$'];
237 | MaxListSize = Maxint div 16;
238 |
239 | // TagProperty
240 | tpBlock = $01;
241 | tpInline = $02;
242 | tpEmpty = $04;
243 | tpFormatAsInline = $08;
244 | tpPreserveWhitespace = $10;
245 |
246 | tpInlineOrEmpty = tpInline or tpEmpty;
247 |
248 | type
249 |
250 | TAttrOperator = (aoExist, aoEqual, aoNotEqual, aoIncludeWord, aoBeginWord,
251 | aoBegin, aoEnd, aoContain);
252 |
253 | PAttrSelectorItem = ^TAttrSelectorItem;
254 |
255 | TAttrSelectorItem = record
256 | Key: string;
257 | AttrOperator: TAttrOperator;
258 | Value: string;
259 | end;
260 |
261 | TSelectorItemRelation = (sirNONE, sirDescendant, sirChildren,
262 | sirYoungerBrother, sirAllYoungerBrother);
263 |
264 | PCSSSelectorItem = ^TCSSSelectorItem;
265 |
266 | TCSSSelectorItem = record
267 | Relation: TSelectorItemRelation;
268 | szTag: string;
269 | Attributes: array of TAttrSelectorItem;
270 | end;
271 |
272 | TCSSSelectorItems = array of TCSSSelectorItem;
273 | PCSSSelectorItems = ^TCSSSelectorItems;
274 | TCSSSelectorItemGroup = array of TCSSSelectorItems;
275 |
276 | //
277 | TSourceContext = record
278 | private
279 | function GetCharOfCurrent(Index: Integer): Char; inline;
280 | public
281 | Code: string;
282 | CodeIndex: Integer;
283 | LineNum: Integer;
284 | ColNum: Integer;
285 | CurrentChar: Char;
286 | {$IFDEF DEBUG}
287 | currentCode: PChar;
288 | {$ENDIF}
289 | procedure IncSrc(); overload; inline;
290 | procedure IncSrc(Step: Integer); overload; inline;
291 | procedure setCode(const ACode: string); inline;
292 | function ReadStr(UntilChars: TSysCharSet): string; inline;
293 | function PeekStr(Index: Integer): string; overload; inline;
294 | function PeekStr(): string; overload; inline;
295 | function subStr(Index, Count: Integer): string; overload; inline;
296 | function subStr(Count: Integer): string; overload; inline;
297 | procedure SkipBlank(); inline;
298 | property charOfCurrent[Index: Integer]: Char read GetCharOfCurrent;
299 | end;
300 |
301 | TAttributeItem = record
302 | Key, Value: string;
303 | end;
304 |
305 | TAttributeDynArray = TArray;
306 |
307 | TIHtmlElementList = class;
308 | THtmlElement = class;
309 | THtmlElementList = TList;
310 |
311 | THtmlElement = class(TInterfacedObject, IHtmlElement)
312 | private
313 | function GetChildrens: IHtmlElementList;
314 | protected
315 | // ying32
316 | function GetParent: IHtmlElement; stdcall;
317 | function GetTagName: WideString; stdcall;
318 | procedure SetTagName(Value: WideString); stdcall;
319 |
320 | function GetContent: WideString; stdcall;
321 | function GetOrignal: WideString; stdcall;
322 | function GetChildrenCount: Integer; stdcall;
323 | function GetChildren(Index: Integer): IHtmlElement; stdcall;
324 | function GetCloseTag: IHtmlElement; stdcall;
325 | function GetInnerHtml(): WideString; stdcall;
326 | function GetOuterHtml(): WideString; stdcall;
327 | function GetInnerText(): WideString; stdcall;
328 | procedure SetInnerText(Value: WideString); stdcall;
329 |
330 | function GetAttributes(Key: WideString): WideString; stdcall;
331 | procedure SetAttributes(Key: WideString; Value: WideString); stdcall;
332 |
333 | procedure RemoveAttr(AAttrName: string); stdcall;
334 |
335 | function GetSourceLineNum(): Integer; stdcall;
336 | function GetSourceColNum(): Integer; stdcall;
337 |
338 | // ying32添加
339 | function RemoveChild(ANode: IHtmlElement): Integer; stdcall;
340 | procedure Remove; stdcall;
341 | function AppedChild(const ATag: string): IHtmlElement; stdcall;
342 |
343 |
344 | // 属性是否存在
345 | function HasAttribute(AttributeName: WideString): Boolean; stdcall;
346 | { 用CSS选择器语法查找Element,不支持"伪类"
347 | CSS Selector Style search,not support Pseudo-classes.
348 |
349 | http://www.w3.org/TR/CSS2/selector.html
350 | }
351 |
352 | function SimpleCSSSelector(const selector: WideString): IHtmlElementList; stdcall;
353 | function Find(const selector: WideString): IHtmlElementList; stdcall;
354 |
355 | function FindX(const AXPath: WideString): IHtmlElementList; stdcall;
356 | // 枚举属性
357 | function EnumAttributeNames(Index: Integer): WideString; stdcall;
358 |
359 | property TagName: WideString read GetTagName write SetTagName;
360 | property ChildrenCount: Integer read GetChildrenCount;
361 | property Children[index: Integer]: IHtmlElement read GetChildren; default;
362 | property CloseTag: IHtmlElement read GetCloseTag;
363 | property Content: WideString read GetContent;
364 | property Orignal: WideString read GetOrignal;
365 | property Parent: IHtmlElement read GetParent;
366 | // 获取元素在源代码中的位置
367 | property SourceLineNum: Integer read GetSourceLineNum;
368 | property SourceColNum: Integer read GetSourceColNum;
369 | //
370 | property InnerHtml: WideString read GetInnerHtml;
371 | property OuterHtml: WideString read GetOuterHtml;
372 | property InnerText: WideString read GetInnerText;
373 |
374 | property Attributes[Key: WideString]: WideString read GetAttributes write SetAttributes;
375 |
376 | property Childrens: IHtmlElementList read GetChildrens;
377 | private
378 | // xpath to css, by:ying32
379 | FValidationRe: TPerlRegEx;
380 | FClosed: Boolean;
381 | //
382 | FOwner: THtmlElement;
383 | FCloseTag: IHtmlElement;
384 | FTagName: string;
385 | FIsCloseTag: Boolean;
386 | FContent: string;
387 | FOrignal: string;
388 | FSourceLine: Integer;
389 | FSourceCol: Integer;
390 | //
391 | FAttributes: TStringDictionary;
392 | FChildren: TIHtmlElementList;
393 | procedure _GetHtml(IncludeSelf: Boolean; Sb: TStringBuilder);
394 | procedure _GetText(IncludeSelf: Boolean; Sb: TStringBuilder);
395 | procedure _SimpleCSSSelector(const ItemGroup: TCSSSelectorItemGroup; r: TIHtmlElementList);
396 | procedure _Select(Item: PCSSSelectorItem; Count: Integer; r: TIHtmlElementList; OnlyTopLevel: Boolean = false);
397 |
398 | // by:yin32 添加
399 | procedure InitXPathRegExPattern;
400 | ///
401 | /// 利用正则提取xpath并转为css选择器
402 | /// 转换代码来自python:https://github.com/santiycr/cssify/blob/master/cssify.py
403 | ///
404 | function XPathToCSSSelector(const AXPath: string): string;
405 | public
406 | constructor Create(AOwner: THtmlElement; AText: string; ALine, ACol: Integer);
407 | destructor Destroy; override;
408 | end;
409 |
410 | TIHtmlElementList = class(TInterfacedObject, IHtmlElementList)
411 | private
412 | // IHtmlElementList
413 | function GetItems(Index: Integer): IHtmlElement; stdcall;
414 | function GetCount: Integer; stdcall;
415 | protected
416 | FList: TList;
417 | procedure SetItems(Index: Integer; const Value: IHtmlElement); inline;
418 | function Add(Value: IHtmlElement): Integer; inline;
419 | procedure Delete(Index: Integer); inline;
420 | procedure Clear; inline;
421 | // ying32添加
422 | procedure RemoveAll; stdcall;
423 | procedure Remove(ANode: IHtmlElement); stdcall;
424 | procedure Each(f: TElementEachEvent); stdcall;
425 | function GetText: WideString; stdcall;
426 | public
427 | constructor Create;
428 | destructor Destroy; override;
429 |
430 | function GetEnumerator: THtmlListEnumerator;
431 |
432 | function IndexOf(Item: IHtmlElement): Integer;
433 | // IHtmlElementList
434 | property Items[index: Integer]: IHtmlElement read GetItems write SetItems; default;
435 | property Count: Integer read GetCount;
436 | end;
437 |
438 | function SplitStr(ACharSet: TSysCharSet; AStr: string): TStringDynArray;
439 | var
440 | L, I: Integer;
441 | S: string;
442 | StrChar: Char;
443 | begin
444 | Result := nil;
445 | if Length(AStr) <= 0 then
446 | Exit;
447 |
448 | I := Low(AStr);
449 | L := Low(AStr);
450 | StrChar := #0;
451 | while I <= High(AStr) do
452 | begin
453 | if CharInSet(AStr[I], ['''', '"']) then
454 | if StrChar = #0 then
455 | StrChar := AStr[I]
456 | else if StrChar = AStr[I] then
457 | StrChar := #0;
458 | // 不在字符串中,分隔符才生效
459 | if StrChar = #0 then
460 | if CharInSet(AStr[I], ACharSet) then
461 | begin
462 | if I > L then
463 | begin
464 | S := Copy(AStr, L{$IF (LowStrIndex = 0)} + 1{$ENDIF}, I - L);
465 | SetLength(Result, Length(Result) + 1);
466 | Result[Length(Result) - 1] := S;
467 | end;
468 | L := I + 1;
469 | end;
470 | Inc(I);
471 | end;
472 | if (I > L) then
473 | begin
474 | S := Copy(AStr, L{$IF (LowStrIndex = 0)} + 1{$ENDIF}, I - L);
475 | SetLength(Result, Length(Result) + 1);
476 | Result[Length(Result) - 1] := S;
477 | end;
478 | end;
479 |
480 | function StrRight(const Value: string; Count: Integer): string;
481 | var
482 | start: Integer;
483 | begin
484 | start := Length(Value) - Count + 1;
485 | if start <= 0 then
486 | Result := Value
487 | else
488 | Result := Copy(Value, start, Count);
489 | end;
490 |
491 | function StrLeft(const Value: string; Count: Integer): string;
492 | begin
493 | Result := Copy(Value, LowStrIndex, Count);
494 | end;
495 |
496 |
497 | // ComapreAttr
498 |
499 | function _aoExist(const Item: TAttrSelectorItem; E: THtmlElement): Boolean;
500 | begin
501 | Result := E.FAttributes.ContainsKey(Item.Key);
502 | end;
503 |
504 | function _aoEqual(const Item: TAttrSelectorItem; E: THtmlElement): Boolean;
505 | begin
506 | Result := E.FAttributes.ContainsKey(Item.Key) and
507 | (E.FAttributes[Item.Key] = Item.Value);
508 | end;
509 |
510 | function _aoNotEqual(const Item: TAttrSelectorItem; E: THtmlElement): Boolean;
511 | begin
512 | Result := E.FAttributes.ContainsKey(Item.Key) and
513 | (E.FAttributes[Item.Key] <> Item.Value);
514 | end;
515 |
516 | function _aoIncludeWord(const Item: TAttrSelectorItem; E: THtmlElement)
517 | : Boolean;
518 | var
519 | S: TStringDynArray;
520 | I: Integer;
521 | begin
522 | Result := false;
523 | if not E.FAttributes.ContainsKey(Item.Key) then
524 | Exit;
525 | Result := True;
526 | S := SplitStr(WhiteSpace, E.FAttributes[Item.Key]);
527 | for I := Low(S) to High(S) do
528 | if S[I] = Item.Value then
529 | Exit;
530 | Result := false;
531 | end;
532 |
533 | function _aoBeginWord(const Item: TAttrSelectorItem; E: THtmlElement): Boolean;
534 | var
535 | S: TStringDynArray;
536 | I: Integer;
537 | begin
538 | Result := false;
539 | if not E.FAttributes.ContainsKey(Item.Key) then
540 | Exit;
541 | S := SplitStr((WhiteSpace + ['_', '-']), E.FAttributes[Item.Key]);
542 | Result := (Length(S) > 0) and (S[0] = Item.Value);
543 | end;
544 |
545 | function _aoBegin(const Item: TAttrSelectorItem; E: THtmlElement): Boolean;
546 | var
547 | attr, Value: string;
548 | begin
549 | Result := false;
550 | if not E.FAttributes.ContainsKey(Item.Key) then
551 | Exit;
552 | attr := E.FAttributes[Item.Key];
553 | Value := Item.Value;
554 | Result := (Length(attr) > Length(Value)) and
555 | (StrLeft(attr, Length(Value)) = Value);
556 | end;
557 |
558 | function _aoEnd(const Item: TAttrSelectorItem; E: THtmlElement): Boolean;
559 | var
560 | attr, Value: string;
561 | begin
562 | Result := false;
563 | if not E.FAttributes.ContainsKey(Item.Key) then
564 | Exit;
565 | attr := E.FAttributes[Item.Key];
566 | Value := Item.Value;
567 | Result := (Length(attr) > Length(Value)) and
568 | (StrRight(attr, Length(Value)) = Value);
569 | end;
570 |
571 | function _aoContain(const Item: TAttrSelectorItem; E: THtmlElement): Boolean;
572 | begin
573 | Result := false;
574 | if not E.FAttributes.ContainsKey(Item.Key) then
575 | Exit;
576 | Result := Pos(Item.Value, E.FAttributes[Item.Key]) > 0;
577 | end;
578 |
579 | type
580 | TFNCompareAttr = function(const Item: TAttrSelectorItem;
581 | E: THtmlElement): Boolean;
582 |
583 | const
584 | AttrCompareFuns: array [TAttrOperator] of TFNCompareAttr = (_aoExist,
585 | _aoEqual, _aoNotEqual, _aoIncludeWord, _aoBeginWord, _aoBegin, _aoEnd,
586 | _aoContain);
587 |
588 | function GetTagProperty(const TagName: string): WORD; forward;
589 |
590 | procedure DoError(const Msg: string);
591 | begin
592 | raise Exception.Create(Msg);
593 | end;
594 |
595 | procedure _ParserAttrs(var sc: TSourceContext; var Attrs: TAttributeDynArray);
596 | var
597 | Item: TAttributeItem;
598 | begin
599 | SetLength(Attrs, 0);
600 | while True do
601 | begin
602 | sc.SkipBlank();
603 | if sc.CurrentChar = #0 then
604 | Break;
605 | Item.Key := sc.ReadStr((WhiteSpace + [#0, '=']));
606 | Item.Value := '';
607 | sc.SkipBlank;
608 | if sc.CurrentChar = '=' then
609 | begin
610 | sc.IncSrc;
611 | sc.SkipBlank;
612 | Item.Value := sc.ReadStr((WhiteSpace + [#0]));
613 | end;
614 | SetLength(Attrs, Length(Attrs) + 1);
615 | Attrs[Length(Attrs) - 1] := Item;
616 | end;
617 | end;
618 |
619 | procedure _ParserNodeItem(S: string; var ATagName: string;
620 | var Attrs: TAttributeDynArray);
621 | var
622 | sc: TSourceContext;
623 | begin
624 | sc.setCode(S);
625 | sc.SkipBlank;
626 |
627 | ATagName := UpperCase(sc.ReadStr((WhiteSpace + [#0, '/', '>'])));
628 |
629 | _ParserAttrs(sc, Attrs);
630 | end;
631 |
632 | function CreateTextElement(AOwner: THtmlElement; AText: string;
633 | ALine, ACol: Integer): THtmlElement;
634 | begin
635 | Result := THtmlElement.Create(AOwner, AText, ALine, ACol);
636 | with Result do
637 | begin
638 | // Edwin: 2019-07-08: html entities in the text element shouldn't be decoded?
639 | //FContent := DecodeHtmlEntities(AText);
640 | FContent := AText;
641 | FTagName := cSpecialTagName_Text;
642 | FClosed := True;
643 | end;
644 | end;
645 |
646 | function CreateScriptElement(AOwner: THtmlElement; AText: string;
647 | ALine, ACol: Integer): THtmlElement;
648 | begin
649 | Result := THtmlElement.Create(AOwner, AText, ALine, ACol);
650 | with Result do
651 | begin
652 | // Edwin: 2019-07-09: html entities in scripts shouldn't be decoded?
653 | //FContent := DecodeHtmlEntities(AText);
654 | FContent := AText;
655 | FTagName := '#SCRIPT';
656 | FClosed := True;
657 | end;
658 | end;
659 |
660 | function CreateStyleElement(AOwner: THtmlElement; AText: string;
661 | ALine, ACol: Integer): THtmlElement;
662 | begin
663 | Result := THtmlElement.Create(AOwner, AText, ALine, ACol);
664 | with Result do
665 | begin
666 | // Edwin: 2019-07-09: html entities in