├── .gitignore ├── HtmlParserEx.pas ├── LICENSE ├── README.md └── demo ├── Bin └── github.com_ying32_htmlparser.html ├── Unit23.dfm ├── Unit23.pas ├── demo.dpr └── demo.dproj /.gitignore: -------------------------------------------------------------------------------- 1 | # Uncomment these types if you want even more clean repository. But be careful. 2 | # It can make harm to an existing project source. Read explanations below. 3 | # 4 | # Resource files are binaries containing manifest, project icon and version info. 5 | # They can not be viewed as text or compared by diff-tools. Consider replacing them with .rc files. 6 | #*.res 7 | # 8 | # Type library file (binary). In old Delphi versions it should be stored. 9 | # Since Delphi 2009 it is produced from .ridl file and can safely be ignored. 10 | #*.tlb 11 | # 12 | # Diagram Portfolio file. Used by the diagram editor up to Delphi 7. 13 | # Uncomment this if you are not using diagrams or use newer Delphi version. 14 | #*.ddp 15 | # 16 | # Visual LiveBindings file. Added in Delphi XE2. 17 | # Uncomment this if you are not using LiveBindings Designer. 18 | #*.vlb 19 | # 20 | # Deployment Manager configuration file for your project. Added in Delphi XE2. 21 | # Uncomment this if it is not mobile development and you do not use remote debug feature. 22 | #*.deployproj 23 | # 24 | # C++ object files produced when C/C++ Output file generation is configured. 25 | # Uncomment this if you are not using external objects (zlib library for example). 26 | #*.obj 27 | # 28 | 29 | # Delphi compiler-generated binaries (safe to delete) 30 | *.exe 31 | *.dll 32 | *.bpl 33 | *.bpi 34 | *.dcp 35 | *.so 36 | *.apk 37 | *.drc 38 | *.map 39 | *.dres 40 | *.rsm 41 | *.tds 42 | *.dcu 43 | *.lib 44 | *.a 45 | *.o 46 | *.ocx 47 | 48 | # Delphi autogenerated files (duplicated info) 49 | *.cfg 50 | *.hpp 51 | *Resource.rc 52 | 53 | # Delphi local files (user-specific info) 54 | *.local 55 | *.identcache 56 | *.projdata 57 | *.tvsconfig 58 | *.dsk 59 | 60 | # Delphi history and backups 61 | __history/ 62 | __recovery/ 63 | *.~* 64 | 65 | # Castalia statistics file (since XE7 Castalia is distributed with Delphi) 66 | *.stat 67 | -------------------------------------------------------------------------------- /HtmlParserEx.pas: -------------------------------------------------------------------------------- 1 | { 2 | Html解析器. 3 | 最近因为用到Html解析功能.在网上找了几款Delphi版本的,结果发现解析复杂的HTML都有一些问题. 4 | 没办法自己写了一款,经测试到现在没遇到任何解析不了的Html. 5 | 6 | wr960204 武稀松 2013 7 | 8 | http://www.raysoftware.cn/?p=370 9 | 10 | 感谢牛人杨延哲在HTML语法和CSS语法方面的帮助. 11 | Thank Yang Yanzhe. 12 | 13 | http://www.pockhero.com/ 14 | 15 | 本版本只支持DelphiXE3之后的版本.如果用早期Delphi请使用HTMLParser.pas文件. 16 | 支持Windows,MacOSX,iOS,Android平台,完全去掉了对指针的使用.防止以后易博龙去掉 17 | 移动平台对指针的支持. 18 | 19 | 脱离了对旧版本的支持,甩掉包袱开发起来真的很爽! 20 | 21 | --------------------------------------------------------------------------------- 22 | ying32修改记录 23 | Email:1444386932@qq.com 24 | 25 | 2017年06月20日 26 | 27 | 1、为IHtmlElementList增加for in 语法支持 28 | 29 | 2017年05月04日 30 | 31 | 1、去除RegularExpressions单元的引用,不再使用TRegEx改使用RegularExpressionsCore单元中的TPerlRegEx 32 | 33 | 2017年04月19日 34 | 35 | 1、增加使用XPath功能的编译指令"UseXPath",默认不使用XPath,个人感觉没什么用 36 | 37 | 2016年11月23日 38 | 39 | 1、简单支持XPath,简单的吧,利用xpath转css selector,嘿 40 | xpath转换的代码改自python版本:https://github.com/santiycr/cssify/blob/master/cssify.py 41 | 另外对正则System.RegularExpressions.pas中TGroupCollection.GetItem进行了改进,没有找到命名组 42 | 且非PCRE_ERROR_NOSUBSTRING时返回空的,而不是抛出一个异常。暂时就简单粗爆的直接改吧,官方网站 43 | 上看到有人提过这个QC,不知道后面有没有解决。 44 | 45 | 2016年11月15日 46 | 47 | IHtmlElement和THtmlElement的改变: 48 | 1、Attributes属性增加Set方法 49 | 2、TagName属性增加Set方法 50 | 3、增加Parent属性 51 | 4、增加RemoveAttr方法 52 | 5、增加Remove方法 53 | 6、增加RemoveChild方法 54 | 7、增加Find方法,此为SimpleCSSSelector的一个另名 55 | 8、_GetHtml不再直接附加FOrignal属性值,而是使用GetSelfHtml重新对修改后的元素进行赋值操作,并更新FOrignal的值 56 | 9、增加Text属性 57 | 58 | 使用例: 59 | EL.Attributes['class'] := 'xxxx'; 60 | EL.TagName = 'a'; 61 | EL.Remove 移除自己 62 | EL.RemoveChild(El2); 63 | 64 | El.Find('a'); 65 | 66 | IHtmlElementList和THtmlElementList的改变: 67 | 1、增加RemoveAll方法 68 | 2、增加Remove方法 69 | 3、增加Each方法 70 | 4、增加Text属性 71 | // 使用例: 72 | 73 | // 移除选择的元素 74 | LHtml.Find('a').RemoveAll 75 | 76 | // 查找并遍沥 77 | LHtml.Find('a').Each( 78 | procedure(AIndex: Integer; AEl: IHtmlElement) 79 | begin 80 | Writeln('Index=', AIndex, ', href=', AEl.Attributes['href']); 81 | end); 82 | 83 | // 直接输出,仅选中的第一个元素 84 | Writeln(LHtml.Find('title').Text); 85 | 86 | } 87 | 88 | 89 | unit HtmlParserEx; 90 | 91 | 92 | {$IF RTLVersion < 24.0} 93 | {$MESSAGE ERROR '只支持XE3及之后的版本'} 94 | {$ENDIF} 95 | 96 | interface 97 | 98 | uses 99 | SysUtils, 100 | Classes, 101 | Generics.Collections, 102 | RegularExpressionsCore; 103 | 104 | {$IF (defined(IOS) and defined(CPUARM)) or defined(ANDROID)} 105 | {$DEFINE MOBILE_DEV} 106 | {$ENDIF} 107 | 108 | const 109 | // By design each IHtmlElement has a TagName property, even it's a 'Text' element. 110 | // so cSpecialTagName_Text represents the fake tag name of a Text element 111 | cSpecialTagName_Text = '#TEXT'; 112 | 113 | LowStrIndex = Low(string); // 移动平台=0,个人电脑平台=1 114 | 115 | type 116 | 117 | {$IFNDEF MSWINDOWS} 118 | { 接口使用WideString是为了可以给例如C++,VB等语言使用. 119 | 但是如果离开了Windows平台,其他平台是没有WideString这个COM的数据类型的. 120 | } 121 | WideString = String; 122 | {$ENDIF} 123 | IHtmlElement = interface; 124 | IHtmlElementList = interface; 125 | 126 | TElementEachEvent = reference to procedure(AIndex: Integer; AEl: IHtmlElement); 127 | 128 | IHtmlElement = interface 129 | ['{8C75239C-8CFA-499F-B115-7CEBEDFB421B}'] 130 | function GetParent: IHtmlElement; stdcall; 131 | function GetTagName: WideString; stdcall; 132 | procedure SetTagName(Value: WideString); stdcall; 133 | function GetContent: WideString; stdcall; 134 | function GetOrignal: WideString; stdcall; 135 | function GetChildrenCount: Integer; stdcall; 136 | function GetChildren(Index: Integer): IHtmlElement; stdcall; 137 | function GetCloseTag: IHtmlElement; stdcall; 138 | function GetInnerHtml(): WideString; stdcall; 139 | function GetOuterHtml(): WideString; stdcall; 140 | function GetInnerText(): WideString; stdcall; 141 | procedure SetInnerText(Value: WideString); stdcall; 142 | 143 | function GetAttributes(Key: WideString): WideString; stdcall; 144 | procedure SetAttributes(Key: WideString; Value: WideString); stdcall; 145 | 146 | procedure RemoveAttr(AAttrName: string); stdcall; 147 | 148 | function GetSourceLineNum(): Integer; stdcall; 149 | function GetSourceColNum(): Integer; stdcall; 150 | 151 | // 增加移除节点 152 | function RemoveChild(ANode: IHtmlElement): Integer; stdcall; 153 | procedure Remove; stdcall; 154 | function AppedChild(const ATag: string): IHtmlElement; stdcall; 155 | 156 | // 属性是否存在 157 | function HasAttribute(AttributeName: WideString): Boolean; stdcall; 158 | { 用CSS选择器语法查找Element,不支持"伪类" 159 | CSS Selector Style search,not support Pseudo-classes. 160 | 161 | http://www.w3.org/TR/CSS2/selector.html 162 | } 163 | 164 | function SimpleCSSSelector(const selector: WideString): IHtmlElementList; stdcall; 165 | function Find(const selector: WideString): IHtmlElementList; stdcall; 166 | 167 | function FindX(const AXPath: WideString): IHtmlElementList; stdcall; 168 | 169 | // 枚举属性 170 | function EnumAttributeNames(Index: Integer): WideString; stdcall; 171 | 172 | property TagName: WideString read GetTagName write SetTagName; 173 | property ChildrenCount: Integer read GetChildrenCount; 174 | property Children[index: Integer]: IHtmlElement read GetChildren; default; 175 | property CloseTag: IHtmlElement read GetCloseTag; 176 | property Content: WideString read GetContent; 177 | property Orignal: WideString read GetOrignal; 178 | property Parent: IHtmlElement read GetParent; 179 | // 获取元素在源代码中的位置 180 | property SourceLineNum: Integer read GetSourceLineNum; 181 | property SourceColNum: Integer read GetSourceColNum; 182 | // 183 | property InnerHtml: WideString read GetInnerHtml; 184 | property OuterHtml: WideString read GetOuterHtml; 185 | property InnerText: WideString read GetInnerText write SetInnerText; 186 | property Text: WideString read GetInnerText write SetInnerText; 187 | 188 | property Attributes[Key: WideString]: WideString read GetAttributes write SetAttributes; 189 | // ying32 不改动原来的,只简化使用 190 | property Attrs[Key: WideString]: WideString read GetAttributes write SetAttributes; 191 | end; 192 | 193 | 194 | THtmlListEnumerator = class 195 | private 196 | FIndex: Integer; 197 | FList: IHtmlElementList; 198 | public 199 | constructor Create(AList: IHtmlElementList); 200 | function GetCurrent: IHtmlElement; inline; 201 | function MoveNext: Boolean; 202 | property Current: IHtmlElement read GetCurrent; 203 | end; 204 | 205 | IHtmlElementList = interface 206 | ['{8E1380C6-4263-4BF6-8D10-091A86D8E7D9}'] 207 | function GetCount: Integer; stdcall; 208 | function GetItems(Index: Integer): IHtmlElement; stdcall; 209 | procedure RemoveAll; stdcall; 210 | procedure Remove(ANode: IHtmlElement); stdcall; 211 | procedure Each(f: TElementEachEvent); stdcall; 212 | function GetText: WideString; stdcall; 213 | function GetEnumerator: THtmlListEnumerator; 214 | 215 | property Text: WideString read GetText; 216 | property Count: Integer read GetCount; 217 | property Items[Index: Integer]: IHtmlElement read GetItems; default; 218 | end; 219 | 220 | function ParserHTML(const Source: WideString): IHtmlElement; stdcall; 221 | function DecodeHtmlEntities(S: String): string; forward; 222 | 223 | implementation 224 | uses 225 | StrUtils; 226 | 227 | 228 | type 229 | TStringDictionary = TDictionary; 230 | TPropDictionary = TDictionary; 231 | TStringDynArray = TArray; 232 | 233 | const 234 | WhiteSpace = [' ', #13, #10, #9]; 235 | // CSS Attribute Compare Operator 236 | OperatorChar = ['=', '!', '*', '~', '|', '^', '$']; 237 | MaxListSize = Maxint div 16; 238 | 239 | // TagProperty 240 | tpBlock = $01; 241 | tpInline = $02; 242 | tpEmpty = $04; 243 | tpFormatAsInline = $08; 244 | tpPreserveWhitespace = $10; 245 | 246 | tpInlineOrEmpty = tpInline or tpEmpty; 247 | 248 | type 249 | 250 | TAttrOperator = (aoExist, aoEqual, aoNotEqual, aoIncludeWord, aoBeginWord, 251 | aoBegin, aoEnd, aoContain); 252 | 253 | PAttrSelectorItem = ^TAttrSelectorItem; 254 | 255 | TAttrSelectorItem = record 256 | Key: string; 257 | AttrOperator: TAttrOperator; 258 | Value: string; 259 | end; 260 | 261 | TSelectorItemRelation = (sirNONE, sirDescendant, sirChildren, 262 | sirYoungerBrother, sirAllYoungerBrother); 263 | 264 | PCSSSelectorItem = ^TCSSSelectorItem; 265 | 266 | TCSSSelectorItem = record 267 | Relation: TSelectorItemRelation; 268 | szTag: string; 269 | Attributes: array of TAttrSelectorItem; 270 | end; 271 | 272 | TCSSSelectorItems = array of TCSSSelectorItem; 273 | PCSSSelectorItems = ^TCSSSelectorItems; 274 | TCSSSelectorItemGroup = array of TCSSSelectorItems; 275 | 276 | // 277 | TSourceContext = record 278 | private 279 | function GetCharOfCurrent(Index: Integer): Char; inline; 280 | public 281 | Code: string; 282 | CodeIndex: Integer; 283 | LineNum: Integer; 284 | ColNum: Integer; 285 | CurrentChar: Char; 286 | {$IFDEF DEBUG} 287 | currentCode: PChar; 288 | {$ENDIF} 289 | procedure IncSrc(); overload; inline; 290 | procedure IncSrc(Step: Integer); overload; inline; 291 | procedure setCode(const ACode: string); inline; 292 | function ReadStr(UntilChars: TSysCharSet): string; inline; 293 | function PeekStr(Index: Integer): string; overload; inline; 294 | function PeekStr(): string; overload; inline; 295 | function subStr(Index, Count: Integer): string; overload; inline; 296 | function subStr(Count: Integer): string; overload; inline; 297 | procedure SkipBlank(); inline; 298 | property charOfCurrent[Index: Integer]: Char read GetCharOfCurrent; 299 | end; 300 | 301 | TAttributeItem = record 302 | Key, Value: string; 303 | end; 304 | 305 | TAttributeDynArray = TArray; 306 | 307 | TIHtmlElementList = class; 308 | THtmlElement = class; 309 | THtmlElementList = TList; 310 | 311 | THtmlElement = class(TInterfacedObject, IHtmlElement) 312 | private 313 | function GetChildrens: IHtmlElementList; 314 | protected 315 | // ying32 316 | function GetParent: IHtmlElement; stdcall; 317 | function GetTagName: WideString; stdcall; 318 | procedure SetTagName(Value: WideString); stdcall; 319 | 320 | function GetContent: WideString; stdcall; 321 | function GetOrignal: WideString; stdcall; 322 | function GetChildrenCount: Integer; stdcall; 323 | function GetChildren(Index: Integer): IHtmlElement; stdcall; 324 | function GetCloseTag: IHtmlElement; stdcall; 325 | function GetInnerHtml(): WideString; stdcall; 326 | function GetOuterHtml(): WideString; stdcall; 327 | function GetInnerText(): WideString; stdcall; 328 | procedure SetInnerText(Value: WideString); stdcall; 329 | 330 | function GetAttributes(Key: WideString): WideString; stdcall; 331 | procedure SetAttributes(Key: WideString; Value: WideString); stdcall; 332 | 333 | procedure RemoveAttr(AAttrName: string); stdcall; 334 | 335 | function GetSourceLineNum(): Integer; stdcall; 336 | function GetSourceColNum(): Integer; stdcall; 337 | 338 | // ying32添加 339 | function RemoveChild(ANode: IHtmlElement): Integer; stdcall; 340 | procedure Remove; stdcall; 341 | function AppedChild(const ATag: string): IHtmlElement; stdcall; 342 | 343 | 344 | // 属性是否存在 345 | function HasAttribute(AttributeName: WideString): Boolean; stdcall; 346 | { 用CSS选择器语法查找Element,不支持"伪类" 347 | CSS Selector Style search,not support Pseudo-classes. 348 | 349 | http://www.w3.org/TR/CSS2/selector.html 350 | } 351 | 352 | function SimpleCSSSelector(const selector: WideString): IHtmlElementList; stdcall; 353 | function Find(const selector: WideString): IHtmlElementList; stdcall; 354 | 355 | function FindX(const AXPath: WideString): IHtmlElementList; stdcall; 356 | // 枚举属性 357 | function EnumAttributeNames(Index: Integer): WideString; stdcall; 358 | 359 | property TagName: WideString read GetTagName write SetTagName; 360 | property ChildrenCount: Integer read GetChildrenCount; 361 | property Children[index: Integer]: IHtmlElement read GetChildren; default; 362 | property CloseTag: IHtmlElement read GetCloseTag; 363 | property Content: WideString read GetContent; 364 | property Orignal: WideString read GetOrignal; 365 | property Parent: IHtmlElement read GetParent; 366 | // 获取元素在源代码中的位置 367 | property SourceLineNum: Integer read GetSourceLineNum; 368 | property SourceColNum: Integer read GetSourceColNum; 369 | // 370 | property InnerHtml: WideString read GetInnerHtml; 371 | property OuterHtml: WideString read GetOuterHtml; 372 | property InnerText: WideString read GetInnerText; 373 | 374 | property Attributes[Key: WideString]: WideString read GetAttributes write SetAttributes; 375 | 376 | property Childrens: IHtmlElementList read GetChildrens; 377 | private 378 | // xpath to css, by:ying32 379 | FValidationRe: TPerlRegEx; 380 | FClosed: Boolean; 381 | // 382 | FOwner: THtmlElement; 383 | FCloseTag: IHtmlElement; 384 | FTagName: string; 385 | FIsCloseTag: Boolean; 386 | FContent: string; 387 | FOrignal: string; 388 | FSourceLine: Integer; 389 | FSourceCol: Integer; 390 | // 391 | FAttributes: TStringDictionary; 392 | FChildren: TIHtmlElementList; 393 | procedure _GetHtml(IncludeSelf: Boolean; Sb: TStringBuilder); 394 | procedure _GetText(IncludeSelf: Boolean; Sb: TStringBuilder); 395 | procedure _SimpleCSSSelector(const ItemGroup: TCSSSelectorItemGroup; r: TIHtmlElementList); 396 | procedure _Select(Item: PCSSSelectorItem; Count: Integer; r: TIHtmlElementList; OnlyTopLevel: Boolean = false); 397 | 398 | // by:yin32 添加 399 | procedure InitXPathRegExPattern; 400 | /// 401 | /// 利用正则提取xpath并转为css选择器 402 | /// 转换代码来自python:https://github.com/santiycr/cssify/blob/master/cssify.py 403 | /// 404 | function XPathToCSSSelector(const AXPath: string): string; 405 | public 406 | constructor Create(AOwner: THtmlElement; AText: string; ALine, ACol: Integer); 407 | destructor Destroy; override; 408 | end; 409 | 410 | TIHtmlElementList = class(TInterfacedObject, IHtmlElementList) 411 | private 412 | // IHtmlElementList 413 | function GetItems(Index: Integer): IHtmlElement; stdcall; 414 | function GetCount: Integer; stdcall; 415 | protected 416 | FList: TList; 417 | procedure SetItems(Index: Integer; const Value: IHtmlElement); inline; 418 | function Add(Value: IHtmlElement): Integer; inline; 419 | procedure Delete(Index: Integer); inline; 420 | procedure Clear; inline; 421 | // ying32添加 422 | procedure RemoveAll; stdcall; 423 | procedure Remove(ANode: IHtmlElement); stdcall; 424 | procedure Each(f: TElementEachEvent); stdcall; 425 | function GetText: WideString; stdcall; 426 | public 427 | constructor Create; 428 | destructor Destroy; override; 429 | 430 | function GetEnumerator: THtmlListEnumerator; 431 | 432 | function IndexOf(Item: IHtmlElement): Integer; 433 | // IHtmlElementList 434 | property Items[index: Integer]: IHtmlElement read GetItems write SetItems; default; 435 | property Count: Integer read GetCount; 436 | end; 437 | 438 | function SplitStr(ACharSet: TSysCharSet; AStr: string): TStringDynArray; 439 | var 440 | L, I: Integer; 441 | S: string; 442 | StrChar: Char; 443 | begin 444 | Result := nil; 445 | if Length(AStr) <= 0 then 446 | Exit; 447 | 448 | I := Low(AStr); 449 | L := Low(AStr); 450 | StrChar := #0; 451 | while I <= High(AStr) do 452 | begin 453 | if CharInSet(AStr[I], ['''', '"']) then 454 | if StrChar = #0 then 455 | StrChar := AStr[I] 456 | else if StrChar = AStr[I] then 457 | StrChar := #0; 458 | // 不在字符串中,分隔符才生效 459 | if StrChar = #0 then 460 | if CharInSet(AStr[I], ACharSet) then 461 | begin 462 | if I > L then 463 | begin 464 | S := Copy(AStr, L{$IF (LowStrIndex = 0)} + 1{$ENDIF}, I - L); 465 | SetLength(Result, Length(Result) + 1); 466 | Result[Length(Result) - 1] := S; 467 | end; 468 | L := I + 1; 469 | end; 470 | Inc(I); 471 | end; 472 | if (I > L) then 473 | begin 474 | S := Copy(AStr, L{$IF (LowStrIndex = 0)} + 1{$ENDIF}, I - L); 475 | SetLength(Result, Length(Result) + 1); 476 | Result[Length(Result) - 1] := S; 477 | end; 478 | end; 479 | 480 | function StrRight(const Value: string; Count: Integer): string; 481 | var 482 | start: Integer; 483 | begin 484 | start := Length(Value) - Count + 1; 485 | if start <= 0 then 486 | Result := Value 487 | else 488 | Result := Copy(Value, start, Count); 489 | end; 490 | 491 | function StrLeft(const Value: string; Count: Integer): string; 492 | begin 493 | Result := Copy(Value, LowStrIndex, Count); 494 | end; 495 | 496 | 497 | // ComapreAttr 498 | 499 | function _aoExist(const Item: TAttrSelectorItem; E: THtmlElement): Boolean; 500 | begin 501 | Result := E.FAttributes.ContainsKey(Item.Key); 502 | end; 503 | 504 | function _aoEqual(const Item: TAttrSelectorItem; E: THtmlElement): Boolean; 505 | begin 506 | Result := E.FAttributes.ContainsKey(Item.Key) and 507 | (E.FAttributes[Item.Key] = Item.Value); 508 | end; 509 | 510 | function _aoNotEqual(const Item: TAttrSelectorItem; E: THtmlElement): Boolean; 511 | begin 512 | Result := E.FAttributes.ContainsKey(Item.Key) and 513 | (E.FAttributes[Item.Key] <> Item.Value); 514 | end; 515 | 516 | function _aoIncludeWord(const Item: TAttrSelectorItem; E: THtmlElement) 517 | : Boolean; 518 | var 519 | S: TStringDynArray; 520 | I: Integer; 521 | begin 522 | Result := false; 523 | if not E.FAttributes.ContainsKey(Item.Key) then 524 | Exit; 525 | Result := True; 526 | S := SplitStr(WhiteSpace, E.FAttributes[Item.Key]); 527 | for I := Low(S) to High(S) do 528 | if S[I] = Item.Value then 529 | Exit; 530 | Result := false; 531 | end; 532 | 533 | function _aoBeginWord(const Item: TAttrSelectorItem; E: THtmlElement): Boolean; 534 | var 535 | S: TStringDynArray; 536 | I: Integer; 537 | begin 538 | Result := false; 539 | if not E.FAttributes.ContainsKey(Item.Key) then 540 | Exit; 541 | S := SplitStr((WhiteSpace + ['_', '-']), E.FAttributes[Item.Key]); 542 | Result := (Length(S) > 0) and (S[0] = Item.Value); 543 | end; 544 | 545 | function _aoBegin(const Item: TAttrSelectorItem; E: THtmlElement): Boolean; 546 | var 547 | attr, Value: string; 548 | begin 549 | Result := false; 550 | if not E.FAttributes.ContainsKey(Item.Key) then 551 | Exit; 552 | attr := E.FAttributes[Item.Key]; 553 | Value := Item.Value; 554 | Result := (Length(attr) > Length(Value)) and 555 | (StrLeft(attr, Length(Value)) = Value); 556 | end; 557 | 558 | function _aoEnd(const Item: TAttrSelectorItem; E: THtmlElement): Boolean; 559 | var 560 | attr, Value: string; 561 | begin 562 | Result := false; 563 | if not E.FAttributes.ContainsKey(Item.Key) then 564 | Exit; 565 | attr := E.FAttributes[Item.Key]; 566 | Value := Item.Value; 567 | Result := (Length(attr) > Length(Value)) and 568 | (StrRight(attr, Length(Value)) = Value); 569 | end; 570 | 571 | function _aoContain(const Item: TAttrSelectorItem; E: THtmlElement): Boolean; 572 | begin 573 | Result := false; 574 | if not E.FAttributes.ContainsKey(Item.Key) then 575 | Exit; 576 | Result := Pos(Item.Value, E.FAttributes[Item.Key]) > 0; 577 | end; 578 | 579 | type 580 | TFNCompareAttr = function(const Item: TAttrSelectorItem; 581 | E: THtmlElement): Boolean; 582 | 583 | const 584 | AttrCompareFuns: array [TAttrOperator] of TFNCompareAttr = (_aoExist, 585 | _aoEqual, _aoNotEqual, _aoIncludeWord, _aoBeginWord, _aoBegin, _aoEnd, 586 | _aoContain); 587 | 588 | function GetTagProperty(const TagName: string): WORD; forward; 589 | 590 | procedure DoError(const Msg: string); 591 | begin 592 | raise Exception.Create(Msg); 593 | end; 594 | 595 | procedure _ParserAttrs(var sc: TSourceContext; var Attrs: TAttributeDynArray); 596 | var 597 | Item: TAttributeItem; 598 | begin 599 | SetLength(Attrs, 0); 600 | while True do 601 | begin 602 | sc.SkipBlank(); 603 | if sc.CurrentChar = #0 then 604 | Break; 605 | Item.Key := sc.ReadStr((WhiteSpace + [#0, '='])); 606 | Item.Value := ''; 607 | sc.SkipBlank; 608 | if sc.CurrentChar = '=' then 609 | begin 610 | sc.IncSrc; 611 | sc.SkipBlank; 612 | Item.Value := sc.ReadStr((WhiteSpace + [#0])); 613 | end; 614 | SetLength(Attrs, Length(Attrs) + 1); 615 | Attrs[Length(Attrs) - 1] := Item; 616 | end; 617 | end; 618 | 619 | procedure _ParserNodeItem(S: string; var ATagName: string; 620 | var Attrs: TAttributeDynArray); 621 | var 622 | sc: TSourceContext; 623 | begin 624 | sc.setCode(S); 625 | sc.SkipBlank; 626 | 627 | ATagName := UpperCase(sc.ReadStr((WhiteSpace + [#0, '/', '>']))); 628 | 629 | _ParserAttrs(sc, Attrs); 630 | end; 631 | 632 | function CreateTextElement(AOwner: THtmlElement; AText: string; 633 | ALine, ACol: Integer): THtmlElement; 634 | begin 635 | Result := THtmlElement.Create(AOwner, AText, ALine, ACol); 636 | with Result do 637 | begin 638 | // Edwin: 2019-07-08: html entities in the text element shouldn't be decoded? 639 | //FContent := DecodeHtmlEntities(AText); 640 | FContent := AText; 641 | FTagName := cSpecialTagName_Text; 642 | FClosed := True; 643 | end; 644 | end; 645 | 646 | function CreateScriptElement(AOwner: THtmlElement; AText: string; 647 | ALine, ACol: Integer): THtmlElement; 648 | begin 649 | Result := THtmlElement.Create(AOwner, AText, ALine, ACol); 650 | with Result do 651 | begin 652 | // Edwin: 2019-07-09: html entities in scripts shouldn't be decoded? 653 | //FContent := DecodeHtmlEntities(AText); 654 | FContent := AText; 655 | FTagName := '#SCRIPT'; 656 | FClosed := True; 657 | end; 658 | end; 659 | 660 | function CreateStyleElement(AOwner: THtmlElement; AText: string; 661 | ALine, ACol: Integer): THtmlElement; 662 | begin 663 | Result := THtmlElement.Create(AOwner, AText, ALine, ACol); 664 | with Result do 665 | begin 666 | // Edwin: 2019-07-09: html entities in