├── LICENSE ├── README.md ├── demo ├── DemoParser.dpr ├── DemoParser.dproj ├── DemoParser.dproj.local ├── DemoParser.identcache ├── DemoParser.res ├── Parse.png ├── Win32 │ └── Debug │ │ ├── DemoParser.exe │ │ └── Lotto.html ├── Xpath.png ├── main.dfm ├── main.pas └── ssl │ ├── libeay32.dll │ └── ssleay32.dll ├── parser.pas └── test ├── DemoParserTests.res ├── ParserTests.dpr ├── ParserTests.dproj ├── ParserTests.dproj.local ├── ParserTests.identcache ├── ParserTests.res ├── Testparser.pas ├── Win32 └── Debug │ ├── dunit.ini │ └── test.html └── test.html /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 SandBil 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Delphi HTML Parser 3 | description: Very small and fast module for parsing HTML pages. 4 | author: sandbil 5 | tags: Delphi 6 | 7 | --- 8 | Delphi HTML Parser 9 | ========= 10 | This module allows you to work with HTML documents as DOM tree and to use XPath for searching tags. 11 | It is a very simple way to parse HTML. 12 | 13 | It's tested with Delphi XE5, XE6 and 10.3 14 | 15 | ## Requirements 16 | OpenSSL library (libeay32.dll, ssleay32.dll) 17 | Current versions of OpenSSL can be downloaded at https://indy.fulgan.com/SSL/ 18 | 19 | ## Usage 20 | 21 | 1. Add ``parser.pas;`` to uses list. 22 | 2. Example usage 23 | ```pascal 24 | {...} 25 | var 26 | DomTree: TDomTree; 27 | DomTreeNode: TDomTreeNode; 28 | HtmlTxt: string; 29 | NodeList: TNodeList; 30 | ValueList: TStringList; 31 | begin 32 | HtmlTxt := NodeList:= TNodeList.Create; 33 | ValueList := TStringList.Create; 34 | DomTree := TDomTree.Create; 35 | DomTreeNode := DomTree.RootNode; 36 | If DomTreeNode.RunParse(HtmlTxt) then 37 | begin 38 | // short example code: 39 | DomTreeNode.FindXPath('//*[@id="TopBox"]/div[1]/div[@class="draw default"]', NodeList, ValueList) 40 | 41 | {...} 42 | end; 43 | {...} 44 | end; 45 | ``` 46 | 3. enjoy!!! 47 | 48 | ## Available properties and methods: 49 | - TDomTree 50 | - property Count - count of nodes 51 | - property RootNode - root node (TDomTreeNode) 52 | - property ParseErr - Tstringlist contains all parsing errors and warnings 53 | 54 | - TDomTreeNode 55 | - property Tag - name of tag 56 | - property AttributesTxt - string with all attributtes 57 | - property Attributes - parsed attributes (TDictionary) 58 | - property Text - text 59 | - property TypeTag - 60 | - property Child - contains child's nodes (TChildList of TDomTreeNode) 61 | - property Parent - contains parent's node 62 | - property Owner - contains pointer to owner TDomTree 63 | - function FindNode - boolean function, if true then TNodeList contains found nodes 64 | - function FindTagOfIndex - boolean function, if true then TNodeList contains founded nodes 65 | - function GetAttrValue - returns value of attribute of current node 66 | - function GetComment - returns the set index comment in the current container node 67 | - function GetTagName - return name of tag + AttributesTxt 68 | - function GetText - returns the set index text in the current container node 69 | - function GetXPath - returns Xpath for current node 70 | - function RunParse - if parse is successfully then CHild property contains HTML DOM tree 71 | - function FindXPath - boolean function, if true then TNodeList contains found nodes 72 | - and TStringList contains found values of attribute, comment, text 73 | 74 | - Xpath support: 75 | - attributes - //*[@id="TopBox"]/div/@class 76 | - comment - //*[@id="TopBox"]/div/comment()[3] 77 | - text - //*[@id="TopBox"]/div/text()[2] 78 | - previous level - /../div[@class="draw default"]/img[2]/@alt 79 | 80 | - partial coincidence by search in value of attribute: 81 | ``/div[@class="draw]`` returned nodes ``[[div class="draw default"],[div class="draw"], [div class="draw any"]..]`` 82 | like Xpath's function contains. 83 | *Note*: Xpath always starts search from current node. If you want to do global search, you must start from root's node. 84 | 85 | ## demo project 86 | [![screenshot1](/demo/Parse.png)](/demo/Parse.png) 87 | [![screenshot2](/demo/Xpath.png)](/Xpath.png) 88 | -------------------------------------------------------------------------------- /demo/DemoParser.dpr: -------------------------------------------------------------------------------- 1 | program DemoParser; 2 | 3 | uses 4 | Vcl.Forms, 5 | main in 'main.pas' {Form1}, 6 | parser in '..\parser.pas'; 7 | 8 | {$R *.res} 9 | 10 | begin 11 | Application.Initialize; 12 | Application.MainFormOnTaskbar := True; 13 | Application.CreateForm(TForm1, Form1); 14 | Application.Run; 15 | 16 | end. 17 | -------------------------------------------------------------------------------- /demo/DemoParser.dproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | {8F70B988-B08B-45CD-B86D-07E9F1BCF376} 4 | 15.4 5 | VCL 6 | DemoParser.dpr 7 | True 8 | Debug 9 | Win32 10 | 1 11 | Application 12 | 13 | 14 | true 15 | 16 | 17 | true 18 | Base 19 | true 20 | 21 | 22 | true 23 | Base 24 | true 25 | 26 | 27 | true 28 | Base 29 | true 30 | 31 | 32 | true 33 | Cfg_1 34 | true 35 | true 36 | 37 | 38 | true 39 | Base 40 | true 41 | 42 | 43 | $(BDS)\bin\delphi_PROJECTICON.ico 44 | System;Xml;Data;Datasnap;Web;Soap;Vcl;Vcl.Imaging;Vcl.Touch;Vcl.Samples;Vcl.Shell;$(DCC_Namespace) 45 | DemoParser 46 | .\$(Platform)\$(Config) 47 | .\$(Platform)\$(Config) 48 | false 49 | false 50 | false 51 | false 52 | false 53 | 54 | 55 | $(BDS)\bin\default_app.manifest 56 | Winapi;System.Win;Data.Win;Datasnap.Win;Web.Win;Soap.Win;Xml.Win;Bde;$(DCC_Namespace) 57 | true 58 | CompanyName=;FileDescription=;FileVersion=1.0.0.0;InternalName=;LegalCopyright=;LegalTrademarks=;OriginalFilename=;ProductName=;ProductVersion=1.0.0.0;Comments= 59 | 1033 60 | FireDACSqliteDriver;FireDACDSDriver;DBXSqliteDriver;FireDACPgDriver;fmx;IndySystem;TeeDB;tethering;inetdbbde;vclib;DBXInterBaseDriver;DataSnapClient;DataSnapServer;DataSnapCommon;DataSnapProviderClient;DBXSybaseASEDriver;DbxCommonDriver;vclimg;dbxcds;DatasnapConnectorsFreePascal;MetropolisUILiveTile;vcldb;vcldsnap;fmxFireDAC;DBXDb2Driver;DBXOracleDriver;CustomIPTransport;vclribbon;dsnap;IndyIPServer;fmxase;vcl;IndyCore;DBXMSSQLDriver;IndyIPCommon;CloudService;FmxTeeUI;FireDACIBDriver;CodeSiteExpressPkg;DataSnapFireDAC;FireDACDBXDriver;soapserver;inetdbxpress;dsnapxml;FireDACInfxDriver;FireDACDb2Driver;adortl;FireDACASADriver;bindcompfmx;vcldbx;FireDACODBCDriver;RESTBackendComponents;rtl;dbrtl;DbxClientDriver;FireDACCommon;bindcomp;inetdb;Tee;DBXOdbcDriver;vclFireDAC;xmlrtl;DataSnapNativeClient;svnui;ibxpress;IndyProtocols;DBXMySQLDriver;FireDACCommonDriver;bindengine;vclactnband;bindcompdbx;soaprtl;FMXTee;TeeUI;bindcompvcl;vclie;FireDACADSDriver;vcltouch;VclSmp;FireDACMSSQLDriver;FireDAC;DBXInformixDriver;Intraweb;VCLRESTComponents;DataSnapConnectors;DataSnapServerMidas;dsnapcon;DBXFirebirdDriver;inet;fmxobj;FireDACMySQLDriver;soapmidas;vclx;svn;DBXSybaseASADriver;FireDACOracleDriver;fmxdae;RESTComponents;bdertl;FireDACMSAccDriver;dbexpress;DataSnapIndy10ServerTransport;IndyIPClient;$(DCC_UsePackage) 61 | 62 | 63 | FireDACSqliteDriver;FireDACDSDriver;DBXSqliteDriver;FireDACPgDriver;fmx;IndySystem;TeeDB;tethering;vclib;DBXInterBaseDriver;DataSnapClient;DataSnapServer;DataSnapCommon;DataSnapProviderClient;DBXSybaseASEDriver;DbxCommonDriver;vclimg;dbxcds;DatasnapConnectorsFreePascal;MetropolisUILiveTile;vcldb;vcldsnap;fmxFireDAC;DBXDb2Driver;DBXOracleDriver;CustomIPTransport;vclribbon;dsnap;IndyIPServer;fmxase;vcl;IndyCore;DBXMSSQLDriver;IndyIPCommon;CloudService;FmxTeeUI;FireDACIBDriver;DataSnapFireDAC;FireDACDBXDriver;soapserver;inetdbxpress;dsnapxml;FireDACInfxDriver;FireDACDb2Driver;adortl;FireDACASADriver;bindcompfmx;FireDACODBCDriver;RESTBackendComponents;rtl;dbrtl;DbxClientDriver;FireDACCommon;bindcomp;inetdb;Tee;DBXOdbcDriver;vclFireDAC;xmlrtl;DataSnapNativeClient;ibxpress;IndyProtocols;DBXMySQLDriver;FireDACCommonDriver;bindengine;vclactnband;bindcompdbx;soaprtl;FMXTee;TeeUI;bindcompvcl;vclie;FireDACADSDriver;vcltouch;VclSmp;FireDACMSSQLDriver;FireDAC;DBXInformixDriver;Intraweb;VCLRESTComponents;DataSnapConnectors;DataSnapServerMidas;dsnapcon;DBXFirebirdDriver;inet;fmxobj;FireDACMySQLDriver;soapmidas;vclx;DBXSybaseASADriver;FireDACOracleDriver;fmxdae;RESTComponents;FireDACMSAccDriver;dbexpress;DataSnapIndy10ServerTransport;IndyIPClient;$(DCC_UsePackage) 64 | 65 | 66 | DEBUG;$(DCC_Define) 67 | true 68 | false 69 | true 70 | true 71 | true 72 | 73 | 74 | 2 75 | 3 76 | 2 77 | true 78 | CompanyName=;FileDescription=Parser;FileVersion=1.2.2.3;InternalName=;LegalCopyright=;LegalTrademarks=;OriginalFilename=;ProductName=;ProductVersion=1.0.0.0;Comments= 79 | 1033 80 | false 81 | 82 | 83 | false 84 | RELEASE;$(DCC_Define) 85 | 0 86 | 0 87 | 88 | 89 | 90 | MainSource 91 | 92 | 93 |
Form1
94 | dfm 95 |
96 | 97 | 98 | 99 | Cfg_2 100 | Base 101 | 102 | 103 | Base 104 | 105 | 106 | Cfg_1 107 | Base 108 | 109 |
110 | 111 | Delphi.Personality.12 112 | 113 | 114 | 115 | 116 | DemoParser.dpr 117 | 118 | 119 | False 120 | False 121 | 1 122 | 0 123 | 0 124 | 0 125 | False 126 | False 127 | False 128 | False 129 | False 130 | 1049 131 | 1251 132 | 133 | 134 | 135 | 136 | 1.0.0.0 137 | 138 | 139 | 140 | 141 | 142 | 1.0.0.0 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | Embarcadero C++Builder Office 2000 Servers Package 170 | Embarcadero C++Builder Office XP Servers Package 171 | Microsoft Office 2000 Sample Automation Server Wrapper Components 172 | Microsoft Office XP Sample Automation Server Wrapper Components 173 | 174 | 175 | 176 | 177 | True 178 | False 179 | 180 | True 181 | 182 | C:\!work\!Demo\DemoParser6\demo\Test\DemoParserTests.dproj 183 | 184 | 185 | 12 186 | 187 | 188 | 189 |
190 | -------------------------------------------------------------------------------- /demo/DemoParser.dproj.local: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 1899.12.30 00:00:00.000.302,C:\!work\!Demo\DemoParser\Unit1.pas= 5 | 1899.12.30 00:00:00.000.211,=C:\!work\!Demo\DemoParser\Unit1.pas 6 | 1899.12.30 00:00:00.000.230,=C:\Users\bil\Documents\Embarcadero\Studio\Projects\ModelSupport_DemoParser\main\default.txvpck 7 | 1899.12.30 00:00:00.000.609,C:\Users\bil\Documents\Embarcadero\Studio\Projects\main.dfm=C:\Users\bil\Documents\Embarcadero\Studio\Projects\Unit1.dfm 8 | 1899.12.30 00:00:00.000.219,=C:\!work\delphi\HTML-Parser\demo\ModelSupport_DemoParser\default.txaPackage 9 | 1899.12.30 00:00:00.000.586,C:\Users\bil\Documents\Embarcadero\Studio\Projects\main.pas=C:\Users\bil\Documents\Embarcadero\Studio\Projects\Unit1.pas 10 | 1899.12.30 00:00:00.000.093,=C:\Users\bil\Documents\Embarcadero\Studio\Projects\ModelSupport_DemoParser\default.txvpck 11 | 1899.12.30 00:00:00.000.770,C:\Users\bil\Documents\Embarcadero\Studio\Projects\DemoParser.dproj=C:\Users\bil\Documents\Embarcadero\Studio\Projects\Project1.dproj 12 | 1899.12.30 00:00:00.000.969,=C:\!work\delphi\DemoParser\parser.pas 13 | 1899.12.30 00:00:00.000.419,C:\!work\delphi\DemoParser\parser.pas=C:\!work\delphi\DemoParser\Unit2.pas 14 | 1899.12.30 00:00:00.000.702,=C:\!work\delphi\DemoParser\Unit2.pas 15 | 1899.12.30 00:00:00.000.511,=C:\Users\bil\Documents\Embarcadero\Studio\Projects\Unit1.pas 16 | 1899.12.30 00:00:00.000.258,=C:\Users\bil\Documents\Embarcadero\Studio\Projects\ModelSupport_DemoParser\DemoParser\default.txvpck 17 | 1899.12.30 00:00:00.000.806,=C:\Users\bil\Documents\Embarcadero\Studio\Projects\ModelSupport_DemoParser\default.txaPackage 18 | 19 | 20 | -------------------------------------------------------------------------------- /demo/DemoParser.identcache: -------------------------------------------------------------------------------- 1 | )C:\!work\delphi\HTML-Parser\demo\main.pas/C:\!work\delphi\HTML-Parser\demo\DemoParser.dpr&C:\!work\delphi\HTML-Parser\parser.pas -------------------------------------------------------------------------------- /demo/DemoParser.res: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandbil/HTML-Parser/4101a73e817abda6ffb739578b9b9f2523147930/demo/DemoParser.res -------------------------------------------------------------------------------- /demo/Parse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandbil/HTML-Parser/4101a73e817abda6ffb739578b9b9f2523147930/demo/Parse.png -------------------------------------------------------------------------------- /demo/Win32/Debug/DemoParser.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandbil/HTML-Parser/4101a73e817abda6ffb739578b9b9f2523147930/demo/Win32/Debug/DemoParser.exe -------------------------------------------------------------------------------- /demo/Win32/Debug/Lotto.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Check Lotto Results & Play Online - Oz Lotteries 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 41 | 42 | 77 | 78 | 369 | 370 | 388 | 389 | 390 | 395 | 396 | 462 | 463 | 464 | 465 | 466 | 469 | 470 | 473 | 488 | 489 | 490 | 491 | 492 | 497 | 498 | 499 | 500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 | 509 | 510 |
511 | 512 |
513 | 514 | 515 | 516 | 517 |
518 |
519 | 520 |
521 | 522 | 523 | 524 | 525 | 526 | 527 |
Lotto Points (icon)Lotto Points: ---What can I get?
528 |
529 |
530 | 531 |
532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 552 | 553 | 554 | 555 | 559 | 564 | 565 | 566 | 570 | 575 | 576 |
556 | Email address
557 | 558 |
560 | Not a member? Join! 561 |
562 | Forgot password? 563 |
567 | Password
568 | 569 |
571 |
572 | 573 |
574 |
577 |
578 |
579 | 580 | 599 | 600 |
601 | 602 | 603 | 622 |
623 | 624 | 625 | 626 | 627 | 1768 | 1769 |
628 | 629 | 630 | 1765 | 1766 |
631 | 632 |
633 | 634 | 635 | 636 |
637 |
638 | 639 |
640 | 641 |

Did you know your browser is from 2001?

642 |

To experience the best Oz Lotteries has to offer, we recommend upgrading your browser.

643 |

It is free, very easy, and you will notice the Internet looks much, much better. Try one of these:

644 | 645 |
646 | 647 |
648 | 649 | 650 | 651 |
652 |
653 | 654 | 655 | 656 |
657 |
658 | 659 | 660 | 661 |
662 |
663 | 664 | 665 | 666 |
667 |
668 | 669 | 670 | 671 |
672 |
673 |
674 | 675 | 676 | 1756 | 1757 |
677 | 678 | 679 | 680 | 1606 | 1607 | 1608 | 1609 | 1748 | 1749 | 1750 |
681 |
682 |
683 |

684 |
685 | 686 |
687 | 1601 |
1602 |
1603 |
1604 |
1605 |
1610 |
Basket
1611 | You currently have 1612 |
0 Items 1613 |
in your shopping basket. 1614 |

Total: AUD $0.00 1615 |

View basket 1616 | checkout 1617 |
1618 |
1619 |
1620 |
1621 | 1683 |
1684 | 1685 | 1707 | 1708 | 1709 | 1710 |
1711 | 1732 |
1733 | 1734 | 1735 | 1739 | 1740 | $50 Million 1741 | 1742 | Hurry! Buy tickets for the $50 Million Dollar Powerball draw Thu 16th Oct 9:30pm AEDT. 1743 | 1744 | 1745 |
1746 | 1747 |
 
1751 |
1752 | 1753 | 1754 | 1755 |
1758 | 1759 | 1760 | 1761 |
1762 |
1763 | 1764 |
1767 |
1770 | 1771 | 1772 | 1773 | 1774 | 1857 | 1858 | 1859 | 1860 | 1861 | 1862 | 1866 | 1867 | 1868 | 1873 | 1874 | 1875 | 1876 | 1877 | -------------------------------------------------------------------------------- /demo/Xpath.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandbil/HTML-Parser/4101a73e817abda6ffb739578b9b9f2523147930/demo/Xpath.png -------------------------------------------------------------------------------- /demo/main.dfm: -------------------------------------------------------------------------------- 1 | object Form1: TForm1 2 | Left = 411 3 | Top = 0 4 | Caption = 'DemoParser' 5 | ClientHeight = 691 6 | ClientWidth = 998 7 | Color = clBtnFace 8 | Font.Charset = DEFAULT_CHARSET 9 | Font.Color = clWindowText 10 | Font.Height = -11 11 | Font.Name = 'Tahoma' 12 | Font.Style = [] 13 | OldCreateOrder = False 14 | Position = poDesigned 15 | PixelsPerInch = 96 16 | TextHeight = 13 17 | object StatusBar1: TStatusBar 18 | Left = 0 19 | Top = 672 20 | Width = 998 21 | Height = 19 22 | Panels = < 23 | item 24 | Width = 250 25 | end 26 | item 27 | Width = 50 28 | end> 29 | end 30 | object PageControl1: TPageControl 31 | Left = 0 32 | Top = 0 33 | Width = 998 34 | Height = 672 35 | ActivePage = TabSheet1 36 | Align = alClient 37 | TabOrder = 1 38 | object TabSheet1: TTabSheet 39 | Caption = 'HTML Parser' 40 | OnShow = TabSheet1Show 41 | object Splitter1: TSplitter 42 | Left = 0 43 | Top = 560 44 | Width = 990 45 | Height = 3 46 | Cursor = crVSplit 47 | Align = alBottom 48 | ExplicitLeft = 1 49 | ExplicitTop = 61 50 | ExplicitWidth = 398 51 | end 52 | object Panel2: TPanel 53 | Left = 0 54 | Top = 563 55 | Width = 990 56 | Height = 81 57 | Align = alBottom 58 | TabOrder = 0 59 | object Memo1: TMemo 60 | Left = 1 61 | Top = 1 62 | Width = 988 63 | Height = 79 64 | Align = alClient 65 | BevelInner = bvNone 66 | Lines.Strings = ( 67 | '') 68 | ReadOnly = True 69 | ScrollBars = ssVertical 70 | TabOrder = 0 71 | end 72 | end 73 | object Panel1: TPanel 74 | Left = 0 75 | Top = 0 76 | Width = 990 77 | Height = 560 78 | Align = alClient 79 | TabOrder = 1 80 | object Panel4: TPanel 81 | Left = 1 82 | Top = 41 83 | Width = 988 84 | Height = 518 85 | Align = alClient 86 | Caption = 'Panel4' 87 | TabOrder = 0 88 | object TreeView1: TTreeView 89 | Left = 1 90 | Top = 1 91 | Width = 986 92 | Height = 516 93 | ParentCustomHint = False 94 | Align = alClient 95 | BevelInner = bvNone 96 | BorderStyle = bsNone 97 | DragMode = dmAutomatic 98 | Indent = 19 99 | MultiSelect = True 100 | ParentShowHint = False 101 | ReadOnly = True 102 | RightClickSelect = True 103 | ShowHint = False 104 | TabOrder = 0 105 | OnMouseUp = TreeView1MouseUp 106 | end 107 | end 108 | object Panel8: TPanel 109 | Left = 1 110 | Top = 1 111 | Width = 988 112 | Height = 40 113 | Align = alTop 114 | TabOrder = 1 115 | object ParseBt: TButton 116 | Left = 7 117 | Top = 5 118 | Width = 90 119 | Height = 25 120 | Caption = 'Parse' 121 | TabOrder = 0 122 | OnClick = ParseBtClick 123 | end 124 | object Edit1: TEdit 125 | Left = 106 126 | Top = 8 127 | Width = 759 128 | Height = 21 129 | TabOrder = 1 130 | Text = 'http://www.ozlotteries.com/lotto-results#' 131 | end 132 | end 133 | end 134 | end 135 | object XPath: TTabSheet 136 | Caption = 'miniXPath' 137 | ImageIndex = 1 138 | OnShow = XPathShow 139 | object Splitter3: TSplitter 140 | Left = 400 141 | Top = 40 142 | Height = 520 143 | AutoSnap = False 144 | ExplicitLeft = 704 145 | ExplicitTop = 512 146 | ExplicitHeight = 100 147 | end 148 | object Splitter4: TSplitter 149 | Left = 0 150 | Top = 560 151 | Width = 990 152 | Height = 3 153 | Cursor = crVSplit 154 | Align = alBottom 155 | ExplicitTop = 500 156 | ExplicitWidth = 906 157 | end 158 | object Panel5: TPanel 159 | Left = 0 160 | Top = 0 161 | Width = 990 162 | Height = 40 163 | Align = alTop 164 | TabOrder = 0 165 | object FindOneBt: TButton 166 | Left = 5 167 | Top = 5 168 | Width = 90 169 | Height = 25 170 | Caption = 'Find in DOM' 171 | TabOrder = 0 172 | OnClick = FindOneBtClick 173 | end 174 | object Edit2: TEdit 175 | Left = 105 176 | Top = 7 177 | Width = 760 178 | Height = 21 179 | TabOrder = 1 180 | Text = '//*[@id="TopBox"]/div/div/div/div/div[@class="draw default"]' 181 | end 182 | end 183 | object Panel6: TPanel 184 | Left = 0 185 | Top = 40 186 | Width = 400 187 | Height = 520 188 | Align = alLeft 189 | Caption = 'Panel6' 190 | TabOrder = 1 191 | end 192 | object Panel7: TPanel 193 | Left = 403 194 | Top = 40 195 | Width = 587 196 | Height = 520 197 | Align = alClient 198 | Caption = 'Panel7' 199 | TabOrder = 2 200 | object Splitter2: TSplitter 201 | Left = 1 202 | Top = 225 203 | Width = 585 204 | Height = 3 205 | Cursor = crVSplit 206 | Align = alTop 207 | ExplicitLeft = -23 208 | ExplicitTop = 17 209 | ExplicitWidth = 540 210 | end 211 | object Panel3: TPanel 212 | Left = 1 213 | Top = 1 214 | Width = 585 215 | Height = 224 216 | Align = alTop 217 | Caption = 'Panel3' 218 | TabOrder = 0 219 | object Panel9: TPanel 220 | Left = 1 221 | Top = 193 222 | Width = 583 223 | Height = 30 224 | Align = alBottom 225 | Color = clSkyBlue 226 | ParentBackground = False 227 | TabOrder = 0 228 | object ClearBt: TButton 229 | Left = 4 230 | Top = 3 231 | Width = 45 232 | Height = 25 233 | Caption = 'Clear' 234 | TabOrder = 0 235 | OnClick = ClearBtClick 236 | end 237 | object FindAllBt: TButton 238 | Left = 208 239 | Top = 3 240 | Width = 122 241 | Height = 25 242 | Caption = 'Find all result' 243 | TabOrder = 1 244 | OnClick = FindAllBtClick 245 | end 246 | object Button1: TButton 247 | Left = 55 248 | Top = 3 249 | Width = 58 250 | Height = 25 251 | Caption = 'Collapse' 252 | TabOrder = 2 253 | OnClick = Button1Click 254 | end 255 | object Button2: TButton 256 | Left = 106 257 | Top = 3 258 | Width = 55 259 | Height = 25 260 | Caption = 'Expand' 261 | TabOrder = 3 262 | OnClick = Button2Click 263 | end 264 | end 265 | object TreeView2: TTreeView 266 | Left = 1 267 | Top = 1 268 | Width = 583 269 | Height = 192 270 | Align = alClient 271 | Indent = 19 272 | TabOrder = 1 273 | OnDragDrop = TreeView2DragDrop 274 | OnDragOver = TreeView2DragOver 275 | OnEndDrag = TreeView2EndDrag 276 | Items.NodeData = { 277 | 0301000000B80000000000000000000000FFFFFFFFFFFFFFFF00000000000000 278 | 0002000000014D2F002F002A005B004000690064003D00220054006F00700042 279 | 006F00780022005D002F006400690076002F006400690076002F006400690076 280 | 002F006400690076005B00400063006C006100730073003D0022007200650073 281 | 0075006C0074005F0062006C006F0063006B005D002F006400690076005B0040 282 | 0063006C006100730073003D0022006E0075006D00620065007200730022005D 283 | 00960000000000000000000000FFFFFFFFFFFFFFFF0000000000000000000000 284 | 00013C2F002E002E002F006400690076005B00400063006C006100730073003D 285 | 00220064007200610077002000640065006600610075006C00740022005D002F 286 | 0061005B00400063006C006100730073003D0022006C006F00740074006F005F 287 | 006E0061006D00650022005D002F007400650078007400280029005000000000 288 | 00000000000000FFFFFFFFFFFFFFFF00000000000000000600000001192F002E 289 | 002E002F006400690076005B00400063006C006100730073003D0022006E0075 290 | 006D00620065007200730022005D006E0000000000000000000000FFFFFFFFFF 291 | FFFFFF00000000000000000000000001282F002E002E002F006400690076005B 292 | 00400063006C006100730073003D002200640072006100770020006400650066 293 | 00610075006C00740022005D002F007400650078007400280029005B0031005D 294 | 006E0000000000000000000000FFFFFFFFFFFFFFFF0000000000000000000000 295 | 0001282F002E002E002F006400690076005B00400063006C006100730073003D 296 | 00220064007200610077002000640065006600610075006C00740022005D002F 297 | 007400650078007400280029005B0032005D00540000000000000000000000FF 298 | FFFFFFFFFFFFFF000000000000000000000000011B2F007400610062006C0065 299 | 002F00740072005B0032005D002F00740064005B0031005D002F0069006D0067 300 | 002F00400061006C007400540000000000000000000000FFFFFFFFFFFFFFFF00 301 | 0000000000000000000000011B2F007400610062006C0065002F00740072005B 302 | 0032005D002F00740064005B0032005D002F0069006D0067002F00400061006C 303 | 0074007A0000000000000000000000FFFFFFFFFFFFFFFF000000000000000000 304 | 000000012E2F002E002E002F006400690076005B00400063006C006100730073 305 | 003D0022006400690076006900640065006E0064005F00770072006100700070 306 | 006500720022005D002F007300700061006E002F007400650078007400280029 307 | 00740000000000000000000000FFFFFFFFFFFFFFFF0000000000000000040000 308 | 00012B2F002E002E002F006400690076005B00400063006C006100730073003D 309 | 0022006400690076006900640065006E0064005F007700720061007000700065 310 | 00720022005D002F007400610062006C0065002F007400720046000000000000 311 | 0000000000FFFFFFFFFFFFFFFF00000000000000000000000001142F00740064 312 | 005B0031005D002F007300740072006F006E0067002F00740065007800740028 313 | 002900460000000000000000000000FFFFFFFFFFFFFFFF000000000000000000 314 | 00000001142F00740064005B0032005D002F007300740072006F006E0067002F 315 | 007400650078007400280029003E0000000000000000000000FFFFFFFFFFFFFF 316 | FF00000000000000000000000001102F00740064005B0032005D002F00740065 317 | 0078007400280029005B0031005D003E0000000000000000000000FFFFFFFFFF 318 | FFFFFF00000000000000000000000001102F00740064005B0032005D002F0074 319 | 00650078007400280029005B0032005D00} 320 | end 321 | end 322 | object TreeView3: TTreeView 323 | Left = 1 324 | Top = 228 325 | Width = 585 326 | Height = 291 327 | Align = alClient 328 | Indent = 19 329 | ReadOnly = True 330 | TabOrder = 1 331 | end 332 | end 333 | object Panel10: TPanel 334 | Left = 0 335 | Top = 563 336 | Width = 990 337 | Height = 81 338 | Align = alBottom 339 | Caption = 'Panel10' 340 | TabOrder = 3 341 | end 342 | end 343 | object TabSheet2: TTabSheet 344 | Caption = 'MultiTest parsing' 345 | ImageIndex = 2 346 | object Panel11: TPanel 347 | Left = 0 348 | Top = 0 349 | Width = 990 350 | Height = 644 351 | Align = alClient 352 | TabOrder = 0 353 | object Panel12: TPanel 354 | Left = 1 355 | Top = 41 356 | Width = 988 357 | Height = 602 358 | Align = alClient 359 | Caption = 'Panel4' 360 | TabOrder = 0 361 | object Memo2: TMemo 362 | Left = 1 363 | Top = 1 364 | Width = 986 365 | Height = 600 366 | Align = alClient 367 | ScrollBars = ssBoth 368 | TabOrder = 0 369 | end 370 | end 371 | object Panel13: TPanel 372 | Left = 1 373 | Top = 1 374 | Width = 988 375 | Height = 40 376 | Align = alTop 377 | TabOrder = 1 378 | object Button3: TButton 379 | Left = 7 380 | Top = 5 381 | Width = 90 382 | Height = 25 383 | Caption = 'Parse all link' 384 | TabOrder = 0 385 | OnClick = Button3Click 386 | end 387 | object Edit3: TEdit 388 | Left = 106 389 | Top = 8 390 | Width = 551 391 | Height = 21 392 | TabOrder = 1 393 | Text = 394 | 'https://www.google.ru/search?ie=UTF-8&hl=ru&q=delphi%20xpath&gws' + 395 | '_rd=ssl' 396 | end 397 | end 398 | end 399 | end 400 | end 401 | object IdHTTP1: TIdHTTP 402 | IOHandler = IdSSLIOHandlerSocketOpenSSL1 403 | AllowCookies = True 404 | ProxyParams.BasicAuthentication = False 405 | ProxyParams.ProxyPort = 0 406 | Request.ContentLength = -1 407 | Request.ContentRangeEnd = -1 408 | Request.ContentRangeStart = -1 409 | Request.ContentRangeInstanceLength = -1 410 | Request.Accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' 411 | Request.BasicAuthentication = False 412 | Request.UserAgent = 'Mozilla/3.0 (compatible; Indy Library)' 413 | Request.Ranges.Units = 'bytes' 414 | Request.Ranges = <> 415 | HTTPOptions = [hoForceEncodeParams] 416 | Left = 824 417 | Top = 72 418 | end 419 | object IdSSLIOHandlerSocketOpenSSL1: TIdSSLIOHandlerSocketOpenSSL 420 | MaxLineAction = maException 421 | Port = 0 422 | DefaultPort = 0 423 | SSLOptions.Mode = sslmUnassigned 424 | SSLOptions.VerifyMode = [] 425 | SSLOptions.VerifyDepth = 0 426 | Left = 824 427 | Top = 144 428 | end 429 | object IdHTTP2: TIdHTTP 430 | IOHandler = IdSSLIOHandlerSocketOpenSSL2 431 | AllowCookies = True 432 | ProxyParams.BasicAuthentication = False 433 | ProxyParams.ProxyPort = 0 434 | Request.ContentLength = -1 435 | Request.ContentRangeEnd = -1 436 | Request.ContentRangeStart = -1 437 | Request.ContentRangeInstanceLength = -1 438 | Request.Accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' 439 | Request.BasicAuthentication = False 440 | Request.UserAgent = 'Mozilla/3.0 (compatible; Indy Library)' 441 | Request.Ranges.Units = 'bytes' 442 | Request.Ranges = <> 443 | HTTPOptions = [hoForceEncodeParams] 444 | Left = 880 445 | Top = 72 446 | end 447 | object IdSSLIOHandlerSocketOpenSSL2: TIdSSLIOHandlerSocketOpenSSL 448 | MaxLineAction = maException 449 | Port = 0 450 | DefaultPort = 0 451 | SSLOptions.Mode = sslmUnassigned 452 | SSLOptions.VerifyMode = [] 453 | SSLOptions.VerifyDepth = 0 454 | Left = 880 455 | Top = 144 456 | end 457 | end 458 | -------------------------------------------------------------------------------- /demo/main.pas: -------------------------------------------------------------------------------- 1 | unit main; 2 | 3 | interface 4 | 5 | uses 6 | Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, 7 | System.Classes, Vcl.Graphics, 8 | Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.StdCtrls, IdBaseComponent, 9 | System.Contnrs, System.StrUtils, parser, 10 | IdComponent, IdTCPConnection, IdTCPClient, IdHTTP, Vcl.ComCtrls, Vcl.ExtCtrls, 11 | Vcl.Buttons, IdIOHandler, IdIOHandlerSocket, IdIOHandlerStack, IdSSL, 12 | IdSSLOpenSSL; 13 | 14 | type 15 | TForm1 = class(TForm) 16 | Memo1: TMemo; 17 | IdHTTP1: TIdHTTP; 18 | Edit1: TEdit; 19 | ParseBt: TButton; 20 | Panel1: TPanel; 21 | Panel2: TPanel; 22 | StatusBar1: TStatusBar; 23 | PageControl1: TPageControl; 24 | TabSheet1: TTabSheet; 25 | Splitter1: TSplitter; 26 | Panel4: TPanel; 27 | XPath: TTabSheet; 28 | Panel5: TPanel; 29 | Edit2: TEdit; 30 | FindOneBt: TButton; 31 | Panel6: TPanel; 32 | Splitter2: TSplitter; 33 | Panel7: TPanel; 34 | TreeView1: TTreeView; 35 | Panel8: TPanel; 36 | Panel3: TPanel; 37 | Splitter3: TSplitter; 38 | Panel9: TPanel; 39 | TreeView2: TTreeView; 40 | TreeView3: TTreeView; 41 | ClearBt: TButton; 42 | FindAllBt: TButton; 43 | Panel10: TPanel; 44 | Splitter4: TSplitter; 45 | Button1: TButton; 46 | Button2: TButton; 47 | IdSSLIOHandlerSocketOpenSSL1: TIdSSLIOHandlerSocketOpenSSL; 48 | TabSheet2: TTabSheet; 49 | Panel11: TPanel; 50 | Panel12: TPanel; 51 | Panel13: TPanel; 52 | Button3: TButton; 53 | Edit3: TEdit; 54 | Memo2: TMemo; 55 | IdHTTP2: TIdHTTP; 56 | IdSSLIOHandlerSocketOpenSSL2: TIdSSLIOHandlerSocketOpenSSL; 57 | procedure ParseBtClick(Sender: TObject); 58 | procedure TabSheet1Show(Sender: TObject); 59 | procedure XPathShow(Sender: TObject); 60 | procedure TreeView1MouseUp(Sender: TObject; Button: TMouseButton; 61 | Shift: TShiftState; X, Y: Integer); 62 | procedure TreeView2DragOver(Sender, Source: TObject; X, Y: Integer; 63 | State: TDragState; var Accept: Boolean); 64 | procedure TreeView2DragDrop(Sender, Source: TObject; X, Y: Integer); 65 | procedure TreeView2EndDrag(Sender, Target: TObject; X, Y: Integer); 66 | procedure ClearBtClick(Sender: TObject); 67 | procedure FindAllBtClick(Sender: TObject); 68 | procedure FindOneBtClick(Sender: TObject); 69 | procedure Button1Click(Sender: TObject); 70 | procedure Button2Click(Sender: TObject); 71 | procedure Button3Click(Sender: TObject); 72 | private 73 | procedure DrawTree1(DTree: TDomTreeNode; prfx: string); 74 | procedure DrawTree(DTree: TDomTreeNode); 75 | procedure AddChildNode(ParentNode:TTreeNode;DTree: TDomTreeNode); 76 | { Private declarations } 77 | public 78 | { Public declarations } 79 | end; 80 | 81 | var 82 | Form1: TForm1; 83 | DomTree: TDomTree; 84 | DomTreeNode: TDomTreeNode; 85 | 86 | implementation 87 | 88 | {$R *.dfm} 89 | 90 | procedure TForm1.DrawTree(DTree: TDomTreeNode); 91 | var 92 | NewNode:TTreeNode; 93 | NodeCap: String; 94 | i: integer; 95 | begin 96 | if DTree.Tag <> '' then 97 | NodeCap:= DTree.GetTagName 98 | else NodeCap:=DTree.Text; 99 | 100 | NewNode:=TreeView1.Items.Add(nil, NodeCap) ; 101 | NewNode.Data:=DTree; 102 | for i := 0 to DTree.Child.Count - 1 do 103 | begin 104 | AddChildNode(NewNode,DTree.Child.Items[i]); 105 | end; 106 | 107 | end; 108 | 109 | procedure TForm1.DrawTree1(DTree: TDomTreeNode; prfx: string); 110 | var 111 | i: integer; 112 | prfxline: string; 113 | begin 114 | memo1.Lines.Add(prfx + ' ' + DTree.Tag); 115 | prfxline := prfx + '-'; 116 | for i := 0 to DTree.Child.Count - 1 do 117 | begin 118 | drawTree1(DTree.Child.Items[i], prfxline); 119 | end; 120 | end; 121 | 122 | procedure TForm1.TabSheet1Show(Sender: TObject); 123 | begin 124 | TreeView1.Parent:=Panel4; 125 | Memo1.Parent:=Panel2; 126 | Memo1.Lines.Add('Enter your URL and click button "Parse"') 127 | end; 128 | 129 | procedure TForm1.TreeView1MouseUp(Sender: TObject; Button: TMouseButton; 130 | Shift: TShiftState; X, Y: Integer); 131 | var 132 | clickedNode : TTreeNode; 133 | begin 134 | 135 | if Button = mbRight then begin 136 | clickedNode := TreeView1.GetNodeAt(x, y); 137 | if clickedNode <> nil then begin 138 | edit2.Text:=TDomTreeNode(clickedNode.Data).GetXPath(true); 139 | end; 140 | end; 141 | end; 142 | 143 | procedure TForm1.TreeView2DragDrop(Sender, Source: TObject; X, Y: Integer); 144 | var 145 | Node: TTreeNode; 146 | CaptNode:string; 147 | begin 148 | Node := TreeView2.GetNodeAt(X, Y); 149 | CaptNode:=TDomTreeNode(TreeView1.Selected.Data).GetXPath(true); 150 | if (Node <> nil) and (CaptNode<>'') then 151 | TreeView2.Items.AddChild(Node, CaptNode) 152 | else TreeView2.Items.Add(nil, CaptNode); 153 | 154 | 155 | end; 156 | 157 | procedure TForm1.TreeView2DragOver(Sender, Source: TObject; X, Y: Integer; 158 | State: TDragState; var Accept: Boolean); 159 | begin 160 | Accept := (Source = TreeView1) ;//and (TreeView2.GetNodeAt(x, y) <> nil); 161 | end; 162 | 163 | procedure TForm1.TreeView2EndDrag(Sender, Target: TObject; X, Y: Integer); 164 | begin 165 | TreeView2.FullExpand; 166 | end; 167 | 168 | procedure TForm1.XPathShow(Sender: TObject); 169 | begin 170 | TreeView1.Parent:=Panel6; 171 | Memo1.Parent:=Panel10; 172 | TreeView2.FullExpand; 173 | Memo1.Lines.Add('Enter your XPath to Edit and click button "Find in DOM" for search node in DOM model') ; 174 | Memo1.Lines.Add('or' ); ; 175 | Memo1.Lines.Add('Drag and drop from DOM Tree to XPath Tree window and click button "Find all result"') ; 176 | end; 177 | 178 | procedure TForm1.AddChildNode(ParentNode:TTreeNode;DTree: TDomTreeNode); 179 | var 180 | NewNode:TTreeNode; 181 | NodeCap: String; 182 | i: integer; 183 | begin 184 | if DTree.Tag <> '' then NodeCap:= DTree.GetTagName 185 | else NodeCap:=DTree.Text; 186 | NewNode :=TreeView1.Items.AddChild(ParentNode, NodeCap) ; 187 | NewNode.Data:=DTree; 188 | for i := 0 to DTree.Child.Count - 1 do 189 | begin 190 | AddChildNode(NewNode,DTree.Child.Items[i]); 191 | end; 192 | end; 193 | 194 | 195 | procedure TForm1.ParseBtClick(Sender: TObject); 196 | var 197 | // cnt,i,j,x,y,ind: integer; 198 | HtmlTxt: string; 199 | HtmlTxtList: TStringList; 200 | begin 201 | try 202 | if not (DomTree = nil) 203 | then FreeAndNil (DomTree); 204 | 205 | Memo1.Clear; 206 | TreeView1.Items.Clear; 207 | Memo1.Lines.Add('Start time GET- ' + DateTimeToStr(Now)); 208 | {HtmlTxtList:=TStringList.Create; 209 | HtmlTxtList.LoadFromFile('lotto.html'); 210 | HtmlTxt:=HtmlTxtList.Text;} 211 | HtmlTxt := IdHTTP1.Get(Edit1.Text); 212 | 213 | 214 | Memo1.Lines.Add('End time GET- ' + DateTimeToStr(Now)); 215 | 216 | // create root node tree's structure 217 | DomTree := TDomTree.Create(); 218 | 219 | 220 | // parse HTML in tree's structure 221 | if not DomTree.RootNode.RunParse(HtmlTxt) then 222 | showmessage('Don'#39'tParse HTML!') ; 223 | Memo1.Lines.Add('End match time - ' + DateTimeToStr(Now)); 224 | if DomTree.ParseErr.Count = 0 then 225 | StatusBar1.Panels[0].Text :='Parse result: OK' 226 | else StatusBar1.Panels[0].Text :='Parse result: '+IntToStr(DomTree.ParseErr.Count)+' Error'; 227 | 228 | Memo1.Lines.Add('Parsing error and warning: ' + IntToStr(DomTree.ParseErr.Count)); 229 | Memo1.Lines.AddStrings(DomTree.ParseErr); 230 | // Show status Parse result 231 | if DomTree.ParseErr.Count = 0 then 232 | StatusBar1.Panels[0].Text :='Parse result: OK' 233 | else StatusBar1.Panels[0].Text :='Parse result: Error'; 234 | // Show total count of parsing nodes 235 | StatusBar1.Panels[1].Text :='Count node: ' + inttostr(DomTree.Count); 236 | 237 | 238 | 239 | drawTree(DomTree.RootNode); 240 | TreeView1.Items.Item[1].Selected:= true; 241 | 242 | Freeandnil(HtmlTxtList); 243 | 244 | 245 | except 246 | on E: Exception do 247 | ShowMessage(E.ClassName + ' : ' + E.Message); 248 | 249 | end; 250 | 251 | end; 252 | 253 | procedure TForm1.FindOneBtClick(Sender: TObject); 254 | var 255 | a:TNodeList; 256 | b:tstringlist; 257 | i,j: integer; 258 | begin 259 | if TreeView1.Items.Count=0 then 260 | exit; 261 | 262 | a:= TNodeList.Create; 263 | b:= TStringList.Create; 264 | begin 265 | if DomTree.RootNode.FindXPath(edit2.Text,a,b) then 266 | begin 267 | for j := 0 to a.Count-1 do 268 | for i := 0 to TreeView1.Items.Count - 1 do 269 | if TreeView1.Items[i].Data = a[j] then 270 | begin 271 | TreeView1.Items.Item[i].TreeView.Select(TreeView1.Items.Item[i],[ssCtrl]); 272 | TreeView1.SetFocus; 273 | end; 274 | for I := 0 to b.Count-1 do 275 | showmessage(b[i]); 276 | end 277 | else showmessage('Not found!');; 278 | 279 | end; 280 | a.Free ; 281 | b.Free; 282 | 283 | 284 | 285 | end; 286 | 287 | procedure TForm1.Button1Click(Sender: TObject); 288 | begin 289 | TreeView2.FullCollapse; 290 | TreeView3.FullCollapse; 291 | end; 292 | 293 | procedure TForm1.Button2Click(Sender: TObject); 294 | begin 295 | TreeView2.FullExpand; 296 | TreeView3.FullExpand; 297 | end; 298 | 299 | procedure TForm1.Button3Click(Sender: TObject); 300 | var 301 | HtmlTxt,href: string; 302 | HtmlTxtList: TStringList; 303 | ListNode:TNodeList; 304 | i: integer; 305 | DomChildTree: TDomTree; 306 | begin 307 | try 308 | if not (DomTree = nil) 309 | then FreeAndNil (DomTree); 310 | 311 | 312 | Memo2.Clear; 313 | Memo2.Lines.Add('Start time GET- ' + DateTimeToStr(Now)); 314 | HtmlTxt := IdHTTP1.Get(Edit3.Text); 315 | Memo2.Lines.Add('End time GET- ' + DateTimeToStr(Now)); 316 | DomTree := TDomTree.Create(); 317 | 318 | // parse HTML in tree's structure 319 | if not DomTree.RootNode.RunParse(HtmlTxt) then 320 | showmessage('Don'#39'tParse HTML!') ; 321 | Memo2.Lines.Add('End match time - ' + DateTimeToStr(Now)); 322 | if DomTree.ParseErr.Count = 0 then 323 | StatusBar1.Panels[0].Text :='Parse result: OK' 324 | else StatusBar1.Panels[0].Text :='Parse result: '+IntToStr(DomTree.ParseErr.Count)+' Error'; 325 | 326 | Memo2.Lines.Add('Parsing error: - ' + IntToStr(DomTree.ParseErr.Count)); 327 | Memo2.Lines.AddStrings(DomTree.ParseErr); 328 | // Show status Parse result 329 | if DomTree.ParseErr.Count = 0 then 330 | StatusBar1.Panels[0].Text :='Parse result: OK' 331 | else StatusBar1.Panels[0].Text :='Parse result: Error'; 332 | // Show total count of parsing nodes 333 | StatusBar1.Panels[1].Text :='Count node: ' + inttostr(DomTree.Count); 334 | 335 | 336 | ListNode:=TNodeList.Create; 337 | if DomTree.RootNode.FindNode('a',0,'href="http',true,ListNode) then 338 | begin 339 | // if DomTree.RootNode.FindNode('a',0,'',true,ListNode) then 340 | for I := 0 to ListNode.Count-1 do 341 | if ListNode[i].Attributes.TryGetValue('href',href) then 342 | begin 343 | Memo2.Lines.Add(href); 344 | end; 345 | 346 | Memo2.Lines.Add(' '); 347 | Memo2.Lines.Add(' '); 348 | 349 | 350 | for I := 0 to ListNode.Count-1 do 351 | begin 352 | if ListNode[i].Attributes.TryGetValue('href',href) then 353 | begin 354 | FreeAndNil (DomChildTree); 355 | Memo2.Lines.Add(href); 356 | DomChildTree := TDomTree.Create(); 357 | try 358 | HtmlTxt := IdHTTP2.Get(AnsiDequotedStr(href,'"')); 359 | DomChildTree.RootNode.RunParse(HtmlTxt); 360 | Memo2.Lines.Add('Parsing error and warning: ' + IntToStr(DomChildTree.ParseErr.Count)); 361 | Memo2.Lines.AddStrings(DomChildTree.ParseErr); 362 | Memo2.Lines.Add(''); 363 | except 364 | on E: Exception do 365 | // Memo2.Lines.Add(E.ClassName + ' : ' + E.Message); 366 | Memo2.Lines.Add(E.Message); 367 | end; 368 | 369 | end; 370 | end; 371 | FreeAndNil (DomChildTree); 372 | end; 373 | 374 | 375 | except 376 | on E: Exception do 377 | ShowMessage(E.ClassName + ' : ' + E.Message); 378 | end; 379 | end; 380 | 381 | procedure TForm1.ClearBtClick(Sender: TObject); 382 | begin 383 | TreeView2.Items.Clear; 384 | TreeView3.Items.Clear; 385 | end; 386 | 387 | procedure TForm1.FindAllBtClick(Sender: TObject); 388 | var 389 | i,j: integer; 390 | Function AddResultToTree(hXPathNode:TTreeNode; hLevel:integer; hParentView:TTreeNode; hDomTreeNode:TDomTreeNode):TTreeNode; 391 | var 392 | FListNode:TNodeList; 393 | FListText:tstringlist; 394 | i,j: integer; 395 | ToMemo: string; 396 | 397 | begin 398 | result:=nil; 399 | 400 | FListNode:= TNodeList.Create; 401 | FListText:= TStringList.Create; 402 | begin 403 | if hDomTreeNode.FindXPath(hXPathNode.Text,FListNode,FListText) then 404 | begin 405 | memo1.Lines.Add('Found nodes: ' + IntToStr(FListNode.Count)); 406 | for I := 0 to FListNode.Count-1 do 407 | ToMemo:=ToMemo + format('[%s],',[FListNode[i].GetTagName]); 408 | memo1.Lines.Add('[' + LeftStr(ToMemo,Length(ToMemo)-1) + ']'); 409 | ToMemo:=''; 410 | memo1.Lines.Add('Found text: ' + IntToStr(FListText.Count)); 411 | for I := 0 to FListText.Count-1 do 412 | ToMemo:=ToMemo + format('%s,',[FListText[i]]); 413 | memo1.Lines.Add(LeftStr(ToMemo,Length(ToMemo)-1)); 414 | 415 | if FListText.Count>0 then 416 | result:= TreeView3.Items.AddChild(hParentView,LeftStr(ToMemo,Length(ToMemo)-1)); 417 | 418 | 419 | for I := 0 to FListNode.Count-1 do 420 | begin 421 | if FListText.Count = 0 then 422 | begin 423 | result:= TreeView3.Items.AddChild(hParentView,FListNode[i].GetTagName ); 424 | if hXPathNode.Count>0 then 425 | for j := 0 to hXPathNode.Count-1 do 426 | AddResultToTree(hXPathNode[j],hLevel+1,result,FListNode[i]) ; 427 | end; 428 | 429 | end; 430 | end 431 | else memo1.Lines.Add(hXPathNode.Text + ' not found!'); 432 | 433 | end; 434 | FListNode.Free ; 435 | FListText.Free; 436 | 437 | end; 438 | 439 | 440 | begin 441 | if (TreeView1.Items.Count=0) or (TreeView2.Items.Count=0) then exit; 442 | 443 | TreeView3.Items.Clear; 444 | begin 445 | AddResultToTree(TreeView2.Items[0],1, nil,DomTree.RootNode) 446 | end; 447 | TreeView3.FullExpand; 448 | end; 449 | 450 | end. 451 | -------------------------------------------------------------------------------- /demo/ssl/libeay32.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandbil/HTML-Parser/4101a73e817abda6ffb739578b9b9f2523147930/demo/ssl/libeay32.dll -------------------------------------------------------------------------------- /demo/ssl/ssleay32.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandbil/HTML-Parser/4101a73e817abda6ffb739578b9b9f2523147930/demo/ssl/ssleay32.dll -------------------------------------------------------------------------------- /parser.pas: -------------------------------------------------------------------------------- 1 | {==============================================================================| 2 | | Project : Delphi HTML/XHTML parser module | 1.1.2 | 3 | |==============================================================================| 4 | | Content: | 5 | |==============================================================================| 6 | | The contents of this file are subject to the Mozilla Public License Ver. 1.0 | 7 | | (the "License"); you may not use this file except in compliance with the | 8 | | License. You may obtain a copy of the License at http://www.mozilla.org/MPL/ | 9 | | | 10 | | Software distributed under the License is distributed on an "AS IS" basis, | 11 | | WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for | 12 | | the specific language governing rights and limitations under the License. | 13 | |==============================================================================| 14 | | Initial Developers of the Original Code are: | 15 | | Sandbil (Russia) sandbil@ya.ru | 16 | | All Rights Reserved. | 17 | | Last Modified: | 18 | | 25.10.2014, Sandbil | 19 | |==============================================================================| 20 | | History: see README | 21 | |==============================================================================|} 22 | 23 | 24 | unit parser; 25 | 26 | interface 27 | 28 | uses 29 | System.Classes, System.RegularExpressionsCore, System.Generics.Collections, 30 | System.Contnrs, System.StrUtils, System.SysUtils; 31 | 32 | 33 | 34 | type 35 | TNodeList = class; 36 | TChildList=class; 37 | TDomTreeNode = class; 38 | 39 | 40 | 41 | TDomTree = class 42 | private 43 | FCount: Integer; 44 | fParseErr: TStringList; 45 | fRootNode: TDomTreeNode; 46 | public 47 | constructor Create; 48 | destructor destroy; override; 49 | property Count: Integer read fCount; 50 | property RootNode: TDomTreeNode read fRootNode; 51 | property ParseErr: TStringList read fParseErr; 52 | end; 53 | 54 | TDomTreeNode = class(TObject) 55 | private 56 | fTag: string; 57 | fAttributesTxt: string; 58 | fAttributes: TDictionary; 59 | fText: string; 60 | fTypeTag: string; 61 | fChild: TChildList; 62 | fParent: Pointer; 63 | fOwner: TDomTree; 64 | public 65 | property Tag: string read fTag; 66 | property AttributesTxt: string read fAttributesTxt; 67 | property Attributes: TDictionary read fAttributes; 68 | property Text: string read fText; 69 | property TypeTag: string read fTypeTag; 70 | property Child: TChildList read fChild; 71 | property Parent: Pointer read fParent; 72 | property Owner: TDomTree read fOwner; 73 | 74 | constructor create(hOwner: TDomTree; hParent: Pointer; hTag, hAttrTxt: string; hAttr: 75 | TDictionary; hTypeTag, hText: string); 76 | destructor destroy; override; 77 | function FindNode(hNameTag: string; hIndex:integer; hAttrTxt: String; 78 | hAnyLevel: Boolean; dListNode: TNodeList): Boolean; 79 | function FindTagOfIndex(hNameTag: String; hIndex:integer; hAnyLevel: 80 | Boolean; dListNode: TNodeList): Boolean; 81 | function FindXPath(hXPathTxt: String; dListNode: TNodeList; 82 | dListValue:TStringList): Boolean; 83 | function GetAttrValue(hAttrName:string): string; 84 | function GetComment(hIndex: Integer): string; 85 | function GetTagName: string; 86 | function GetTextValue(hIndex:Integer): string; 87 | function GetXPath(hRelative:boolean): string; 88 | function RunParse(HtmlTxt: String): Boolean; 89 | end; 90 | 91 | TChildList = class(TList) 92 | private 93 | function Get(Index: Integer): TDomTreeNode; 94 | public 95 | destructor Destroy; override; 96 | property Items[Index: Integer]: TDomTreeNode read Get; default; 97 | end; 98 | 99 | TNodeList = class(TList) 100 | private 101 | function Get(Index: Integer): TDomTreeNode; 102 | public 103 | property Items[Index: Integer]: TDomTreeNode read Get; default; 104 | end; 105 | 106 | 107 | PPrmRec=^TPrmRec; 108 | TPrmRec = record 109 | TagName: string; 110 | ind: Integer; 111 | Attr: string; 112 | AnyLevel: Boolean; 113 | end; 114 | 115 | TPrmRecList = class(TList) 116 | private 117 | function Get(Index: Integer): PPrmRec; 118 | public 119 | destructor Destroy; override; 120 | property Items[Index: Integer]: PPrmRec read Get; default; 121 | end; 122 | 123 | 124 | 125 | 126 | 127 | implementation 128 | 129 | { TDomTree } 130 | 131 | { 132 | *********************************** TDomTree *********************************** 133 | } 134 | constructor TDomTree.Create; 135 | begin 136 | fParseErr:= TStringList.Create; 137 | fRootnode:= TDomTreeNode.Create(self,self,'Root','',nil,'',''); 138 | FCount:=0; 139 | 140 | end; 141 | 142 | destructor TDomTree.destroy; 143 | begin 144 | FreeAndNil(fParseErr); 145 | FreeAndNil(fRootNode); 146 | inherited; 147 | end; 148 | 149 | 150 | { TChildList } 151 | 152 | { 153 | ********************************** TChildList ********************************** 154 | } 155 | destructor TChildList.Destroy; 156 | var 157 | i: Integer; 158 | begin 159 | for i := 0 to Count - 1 do 160 | self[i].Free; 161 | inherited; 162 | end; 163 | 164 | 165 | function TChildList.Get(Index: Integer): TDomTreeNode; 166 | begin 167 | Result := TDomTreeNode(inherited Get(Index)); 168 | end; 169 | 170 | { TNodeList } 171 | 172 | function TNodeList.Get(Index: Integer): TDomTreeNode; 173 | begin 174 | Result := TDomTreeNode(inherited Get(Index)); 175 | end; 176 | 177 | 178 | { TPrmRecList } 179 | 180 | { 181 | ********************************* TPrmRecList ********************************** 182 | } 183 | destructor TPrmRecList.Destroy; 184 | var 185 | i: Integer; 186 | begin 187 | for i := 0 to Count - 1 do 188 | FreeMem(Items[i]); 189 | inherited; 190 | end; 191 | 192 | 193 | 194 | function TPrmRecList.Get(Index: Integer): PPrmRec; 195 | begin 196 | Result := PPrmRec(inherited Get(Index)); 197 | end; 198 | 199 | { TDomTreeNode } 200 | 201 | { 202 | ********************************* TDomTreeNode ********************************* 203 | } 204 | constructor TDomTreeNode.create(hOwner: TDomTree; hParent: Pointer; hTag, hAttrTxt: string; 205 | hAttr: TDictionary; hTypeTag, hText: string); 206 | begin 207 | fChild := TChildList.create; 208 | fParent := hParent; 209 | fTag := hTag; 210 | fAttributesTxt := hAttrTxt; 211 | fAttributes := hAttr; 212 | fTypeTag:= hTypeTag; 213 | fText := hText; 214 | fOwner:=hOwner; 215 | inc(hOwner.FCount); 216 | end; 217 | 218 | destructor TDomTreeNode.destroy; 219 | begin 220 | FreeAndNil(fAttributes); 221 | FreeAndNil(fChild); 222 | inherited; 223 | end; 224 | 225 | //***********FindAttr************* 226 | // hNameTag - name Tag 227 | // hIndex - number of a tag one after another (0 - all tag, 1 - each first ..) 228 | // hAttrTxt - attribute. ex. alt=1 229 | // hAnyLevel - true - all levels after start node; false - only one child level after start node 230 | // dListNode - return TNodeList of TDomTreeNode 231 | 232 | function TDomTreeNode.FindNode(hNameTag: string; hIndex:integer; hAttrTxt: 233 | String; hAnyLevel: Boolean; dListNode: TNodeList): Boolean; 234 | var 235 | RegEx: TPerlRegEx; 236 | i,a: integer; 237 | TagNodeList:TNodeList; 238 | tValue: string; 239 | 240 | Function FindAttrChildNode(aNode:TDomTreeNode;AttrName,AttrValue: String):TNodeList; 241 | var 242 | aValue: String; 243 | j: integer; 244 | begin 245 | for j := 0 to aNode.Child.Count - 1 do 246 | begin 247 | if aNode.Child[j].Attributes <> nil then 248 | if aNode.Child[j].Attributes.ContainsKey(AttrName) then 249 | if aNode.Child[j].Attributes.TryGetValue(AttrName, aValue) then 250 | if AttrValue = aValue then dListNode.Add(aNode.Child[j]); 251 | if hAnyLevel then 252 | FindAttrChildNode(aNode.Child[j], AttrName, AttrValue); 253 | end; 254 | result:=dListNode; 255 | end; 256 | 257 | begin 258 | RegEx:=nil; 259 | try 260 | result:=false; 261 | RegEx := TPerlRegEx.create; 262 | RegEx.Subject := hAttrTxt; 263 | RegEx.RegEx :='([^\s]*?[^\S]*)=([^\S]*".*?"[^\S]*)|'+ 264 | '([^\s]*?[^\S]*)=([^\S]*#39.*?#39[^\S]*)|'+ 265 | '([^\s]*?[^\S]*)=([^\S]*[^\s]+[^\S]*)|'+ 266 | '(autofocus[^\S]*)()|'+ 267 | '(disabled[^\S]*)()|'+ 268 | '(selected[^\S]*)()'; 269 | 270 | if (not (hAttrTxt = '')) and (RegEx.Match) then 271 | begin 272 | for i := 1 to RegEx.GroupCount do 273 | if trim(RegEx.Groups[i]) <> '' then break; 274 | if hNameTag = '' then 275 | begin 276 | if FindAttrChildNode(self,RegEx.Groups[i],RegEx.Groups[i+1]).Count>0 277 | then result:=true; 278 | end 279 | else 280 | begin 281 | TagNodeList:=TNodeList.Create; 282 | if FindTagOfIndex(hNameTag,hIndex,hAnyLevel,TagNodeList) then 283 | for a := 0 to TagNodeList.Count - 1 do 284 | if TagNodeList[a].Attributes <> nil then 285 | if TagNodeList[a].Attributes.ContainsKey(RegEx.Groups[i]) then 286 | if TagNodeList[a].Attributes.TryGetValue(RegEx.Groups[i], tValue) then 287 | //There was a strong compareson of values of attribute 288 | // if RegEx.Groups = tValue) 289 | if pos(RegEx.Groups[i+1],tValue)>0 290 | then 291 | begin 292 | dListNode.Add(TagNodeList[a]); 293 | result:=true; 294 | end; 295 | TagNodeList.Free; 296 | end; 297 | end 298 | else 299 | if hAttrTxt = '' then 300 | begin 301 | TagNodeList:=TNodeList.Create; 302 | if FindTagOfIndex(hNameTag,hIndex,hAnyLevel,TagNodeList) then 303 | for a := 0 to TagNodeList.Count - 1 do 304 | begin 305 | dListNode.Add(TagNodeList[a]); 306 | result:=true; 307 | end; 308 | TagNodeList.Free; 309 | end 310 | else raise Exception.create('Attribute not found: '+ hAttrTxt ); 311 | 312 | finally 313 | RegEx.free 314 | end; 315 | end; 316 | 317 | //***********FindTagOfIndex************* 318 | // hNameTag - name Tag (* - any tag, except text tag) 319 | // hIndex - number of a tag one after another (0 - all tag, 1 - each first ..) 320 | // hAnyLevel - true - all level after start node; false - only one child level after start node 321 | // dListNode - return TNodeList of TDomTreeNode 322 | 323 | function TDomTreeNode.FindTagOfIndex(hNameTag: String; hIndex:integer; 324 | hAnyLevel: Boolean; dListNode: TNodeList): Boolean; 325 | 326 | function SubStringOccurences(const subString, sourceString : string; caseSensitive : boolean) : integer; 327 | var 328 | pEx: integer; 329 | sub, source : string; 330 | begin 331 | if caseSensitive then 332 | begin 333 | sub := subString; 334 | source := sourceString; 335 | end 336 | else 337 | begin 338 | sub := LowerCase(subString); 339 | source := LowerCase(sourceString); 340 | end; 341 | 342 | result := 0; 343 | pEx := PosEx(sub, source, 1); 344 | while pEx <> 0 do 345 | begin 346 | Inc(result); 347 | pEx := PosEx(sub, source, pEx + Length(sub)); 348 | end; 349 | end; 350 | 351 | Function FindChildTagOfIndex(aNode:TDomTreeNode):TNodeList; 352 | var 353 | countNode,j: integer; 354 | enumTags:string; 355 | begin 356 | countNode:=0; 357 | for j := 0 to aNode.Child.Count - 1 do 358 | begin 359 | if hNameTag <> '*' then 360 | begin 361 | if ((AnsiUpperCase(aNode.Child[j].Tag) = AnsiUpperCase(hNameTag)) and (aNode.Child[j].TypeTag <> '')) 362 | or ((AnsiUpperCase(aNode.Child[j].Tag) = '') and (AnsiUpperCase(hNameTag)='TEXT()') and (aNode.Child[j].Text <> '')) 363 | or ((LeftStr(AnsiUpperCase(aNode.Child[j].Tag),4) = '[^<]*) - comment 865 | // ([^<]*) - script 866 | // (<[^>]+>[^<]*) - all remaining tags 867 | // [^<]* - text 868 | RegExException :='(.*?</PLAINTEXT>[^<]*)|'+ 869 | '(<title>.*?</title>[^<]*)|'+ 870 | '(<xmp>.*?</xmp>[^<]*)|'+ 871 | '(<script.*?</script>[^<]*)|'+ 872 | '(<textarea.*?</textarea>[^<]*)|'+ 873 | // '(<pre.*?</pre>[^<]*)|'+ 874 | '(<!--.+?-->[^<]*)|'; 875 | RegEx := RegExException + '(<[^>]+>[^<]*)'; // all teg and text 876 | Subject := HtmlUtf8; 877 | if Match then 878 | begin 879 | MatchTag(RegExHTML.MatchedText); 880 | prev := MatchedOffset + MatchedLength; 881 | while MatchAgain do 882 | begin 883 | MatchTag(RegExHTML.MatchedText); 884 | // *****Start Check Parsing HTML Error************ 885 | if MatchedOffset - prev > 0 then 886 | begin 887 | Owner.fParseErr.Add(IntToStr(ErrParseHTML) + '- Check error found after HTML parsing'); 888 | inc(ErrParseHTML) 889 | end; 890 | prev := MatchedOffset + MatchedLength; 891 | // *****End Check Parsing HTML Error************ 892 | end; 893 | // ***********End RegExp match cycle************ 894 | end 895 | else 896 | raise Exception.create('Input text not contain HTML tags'); 897 | // *************End RegExp match ************ 898 | end; 899 | 900 | Finally 901 | RegExHTML.Free; 902 | RegExTag.Free; 903 | if Owner.FCount>0 then 904 | result := True 905 | else result := False ; 906 | end; 907 | 908 | end; 909 | 910 | end. 911 | -------------------------------------------------------------------------------- /test/DemoParserTests.res: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandbil/HTML-Parser/4101a73e817abda6ffb739578b9b9f2523147930/test/DemoParserTests.res -------------------------------------------------------------------------------- /test/ParserTests.dpr: -------------------------------------------------------------------------------- 1 | program DemoParserTests; 2 | { 3 | 4 | Delphi DUnit Test Project 5 | ------------------------- 6 | This project contains the DUnit test framework and the GUI/Console test runners. 7 | Add "CONSOLE_TESTRUNNER" to the conditional defines entry in the project options 8 | to use the console test runner. Otherwise the GUI test runner will be used by 9 | default. 10 | 11 | } 12 | 13 | {$IFDEF CONSOLE_TESTRUNNER} 14 | {$APPTYPE CONSOLE} 15 | {$ENDIF} 16 | 17 | uses 18 | DUnitTestRunner, 19 | Testparser in 'Testparser.pas', 20 | parser in '..\parser.pas'; 21 | 22 | {$R *.RES} 23 | 24 | begin 25 | DUnitTestRunner.RunRegisteredTests; 26 | end. 27 | 28 | -------------------------------------------------------------------------------- /test/ParserTests.dproj: -------------------------------------------------------------------------------- 1 | <Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> 2 | <PropertyGroup> 3 | <ProjectGuid>{3BDF6518-3527-460A-98A3-324EB5967CF2}</ProjectGuid> 4 | <ProjectVersion>15.4</ProjectVersion> 5 | <FrameworkType>None</FrameworkType> 6 | <Base>True</Base> 7 | <Config Condition="'$(Config)'==''">Debug</Config> 8 | <Platform Condition="'$(Platform)'==''">Win32</Platform> 9 | <TargetedPlatforms>1</TargetedPlatforms> 10 | <AppType>Console</AppType> 11 | <MainSource>ParserTests.dpr</MainSource> 12 | </PropertyGroup> 13 | <PropertyGroup Condition="'$(Config)'=='Base' or '$(Base)'!=''"> 14 | <Base>true</Base> 15 | </PropertyGroup> 16 | <PropertyGroup Condition="('$(Platform)'=='Win32' and '$(Base)'=='true') or '$(Base_Win32)'!=''"> 17 | <Base_Win32>true</Base_Win32> 18 | <CfgParent>Base</CfgParent> 19 | <Base>true</Base> 20 | </PropertyGroup> 21 | <PropertyGroup Condition="('$(Platform)'=='Win64' and '$(Base)'=='true') or '$(Base_Win64)'!=''"> 22 | <Base_Win64>true</Base_Win64> 23 | <CfgParent>Base</CfgParent> 24 | <Base>true</Base> 25 | </PropertyGroup> 26 | <PropertyGroup Condition="'$(Config)'=='Debug' or '$(Cfg_1)'!=''"> 27 | <Cfg_1>true</Cfg_1> 28 | <CfgParent>Base</CfgParent> 29 | <Base>true</Base> 30 | </PropertyGroup> 31 | <PropertyGroup Condition="('$(Platform)'=='Win32' and '$(Cfg_1)'=='true') or '$(Cfg_1_Win32)'!=''"> 32 | <Cfg_1_Win32>true</Cfg_1_Win32> 33 | <CfgParent>Cfg_1</CfgParent> 34 | <Cfg_1>true</Cfg_1> 35 | <Base>true</Base> 36 | </PropertyGroup> 37 | <PropertyGroup Condition="'$(Config)'=='Release' or '$(Cfg_2)'!=''"> 38 | <Cfg_2>true</Cfg_2> 39 | <CfgParent>Base</CfgParent> 40 | <Base>true</Base> 41 | </PropertyGroup> 42 | <PropertyGroup Condition="'$(Base)'!=''"> 43 | <SanitizedProjectName>ParserTests</SanitizedProjectName> 44 | <DCC_UnitSearchPath>$(BDS)\Source\DUnit\src;$(DCC_UnitSearchPath)</DCC_UnitSearchPath> 45 | <DCC_Namespace>System;Xml;Data;Datasnap;Web;Soap;Vcl;Vcl.Imaging;Vcl.Touch;Vcl.Samples;Vcl.Shell;$(DCC_Namespace)</DCC_Namespace> 46 | <DCC_Define>_CONSOLE_TESTRUNNER;$(DCC_Define)</DCC_Define> 47 | <DCC_DcuOutput>.</DCC_DcuOutput> 48 | <DCC_ExeOutput>.\$(Platform)\$(Config)</DCC_ExeOutput> 49 | <DCC_E>false</DCC_E> 50 | <DCC_N>false</DCC_N> 51 | <DCC_S>false</DCC_S> 52 | <DCC_F>false</DCC_F> 53 | <DCC_K>false</DCC_K> 54 | </PropertyGroup> 55 | <PropertyGroup Condition="'$(Base_Win32)'!=''"> 56 | <DCC_Namespace>Winapi;System.Win;Data.Win;Datasnap.Win;Web.Win;Soap.Win;Xml.Win;Bde;$(DCC_Namespace)</DCC_Namespace> 57 | <DCC_UsePackage>IndyIPClient;FireDACASADriver;FireDACSqliteDriver;bindcompfmx;DBXSqliteDriver;vcldbx;FireDACPgDriver;FireDACODBCDriver;fmx;rtl;dbrtl;DbxClientDriver;IndySystem;FireDACCommon;bindcomp;inetdb;TeeDB;inetdbbde;vclib;DBXInterBaseDriver;DataSnapClient;DataSnapCommon;DBXOdbcDriver;DataSnapServer;Tee;vclFireDAC;DataSnapProviderClient;xmlrtl;DBXSybaseASEDriver;ibxpress;DbxCommonDriver;svnui;vclimg;IndyProtocols;DBXMySQLDriver;dbxcds;DatasnapConnectorsFreePascal;FireDACCommonDriver;MetropolisUILiveTile;bindengine;vclactnband;vcldb;bindcompdbx;FMXTee;vcldsnap;bindcompvcl;soaprtl;TeeUI;vclie;fmxFireDAC;FireDACADSDriver;DBXDb2Driver;vcltouch;DBXOracleDriver;CustomIPTransport;vclribbon;VCLRESTComponents;dsnap;DBXInformixDriver;FireDAC;FireDACMSSQLDriver;fmxase;vcl;DataSnapConnectors;FireDACDataSnapDriver;IndyCore;DBXMSSQLDriver;CloudService;Intraweb;DBXFirebirdDriver;FireDACIBDriver;FmxTeeUI;inet;IndyIPCommon;fmxobj;FireDACDBXDriver;IndyIPServer;dsnapcon;FireDACMySQLDriver;VclSmp;vclx;inetdbxpress;svn;DBXSybaseASADriver;FireDACOracleDriver;fmxdae;FireDACDb2Driver;RESTComponents;bdertl;FireDACMSAccDriver;dbexpress;adortl;DataSnapIndy10ServerTransport;$(DCC_UsePackage)</DCC_UsePackage> 58 | <VerInfo_Keys>CompanyName=;FileDescription=;FileVersion=1.0.0.0;InternalName=;LegalCopyright=;LegalTrademarks=;OriginalFilename=;ProductName=;ProductVersion=1.0.0.0;Comments=</VerInfo_Keys> 59 | <VerInfo_Locale>1033</VerInfo_Locale> 60 | </PropertyGroup> 61 | <PropertyGroup Condition="'$(Base_Win64)'!=''"> 62 | <DCC_UsePackage>IndyIPClient;FireDACASADriver;FireDACSqliteDriver;bindcompfmx;DBXSqliteDriver;FireDACPgDriver;FireDACODBCDriver;fmx;rtl;dbrtl;DbxClientDriver;IndySystem;FireDACCommon;bindcomp;inetdb;TeeDB;vclib;DBXInterBaseDriver;DataSnapClient;DataSnapCommon;DBXOdbcDriver;DataSnapServer;Tee;vclFireDAC;DataSnapProviderClient;xmlrtl;DBXSybaseASEDriver;ibxpress;DbxCommonDriver;vclimg;IndyProtocols;DBXMySQLDriver;dbxcds;DatasnapConnectorsFreePascal;FireDACCommonDriver;MetropolisUILiveTile;bindengine;vclactnband;vcldb;bindcompdbx;FMXTee;vcldsnap;bindcompvcl;soaprtl;TeeUI;vclie;fmxFireDAC;FireDACADSDriver;DBXDb2Driver;vcltouch;DBXOracleDriver;CustomIPTransport;vclribbon;VCLRESTComponents;dsnap;DBXInformixDriver;FireDAC;FireDACMSSQLDriver;fmxase;vcl;DataSnapConnectors;FireDACDataSnapDriver;IndyCore;DBXMSSQLDriver;CloudService;Intraweb;DBXFirebirdDriver;FireDACIBDriver;FmxTeeUI;inet;IndyIPCommon;fmxobj;FireDACDBXDriver;IndyIPServer;dsnapcon;FireDACMySQLDriver;VclSmp;vclx;inetdbxpress;DBXSybaseASADriver;FireDACOracleDriver;fmxdae;FireDACDb2Driver;RESTComponents;FireDACMSAccDriver;dbexpress;adortl;DataSnapIndy10ServerTransport;$(DCC_UsePackage)</DCC_UsePackage> 63 | </PropertyGroup> 64 | <PropertyGroup Condition="'$(Cfg_1)'!=''"> 65 | <DCC_Define>DEBUG;$(DCC_Define)</DCC_Define> 66 | <DCC_DebugDCUs>true</DCC_DebugDCUs> 67 | <DCC_Optimize>false</DCC_Optimize> 68 | <DCC_GenerateStackFrames>true</DCC_GenerateStackFrames> 69 | <DCC_DebugInfoInExe>true</DCC_DebugInfoInExe> 70 | <DCC_RemoteDebug>true</DCC_RemoteDebug> 71 | </PropertyGroup> 72 | <PropertyGroup Condition="'$(Cfg_1_Win32)'!=''"> 73 | <DCC_RemoteDebug>false</DCC_RemoteDebug> 74 | </PropertyGroup> 75 | <PropertyGroup Condition="'$(Cfg_2)'!=''"> 76 | <DCC_LocalDebugSymbols>false</DCC_LocalDebugSymbols> 77 | <DCC_Define>RELEASE;$(DCC_Define)</DCC_Define> 78 | <DCC_SymbolReferenceInfo>0</DCC_SymbolReferenceInfo> 79 | <DCC_DebugInformation>0</DCC_DebugInformation> 80 | </PropertyGroup> 81 | <ItemGroup> 82 | <DelphiCompile Include="$(MainSource)"> 83 | <MainSource>MainSource</MainSource> 84 | </DelphiCompile> 85 | <DCCReference Include="Testparser.pas"/> 86 | <DCCReference Include="..\parser.pas"/> 87 | <BuildConfiguration Include="Release"> 88 | <Key>Cfg_2</Key> 89 | <CfgParent>Base</CfgParent> 90 | </BuildConfiguration> 91 | <BuildConfiguration Include="Base"> 92 | <Key>Base</Key> 93 | </BuildConfiguration> 94 | <BuildConfiguration Include="Debug"> 95 | <Key>Cfg_1</Key> 96 | <CfgParent>Base</CfgParent> 97 | </BuildConfiguration> 98 | </ItemGroup> 99 | <ProjectExtensions> 100 | <Borland.Personality>Delphi.Personality.12</Borland.Personality> 101 | <Borland.ProjectType/> 102 | <BorlandProject> 103 | <Delphi.Personality> 104 | <VersionInfo> 105 | <VersionInfo Name="IncludeVerInfo">False</VersionInfo> 106 | <VersionInfo Name="AutoIncBuild">False</VersionInfo> 107 | <VersionInfo Name="MajorVer">1</VersionInfo> 108 | <VersionInfo Name="MinorVer">0</VersionInfo> 109 | <VersionInfo Name="Release">0</VersionInfo> 110 | <VersionInfo Name="Build">0</VersionInfo> 111 | <VersionInfo Name="Debug">False</VersionInfo> 112 | <VersionInfo Name="PreRelease">False</VersionInfo> 113 | <VersionInfo Name="Special">False</VersionInfo> 114 | <VersionInfo Name="Private">False</VersionInfo> 115 | <VersionInfo Name="DLL">False</VersionInfo> 116 | <VersionInfo Name="Locale">1049</VersionInfo> 117 | <VersionInfo Name="CodePage">1251</VersionInfo> 118 | </VersionInfo> 119 | <VersionInfoKeys> 120 | <VersionInfoKeys Name="CompanyName"/> 121 | <VersionInfoKeys Name="FileDescription"/> 122 | <VersionInfoKeys Name="FileVersion">1.0.0.0</VersionInfoKeys> 123 | <VersionInfoKeys Name="InternalName"/> 124 | <VersionInfoKeys Name="LegalCopyright"/> 125 | <VersionInfoKeys Name="LegalTrademarks"/> 126 | <VersionInfoKeys Name="OriginalFilename"/> 127 | <VersionInfoKeys Name="ProductName"/> 128 | <VersionInfoKeys Name="ProductVersion">1.0.0.0</VersionInfoKeys> 129 | <VersionInfoKeys Name="Comments"/> 130 | <VersionInfoKeys Name="CFBundleName"/> 131 | <VersionInfoKeys Name="CFBundleDisplayName"/> 132 | <VersionInfoKeys Name="UIDeviceFamily"/> 133 | <VersionInfoKeys Name="CFBundleIdentifier"/> 134 | <VersionInfoKeys Name="CFBundleVersion"/> 135 | <VersionInfoKeys Name="CFBundlePackageType"/> 136 | <VersionInfoKeys Name="CFBundleSignature"/> 137 | <VersionInfoKeys Name="CFBundleAllowMixedLocalizations"/> 138 | <VersionInfoKeys Name="UISupportedInterfaceOrientations"/> 139 | <VersionInfoKeys Name="CFBundleExecutable"/> 140 | <VersionInfoKeys Name="CFBundleResourceSpecification"/> 141 | <VersionInfoKeys Name="LSRequiresIPhoneOS"/> 142 | <VersionInfoKeys Name="CFBundleInfoDictionaryVersion"/> 143 | <VersionInfoKeys Name="CFBundleDevelopmentRegion"/> 144 | <VersionInfoKeys Name="package"/> 145 | <VersionInfoKeys Name="label"/> 146 | <VersionInfoKeys Name="versionCode"/> 147 | <VersionInfoKeys Name="versionName"/> 148 | <VersionInfoKeys Name="persistent"/> 149 | <VersionInfoKeys Name="restoreAnyVersion"/> 150 | <VersionInfoKeys Name="installLocation"/> 151 | <VersionInfoKeys Name="largeHeap"/> 152 | <VersionInfoKeys Name="theme"/> 153 | </VersionInfoKeys> 154 | <Source> 155 | <Source Name="MainSource">ParserTests.dpr</Source> 156 | </Source> 157 | </Delphi.Personality> 158 | <Deployment/> 159 | <Platforms> 160 | <Platform value="Win32">True</Platform> 161 | <Platform value="Win64">False</Platform> 162 | </Platforms> 163 | <UnitTesting> 164 | <TestFramework>DUnit / Delphi Win32</TestFramework> 165 | <TestRunner>GUI</TestRunner> 166 | <SourceProjectName/> 167 | <TestProjectName/> 168 | </UnitTesting> 169 | </BorlandProject> 170 | <ProjectFileVersion>12</ProjectFileVersion> 171 | </ProjectExtensions> 172 | <Import Project="$(BDS)\Bin\CodeGear.Delphi.Targets" Condition="Exists('$(BDS)\Bin\CodeGear.Delphi.Targets')"/> 173 | <Import Project="$(APPDATA)\Embarcadero\$(BDSAPPDATABASEDIR)\$(PRODUCTVERSION)\UserTools.proj" Condition="Exists('$(APPDATA)\Embarcadero\$(BDSAPPDATABASEDIR)\$(PRODUCTVERSION)\UserTools.proj')"/> 174 | </Project> 175 | -------------------------------------------------------------------------------- /test/ParserTests.dproj.local: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <BorlandProject/> 3 | -------------------------------------------------------------------------------- /test/ParserTests.identcache: -------------------------------------------------------------------------------- 1 | ���%C:\!work\!Demo\DemoParser7\parser.pas������������/C:\!work\!Demo\DemoParser7\test\ParserTests.dpr������������.C:\!work\!Demo\DemoParser7\test\Testparser.pas������������ -------------------------------------------------------------------------------- /test/ParserTests.res: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandbil/HTML-Parser/4101a73e817abda6ffb739578b9b9f2523147930/test/ParserTests.res -------------------------------------------------------------------------------- /test/Testparser.pas: -------------------------------------------------------------------------------- 1 | unit Testparser; 2 | { 3 | 4 | Delphi DUnit Test Case 5 | ---------------------- 6 | This unit contains a skeleton test case class generated by the Test Case Wizard. 7 | Modify the generated code to correctly setup and call the methods from the unit 8 | being tested. 9 | 10 | } 11 | 12 | interface 13 | 14 | uses 15 | TestFramework, System.SysUtils, System.Generics.Collections, parser, 16 | System.Contnrs, System.RegularExpressionsCore, System.StrUtils, Vcl.Dialogs, 17 | System.Classes ; 18 | 19 | type 20 | 21 | // Test methods for class TDomTree 22 | 23 | TestTDomTree = class(TTestCase) 24 | strict private 25 | FDomTree: TDomTree; 26 | public 27 | procedure SetUp; override; 28 | procedure TearDown; override; 29 | end; 30 | 31 | 32 | // Test methods for class TDomTreeNode 33 | 34 | TestTDomTreeNode = class(TTestCase) 35 | strict private 36 | FDomTreeNode: TDomTreeNode; 37 | public 38 | procedure SetUp; override; 39 | procedure TearDown; override; 40 | published 41 | procedure TestRunParse; 42 | procedure TestGetTagName; 43 | procedure TestGetAttrValue; 44 | procedure TestGetTextValue; 45 | procedure TestGetComment; 46 | procedure TestFindNode; 47 | procedure TestFindTagOfIndex; 48 | procedure TestGetXPath; 49 | procedure TestFindXPath; 50 | end; 51 | // Test methods for class TChildList 52 | 53 | TestTChildList = class(TTestCase) 54 | strict private 55 | FChildList: TChildList; 56 | public 57 | procedure SetUp; override; 58 | procedure TearDown; override; 59 | end; 60 | // Test methods for class TPrmRecList 61 | 62 | TestTPrmRecList = class(TTestCase) 63 | strict private 64 | FPrmRecList: TPrmRecList; 65 | public 66 | procedure SetUp; override; 67 | procedure TearDown; override; 68 | end; 69 | 70 | implementation 71 | 72 | procedure TestTDomTree.SetUp; 73 | begin 74 | FDomTree := TDomTree.Create; 75 | end; 76 | 77 | procedure TestTDomTree.TearDown; 78 | begin 79 | FDomTree.Free; 80 | FDomTree := nil; 81 | end; 82 | 83 | procedure TestTDomTreeNode.SetUp; 84 | var 85 | DomTree: TDomTree; 86 | begin 87 | DomTree:=TDomTree.Create ; 88 | FDomTreeNode := DomTree.RootNode; 89 | CheckEquals('Root', FDomTreeNode.Tag); 90 | CheckEquals('', FDomTreeNode.TypeTag); 91 | CheckEquals('', FDomTreeNode.AttributesTxt); 92 | CheckEquals('', FDomTreeNode.Text); 93 | 94 | CheckEquals(0, FDomTreeNode.Child.Count); 95 | 96 | end; 97 | 98 | procedure TestTDomTreeNode.TearDown; 99 | begin 100 | FDomTreeNode.Free; 101 | FDomTreeNode := nil; 102 | end; 103 | 104 | procedure TestTDomTreeNode.TestRunParse; 105 | var 106 | ReturnValue: Boolean; 107 | HtmlTxt: TStringList; 108 | tmp: string; 109 | tmpNode: TDomTreeNode; 110 | begin 111 | HtmlTxt:=TStringList.Create; 112 | HtmlTxt.LoadFromFile('test.html'); 113 | 114 | ReturnValue := FDomTreeNode.RunParse(HtmlTxt.Text); 115 | CheckEquals(true, ReturnValue); 116 | //check <? 117 | CheckEquals('<?xml version="1.0" encoding="UTF-8"?>', FDomTreeNode.Child[0].Tag); 118 | CheckEquals('%s', FDomTreeNode.Child[0].Typetag); 119 | CheckEquals('', FDomTreeNode.Child[0].AttributesTxt); 120 | CheckEquals('', FDomTreeNode.Child[0].Text); 121 | //check multiline comment 122 | tmp:='<!-- <link href="https://ozlotteries.r.worldssl.net/stylesheet/main.css" rel="stylesheet" type="text/css" > 1'#$D#$A'123-->'; 123 | CheckEquals(tmp, FDomTreeNode.Child[2].Tag); 124 | //check Exceptions contain any symbols 125 | tmp:='Title "<!-- <'#39'this"/> --> '#39'document'; 126 | tmpNode:=FDomTreeNode.Child[3].child[0].child[0].child[0]; 127 | CheckEquals(tmp, tmpNode.Text); 128 | tmpNode:=FDomTreeNode.Child[3].child[1].child[1]; 129 | CheckEquals('textarea', AnsiLowerCase(tmpNode.Tag)); 130 | CheckEquals('This disabled field? don'#39't write here<123/>', tmpNode.child[0].Text); 131 | 132 | //check attributes 133 | tmpNode:=FDomTreeNode.Child[3].child[1]; 134 | CheckEquals('body', AnsiLowerCase(tmpNode.Tag)); 135 | CheckEquals(true, tmpNode.Attributes.ContainsKey('class')); 136 | CheckEquals(true, tmpNode.Attributes.TryGetValue('class',tmp)); 137 | CheckEquals('"default"', tmp); 138 | CheckEquals(true, tmpNode.Attributes.ContainsKey('bgcolor')); 139 | CheckEquals(true, tmpNode.Attributes.TryGetValue('bgcolor',tmp)); 140 | CheckEquals(#39'blue'#39, tmp); 141 | tmpNode:=FDomTreeNode.Child[3].child[1].child[1]; 142 | CheckEquals('textarea', AnsiLowerCase(tmpNode.Tag)); 143 | CheckEquals(true, tmpNode.Attributes.ContainsKey('disabled')); 144 | CheckEquals(false, tmpNode.Attributes.TryGetValue('class',tmp)); 145 | 146 | 147 | 148 | end; 149 | 150 | procedure TestTDomTreeNode.TestGetTagName; 151 | var 152 | ReturnValue: string; 153 | tmpNode : TDomTreeNode; 154 | HtmlTxt: TStringList; 155 | begin 156 | HtmlTxt:=TStringList.Create; 157 | HtmlTxt.LoadFromFile('test.html'); 158 | FDomTreeNode.RunParse(HtmlTxt.Text); 159 | tmpNode:= FDomTreeNode.Child[3].child[1].child[1]; 160 | ReturnValue := tmpNode.GetTagName; 161 | CheckEquals('<textarea disabled cols="30" rows="5">', ReturnValue); 162 | 163 | end; 164 | 165 | procedure TestTDomTreeNode.TestGetAttrValue; 166 | var 167 | ReturnValue: string; 168 | tmpNode: TDomTreeNode; 169 | HtmlTxt: TStringList; 170 | begin 171 | HtmlTxt:=TStringList.Create; 172 | HtmlTxt.LoadFromFile('test.html'); 173 | FDomTreeNode.RunParse(HtmlTxt.Text); 174 | tmpNode:= FDomTreeNode.Child[3].child[1].child[3]; 175 | ReturnValue := tmpNode.GetAttrValue('id'); 176 | CheckEquals('"maincontainer"', ReturnValue); 177 | end; 178 | 179 | procedure TestTDomTreeNode.TestGetTextValue; 180 | var 181 | ReturnValue: string; 182 | tmpNode: TDomTreeNode; 183 | HtmlTxt: TStringList; 184 | begin 185 | HtmlTxt:=TStringList.Create; 186 | HtmlTxt.LoadFromFile('test.html'); 187 | FDomTreeNode.RunParse(HtmlTxt.Text); 188 | tmpNode:= FDomTreeNode.Child[3].child[1].child[3].child[1].child[0].child[0].child[0].child[1].child[0].child[0].child[0].child[0].child[0].child[1].child[1].child[0].child[3].child[1]; 189 | ReturnValue := tmpNode.GetTextValue(0); 190 | CheckEquals('Draw 960', ReturnValue); 191 | ReturnValue := tmpNode.GetTextValue(1); 192 | CheckEquals('Draw 960', ReturnValue); 193 | ReturnValue := tmpNode.GetTextValue(2); 194 | CheckEquals('Thursday 9th October 2014', ReturnValue); 195 | 196 | // TODO: Validate method results 197 | end; 198 | 199 | procedure TestTDomTreeNode.TestGetComment; 200 | var 201 | ReturnValue: string; 202 | tmpNode: TDomTreeNode; 203 | HtmlTxt: TStringList; 204 | begin 205 | HtmlTxt:=TStringList.Create; 206 | HtmlTxt.LoadFromFile('test.html'); 207 | FDomTreeNode.RunParse(HtmlTxt.Text); 208 | tmpNode:= FDomTreeNode.Child[3].child[1]; 209 | ReturnValue := tmpNode.GetComment(0); 210 | CheckEquals('<!-- logo(s) -->', ReturnValue); 211 | ReturnValue := tmpNode.GetComment(1); 212 | CheckEquals('<!-- logo(s) -->', ReturnValue); 213 | 214 | end; 215 | 216 | 217 | procedure TestTDomTreeNode.TestFindNode; 218 | var 219 | ReturnValue: Boolean; 220 | dListNode: TNodeList; 221 | HtmlTxt: TStringList; 222 | begin 223 | HtmlTxt:=TStringList.Create; 224 | HtmlTxt.LoadFromFile('test.html'); 225 | FDomTreeNode.RunParse(HtmlTxt.Text); 226 | dListNode:= TNodeList.Create; 227 | //tmpNode:= DomTree.Child[3].child[1].child[3]; 228 | 229 | ReturnValue := FDomTreeNode.FindNode('', 0, 'id="maincontainer"', True, dListNode); 230 | CheckEquals(true, ReturnValue); 231 | CheckEquals('<div id="maincontainer">', dListNode[0].GetTagName); 232 | dListNode.Clear; 233 | 234 | ReturnValue := FDomTreeNode.FindNode('', 0, 'id="maincontainer"', false, dListNode); 235 | CheckEquals(false, ReturnValue); 236 | dListNode.Clear; 237 | 238 | ReturnValue := FDomTreeNode.FindNode('div', 0, 'id="TopBox"', True, dListNode); 239 | CheckEquals(true, ReturnValue); 240 | CheckEquals('<div id="TopBox">', dListNode[0].GetTagName); 241 | dListNode.Clear; 242 | 243 | ReturnValue := FDomTreeNode.FindNode('h1', 0, '', True, dListNode); 244 | CheckEquals(true, ReturnValue); 245 | CheckEquals('<h1 class="pageTitle logintitle">', dListNode[0].GetTagName); 246 | dListNode.Clear; 247 | 248 | end; 249 | 250 | procedure TestTDomTreeNode.TestFindTagOfIndex; 251 | var 252 | ReturnValue: Boolean; 253 | dListNode: TNodeList; 254 | tmpNode: TDomTreeNode; 255 | HtmlTxt: TStringList; 256 | begin 257 | HtmlTxt:=TStringList.Create; 258 | HtmlTxt.LoadFromFile('test.html'); 259 | FDomTreeNode.RunParse(HtmlTxt.Text); 260 | dListNode:= TNodeList.Create; 261 | 262 | ReturnValue := FDomTreeNode.FindTagOfIndex('div', 2, false, dListNode); 263 | CheckEquals(false, ReturnValue); 264 | tmpNode:= FDomTreeNode.Child[3].child[1].child[3].child[1].child[0].child[0].child[0].child[1].child[0].child[0].child[0].child[0].child[0].child[1].child[1].child[0].child[3]; 265 | ReturnValue := tmpNode.FindTagOfIndex('div', 2, false, dListNode); 266 | CheckEquals(true, ReturnValue); 267 | CheckEquals('<div class="numbers">', dListNode[0].GetTagName); 268 | 269 | end; 270 | 271 | procedure TestTDomTreeNode.TestGetXPath; 272 | var 273 | ReturnValue: string; 274 | tmpNode: TDomTreeNode; 275 | HtmlTxt: TStringList; 276 | begin 277 | HtmlTxt:=TStringList.Create; 278 | HtmlTxt.LoadFromFile('test.html'); 279 | FDomTreeNode.RunParse(HtmlTxt.Text); 280 | tmpNode:= FDomTreeNode.Child[3].child[1].child[3].child[1].child[0].child[0].child[0].child[1].child[0].child[0].child[0].child[0].child[0].child[1].child[1].child[0].child[3]; 281 | ReturnValue := tmpNode.GetXPath(true); 282 | CheckEquals('//*[@id="TopBox"]/div/div/div/div[1]', ReturnValue); 283 | ReturnValue := tmpNode.GetXPath(false); 284 | CheckEquals('./html/body/div/table/tbody/tr/td/table/tbody/tr/td/div/div/div/div/div/div[1]', ReturnValue); 285 | 286 | end; 287 | 288 | procedure TestTDomTreeNode.TestFindXPath; 289 | var 290 | ReturnValue: Boolean; 291 | dListValue: TStringList; 292 | dListNode: TNodeList; 293 | HtmlTxt: TStringList; 294 | begin 295 | HtmlTxt:=TStringList.Create; 296 | HtmlTxt.LoadFromFile('test.html'); 297 | FDomTreeNode.RunParse(HtmlTxt.Text); 298 | 299 | dListNode:= TNodeList.Create; 300 | dListValue:= TStringList.Create; 301 | ReturnValue := FDomTreeNode.FindXPath('//*[@id="TopBox"]/div/div/div/div[1]', dListNode, dListValue); 302 | CheckEquals(true, ReturnValue); 303 | CheckEquals(1, dListNode.Count); 304 | CheckEquals('<div class="result_block result_13">', dListNode[0].GetTagName); 305 | 306 | dListNode.Clear; 307 | ReturnValue := FDomTreeNode.FindXPath('//*[@id="TopBox"]/div/div/div/div/div[@class="draw default"]/text()[2]', dListNode, dListValue); 308 | CheckEquals(true, ReturnValue); 309 | CheckEquals(2, dListNode.Count); 310 | CheckEquals(2, dListValue.Count); 311 | CheckEquals('Thursday 9th October 2014', dListValue[0]); 312 | 313 | dListNode.Clear; 314 | dListValue.Clear; 315 | ReturnValue := FDomTreeNode.FindXPath('//*[@id="TopBox"]/div/div/div/div/div[@class="numbers"]/table/tbody/tr[2]/td[1]/img[2]/@alt', dListNode, dListValue); 316 | CheckEquals(true, ReturnValue); 317 | CheckEquals(2, dListNode.Count); 318 | CheckEquals(2, dListValue.Count); 319 | CheckEquals('"35"', dListValue[0]); 320 | CheckEquals('"9"', dListValue[1]); 321 | 322 | 323 | end; 324 | 325 | procedure TestTChildList.SetUp; 326 | begin 327 | FChildList := TChildList.Create; 328 | end; 329 | 330 | procedure TestTChildList.TearDown; 331 | begin 332 | FChildList.Free; 333 | FChildList := nil; 334 | end; 335 | 336 | 337 | 338 | procedure TestTPrmRecList.SetUp; 339 | begin 340 | FPrmRecList := TPrmRecList.Create; 341 | end; 342 | 343 | procedure TestTPrmRecList.TearDown; 344 | begin 345 | FPrmRecList.Free; 346 | FPrmRecList := nil; 347 | end; 348 | 349 | 350 | 351 | initialization 352 | // Register any test cases with the test runner 353 | RegisterTest(TestTDomTree.Suite); 354 | RegisterTest(TestTDomTreeNode.Suite); 355 | RegisterTest(TestTChildList.Suite); 356 | RegisterTest(TestTPrmRecList.Suite); 357 | end. 358 | 359 | -------------------------------------------------------------------------------- /test/Win32/Debug/dunit.ini: -------------------------------------------------------------------------------- 1 | [GUITestRunner Config] 2 | AutoSave=1 3 | Left=294 4 | Top=53 5 | Width=1036 6 | Height=808 7 | Maximized=0 8 | UseRegistry=0 9 | ResultsPanel.Height=174 10 | ErrorMessage.Height=105 11 | ErrorMessage.Visible=1 12 | FailureList.ColumnWidth[0]=120 13 | FailureList.ColumnWidth[1]=100 14 | FailureList.ColumnWidth[2]=200 15 | FailureList.ColumnWidth[3]=592 16 | HideTestNodesOnOpen=0 17 | BreakOnFailures=0 18 | FailOnNoChecksExecuted=0 19 | FailOnMemoryLeaked=0 20 | IgnoreSetUpTearDownLeaks=0 21 | ReportMemoryLeakTypes=0 22 | SelectTestedNode=1 23 | WarnOnFailTestOverride=0 24 | PopupX=350 25 | PopupY=30 26 | 27 | -------------------------------------------------------------------------------- /test/Win32/Debug/test.html: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8"?> 2 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> 3 | <!-- <link href="https://ozlotteries.r.worldssl.net/stylesheet/main.css" rel="stylesheet" type="text/css" > 1 4 | 123--> 5 | <html xmlns="http://www.w3.org/1999/xhtml" dir="ltr"> 6 | <HEAD> 7 | <TITLE>Title "<!-- <'this"/> --> 'document</TITLE> 8 | </HEAD> 9 | <body class="default" bgcolor='blue'> 10 | Text of document 11 | <textarea disabled cols="30" rows="5">This disabled field? don't write here<123/></textarea> 12 | 13 | <!-- logo(s) --> 14 | <div id="maincontainer"> 15 | 16 | <!-- Content Section --> 17 | 18 | <table width="100%" border="0" align="center" cellpadding="0" cellspacing="0"> 19 | <tbody><tr> 20 | <td id="contentContainer"> 21 | <!--<div id="contentContainer">--> 22 | <table width="100%" border="0" cellpadding="0" cellspacing="0"> 23 | <tbody><tr> 24 | <td valign="top" bgcolor="#ffffff" id="contentWrapper" rowspan="2"> 25 | <div id="TopWrapper"> 26 | <div id="TopBox"> 27 | <h1 class="pageTitle logintitle"></h1> 28 | <div class="content"> 29 | <!-- <h2>Results &raquo;</h2> --> 30 | <div style="padding: 15px 0px 15px 25px;"> 31 | <div class="account-content"> 32 | 33 | <!-- START: RESULT --> 34 | 35 | <br><a name="powerball" hidefocus="true" style="outline: none;"> 36 | <!-- START: Powerball --> 37 | </a> 38 | <div class="result_block result_13"> 39 | <a name="powerball" hidefocus="true" style="outline: none;"></a> 40 | <div class="draw default"><a name="powerball" hidefocus="true" style="outline: none;"></a> 41 | <a href="#" title="Powerball" class="logo" hidefocus="true" style="outline: none;"> 42 | <img class="lotto_13" src="https://ozlotteries.r.worldssl.net/images/lottery/logos/default/13_lotto_logo.gif"> 43 | </a> 44 | <a href="#" title="Powerball" class="lotto_name" hidefocus="true" style="outline: none;">Powerball</a> 45 | Draw 960<br> 46 | Thursday 9th October 2014 47 | </div> 48 | <div class="numbers"> 49 | <table cellpadding="3"> 50 | <tbody><tr> 51 | <td width="">Main numbers</td> 52 | <td width="">Powerball</td> 53 | </tr> 54 | <tr> 55 | <td> 56 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_main_24.gif" alt="24"> 57 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_main_35.gif" alt="35"> 58 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_main_2.gif" alt="2"> 59 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_main_26.gif" alt="26"> 60 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_main_25.gif" alt="25"> 61 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_main_29.gif" alt="29"> 62 | </td> 63 | <td> 64 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_supp_8.gif" alt="8"> 65 | </td> 66 | </tr> 67 | </tbody> 68 | </table> 69 | </div> 70 | <div class="button_wrapper"> <a href="#" class="button_viewdividend" id="viewdividend_13" title="View dividends" hidefocus="true" style="outline: none;">View dividends</a> <a href="/lotto-results/powerball" class="button_pastresults" title="View past results" hidefocus="true" style="outline: none;">View past results</a> </div> 71 | <div class="dividend_wrapper"> 72 | <span class="dividends_total">Total Prizes Won:$16,439,231.00</span> <br> 73 | <br> 74 | <table width="90%" border="0" cellspacing="0" cellpadding="0"> 75 | <tbody><tr> 76 | <td width="25%" valign="top"><strong>Division 1</strong></td> 77 | <td width="75%"><strong>6 Main numbers, 1 Powerball </strong><br> 78 | Prize Pool: $0.00<br> 79 | 0 winners<br> 80 | <br> 81 | </td> 82 | </tr> 83 | <tr> 84 | <td width="25%" valign="top"><strong>Division 2</strong></td> 85 | <td width="75%"><strong>6 Main numbers </strong><br> 86 | Prize Pool: $890,774.40<br> 87 | 6 winners, each received $148,462.40<br> 88 | <br> 89 | </td> 90 | </tr> 91 | <tr> 92 | <td width="25%" valign="top"><strong>Division 3</strong></td> 93 | <td width="75%"><strong>5 Main numbers, 1 Powerball </strong><br> 94 | Prize Pool: $931,889.55<br> 95 | 129 winners, each received $7,223.95<br> 96 | <br> 97 | </td> 98 | </tr> 99 | <tr> 100 | <td width="25%" valign="top"><strong>Division 4</strong></td> 101 | <td width="75%"><strong>5 Main numbers </strong><br> 102 | Prize Pool: $520,789.50<br> 103 | 2505 winners, each received $207.90<br> 104 | <br> 105 | </td> 106 | </tr> 107 | <tr> 108 | <td width="25%" valign="top"><strong>Division 5</strong></td> 109 | <td width="75%"><strong>4 Main numbers, 1 Powerball </strong><br> 110 | Prize Pool: $397,541.20<br> 111 | 6173 winners, each received $64.40<br> 112 | <br> 113 | </td> 114 | </tr> 115 | <tr> 116 | <td width="25%" valign="top"><strong>Division 6</strong></td> 117 | <td width="75%"><strong>3 Main numbers, 1 Powerball </strong><br> 118 | Prize Pool: $3,467,528.80<br> 119 | 90536 winners, each received $38.30<br> 120 | <br> 121 | </td> 122 | </tr> 123 | <tr> 124 | <td width="25%" valign="top"><strong>Division 7</strong></td> 125 | <td width="75%"><strong>4 Main numbers </strong><br> 126 | Prize Pool: $3,110,202.15<br> 127 | 112893 winners, each received $27.55<br> 128 | <br> 129 | </td> 130 | </tr> 131 | <tr> 132 | <td width="25%" valign="top"><strong>Division 8</strong></td> 133 | <td width="75%"><strong>2 Main numbers, 1 Powerball </strong><br> 134 | Prize Pool: $7,120,505.40<br> 135 | 531381 winners, each received $13.40<br> 136 | <br> 137 | </td> 138 | </tr> 139 | </tbody></table> 140 | </div> 141 | <div class="next_draw_wrapper"> 142 | <div class="next_draw"> <strong style="font-size:16px;">Next Draw</strong> <br> 143 | <span style="font-size:14px;"> 144 | $50,000,000 </span> <br> 145 | Thu 16th Oct 9:30pm AEDT <br> 146 | </div> 147 | <a href="/play/powerball" class="button_playnow" title="Play now" hidefocus="true" style="outline: none;">Play now</a> </div> 148 | <div style="width: 260px; text-align: center;"> 149 | </div> 150 | </div> 151 | <!-- END: Powerball --> 152 | 153 | <br><a name="wednesday_lotto" hidefocus="true" style="outline: none;"> 154 | <!-- START: Wednesday Lotto --> 155 | </a><div class="result_block result_3"><a name="wednesday_lotto" hidefocus="true" style="outline: none;"> 156 | </a><div class="draw default"><a name="wednesday_lotto" hidefocus="true" style="outline: none;"> 157 | </a><a href="#" title="Wednesday Lotto" class="logo" hidefocus="true" style="outline: none;"> 158 | <img class="lotto_3" src="https://ozlotteries.r.worldssl.net/images/lottery/logos/default/3_lotto_logo.gif"> 159 | </a> 160 | <a href="#" title="Wednesday Lotto" class="lotto_name" hidefocus="true" style="outline: none;">Wednesday Lotto</a> 161 | Draw 3389<br> 162 | Wednesday 8th October 2014 </div> 163 | <div class="numbers"> 164 | <table cellpadding="3"> 165 | <tbody><tr> 166 | <td width="">Main numbers</td> 167 | <td width="">Supplementary</td> 168 | </tr> 169 | <tr> 170 | <td> 171 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_main_15.gif" alt="15"> 172 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_main_9.gif" alt="9"> 173 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_main_13.gif" alt="13"> 174 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_main_33.gif" alt="33"> 175 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_main_12.gif" alt="12"> 176 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_main_32.gif" alt="32"> 177 | </td> 178 | <td> 179 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_supp_17.gif" alt="17"> 180 | <img src="https://ozlotteries.r.worldssl.net/images/new/results/results_balls_supp_8.gif" alt="8"> 181 | </td> 182 | </tr> 183 | </tbody></table> 184 | </div> 185 | <div class="button_wrapper"> <a href="#" class="button_viewdividend" id="viewdividend_3" title="View dividends" hidefocus="true" style="outline: none;">View dividends</a> <a href="/lotto-results/wednesday-lotto" class="button_pastresults" title="View past results" hidefocus="true" style="outline: none;">View past results</a> </div> 186 | <div class="dividend_wrapper"> 187 | <span class="dividends_total">Total Prize Pool:$3,513,018.10</span> <br> 188 | <br> 189 | <table width="90%" border="0" cellspacing="0" cellpadding="0"> 190 | <tbody><tr> 191 | <td width="25%" valign="top"><strong>Division 1</strong></td> 192 | <td width="75%"><strong>6 Main numbers </strong><br> 193 | Prize Pool: $2,000,000.00<br> 194 | 2 winners, each received $1,000,000.00<br> 195 | <br> 196 | </td> 197 | </tr> 198 | <tr> 199 | <td width="25%" valign="top"><strong>Division 2</strong></td> 200 | <td width="75%"><strong>5 Main numbers, 1 Supplementary </strong><br> 201 | Prize Pool: $68,081.60<br> 202 | 8 winners, each received $8,510.20<br> 203 | <br> 204 | </td> 205 | </tr> 206 | <tr> 207 | <td width="25%" valign="top"><strong>Division 3</strong></td> 208 | <td width="75%"><strong>5 Main numbers </strong><br> 209 | Prize Pool: $105,908.85<br> 210 | 171 winners, each received $619.35<br> 211 | <br> 212 | </td> 213 | </tr> 214 | <tr> 215 | <td width="25%" valign="top"><strong>Division 4</strong></td> 216 | <td width="75%"><strong>4 Main numbers </strong><br> 217 | Prize Pool: $294,919.80<br> 218 | 9159 winners, each received $32.20<br> 219 | <br> 220 | </td> 221 | </tr> 222 | <tr> 223 | <td width="25%" valign="top"><strong>Division 5</strong></td> 224 | <td width="75%"><strong>3 Main numbers, 1 Supplementary </strong><br> 225 | Prize Pool: $408,841.65<br> 226 | 25473 winners, each received $16.05<br> 227 | <br> 228 | </td> 229 | </tr> 230 | <tr> 231 | <td width="25%" valign="top"><strong>Division 6</strong></td> 232 | <td width="75%"><strong>1 Main numbers, 2 Supplementary </strong><br> 233 | Prize Pool: $635,266.20<br> 234 | 62281 winners, each received $10.20<br> 235 | <br> 236 | </td> 237 | </tr> 238 | </tbody></table> 239 | </div> 240 | <div class="next_draw_wrapper"> 241 | <div class="next_draw"> <strong style="font-size:16px;">Next Draw</strong> <br> 242 | <span style="font-size:14px;"> 243 | $1,000,000 � </span> <br> 244 | Wed 15th Oct 9:30pm AEDT <br> 245 | </div> 246 | <a href="/play/wednesday-lotto" class="button_playnow" title="Play now" hidefocus="true" style="outline: none;">Play now</a> </div> 247 | <div style="width: 260px; text-align: center;"> 248 | Guaranteed for up to 4 winners. </div> 249 | </div> 250 | 251 | 252 | 253 | <br> 254 | </div> 255 | </div> 256 | </div> 257 | </div> 258 | </div> 259 | </td> 260 | 261 | 262 | 263 | 264 | </tr> 265 | 266 | </tbody></table> 267 | 268 | <!--</div>--> 269 | <!-- End Content Section --> 270 | 271 | </td> 272 | </tr> 273 | </tbody></table> 274 | 275 | 276 | 277 | <br> 278 | </div> 279 | 280 | <p>Пример программы</p> 281 | <xmp> 282 | while (<>) { 283 | $org=$_; 284 | s/\\["']//g; 285 | s/\/\/[^:].*//; 286 | s/\/\*.*\*\///g; 287 | if ($comment == 1) { 288 | if (s/.*\*\///) { 289 | $comment = 0; 290 | } 291 | else { 292 | next; 293 | } 294 | } 295 | if (s/\/\*.*//) { 296 | $comment = 1; 297 | } 298 | if (/^\s*#/) { 299 | next; 300 | } 301 | }</xmp> 302 | </BODY> 303 | </HTML> -------------------------------------------------------------------------------- /test/test.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandbil/HTML-Parser/4101a73e817abda6ffb739578b9b9f2523147930/test/test.html --------------------------------------------------------------------------------