├── .gitignore
├── phpunit
├── bootstrap.php
├── test.html
└── HtmlTest.php
├── .travis.yml
├── composer.json
├── test.php
├── LICENSE
├── phpunit.xml.dist
├── README.md
└── src
└── ParserDom.php
/.gitignore:
--------------------------------------------------------------------------------
1 | /.idea
2 | /vendor
3 |
4 | composer.lock
5 |
--------------------------------------------------------------------------------
/phpunit/bootstrap.php:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | test
6 |
7 |
8 | p1
9 | p2
10 | p3
11 | p_id
12 | p_id_2
13 | p4
14 | 测试1
15 |
16 |
17 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "bupt1987/html-parser",
3 | "license": "MIT",
4 | "description": "Html Parser",
5 | "homepage": "https://github.com/bupt1987/html-parser",
6 | "authors": [
7 | {
8 | "name": "俊杰jerry",
9 | "email": "bupt1987@gmail.com",
10 | "homepage": "http://bupt1987.github.io",
11 | "role": "Developer"
12 | }
13 | ],
14 | "require": {
15 | "php": ">=5.5"
16 | },
17 | "require-dev": {
18 | "phpunit/phpunit": "^4.8"
19 | },
20 | "autoload": {
21 | "psr-4": {
22 | "HtmlParser\\": "src/"
23 | }
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/test.php:
--------------------------------------------------------------------------------
1 | find('ul.uni-blk-list02', 0);
22 | $oDom->find('a');
23 | $oDom->find('ul');
24 | $oDom->find('p');
25 | }
26 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2014 俊杰Jerry
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/phpunit.xml.dist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/phpunit/HtmlTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('p4', $oDom->find('p', -1)->getPlainText());
9 | $this->assertEquals('p_id', $oDom->find('p[id]', 0)->getPlainText());
10 | $this->assertEquals('p_id_2', $oDom->find('p[id=p_id_2]', 0)->getPlainText());
11 | $this->assertEquals('p2', $oDom->find('p[!id]', 1)->getPlainText());
12 | $this->assertEquals('测试1', $oDom->find('#test1', 0)->getPlainText());
13 |
14 | $oPClass = $oDom->find('p.test_class1', 0);
15 |
16 | $this->assertEquals('p1', $oPClass->getPlainText());
17 | $this->assertEquals('test_class test_class1', $oPClass->getAttr('class'));
18 |
19 | $lCheck = array(
20 | 'p1',
21 | 'p2',
22 | 'p3',
23 | 'p_id',
24 | 'p_id_2',
25 | );
26 | $lPTag = $oDom->find('p.test_class');
27 | $this->assertEquals(5, count($lPTag));
28 | $lPText = array();
29 | foreach ($lPTag as $oPTag) {
30 | $lPText[] = $oPTag->getPlainText();
31 | }
32 | $this->assertEquals($lCheck, $lPText);
33 |
34 | $this->assertEquals($oDom->node instanceof \DOMNode, true);
35 |
36 | }
37 |
38 | private static function getHtml() {
39 | static $sHtml;
40 | if ($sHtml === null) {
41 | $sHtml = file_get_contents(__DIR__ . '/test.html');
42 | }
43 | return $sHtml;
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | HtmlParser
2 | ===============
3 | [](https://packagist.org/packages/bupt1987/html-parser)
4 | [](https://travis-ci.org/bupt1987/html-parser)
5 |
6 | php html解析工具,类似与PHP Simple HTML DOM Parser。
7 | 由于基于php模块dom,所以在解析html时的效率比 PHP Simple HTML DOM Parser 快好几倍。
8 |
9 |
10 | 注意:html代码必须是utf-8编码字符,如果不是请转成utf-8
11 | 如果有乱码的问题参考:http://www.fwolf.com/blog/post/314
12 |
13 | 现在支持composer
14 |
15 | "require": {"bupt1987/html-parser": "dev-master"}
16 |
17 | 加载composer
18 | require 'vendor/autoload.php';
19 |
20 | ================================================================================
21 | ##### *Example*
22 | ~~~
23 |
27 |
28 |
29 | test
30 |
31 |
32 | p1
33 | p2
34 | p3
35 | 测试1
36 |
37 |