120 | foreach($html->find('ul') as $ul)
121 | {
122 | foreach($ul->find('li') as $li)
123 | {
124 | // do something...
125 | }
126 | }
127 |
128 | // Find first
in first
129 | $e = $html->find('ul', 0)->find('li', 0);
130 | ```
131 |
132 | ### Фильтр атрибутов
133 |
134 | Filter |Description
135 | ---|---
136 | [attribute] |Matches elements that have the specified attribute.
137 | [!attribute] |Matches elements that don't have the specified attribute.
138 | [attribute=value] |Matches elements that have the specified attribute with a certain value.
139 | [attribute!=value] |Matches elements that don't have the specified attribute with a certain value.
140 | [attribute^=value] |Matches elements that have the specified attribute and it starts with a certain value.
141 | [attribute$=value] |Matches elements that have the specified attribute and it ends with a certain value.
142 | [attribute*=value] |Matches elements that have the specified attribute and it contains a certain value.
143 |
144 | ### Текст, комментарии
145 |
146 | ```php
147 |
148 | // Find all text blocks
149 | $es = $html->find('text');
150 |
151 | // Find all comment () blocks
152 | $es = $html->find('comment');
153 | ```
154 |
155 | ## Доступ к атрибутам
156 |
157 | ### Получение, установка и удаление атрибутов
158 |
159 | ```php
160 |
161 | // Get a attribute ( If the attribute is non-value attribute (eg. checked, selected...), it will returns true or false)
162 | $value = $e->href;
163 |
164 | // Set a attribute(If the attribute is non-value attribute (eg. checked, selected...), set it's value as true or false)
165 | $e->href = 'my link';
166 |
167 | // Remove a attribute, set it's value as null!
168 | $e->href = null;
169 |
170 | // Determine whether a attribute exist?
171 | if(isset($e->href))
172 | echo 'href exist!';
173 | ```
174 |
175 | ### "Магические" атрибуты
176 |
177 | ```php
178 |
179 | // Example
180 | $html = str_get_html('
"
185 | echo $e->innertext; // Returns: "foo bar"
186 | echo $e->plaintext; // Returns: "foo bar"
187 | ```
188 |
189 | Attribute Name |Usage
190 | ---|---
191 | $e->tag |Read or write the tag name of element.
192 | $e->outertext |Read or write the outer HTML text of element.
193 | $e->innertext |Read or write the inner HTML text of element.
194 | $e->plaintext |Read or write the plain text of element.
195 |
196 | ### Трюки
197 |
198 | ```php
199 |
200 | // Extract contents from HTML
201 | echo $html->plaintext;
202 |
203 | // Wrap a element
204 | $e->outertext = '
' . $e->outertext . '
';
205 |
206 | // Remove a element, set it's outertext as an empty string
207 | $e->outertext = '';
208 |
209 | // Append a element
210 | $e->outertext = $e->outertext . '
foo
';
211 |
212 | // Insert a element
213 | $e->outertext = '
foo
' . $e->outertext;
214 | ```
215 |
216 | ## Прогон по DOM-дереву
217 |
218 | ```php
219 |
220 | // If you are not so familiar with HTML DOM, check this link to learn more...
221 |
222 | // Example
223 | echo $html->find('#div1', 0)->children(1)->children(1)->children(2)->id;
224 | // or
225 | echo $html->getElementById('div1')->childNodes(1)->childNodes(1)->childNodes(2)->getAttribute('id');
226 | ```
227 |
228 | Method |Description
229 | ---|---
230 | `mixed` $e->children([int $index]) |Returns the Nth child object if index is set, otherwise return an array of children.
231 | `Element` $e->parent() |Returns the parent of element.
232 | `Element` $e->first_child() |Returns the first child of element, or null if not found.
233 | `Element` $e->last_child() |Returns the last child of element, or null if not found.
234 | `Element` $e->next_sibling() |Returns the next sibling of element, or null if not found.
235 | `Element` $e->prev_sibling() |Returns the previous sibling of element, or null if not found.
236 |
237 | ## API-справочник
238 |
239 | ### Методы и свойства DOM
240 |
241 | Name |Description
242 | ---|---
243 | `void` __construct([string|Element $html]) |Constructor $html is text or Element.
244 | `string` plaintext |Returns the contents extracted from HTML.
245 | `mixed` find (string $selector [, int $index]) |Find elements by the CSS selector. Returns the Nth element object if index is set, otherwise return an array of object.
246 |
247 | ### Методы и свойства элементов
248 |
249 | Name |Description
250 | ---|---
251 | `string` [attribute] |Read or write element's attribure value.
252 | `string` tag |Read or write the tag name of element.
253 | `string` outertext |Read or write the outer HTML text of element.
254 | `string` innertext |Read or write the inner HTML text of element.
255 | `string` plaintext |Read or write the plain text of element.
256 | `mixed` find (string $selector [, int $index]) |Find children by the CSS selector. Returns the Nth element object if index is set, otherwise, return an array of object.
257 |
258 | ### Прогон по дереву DOM
259 |
260 | Name |Description
261 | ---|---
262 | `mixed` $e->children([int $index]) |Returns the Nth child object if index is set, otherwise return an array of children.
263 | `element` $e->parent() |Returns the parent of element.
264 | `element` $e->first_child() |Returns the first child of element, or null if not found.
265 | `element` $e->last_child() |Returns the last child of element, or null if not found.
266 | `element` $e->next_sibling() |Returns the next sibling of element, or null if not found.
267 | `element` $e->prev_sibling() |Returns the previous sibling of element, or null if not found.
268 |
269 | ### camelCase эквиваленты
270 |
271 | ```php
272 |
273 | string $e->getAttribute($name)
274 | string $e->attribute
275 |
276 | void $e->setAttribute($name, $value)
277 | void $value = $e->attribute
278 |
279 | bool $e->hasAttribute($name)
280 | bool isset($e->attribute)
281 |
282 | void $e->removeAttribute($name)
283 | void $e->attribute = null
284 |
285 | element $e->getElementById($id)
286 | mixed $e->find("#$id", 0)
287 |
288 | mixed $e->getElementsById($id [,$index])
289 | mixed $e->find("#$id" [, int $index])
290 |
291 | element $e->getElementByTagName($name)
292 | mixed $e->find($name, 0)
293 |
294 | mixed $e->getElementsByTagName($name [, $index])
295 | mixed $e->find($name [, int $index])
296 |
297 | element $e->parentNode()
298 | element $e->parent()
299 |
300 | mixed $e->childNodes([$index])
301 | mixed $e->children([int $index])
302 |
303 | element $e->firstChild()
304 | element $e->first_child()
305 |
306 | element $e->lastChild()
307 | element $e->last_child()
308 |
309 | element $e->nextSibling()
310 | element $e->next_sibling()
311 |
312 | element $e->previousSibling()
313 | element $e->prev_sibling()
314 | ```
315 |
--------------------------------------------------------------------------------
/lib/Element.php:
--------------------------------------------------------------------------------
1 | 'childNodes',
42 | 'first_child' => 'firstChild',
43 | 'last_child' => 'lastChild',
44 | 'next_sibling' => 'nextSibling',
45 | 'prev_sibling' => 'previousSibling',
46 | 'parent' => 'parentNode',
47 | 'outertext' => 'html',
48 | 'innertext' => 'innerHtml',
49 | ];
50 |
51 |
52 | public function __construct(DOMNode $node)
53 | {
54 | $this->node = $node;
55 | }
56 |
57 | /**
58 | * @return DOMNode
59 | */
60 | public function getNode()
61 | {
62 | return $this->node;
63 | }
64 |
65 | /**
66 | * Replace this node
67 | *
68 | * @param $string
69 | *
70 | * @return $this
71 | *
72 | * @throws RuntimeException
73 | */
74 | protected function replaceNode($string)
75 | {
76 | $importNodeList = NodeList::fromString($string);
77 |
78 | if ($importNodeList->count() > 1) {
79 | throw new RuntimeException('Not valid HTML fragment. String contains more one root node');
80 | }
81 |
82 | if ($importNodeList->count() === 0) {
83 | $this->node->parentNode->removeChild($this->node);
84 | $this->node = new DOMText();
85 | return null;
86 | }
87 |
88 | $newNode = $this->node->ownerDocument->importNode($importNodeList[0]->getNode(), true);
89 |
90 | $this->node->parentNode->replaceChild($newNode, $this->node);
91 | $this->node = $newNode;
92 |
93 | return $this;
94 | }
95 |
96 | /**
97 | * Replace child node
98 | *
99 | * @param $string
100 | *
101 | * @return $this
102 | */
103 | protected function replaceChild($string)
104 | {
105 | $importNodeList = NodeList::fromString($string);
106 |
107 | foreach ($this->node->childNodes as $node) {
108 | $this->node->removeChild($node);
109 | }
110 |
111 | foreach ($importNodeList as $importNode) {
112 | $newNode = $this->node->ownerDocument->importNode($importNode->getNode(), true);
113 | $this->node->appendChild($newNode);
114 | }
115 |
116 | $this->node->normalize();
117 |
118 | return $this;
119 | }
120 |
121 | /**
122 | * Replace this node with text
123 | *
124 | * @param $string
125 | *
126 | * @return $this
127 | */
128 | protected function replaceText($string)
129 | {
130 | if (empty($string)) {
131 | $this->node->parentNode->removeChild($this->node);
132 |
133 | return null;
134 | }
135 |
136 | $newElement = $this->node->ownerDocument->createTextNode($string);
137 |
138 | $newNode = $this->node->ownerDocument->importNode($newElement, true);
139 |
140 | $this->node->parentNode->replaceChild($newNode, $this->node);
141 | $this->node = $newNode;
142 |
143 | return $this;
144 | }
145 |
146 | /**
147 | * @return Document
148 | */
149 | public function getDom()
150 | {
151 | return new Document($this);
152 | }
153 |
154 | /**
155 | * Find list of nodes with a CSS selector
156 | *
157 | * @param string $selector
158 | * @param int $idx
159 | *
160 | * @return NodeList|Element|null
161 | */
162 | public function find($selector, $idx = null)
163 | {
164 | return $this->getDom()->find($selector, $idx);
165 | }
166 |
167 | /**
168 | * Return Element by id
169 | *
170 | * @param $id
171 | *
172 | * @return Element|null
173 | */
174 | public function getElementById($id)
175 | {
176 | return $this->find("#$id", 0);
177 | }
178 |
179 | /**
180 | * Returns Elements by id
181 | *
182 | * @param $id
183 | * @param null $idx
184 | *
185 | * @return Element|NodeList|null
186 | */
187 | public function getElementsById($id, $idx = null)
188 | {
189 | return $this->find("#$id", $idx);
190 | }
191 |
192 | /**
193 | * Return Element by tag name
194 | *
195 | * @param $name
196 | *
197 | * @return Element|null
198 | */
199 | public function getElementByTagName($name)
200 | {
201 | return $this->find($name, 0);
202 | }
203 |
204 | /**
205 | * Returns Elements by tag name
206 | *
207 | * @param $name
208 | * @param null $idx
209 | *
210 | * @return Element|NodeList|null
211 | */
212 | public function getElementsByTagName($name, $idx = null)
213 | {
214 | return $this->find($name, $idx);
215 | }
216 |
217 | /**
218 | * Returns children of node
219 | *
220 | * @param int $idx
221 | *
222 | * @return NodeList|Element|null
223 | */
224 | public function childNodes($idx = -1)
225 | {
226 | $nodeList = $this->getIterator();
227 |
228 | if ($idx === -1) {
229 | return $nodeList;
230 | }
231 |
232 | if (isset($nodeList[$idx])) {
233 | return $nodeList[$idx];
234 | }
235 |
236 | return null;
237 | }
238 |
239 | /**
240 | * Returns the first child of node
241 | *
242 | * @return Element|null
243 | */
244 | public function firstChild()
245 | {
246 | $node = $this->node->firstChild;
247 |
248 | if ($node === null) {
249 | return null;
250 | }
251 |
252 | return new Element($node);
253 | }
254 |
255 | /**
256 | * Returns the last child of node
257 | *
258 | * @return Element|null
259 | */
260 | public function lastChild()
261 | {
262 | $node = $this->node->lastChild;
263 |
264 | if ($node === null) {
265 | return null;
266 | }
267 |
268 | return new Element($node);
269 | }
270 |
271 | /**
272 | * Returns the next sibling of node
273 | *
274 | * @return Element|null
275 | */
276 | public function nextSibling()
277 | {
278 | $node = $this->node->nextSibling;
279 |
280 | if ($node === null) {
281 | return null;
282 | }
283 |
284 | return new Element($node);
285 | }
286 |
287 | /**
288 | * Returns the previous sibling of node
289 | *
290 | * @return Element|null
291 | */
292 | public function previousSibling()
293 | {
294 | $node = $this->node->previousSibling;
295 |
296 | if ($node === null) {
297 | return null;
298 | }
299 |
300 | return new Element($node);
301 | }
302 |
303 | /**
304 | * Returns the parent of node
305 | *
306 | * @return Element
307 | */
308 | public function parentNode()
309 | {
310 | return new Element($this->node->parentNode);
311 | }
312 |
313 | /**
314 | * Get dom node's outer html
315 | *
316 | * @return string
317 | */
318 | public function html()
319 | {
320 | return $this->getDom()->html();
321 | }
322 |
323 | /**
324 | * Get dom node's inner html
325 | *
326 | * @return string
327 | */
328 | public function innerHtml()
329 | {
330 | return $this->getDom()->innerHtml();
331 | }
332 |
333 | /**
334 | * Get dom node's plain text
335 | *
336 | * @return string
337 | */
338 | public function text()
339 | {
340 | return $this->node->textContent;
341 | }
342 |
343 | /**
344 | * Returns array of attributes
345 | *
346 | * @return array|null
347 | */
348 | public function getAllAttributes()
349 | {
350 | if ($this->node->hasAttributes()) {
351 | $attributes = [];
352 | foreach ($this->node->attributes as $attr) {
353 | $attributes[$attr->name] = $attr->value;
354 | }
355 |
356 | return $attributes;
357 | }
358 |
359 | return null;
360 | }
361 |
362 | /**
363 | * Return attribute value
364 | *
365 | * @param string $name
366 | *
367 | * @return string|null
368 | */
369 | public function getAttribute($name)
370 | {
371 | return $this->node->getAttribute($name);
372 | }
373 |
374 | /**
375 | * Set attribute value
376 | *
377 | * @param $name
378 | * @param $value
379 | *
380 | * @return $this
381 | */
382 | public function setAttribute($name, $value)
383 | {
384 | if (empty($value)) {
385 | $this->node->removeAttribute($name);
386 | } else {
387 | $this->node->setAttribute($name, $value);
388 | }
389 |
390 | return $this;
391 | }
392 |
393 | /**
394 | * Determine if an attribute exists on the element.
395 | *
396 | * @param $name
397 | *
398 | * @return bool
399 | */
400 | public function hasAttribute($name)
401 | {
402 | return $this->node->hasAttribute($name);
403 | }
404 |
405 | /**
406 | * @param $name
407 | *
408 | * @return array|null|string
409 | */
410 | public function __get($name)
411 | {
412 | switch ($name) {
413 | case 'outertext':
414 | return $this->html();
415 | case 'innertext':
416 | return $this->innerHtml();
417 | case 'plaintext':
418 | return $this->text();
419 | case 'tag' :
420 | return $this->node->nodeName;
421 | case 'attr' :
422 | return $this->getAllAttributes();
423 | default :
424 | return $this->getAttribute($name);
425 | }
426 | }
427 |
428 | public function __set($name, $value)
429 | {
430 | switch ($name) {
431 | case 'outertext':
432 | return $this->replaceNode($value);
433 | case 'innertext':
434 | return $this->replaceChild($value);
435 | case 'plaintext':
436 | return $this->replaceText($value);
437 | default :
438 | return $this->setAttribute($name, $value);
439 | }
440 | }
441 |
442 | /**
443 | * @param $name
444 | *
445 | * @return bool
446 | */
447 | public function __isset($name)
448 | {
449 | switch ($name) {
450 | case 'outertext':
451 | case 'innertext':
452 | case 'plaintext':
453 | case 'tag' :
454 | return true;
455 | default :
456 | return $this->hasAttribute($name);
457 | }
458 | }
459 |
460 | public function __unset($name)
461 | {
462 | return $this->setAttribute($name, null);
463 | }
464 |
465 | /**
466 | * @return mixed
467 | */
468 | public function __toString()
469 | {
470 | return $this->html();
471 | }
472 |
473 | /**
474 | * @param string $selector
475 | * @param int $idx
476 | *
477 | * @return Element|NodeList|null
478 | */
479 | public function __invoke($selector, $idx = null)
480 | {
481 | return $this->find($selector, $idx);
482 | }
483 |
484 | /**
485 | * @param $name
486 | * @param $arguments
487 | *
488 | * @return null|string|Element
489 | *
490 | * @throws BadMethodCallException
491 | */
492 | public function __call($name, $arguments)
493 | {
494 | if (isset($this->functionAliases[$name])) {
495 | return call_user_func_array([$this, $this->functionAliases[$name]], $arguments);
496 | }
497 | throw new BadMethodCallException('Method does not exist');
498 | }
499 |
500 | /**
501 | * Retrieve an external iterator
502 | *
503 | * @link http://php.net/manual/en/iteratoraggregate.getiterator.php
504 | * @return NodeList An instance of an object implementing Iterator or
505 | * Traversable
506 | * @since 5.0.0
507 | */
508 | public function getIterator()
509 | {
510 | $elements = new NodeList();
511 | if ($this->node->hasChildNodes()) {
512 | foreach ($this->node->childNodes as $node) {
513 | $elements[] = new Element($node);
514 | }
515 | }
516 |
517 | return $elements;
518 | }
519 | }
520 |
--------------------------------------------------------------------------------
/tests/FastSimpleHTMLDom/ElementTest.php:
--------------------------------------------------------------------------------
1 | User name';
16 |
17 | $document = new Document($html);
18 | $node = $document->getDocument()->documentElement;
19 |
20 | $element = new Element($node);
21 |
22 | $this->assertEquals('input', $element->tag);
23 | $this->assertEquals('User name', $element->plaintext);
24 | $this->assertEquals('username', $element->name);
25 | $this->assertEquals('John', $element->value);
26 | }
27 |
28 | public function testGetNode()
29 | {
30 | $html = '
This is a test page filled with common HTML elements to be used to provide visual feedback whilst building CSS systems and frameworks.
13 |
14 |
15 |
57 |
58 |
59 |
60 |
Text
61 |
62 |
63 |
Headings
64 |
65 |
66 |
Heading 1
67 |
Heading 2
68 |
Heading 3
69 |
Heading 4
70 |
Heading 5
71 |
Heading 6
72 |
73 |
74 |
75 |
76 |
Paragraphs
77 |
78 |
A paragraph (from the Greek paragraphos, “to write beside” or “written beside”) is a self-contained unit of a discourse in writing dealing with a particular point or idea. A paragraph consists of one or more sentences. Though not required by the syntax of any language, paragraphs are usually an expected part of formal writing, used to organize longer prose.
79 |
80 |
81 |
82 |
83 |
Blockquotes
84 |
85 |
86 |
A block quotation (also known as a long quotation or extract) is a quotation in a written document, that is set off from the main text as a paragraph, or block of text.
87 |
It is typically distinguished visually using indentation and a different typeface or smaller size quotation. It may or may not include a citation, usually placed at the bottom.
Sample output:This is sample output from a computer program.
184 |
Pre-formatted text
185 |
P R E F O R M A T T E D T E X T
186 | ! " # $ % & ' ( ) * + , - . /
187 | 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
188 | @ A B C D E F G H I J K L M N O
189 | P Q R S T U V W X Y Z [ \ ] ^ _
190 | ` a b c d e f g h i j k l m n o
191 | p q r s t u v w x y z { | } ~