├── CHANGELOG.md ├── LICENSE ├── README-RU.md ├── README.md ├── composer.json ├── composer.lock └── src └── DiDom ├── ClassAttribute.php ├── Document.php ├── DocumentFragment.php ├── Element.php ├── Encoder.php ├── Errors.php ├── Exceptions └── InvalidSelectorException.php ├── Node.php ├── Query.php └── StyleAttribute.php /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 2.0 2 | 3 | ### Breaking changes 4 | 5 | - Minimum PHP version bumped to 7.2 6 | - Remove `__invoke` method from `Document`, `Element` and `DocumentFragment` that was deprecated early 7 | - Remove magic property `Element::$tag`. Use `tagName()` method instead 8 | - Rename `Element::getDocument()` to `ownerDocument()` 9 | 10 | ### What's new 11 | 12 | - Add `Node::setInnerXml()` method (i. e. for `Element` and `DocumentFragment` too) 13 | 14 | ## 1.18 15 | 16 | - Fix a bug when a call of Element::previousSibling() with selector returns a previous sibling when there is not matching element 17 | 18 | ## 1.17 19 | 20 | - Add support of multiple pseudoclasses (#125) 21 | 22 | ## 1.16.4 23 | 24 | - Handle nested pseudo-classes with expression correctly 25 | 26 | ## 1.16.3 27 | 28 | - Fix parsing of a style property in "style" attribute when the value contains a colon 29 | 30 | ## 1.16.1 31 | 32 | - Fix deprecation notice in PHP 8 for `libxml_disable_entity_loader` 33 | 34 | ## 1.16 35 | 36 | - Add `Node::insertSiblingBefore()` and `Node::insertSiblingAfter()` methods for inserting sibling nodes 37 | 38 | ## 1.15 39 | 40 | - Add support of document fragments 41 | 42 | ## 1.14.1 43 | 44 | - Fix an exception when selecting comment element with XPath 45 | - Add support of `DOMCdataSection` nodes 46 | - Add methods `createTextNode()`, `createComment()`, `createCdataSection()` to the Document class 47 | 48 | ## 1.14 49 | 50 | - Add `Element::innerXml()` method 51 | 52 | ## 1.13 53 | 54 | - Add `Element::outerHtml()` method 55 | - Add `Element::prependChild()` method 56 | - Add `Element::insertBefore()` and `Element::insertAfter()` methods 57 | - Add `Element::style()` method for more convenient inline styles manipulation 58 | - Add `Element::classes()` method for more convenient class manipulation 59 | 60 | ## 1.12 61 | 62 | - Many fixes and improvements 63 | 64 | ## 1.11.1 65 | 66 | - Fix bug with unregistered PHP functions in XPath in `Document::has()` and `Document::count()` methods 67 | 68 | ## 1.11 69 | 70 | - Add `Element::isElementNode()` method 71 | - Add ability to retrieve only specific attributes in `Element::attributes()` method 72 | - Add `Element::removeAllAttributes()` method 73 | - Add ability to specify selector and node type in `Element::previousSibling()` and `Element::nextSibling()` methods 74 | - Add `Element::previousSiblings()` and `Element::nextSiblings()` methods 75 | - Many minor fixes and improvements 76 | 77 | ## 1.10.6 78 | 79 | - Fix bug with XML document loading 80 | 81 | ## 1.10.5 82 | 83 | - Fix issue #85 84 | 85 | ## 1.10.4 86 | 87 | - Use `mb_convert_encoding()` in the Encoder if it is available 88 | 89 | ## 1.10.3 90 | 91 | - Add `Element::removeChild()` and `Element::removeChildren()` methods 92 | - Fix bug in `Element::matches()` method 93 | - `Element::matches()` method now returns false if node is not `DOMElement` 94 | - Add `Element::hasChildren()` method 95 | 96 | ## 1.10.2 97 | 98 | - Fix bug in setInnerHtml: can't rewrite existing content 99 | - Throw `InvalidSelectorException` instead of `InvalidArgumentException` when selector is empty 100 | 101 | ## 1.10.1 102 | 103 | - Fix attributes `ends-with` XPath 104 | - Method `Element::matches()` now can check children nodes 105 | 106 | ## 1.10 107 | 108 | - Fix HTML saving mechanism 109 | - Throw `InvalidSelectorException` instead of `RuntimeException` in Query class 110 | 111 | ## 1.9.1 112 | 113 | - Add ability to search in owner document using current node as context 114 | - Bugs fixed 115 | 116 | ## 1.9.0 117 | 118 | - Methods `Document::appendChild()` and `Element::appendChild()` now return appended node(s) 119 | - Add ability to search elements in context 120 | 121 | ## 1.8.8 122 | 123 | - Bugs fixed 124 | 125 | ## 1.8.7 126 | 127 | - Add `Element::getLineNo()` method 128 | 129 | ## 1.8.6 130 | 131 | - Fix issue #55 132 | 133 | ## 1.8.5 134 | 135 | - Add support of `DOMComment` 136 | 137 | ## 1.8.4 138 | 139 | - Add ability to create an element by selector 140 | - Add closest method 141 | 142 | ## 1.8.3 143 | 144 | - Add method `Element::isTextNode()` 145 | - Many minor fixes 146 | 147 | ## 1.8.2 148 | 149 | - Add ability to check that element matches selector 150 | - Add ability counting nodes by selector 151 | - Many minor fixes 152 | 153 | ## 1.8.1 154 | 155 | - Small fix 156 | 157 | ## 1.8 158 | 159 | - Bug fixes 160 | - Add support of ~ selector 161 | - Add ability to direct search by CSS selector 162 | - Add setInnerHtml method 163 | - Add attributes method 164 | 165 | ## 1.7.4 166 | 167 | - Add support of text nodes 168 | 169 | ## 1.7.3 170 | 171 | - Bug fix 172 | 173 | ## 1.7.2 174 | 175 | - Fixed behavior of nth-child pseudo class 176 | - Add nth-of-type pseudo class 177 | 178 | ## 1.7.1 179 | 180 | - Add pseudo class has and more attribute options 181 | 182 | ## 1.7.0 183 | 184 | - Bug fixes 185 | - Add methods `previousSibling`, `nextSibling`, `child`, `firstChild`, `lastChild`, `children`, `getDocument` to the Element 186 | - Changed behavior of parent method. Now it returns parent node instead of owner document 187 | 188 | ## 1.6.8 189 | 190 | - Bug fix 191 | 192 | ## 1.6.5 193 | 194 | - Added ability to get an element attribute by CSS selector 195 | 196 | ## 1.6.4 197 | 198 | - Added handling of `DOMText` and `DOMAttr` in `Document::find()` 199 | 200 | ## 1.6.3 201 | 202 | - Added ability to get inner HTML 203 | 204 | ## 1.6.2 205 | 206 | - Added the ability to pass options when load HTML or XML 207 | 208 | ## 1.6.1 209 | 210 | - Added the ability to pass an array of nodes to appendChild 211 | - Added the ability to pass options when converting to HTML or XML 212 | - Added the ability to add child elements to the element 213 | 214 | ## 1.6 215 | 216 | - Added support for XML 217 | - Added the ability to search element by part of attribute name or value 218 | - Added support for pseudo-class "contains" 219 | - Added the ability to clone a node 220 | 221 | ## 1.5.1 222 | 223 | - Added ability to remove and replace nodes 224 | - Added ability to specify encoding when converting the element into the document 225 | 226 | ## 1.5 227 | 228 | - Fixed problem with incorrect encoding 229 | - Added ability to set the value of the element 230 | - Added ability to specify encoding when creating document 231 | 232 | ## 1.4 233 | 234 | - Added the ability to specify the return type element (`DiDom\Element` or `DOMElement`) 235 | 236 | ## 1.3.2 237 | 238 | - Bug fixed 239 | 240 | ## 1.3.1 241 | 242 | - Bugs fixed 243 | - Added the ability to pass element attributes in the constructor 244 | 245 | ## 1.3 246 | 247 | - Bugs fixed 248 | 249 | ## 1.2 250 | 251 | - Bugs fixed 252 | - Added the ability to compare Element\Document 253 | - Added the ability to format HTML code of the document when outputting 254 | 255 | ## 1.1 256 | 257 | - Added cache control 258 | - Converter from CSS to XPath replaced by faster 259 | 260 | ## 1.0 261 | 262 | - First release -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Muhammad Imangazaliev 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is furnished 8 | to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. -------------------------------------------------------------------------------- /README-RU.md: -------------------------------------------------------------------------------- 1 | # DiDOM 2 | 3 | [![Build Status](https://travis-ci.org/Imangazaliev/DiDOM.svg?branch=master)](https://travis-ci.org/Imangazaliev/DiDOM) 4 | [![Total Downloads](https://poser.pugx.org/imangazaliev/didom/downloads)](https://packagist.org/packages/imangazaliev/didom) 5 | [![Latest Stable Version](https://poser.pugx.org/imangazaliev/didom/v/stable)](https://packagist.org/packages/imangazaliev/didom) 6 | [![License](https://poser.pugx.org/imangazaliev/didom/license)](https://packagist.org/packages/imangazaliev/didom) 7 | 8 | DiDOM - простая и быстрая библиотека для парсинга HTML. 9 | 10 | - [English version](README.md) 11 | - [Документация для версии 1.x](https://github.com/Imangazaliev/DiDOM/blob/98d411741d598b0b74bb38e215d99c1cdb0d532d/README-RU.md). Чтобы обновится с версии 1.x, пожалуйста просмотрите [историю изменений](CHANGELOG.md). 12 | 13 | ## Содержание 14 | 15 | - [Установка](#Установка) 16 | - [Быстрый старт](#Быстрый-старт) 17 | - [Создание нового документа](#Создание-нового-документа) 18 | - [Поиск элементов](#Поиск-элементов) 19 | - [Проверка наличия элемента](#Проверка-наличия-элемента) 20 | - [Подсчет количества элементов](#Подсчет-количества-элементов) 21 | - [Поиск в элементе](#Поиск-в-элементе) 22 | - [Поддерживамые селекторы](#Поддерживамые-селекторы) 23 | - [Изменение содержимого](#Изменение-содержимого) 24 | - [Вывод содержимого](#Вывод-содержимого) 25 | - [Работа с элементами](#Работа-с-элементами) 26 | - [Создание нового элемента](#Создание-нового-элемента) 27 | - [Получение названия элемента](#Получение-названия-элемента) 28 | - [Получение родительского элемента](#Получение-родительского-элемента) 29 | - [Получение соседних элементов](#Получение-соседних-элементов) 30 | - [Получение дочерних элементов](#Получение-соседних-элементов) 31 | - [Получение документа](#Получение-документа) 32 | - [Работа с атрибутами элемента](#Работа-с-атрибутами-элемента) 33 | - [Сравнение элементов](#Сравнение-элементов) 34 | - [Добавление дочерних элементов](#Добавление-дочерних-элементов) 35 | - [Замена элемента](#Замена-элемента) 36 | - [Удаление элемента](#Удаление-элемента) 37 | - [Работа с кэшем](#Работа-с-кэшем) 38 | - [Прочее](#Прочее) 39 | - [Сравнение с другими парсерами](#Сравнение-с-другими-парсерами) 40 | 41 | ## Установка 42 | 43 | Для установки DiDOM выполните команду: 44 | 45 | composer require imangazaliev/didom 46 | 47 | ## Быстрый старт 48 | 49 | ```php 50 | use DiDom\Document; 51 | 52 | $document = new Document('http://www.news.com/', true); 53 | 54 | $posts = $document->find('.post'); 55 | 56 | foreach($posts as $post) { 57 | echo $post->text(), "\n"; 58 | } 59 | ``` 60 | 61 | ## Создание нового документа 62 | 63 | DiDom позволяет загрузить HTML несколькими способами: 64 | 65 | ##### Через конструктор 66 | 67 | ```php 68 | // в первом параметре передается строка с HTML 69 | $document = new Document($html); 70 | 71 | // путь к файлу 72 | $document = new Document('page.html', true); 73 | 74 | // или URL 75 | $document = new Document('http://www.example.com/', true); 76 | 77 | // также можно создать документ из DOMDocument 78 | $domDocument = new DOMDocument(); 79 | $document = new Document($domDocument); 80 | ``` 81 | 82 | Сигнатура: 83 | 84 | ```php 85 | __construct($string = null, $isFile = false, $encoding = 'UTF-8', $type = Document::TYPE_HTML) 86 | ``` 87 | 88 | `$isFile` - указывает, что загружается файл. По умолчанию - `false`. 89 | 90 | `$encoding` - кодировка документа. По умолчанию - UTF-8. 91 | 92 | `$type` - тип документа (HTML - `Document::TYPE_HTML`, XML - `Document::TYPE_XML`). По умолчанию - `Document::TYPE_HTML`. 93 | 94 | ##### Через отдельные методы 95 | 96 | ```php 97 | $document = new Document(); 98 | 99 | $document->loadHtml($html); 100 | 101 | $document->loadHtmlFile('page.html'); 102 | 103 | $document->loadHtmlFile('http://www.example.com/'); 104 | ``` 105 | 106 | Для загрузки XML есть соответствующие методы `loadXml` и `loadXmlFile`. 107 | 108 | При загрузке документа через эти методы, парсеру можно передать дополнительные [опции](http://php.net/manual/ru/libxml.constants.php): 109 | 110 | ```php 111 | $document->loadHtml($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); 112 | $document->loadHtmlFile($url, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); 113 | 114 | $document->loadXml($xml, LIBXML_PARSEHUGE); 115 | $document->loadXmlFile($url, LIBXML_PARSEHUGE); 116 | ``` 117 | 118 | ## Поиск элементов 119 | 120 | В качестве выражения для поиска можно передать CSS-селектор или XPath. Для этого в первом параметре нужно передать само выражение, а во втором - его тип (по умолчанию - `Query::TYPE_CSS`): 121 | 122 | ##### Через метод `find()`: 123 | 124 | ```php 125 | use DiDom\Document; 126 | use DiDom\Query; 127 | 128 | ... 129 | 130 | // CSS-селектор 131 | $posts = $document->find('.post'); 132 | 133 | // эквивалентно 134 | $posts = $document->find('.post', Query::TYPE_CSS); 135 | 136 | // XPath-выражение 137 | $posts = $document->find("//div[contains(@class, 'post')]", Query::TYPE_XPATH); 138 | ``` 139 | 140 | Метод вернет массив с элементами (экземпляры класса `DiDom\Element`) или пустой массив, если не найден ни один элемент, соответствующий выражению. 141 | 142 | При желании можно получить массив узлов без преобразования в Element или текст (`DOMElement`/`DOMText`/`DOMComment`/`DOMAttr`, в зависимости от выражения), для этого необходимо передать в качестве третьего параметра `false`. 143 | 144 | ##### Через метод `first()`: 145 | 146 | Возвращает первый найденный элемент или `null`, если не найдено ни одного элемента. 147 | 148 | Принимает те же параметры, что и метод `find()`. 149 | 150 | ##### Через магический метод `__invoke()`: 151 | 152 | ```php 153 | $posts = $document('.post'); 154 | ``` 155 | 156 | Принимает те же параметры, что и метод `find()`. 157 | 158 | **Внимание:** использование данного метода нежелательно, т.к. в будущем он может быть удален. 159 | 160 | ##### Через метод `xpath()`: 161 | 162 | ```php 163 | $posts = $document->xpath("//*[contains(concat(' ', normalize-space(@class), ' '), ' post ')]"); 164 | ``` 165 | 166 | ## Проверка наличия элемента 167 | 168 | Проверить наличие элемента можно с помощью метода `has()`: 169 | 170 | ```php 171 | if ($document->has('.post')) { 172 | // код 173 | } 174 | ``` 175 | 176 | Если нужно проверить наличие элемента, а затем получить его, то можно сделать так: 177 | 178 | ```php 179 | if ($document->has('.post')) { 180 | $elements = $document->find('.post'); 181 | 182 | // код 183 | } 184 | ``` 185 | 186 | но быстрее так: 187 | 188 | ```php 189 | $elements = $document->find('.post'); 190 | 191 | if (count($elements) > 0) { 192 | // код 193 | } 194 | ``` 195 | 196 | т.к. в первом случае выполняется два запроса. 197 | 198 | ## Подсчет количества элементов 199 | 200 | Метод `count()` позволяет подсчитать количество дочерних элементов, соотвествующих селектору: 201 | 202 | ```php 203 | // выведет количество ссылок в документе 204 | echo $document->count('a'); 205 | ``` 206 | 207 | ```php 208 | // выведет количество пунктов в списке 209 | echo $document->first('ul')->count('> li'); 210 | ``` 211 | 212 | ## Поиск в элементе 213 | 214 | Методы `find()`, `first()`, `xpath()`, `has()`, `count()` доступны также и для элемента. 215 | 216 | Пример: 217 | 218 | ```php 219 | echo $document->find('nav')[0]->first('ul.menu')->xpath('//li')[0]->text(); 220 | ``` 221 | 222 | #### Метод `findInDocument()` 223 | 224 | При изменении, замене или удалении элемента, найденного в другом элементе, документ не будет изменен. Данное поведение связано с тем, что в методе `find()` класса `Element` (а, соответственно, и в методах `first()` и `xpath`) создается новый документ, в котором и производится поиск. 225 | 226 | Для поиска элементов в исходном документе необходимо использовать методы `findInDocument()` и `firstInDocument()`: 227 | 228 | ```php 229 | // ничего не выйдет 230 | $document->first('head')->first('title')->remove(); 231 | 232 | // а вот так да 233 | $document->first('head')->firstInDocument('title')->remove(); 234 | ``` 235 | 236 | **Внимание:** методы `findInDocument()` и `firstInDocument()` работают только для элементов, которые принадлежат какому-либо документу, либо созданых через `new Element(...)`. Если элемент не принадлежит к какому-либо документу, будет выброшено исключение `LogicException`; 237 | 238 | ## Поддерживамые селекторы 239 | 240 | DiDom поддерживает поиск по: 241 | 242 | - тэгу 243 | - классу, идентификатору, имени и значению атрибута 244 | - псевдоклассам: 245 | - first-, last-, nth-child 246 | - empty и not-empty 247 | - contains 248 | - has 249 | 250 | ```php 251 | // все ссылки 252 | $document->find('a'); 253 | 254 | // любой элемент с id = "foo" и классом "bar" 255 | $document->find('#foo.bar'); 256 | 257 | // любой элемент, у которого есть атрибут "name" 258 | $document->find('[name]'); 259 | 260 | // эквивалентно 261 | $document->find('*[name]'); 262 | 263 | // поле ввода с именем "foo" 264 | $document->find('input[name=foo]'); 265 | $document->find('input[name=\'foo\']'); 266 | $document->find('input[name="foo"]'); 267 | 268 | // поле ввода с именем "foo" и значением "bar" 269 | $document->find('input[name="foo"][value="bar"]'); 270 | 271 | // поле ввода, название которого НЕ равно "foo" 272 | $document->find('input[name!="foo"]'); 273 | 274 | // любой элемент, у которого есть атрибут, 275 | // начинающийся с "data-" и равный "foo" 276 | $document->find('*[^data-=foo]'); 277 | 278 | // все ссылки, у которых адрес начинается с https 279 | $document->find('a[href^=https]'); 280 | 281 | // все изображения с расширением png 282 | $document->find('img[src$=png]'); 283 | 284 | // все ссылки, содержащие в своем адресе строку "example.com" 285 | $document->find('a[href*=example.com]'); 286 | 287 | // все ссылки, содержащие в атрибуте data-foo значение bar отделенное пробелом 288 | $document->find('a[data-foo~=bar]'); 289 | 290 | // текст всех ссылок с классом "foo" (массив строк) 291 | $document->find('a.foo::text'); 292 | 293 | // эквивалентно 294 | $document->find('a.foo::text()'); 295 | 296 | // адрес и текст подсказки всех полей с классом "bar" 297 | $document->find('a.bar::attr(href|title)'); 298 | 299 | // все ссылки, которые являются прямыми потомками текущего элемента 300 | $element->find('> a'); 301 | ``` 302 | 303 | ## Изменение содержимого 304 | 305 | ### Изменение HTML 306 | 307 | ```php 308 | $element->setInnerHtml('Foo'); 309 | ``` 310 | 311 | ### Изменение XML 312 | 313 | ```php 314 | $element->setInnerXml(' Foo BarHello world! 316 | ]]>'); 317 | ``` 318 | 319 | ### Изменение значения (как простой текст) 320 | 321 | ```php 322 | $element->setValue('Foo'); 323 | // будет закодирован в HTML-сущность как при вызове htmlentities() 324 | $element->setValue('Foo'); 325 | ``` 326 | 327 | ## Вывод содержимого 328 | 329 | ### Получение HTML 330 | 331 | ##### Через метод `html()`: 332 | 333 | ```php 334 | // HTML-код документа 335 | echo $document->html(); 336 | 337 | // HTML-код элемента 338 | echo $document->first('.post')->html(); 339 | ``` 340 | 341 | ##### Приведение к строке: 342 | 343 | ```php 344 | // HTML-код документа 345 | $html = (string) $document; 346 | 347 | // HTML-код элемента 348 | $html = (string) $document->first('.post'); 349 | ``` 350 | 351 | **Внимание:** использование данного способа нежелательно, т.к. в будущем он может быть удален. 352 | 353 | ##### Форматирование HTML при выводе 354 | 355 | ```php 356 | echo $document->format()->html(); 357 | ``` 358 | 359 | Метод `format()` отсутствует у элемента, поэтому, если нужно получить отформатированный HTML-код элемента, необходимо сначала преобразовать его в документ: 360 | 361 | ```php 362 | $html = $element->toDocument()->format()->html(); 363 | ``` 364 | 365 | #### Внутренний HTML 366 | 367 | ```php 368 | $innerHtml = $element->innerHtml(); 369 | ``` 370 | 371 | Метод `innerHtml()` отсутствует у документа, поэтому, если нужно получить внутренний HTML-код документа, необходимо сначала преобразовать его в элемент: 372 | 373 | ```php 374 | $innerHtml = $document->toElement()->innerHtml(); 375 | ``` 376 | 377 | ### Получение XML 378 | 379 | ```php 380 | // XML-код документа 381 | echo $document->xml(); 382 | 383 | // XML-код элемента 384 | echo $document->first('book')->xml(); 385 | ``` 386 | 387 | ### Получение содержимого 388 | 389 | Возвращает текстовое содержимое узла и его потомков: 390 | 391 | ```php 392 | echo $element->text(); 393 | ``` 394 | 395 | ## Создание нового элемента 396 | 397 | ### Создание экземпляра класса 398 | 399 | ```php 400 | use DiDom\Element; 401 | 402 | $element = new Element('span', 'Hello'); 403 | 404 | // выведет "Hello" 405 | echo $element->html(); 406 | ``` 407 | 408 | Первым параметром передается название элемента, вторым - его значение (необязательно), третьим - атрибуты элемента (необязательно). 409 | 410 | Пример создания элемента с атрибутами: 411 | 412 | ```php 413 | $attributes = ['name' => 'description', 'placeholder' => 'Enter description of item']; 414 | 415 | $element = new Element('textarea', 'Text', $attributes); 416 | ``` 417 | 418 | Элемент можно создать и из экземпляра класса `DOMElement`: 419 | 420 | ```php 421 | use DiDom\Element; 422 | use DOMElement; 423 | 424 | $domElement = new DOMElement('span', 'Hello'); 425 | $element = new Element($domElement); 426 | ``` 427 | 428 | #### Изменение элемента, созданного из `DOMElement` 429 | 430 | Экземпляры класса `DOMElement`, созданные через конструктор (`new DOMElement(...)`), являются неизменяемыми, поэтому и элементы (экземпляры класса `DiDom\Element`), созданные из таких объектов, так же являются неизменяемыми. 431 | 432 | Пример: 433 | 434 | ```php 435 | $element = new Element('span', 'Hello'); 436 | 437 | // добавит атрибут "id" со значением "greeting" 438 | $element->attr('id', 'greeting'); 439 | 440 | $domElement = new DOMElement('span', 'Hello'); 441 | $element = new Element($domElement); 442 | 443 | // будет выброшено исключение 444 | // DOMException with message 'No Modification Allowed Error' 445 | $element->attr('id', 'greeting'); 446 | ``` 447 | 448 | ### С помощью метода `Document::createElement()` 449 | 450 | ```php 451 | $document = new Document($html); 452 | 453 | $element = $document->createElement('span', 'Hello'); 454 | ``` 455 | 456 | ### С помощью CSS-селектора 457 | 458 | Первый параметр - селектор, второй - значение, третий - массив с атрибутами. 459 | 460 | Атрибуты элемента могут быть указаны как в селекторе, так и переданы отдельно в третьем параметре. 461 | 462 | Если название атрибута в массиве совпадает с названием атрибута из селектора, будет использовано значение, указанное в селекторе. 463 | 464 | ```php 465 | $document = new Document($html); 466 | 467 | $element = $document->createElementBySelector('div.block', 'Foo', [ 468 | 'id' => '#content', 469 | 'class' => '.container', 470 | ]); 471 | ``` 472 | 473 | Можно так же использовать статический метод `createBySelector` класса `Element`: 474 | 475 | ```php 476 | $element = Element::createBySelector('div.block', 'Foo', [ 477 | 'id' => '#content', 478 | 'class' => '.container', 479 | ]); 480 | ``` 481 | 482 | ## Получение названия элемента 483 | 484 | ```php 485 | $element->tagName(); 486 | ``` 487 | 488 | ## Получение родительского элемента 489 | 490 | ```php 491 | $element->parent(); 492 | ``` 493 | 494 | Так же можно получить родительский элемент, соответствующий селектору: 495 | 496 | ```php 497 | $element->closest('.foo'); 498 | ``` 499 | 500 | Вернет родительский элемент, у которого есть класс `foo`. Если подходящий элемент не найден, метод вернет `null`. 501 | 502 | ## Получение соседних элементов 503 | 504 | Первый аргумент - CSS-селектор, второй - тип узла (`DOMElement`, `DOMText` или `DOMComment`). 505 | 506 | Если оба аргумента опущены, будет осуществлен поиск узлов любого типа. 507 | 508 | Если селектор указан, а тип узла нет, будет использован тип `DOMElement`. 509 | 510 | **Внимание:** Селектор можно использовать только с типом `DOMElement`. 511 | 512 | ```php 513 | // предыдущий элемент 514 | $item->previousSibling(); 515 | 516 | // предыдущий элемент, соответствующий селектору 517 | $item->previousSibling('span'); 518 | 519 | // предыдущий элемент типа DOMElement 520 | $item->previousSibling(null, 'DOMElement'); 521 | 522 | // предыдущий элемент типа DOMComment 523 | $item->previousSibling(null, 'DOMComment'); 524 | ``` 525 | 526 | ```php 527 | // все предыдущие элементы 528 | $item->previousSiblings(); 529 | 530 | // все предыдущие элементы, соответствующие селектору 531 | $item->previousSiblings('span'); 532 | 533 | // все предыдущие элементы типа DOMElement 534 | $item->previousSiblings(null, 'DOMElement'); 535 | 536 | // все предыдущие элементы типа DOMComment 537 | $item->previousSiblings(null, 'DOMComment'); 538 | ``` 539 | 540 | ```php 541 | // следующий элемент 542 | $item->nextSibling(); 543 | 544 | // следующий элемент, соответствующий селектору 545 | $item->nextSibling('span'); 546 | 547 | // следующий элемент типа DOMElement 548 | $item->nextSibling(null, 'DOMElement'); 549 | 550 | // следующий элемент типа DOMComment 551 | $item->nextSibling(null, 'DOMComment'); 552 | ``` 553 | 554 | ```php 555 | // все последующие элементы 556 | $item->nextSiblings(); 557 | 558 | // все последующие элементы, соответствующие селектору 559 | $item->nextSiblings('span'); 560 | 561 | // все последующие элементы типа DOMElement 562 | $item->nextSiblings(null, 'DOMElement'); 563 | 564 | // все последующие элементы типа DOMComment 565 | $item->nextSiblings(null, 'DOMComment'); 566 | ``` 567 | 568 | ## Получение дочерних элементов 569 | 570 | ```php 571 | $html = '
FooBar
'; 572 | 573 | $document = new Document($html); 574 | 575 | $div = $document->first('div'); 576 | 577 | // элемент (DOMElement) 578 | // string(3) "Bar" 579 | var_dump($div->child(1)->text()); 580 | 581 | // текстовый узел (DOMText) 582 | // string(3) "Foo" 583 | var_dump($div->firstChild()->text()); 584 | 585 | // комментарий (DOMComment) 586 | // string(3) "Baz" 587 | var_dump($div->lastChild()->text()); 588 | 589 | // array(3) { ... } 590 | var_dump($div->children()); 591 | ``` 592 | 593 | ## Получение документа 594 | 595 | ```php 596 | $document = new Document($html); 597 | 598 | $element = $document->first('input[name=email]'); 599 | 600 | $document2 = $element->ownerDocument(); 601 | 602 | // bool(true) 603 | var_dump($document->is($document2)); 604 | ``` 605 | 606 | ## Работа с атрибутами элемента 607 | 608 | #### Создание/изменение атрибута 609 | 610 | ##### Через метод `setAttribute`: 611 | ```php 612 | $element->setAttribute('name', 'username'); 613 | ``` 614 | 615 | ##### Через метод `attr`: 616 | ```php 617 | $element->attr('name', 'username'); 618 | ``` 619 | 620 | ##### Через магический метод `__set`: 621 | ```php 622 | $element->name = 'username'; 623 | ``` 624 | 625 | #### Получение значения атрибута 626 | 627 | ##### Через метод `getAttribute`: 628 | ```php 629 | $username = $element->getAttribute('value'); 630 | ``` 631 | 632 | ##### Через метод `attr`: 633 | ```php 634 | $username = $element->attr('value'); 635 | ``` 636 | 637 | ##### Через магический метод `__get`: 638 | ```php 639 | $username = $element->name; 640 | ``` 641 | 642 | Если атрибут не найден, вернет `null`. 643 | 644 | #### Проверка наличия атрибута 645 | 646 | ##### Через метод `hasAttribute`: 647 | ```php 648 | if ($element->hasAttribute('name')) { 649 | // код 650 | } 651 | ``` 652 | 653 | ##### Через магический метод `__isset`: 654 | ```php 655 | if (isset($element->name)) { 656 | // код 657 | } 658 | ``` 659 | 660 | #### Удаление атрибута: 661 | 662 | ##### Через метод `removeAttribute`: 663 | ```php 664 | $element->removeAttribute('name'); 665 | ``` 666 | 667 | ##### Через магический метод `__unset`: 668 | ```php 669 | unset($element->name); 670 | ``` 671 | 672 | #### Получение всех атрибутов: 673 | 674 | ```php 675 | var_dump($element->attributes()); 676 | ``` 677 | 678 | #### Получение определенных атрибутов: 679 | 680 | ```php 681 | var_dump($element->attributes(['name', 'type'])); 682 | ``` 683 | 684 | #### Удаление всех атрибутов: 685 | 686 | ```php 687 | $element->removeAllAttributes(); 688 | ``` 689 | 690 | #### Удаление всех атрибутов, за исключением указанных: 691 | 692 | ```php 693 | $element->removeAllAttributes(['name', 'type']); 694 | ``` 695 | 696 | ## Сравнение элементов 697 | 698 | ```php 699 | $element = new Element('span', 'hello'); 700 | $element2 = new Element('span', 'hello'); 701 | 702 | // bool(true) 703 | var_dump($element->is($element)); 704 | 705 | // bool(false) 706 | var_dump($element->is($element2)); 707 | ``` 708 | 709 | ## Добавление дочерних элементов 710 | 711 | ```php 712 | $list = new Element('ul'); 713 | 714 | $item = new Element('li', 'Item 1'); 715 | 716 | $list->appendChild($item); 717 | 718 | $items = [ 719 | new Element('li', 'Item 2'), 720 | new Element('li', 'Item 3'), 721 | ]; 722 | 723 | $list->appendChild($items); 724 | ``` 725 | 726 | ## Замена элемента 727 | 728 | ```php 729 | $title = new Element('title', 'foo'); 730 | 731 | $document->first('title')->replace($title); 732 | ``` 733 | 734 | **Внимание:** заменить можно только те элементы, которые были найдены непосредственно в документе: 735 | 736 | ```php 737 | // ничего не выйдет 738 | $document->first('head')->first('title')->replace($title); 739 | 740 | // а вот так да 741 | $document->first('head title')->replace($title); 742 | ``` 743 | 744 | Подробнее об этом в разделе [Поиск в элементе](#Поиск-в-элементе). 745 | 746 | ## Удаление элемента 747 | 748 | ```php 749 | $document->first('title')->remove(); 750 | ``` 751 | 752 | **Внимание:** удалить можно только те элементы, которые были найдены непосредственно в документе: 753 | 754 | ```php 755 | // ничего не выйдет 756 | $document->first('head')->first('title')->remove(); 757 | 758 | // а вот так да 759 | $document->first('head title')->remove(); 760 | ``` 761 | 762 | Подробнее об этом в разделе [Поиск в элементе](#Поиск-в-элементе). 763 | 764 | ## Работа с кэшем 765 | 766 | Кэш - массив XPath-выражений, полученных из CSS. 767 | 768 | #### Получение кэша 769 | 770 | ```php 771 | use DiDom\Query; 772 | 773 | ... 774 | 775 | $xpath = Query::compile('h2'); 776 | $compiled = Query::getCompiled(); 777 | 778 | // array('h2' => '//h2') 779 | var_dump($compiled); 780 | ``` 781 | 782 | #### Установка кэша 783 | 784 | ```php 785 | Query::setCompiled(['h2' => '//h2']); 786 | ``` 787 | 788 | ## Прочее 789 | 790 | #### `preserveWhiteSpace` 791 | 792 | По умолчанию сохранение пробелов между тегами отключено. 793 | 794 | Включать опцию `preserveWhiteSpace` следует до загрузки документа: 795 | 796 | ```php 797 | $document = new Document(); 798 | 799 | $document->preserveWhiteSpace(); 800 | 801 | $document->loadXml($xml); 802 | ``` 803 | 804 | #### `matches` 805 | 806 | Возвращает `true`, если элемент соответсвует селектору: 807 | 808 | ```php 809 | // вернет true, если элемент это div с идентификатором content 810 | $element->matches('div#content'); 811 | 812 | // строгое соответствие 813 | // вернет true, если элемент это div с идентификатором content и ничего более 814 | // если у элемента будут какие-либо другие атрибуты, метод вернет false 815 | $element->matches('div#content', true); 816 | ``` 817 | 818 | #### `isElementNode` 819 | 820 | Проверяет, является ли элемент узлом типа DOMElement: 821 | 822 | ```php 823 | $element->isElementNode(); 824 | ``` 825 | 826 | #### `isTextNode` 827 | 828 | Проверяет, является ли элемент текстовым узлом (DOMText): 829 | 830 | ```php 831 | $element->isTextNode(); 832 | ``` 833 | 834 | #### `isCommentNode` 835 | 836 | Проверяет, является ли элемент комментарием (DOMComment): 837 | 838 | ```php 839 | $element->isCommentNode(); 840 | ``` 841 | 842 | ## Сравнение с другими парсерами 843 | 844 | [Сравнение с другими парсерами](https://github.com/Imangazaliev/DiDOM/wiki/Сравнение-с-другими-парсерами-(1.6.3)) 845 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DiDOM 2 | 3 | [![Build Status](https://travis-ci.com/Imangazaliev/DiDOM.svg?branch=master)](https://travis-ci.com/Imangazaliev/DiDOM) 4 | [![Total Downloads](https://poser.pugx.org/imangazaliev/didom/downloads)](https://packagist.org/packages/imangazaliev/didom) 5 | [![Latest Stable Version](https://poser.pugx.org/imangazaliev/didom/v/stable)](https://packagist.org/packages/imangazaliev/didom) 6 | [![License](https://poser.pugx.org/imangazaliev/didom/license)](https://packagist.org/packages/imangazaliev/didom) 7 | 8 | DiDOM - simple and fast HTML parser. 9 | 10 | - [README на русском](README-RU.md) 11 | - [DiDOM 1.x documentation](https://github.com/Imangazaliev/DiDOM/blob/98d411741d598b0b74bb38e215d99c1cdb0d532d/README.md). To upgrade from 1.x please checkout the [changelog](CHANGELOG.md). 12 | 13 | ## Contents 14 | 15 | - [Installation](#installation) 16 | - [Quick start](#quick-start) 17 | - [Creating new document](#creating-new-document) 18 | - [Search for elements](#search-for-elements) 19 | - [Verify if element exists](#verify-if-element-exists) 20 | - [Search in element](#search-in-element) 21 | - [Supported selectors](#supported-selectors) 22 | - [Changing content](#changing-content) 23 | - [Output](#output) 24 | - [Working with elements](#working-with-elements) 25 | - [Creating a new element](#creating-a-new-element) 26 | - [Getting the name of an element](#getting-the-name-of-an-element) 27 | - [Getting parent element](#getting-parent-element) 28 | - [Getting sibling elements](#getting-sibling-elements) 29 | - [Getting the child elements](#getting-the-child-elements) 30 | - [Getting document](#getting-document) 31 | - [Working with element attributes](#working-with-element-attributes) 32 | - [Comparing elements](#comparing-elements) 33 | - [Adding a child element](#adding-a-child-element) 34 | - [Replacing element](#replacing-element) 35 | - [Removing element](#removing-element) 36 | - [Working with cache](#working-with-cache) 37 | - [Miscellaneous](#miscellaneous) 38 | - [Comparison with other parsers](#comparison-with-other-parsers) 39 | 40 | ## Installation 41 | 42 | To install DiDOM run the command: 43 | 44 | composer require imangazaliev/didom 45 | 46 | ## Quick start 47 | 48 | ```php 49 | use DiDom\Document; 50 | 51 | $document = new Document('http://www.news.com/', true); 52 | 53 | $posts = $document->find('.post'); 54 | 55 | foreach($posts as $post) { 56 | echo $post->text(), "\n"; 57 | } 58 | ``` 59 | 60 | ## Creating new document 61 | 62 | DiDom allows to load HTML in several ways: 63 | 64 | ##### With constructor 65 | 66 | ```php 67 | // the first parameter is a string with HTML 68 | $document = new Document($html); 69 | 70 | // file path 71 | $document = new Document('page.html', true); 72 | 73 | // or URL 74 | $document = new Document('http://www.example.com/', true); 75 | ``` 76 | 77 | The second parameter specifies if you need to load file. Default is `false`. 78 | 79 | Signature: 80 | 81 | ```php 82 | __construct($string = null, $isFile = false, $encoding = 'UTF-8', $type = Document::TYPE_HTML) 83 | ``` 84 | 85 | `$string` - an HTML or XML string or a file path. 86 | 87 | `$isFile` - indicates that the first parameter is a path to a file. 88 | 89 | `$encoding` - the document encoding. 90 | 91 | `$type` - the document type (HTML - `Document::TYPE_HTML`, XML - `Document::TYPE_XML`). 92 | 93 | ##### With separate methods 94 | 95 | ```php 96 | $document = new Document(); 97 | 98 | $document->loadHtml($html); 99 | 100 | $document->loadHtmlFile('page.html'); 101 | 102 | $document->loadHtmlFile('http://www.example.com/'); 103 | ``` 104 | 105 | There are two methods available for loading XML: `loadXml` and `loadXmlFile`. 106 | 107 | These methods accept additional [options](http://php.net/manual/en/libxml.constants.php): 108 | 109 | ```php 110 | $document->loadHtml($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); 111 | $document->loadHtmlFile($url, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); 112 | 113 | $document->loadXml($xml, LIBXML_PARSEHUGE); 114 | $document->loadXmlFile($url, LIBXML_PARSEHUGE); 115 | ``` 116 | 117 | ## Search for elements 118 | 119 | DiDOM accepts CSS selector or XPath as an expression for search. You need to path expression as the first parameter, and specify its type in the second one (default type is `Query::TYPE_CSS`): 120 | 121 | ##### With method `find()`: 122 | 123 | ```php 124 | use DiDom\Document; 125 | use DiDom\Query; 126 | 127 | ... 128 | 129 | // CSS selector 130 | $posts = $document->find('.post'); 131 | 132 | // XPath 133 | $posts = $document->find("//div[contains(@class, 'post')]", Query::TYPE_XPATH); 134 | ``` 135 | 136 | If the elements that match a given expression are found, then method returns an array of instances of `DiDom\Element`, otherwise - an empty array. You could also get an array of `DOMElement` objects. To get this, pass `false` as the third parameter. 137 | 138 | ##### With magic method `__invoke()`: 139 | 140 | ```php 141 | $posts = $document('.post'); 142 | ``` 143 | 144 | **Warning:** using this method is undesirable because it may be removed in the future. 145 | 146 | ##### With method `xpath()`: 147 | 148 | ```php 149 | $posts = $document->xpath("//*[contains(concat(' ', normalize-space(@class), ' '), ' post ')]"); 150 | ``` 151 | 152 | You can do search inside an element: 153 | 154 | ```php 155 | echo $document->find('nav')[0]->first('ul.menu')->xpath('//li')[0]->text(); 156 | ``` 157 | 158 | ### Verify if element exists 159 | 160 | To verify if element exist use `has()` method: 161 | 162 | ```php 163 | if ($document->has('.post')) { 164 | // code 165 | } 166 | ``` 167 | 168 | If you need to check if element exist and then get it: 169 | 170 | ```php 171 | if ($document->has('.post')) { 172 | $elements = $document->find('.post'); 173 | // code 174 | } 175 | ``` 176 | 177 | but it would be faster like this: 178 | 179 | ```php 180 | if (count($elements = $document->find('.post')) > 0) { 181 | // code 182 | } 183 | ``` 184 | 185 | because in the first case it makes two queries. 186 | 187 | ## Search in element 188 | 189 | Methods `find()`, `first()`, `xpath()`, `has()`, `count()` are available in Element too. 190 | 191 | Example: 192 | 193 | ```php 194 | echo $document->find('nav')[0]->first('ul.menu')->xpath('//li')[0]->text(); 195 | ``` 196 | 197 | #### Method `findInDocument()` 198 | 199 | If you change, replace, or remove an element that was found in another element, the document will not be changed. This happens because method `find()` of `Element` class (a, respectively, the `first ()` and `xpath` methods) creates a new document to search. 200 | 201 | To search for elements in the source document, you must use the methods `findInDocument()` and `firstInDocument()`: 202 | 203 | ```php 204 | // nothing will happen 205 | $document->first('head')->first('title')->remove(); 206 | 207 | // but this will do 208 | $document->first('head')->firstInDocument('title')->remove(); 209 | ``` 210 | 211 | **Warning:** methods `findInDocument()` and `firstInDocument()` work only for elements, which belong to a document, and for elements created via `new Element(...)`. If an element does not belong to a document, `LogicException` will be thrown; 212 | 213 | ## Supported selectors 214 | 215 | DiDom supports search by: 216 | 217 | - tag 218 | - class, ID, name and value of an attribute 219 | - pseudo-classes: 220 | - first-, last-, nth-child 221 | - empty and not-empty 222 | - contains 223 | - has 224 | 225 | ```php 226 | // all links 227 | $document->find('a'); 228 | 229 | // any element with id = "foo" and "bar" class 230 | $document->find('#foo.bar'); 231 | 232 | // any element with attribute "name" 233 | $document->find('[name]'); 234 | // the same as 235 | $document->find('*[name]'); 236 | 237 | // input field with the name "foo" 238 | $document->find('input[name=foo]'); 239 | $document->find('input[name=\'bar\']'); 240 | $document->find('input[name="baz"]'); 241 | 242 | // any element that has an attribute starting with "data-" and the value "foo" 243 | $document->find('*[^data-=foo]'); 244 | 245 | // all links starting with https 246 | $document->find('a[href^=https]'); 247 | 248 | // all images with the extension png 249 | $document->find('img[src$=png]'); 250 | 251 | // all links containing the string "example.com" 252 | $document->find('a[href*=example.com]'); 253 | 254 | // text of the links with "foo" class 255 | $document->find('a.foo::text'); 256 | 257 | // address and title of all the fields with "bar" class 258 | $document->find('a.bar::attr(href|title)'); 259 | ``` 260 | 261 | ## Changing content 262 | 263 | ### Change inner HTML 264 | 265 | ```php 266 | $element->setInnerHtml('Foo'); 267 | ``` 268 | 269 | ### Change inner XML 270 | 271 | ```php 272 | $element->setInnerXml(' Foo BarHello world! 274 | ]]>'); 275 | ``` 276 | 277 | ### Change value (as plain text) 278 | 279 | ```php 280 | $element->setValue('Foo'); 281 | // will be encoded like using htmlentities() 282 | $element->setValue('Foo'); 283 | ``` 284 | 285 | ## Output 286 | 287 | ### Getting HTML 288 | 289 | ##### With method `html()`: 290 | 291 | ```php 292 | $posts = $document->find('.post'); 293 | 294 | echo $posts[0]->html(); 295 | ``` 296 | 297 | ##### Casting to string: 298 | 299 | ```php 300 | $html = (string) $posts[0]; 301 | ``` 302 | 303 | ##### Formatting HTML output 304 | 305 | ```php 306 | $html = $document->format()->html(); 307 | ``` 308 | 309 | An element does not have `format()` method, so if you need to output formatted HTML of the element, then first you have to convert it to a document: 310 | 311 | 312 | ```php 313 | $html = $element->toDocument()->format()->html(); 314 | ``` 315 | 316 | #### Inner HTML 317 | 318 | ```php 319 | $innerHtml = $element->innerHtml(); 320 | ``` 321 | 322 | Document does not have the method `innerHtml()`, therefore, if you need to get inner HTML of a document, convert it into an element first: 323 | 324 | ```php 325 | $innerHtml = $document->toElement()->innerHtml(); 326 | ``` 327 | 328 | ### Getting XML 329 | 330 | ```php 331 | echo $document->xml(); 332 | 333 | echo $document->first('book')->xml(); 334 | ``` 335 | 336 | ### Getting content 337 | 338 | ```php 339 | $posts = $document->find('.post'); 340 | 341 | echo $posts[0]->text(); 342 | ``` 343 | 344 | ## Creating a new element 345 | 346 | ### Creating an instance of the class 347 | 348 | ```php 349 | use DiDom\Element; 350 | 351 | $element = new Element('span', 'Hello'); 352 | 353 | // Outputs "Hello" 354 | echo $element->html(); 355 | ``` 356 | 357 | First parameter is a name of an attribute, the second one is its value (optional), the third one is element attributes (optional). 358 | 359 | An example of creating an element with attributes: 360 | 361 | ```php 362 | $attributes = ['name' => 'description', 'placeholder' => 'Enter description of item']; 363 | 364 | $element = new Element('textarea', 'Text', $attributes); 365 | ``` 366 | 367 | An element can be created from an instance of the class `DOMElement`: 368 | 369 | ```php 370 | use DiDom\Element; 371 | use DOMElement; 372 | 373 | $domElement = new DOMElement('span', 'Hello'); 374 | 375 | $element = new Element($domElement); 376 | ``` 377 | 378 | ### Using the method `createElement` 379 | 380 | ```php 381 | $document = new Document($html); 382 | 383 | $element = $document->createElement('span', 'Hello'); 384 | ``` 385 | 386 | ## Getting the name of an element 387 | 388 | ```php 389 | $element->tagName(); 390 | ``` 391 | 392 | ## Getting parent element 393 | 394 | ```php 395 | $document = new Document($html); 396 | 397 | $input = $document->find('input[name=email]')[0]; 398 | 399 | var_dump($input->parent()); 400 | ``` 401 | 402 | ## Getting sibling elements 403 | 404 | ```php 405 | $document = new Document($html); 406 | 407 | $item = $document->find('ul.menu > li')[1]; 408 | 409 | var_dump($item->previousSibling()); 410 | 411 | var_dump($item->nextSibling()); 412 | ``` 413 | 414 | ## Getting the child elements 415 | 416 | ```php 417 | $html = '
FooBar
'; 418 | 419 | $document = new Document($html); 420 | 421 | $div = $document->first('div'); 422 | 423 | // element node (DOMElement) 424 | // string(3) "Bar" 425 | var_dump($div->child(1)->text()); 426 | 427 | // text node (DOMText) 428 | // string(3) "Foo" 429 | var_dump($div->firstChild()->text()); 430 | 431 | // comment node (DOMComment) 432 | // string(3) "Baz" 433 | var_dump($div->lastChild()->text()); 434 | 435 | // array(3) { ... } 436 | var_dump($div->children()); 437 | ``` 438 | 439 | ## Getting owner document 440 | 441 | ```php 442 | $document = new Document($html); 443 | 444 | $element = $document->find('input[name=email]')[0]; 445 | 446 | $document2 = $element->ownerDocument(); 447 | 448 | // bool(true) 449 | var_dump($document->is($document2)); 450 | ``` 451 | 452 | ## Working with element attributes 453 | 454 | #### Creating/updating an attribute 455 | 456 | ##### With method `setAttribute`: 457 | ```php 458 | $element->setAttribute('name', 'username'); 459 | ``` 460 | 461 | ##### With method `attr`: 462 | ```php 463 | $element->attr('name', 'username'); 464 | ``` 465 | 466 | ##### With magic method `__set`: 467 | ```php 468 | $element->name = 'username'; 469 | ``` 470 | 471 | #### Getting value of an attribute 472 | 473 | ##### With method `getAttribute`: 474 | 475 | ```php 476 | $username = $element->getAttribute('value'); 477 | ``` 478 | 479 | ##### With method `attr`: 480 | 481 | ```php 482 | $username = $element->attr('value'); 483 | ``` 484 | 485 | ##### With magic method `__get`: 486 | 487 | ```php 488 | $username = $element->name; 489 | ``` 490 | 491 | Returns `null` if attribute is not found. 492 | 493 | #### Verify if attribute exists 494 | 495 | ##### With method `hasAttribute`: 496 | 497 | ```php 498 | if ($element->hasAttribute('name')) { 499 | // code 500 | } 501 | ``` 502 | 503 | ##### With magic method `__isset`: 504 | 505 | ```php 506 | if (isset($element->name)) { 507 | // code 508 | } 509 | ``` 510 | 511 | #### Removing attribute: 512 | 513 | ##### With method `removeAttribute`: 514 | 515 | ```php 516 | $element->removeAttribute('name'); 517 | ``` 518 | 519 | ##### With magic method `__unset`: 520 | 521 | ```php 522 | unset($element->name); 523 | ``` 524 | 525 | ## Comparing elements 526 | 527 | ```php 528 | $element = new Element('span', 'hello'); 529 | $element2 = new Element('span', 'hello'); 530 | 531 | // bool(true) 532 | var_dump($element->is($element)); 533 | 534 | // bool(false) 535 | var_dump($element->is($element2)); 536 | ``` 537 | 538 | ## Appending child elements 539 | 540 | ```php 541 | $list = new Element('ul'); 542 | 543 | $item = new Element('li', 'Item 1'); 544 | 545 | $list->appendChild($item); 546 | 547 | $items = [ 548 | new Element('li', 'Item 2'), 549 | new Element('li', 'Item 3'), 550 | ]; 551 | 552 | $list->appendChild($items); 553 | ``` 554 | 555 | ## Adding a child element 556 | 557 | ```php 558 | $list = new Element('ul'); 559 | 560 | $item = new Element('li', 'Item 1'); 561 | $items = [ 562 | new Element('li', 'Item 2'), 563 | new Element('li', 'Item 3'), 564 | ]; 565 | 566 | $list->appendChild($item); 567 | $list->appendChild($items); 568 | ``` 569 | 570 | ## Replacing element 571 | 572 | ```php 573 | $element = new Element('span', 'hello'); 574 | 575 | $document->find('.post')[0]->replace($element); 576 | ``` 577 | 578 | **Waning:** you can replace only those elements that were found directly in the document: 579 | 580 | ```php 581 | // nothing will happen 582 | $document->first('head')->first('title')->replace($title); 583 | 584 | // but this will do 585 | $document->first('head title')->replace($title); 586 | ``` 587 | 588 | More about this in section [Search for elements](#search-for-elements). 589 | 590 | ## Removing element 591 | 592 | ```php 593 | $document->find('.post')[0]->remove(); 594 | ``` 595 | 596 | **Warning:** you can remove only those elements that were found directly in the document: 597 | 598 | ```php 599 | // nothing will happen 600 | $document->first('head')->first('title')->remove(); 601 | 602 | // but this will do 603 | $document->first('head title')->remove(); 604 | ``` 605 | 606 | More about this in section [Search for elements](#search-for-elements). 607 | 608 | ## Working with cache 609 | 610 | Cache is an array of XPath expressions, that were converted from CSS. 611 | 612 | #### Getting from cache 613 | 614 | ```php 615 | use DiDom\Query; 616 | 617 | ... 618 | 619 | $xpath = Query::compile('h2'); 620 | $compiled = Query::getCompiled(); 621 | 622 | // array('h2' => '//h2') 623 | var_dump($compiled); 624 | ``` 625 | 626 | #### Cache setting 627 | 628 | ```php 629 | Query::setCompiled(['h2' => '//h2']); 630 | ``` 631 | 632 | ## Miscellaneous 633 | 634 | #### `preserveWhiteSpace` 635 | 636 | By default, whitespace preserving is disabled. 637 | 638 | You can enable the `preserveWhiteSpace` option before loading the document: 639 | 640 | ```php 641 | $document = new Document(); 642 | 643 | $document->preserveWhiteSpace(); 644 | 645 | $document->loadXml($xml); 646 | ``` 647 | 648 | #### `count` 649 | 650 | The `count ()` method counts children that match the selector: 651 | 652 | ```php 653 | // prints the number of links in the document 654 | echo $document->count('a'); 655 | ``` 656 | 657 | ```php 658 | // prints the number of items in the list 659 | echo $document->first('ul')->count('li'); 660 | ``` 661 | 662 | #### `matches` 663 | 664 | Returns `true` if the node matches the selector: 665 | 666 | ```php 667 | $element->matches('div#content'); 668 | 669 | // strict match 670 | // returns true if the element is a div with id equals content and nothing else 671 | // if the element has any other attributes the method returns false 672 | $element->matches('div#content', true); 673 | ``` 674 | 675 | #### `isElementNode` 676 | 677 | Checks whether an element is an element (DOMElement): 678 | 679 | ```php 680 | $element->isElementNode(); 681 | ``` 682 | 683 | #### `isTextNode` 684 | 685 | Checks whether an element is a text node (DOMText): 686 | 687 | ```php 688 | $element->isTextNode(); 689 | ``` 690 | 691 | #### `isCommentNode` 692 | 693 | Checks whether the element is a comment (DOMComment): 694 | 695 | ```php 696 | $element->isCommentNode(); 697 | ``` 698 | 699 | ## Comparison with other parsers 700 | 701 | [Comparison with other parsers](https://github.com/Imangazaliev/DiDOM/wiki/Comparison-with-other-parsers-(1.0)) 702 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "imangazaliev/didom", 3 | "description": "Simple and fast HTML parser", 4 | "type": "library", 5 | "keywords": ["didom", "parser", "html", "xml"], 6 | "license": "MIT", 7 | "homepage": "https://github.com/Imangazaliev/DiDOM", 8 | "authors": [ 9 | { 10 | "name": "Imangazaliev Muhammad", 11 | "email": "imangazalievm@gmail.com" 12 | } 13 | ], 14 | "require": { 15 | "php": ">=7.2", 16 | "ext-dom": "*", 17 | "ext-iconv": "*" 18 | }, 19 | "require-dev": { 20 | "phpunit/phpunit": "^8.5" 21 | }, 22 | "autoload": { 23 | "psr-4": { 24 | "DiDom\\": "src/DiDom/" 25 | } 26 | }, 27 | "autoload-dev": { 28 | "psr-4": { 29 | "DiDom\\Tests\\": "tests/" 30 | } 31 | }, 32 | "config": { 33 | "platform": { 34 | "php": "7.2" 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/DiDom/ClassAttribute.php: -------------------------------------------------------------------------------- 1 | isElementNode()) { 36 | throw new InvalidArgumentException(sprintf('The element must contain DOMElement node.')); 37 | } 38 | 39 | $this->element = $element; 40 | 41 | $this->parseClassAttribute(); 42 | } 43 | 44 | /** 45 | * Parses class attribute of the element. 46 | */ 47 | protected function parseClassAttribute() 48 | { 49 | if ( ! $this->element->hasAttribute('class')) { 50 | // possible if class attribute has been removed 51 | if ($this->classesString !== '') { 52 | $this->classesString = ''; 53 | $this->classes = []; 54 | } 55 | 56 | return; 57 | } 58 | 59 | // if class attribute is not changed 60 | if ($this->element->getAttribute('class') === $this->classesString) { 61 | return; 62 | } 63 | 64 | // save class attribute as is (without trimming) 65 | $this->classesString = $this->element->getAttribute('class'); 66 | 67 | $classesString = trim($this->classesString); 68 | 69 | if ($classesString === '') { 70 | $this->classes = []; 71 | 72 | return; 73 | } 74 | 75 | $classes = explode(' ', $classesString); 76 | 77 | $classes = array_map('trim', $classes); 78 | $classes = array_filter($classes); 79 | $classes = array_unique($classes); 80 | 81 | $this->classes = array_values($classes); 82 | } 83 | 84 | /** 85 | * Updates class attribute of the element. 86 | */ 87 | protected function updateClassAttribute() 88 | { 89 | $this->classesString = implode(' ', $this->classes); 90 | 91 | $this->element->setAttribute('class', $this->classesString); 92 | } 93 | 94 | /** 95 | * @param string $className 96 | * 97 | * @return ClassAttribute 98 | * 99 | * @throws InvalidArgumentException if class name is not a string 100 | */ 101 | public function add(string $className): self 102 | { 103 | $this->parseClassAttribute(); 104 | 105 | if (in_array($className, $this->classes, true)) { 106 | return $this; 107 | } 108 | 109 | $this->classes[] = $className; 110 | 111 | $this->updateClassAttribute(); 112 | 113 | return $this; 114 | } 115 | 116 | /** 117 | * @param array $classNames 118 | * 119 | * @return ClassAttribute 120 | * 121 | * @throws InvalidArgumentException if class name is not a string 122 | */ 123 | public function addMultiple(array $classNames): self 124 | { 125 | $this->parseClassAttribute(); 126 | 127 | foreach ($classNames as $className) { 128 | if ( ! is_string($className)) { 129 | throw new InvalidArgumentException(sprintf('Class name must be a string, %s given.', (is_object($className) ? get_class($className) : gettype($className)))); 130 | } 131 | 132 | if (in_array($className, $this->classes, true)) { 133 | continue; 134 | } 135 | 136 | $this->classes[] = $className; 137 | } 138 | 139 | $this->updateClassAttribute(); 140 | 141 | return $this; 142 | } 143 | 144 | /** 145 | * @return string[] 146 | */ 147 | public function getAll(): array 148 | { 149 | $this->parseClassAttribute(); 150 | 151 | return $this->classes; 152 | } 153 | 154 | /** 155 | * @param string $className 156 | * 157 | * @return bool 158 | */ 159 | public function contains(string $className): bool 160 | { 161 | $this->parseClassAttribute(); 162 | 163 | return in_array($className, $this->classes, true); 164 | } 165 | 166 | /** 167 | * @param string $className 168 | * 169 | * @return ClassAttribute 170 | * 171 | * @throws InvalidArgumentException if class name is not a string 172 | */ 173 | public function remove(string $className): self 174 | { 175 | $this->parseClassAttribute(); 176 | 177 | $classIndex = array_search($className, $this->classes); 178 | 179 | if ($classIndex === false) { 180 | return $this; 181 | } 182 | 183 | unset($this->classes[$classIndex]); 184 | 185 | $this->updateClassAttribute(); 186 | 187 | return $this; 188 | } 189 | 190 | /** 191 | * @param array $classNames 192 | * 193 | * @return ClassAttribute 194 | * 195 | * @throws InvalidArgumentException if class name is not a string 196 | */ 197 | public function removeMultiple(array $classNames): self 198 | { 199 | $this->parseClassAttribute(); 200 | 201 | foreach ($classNames as $className) { 202 | if ( ! is_string($className)) { 203 | throw new InvalidArgumentException(sprintf('Class name must be a string, %s given.', (is_object($className) ? get_class($className) : gettype($className)))); 204 | } 205 | 206 | $classIndex = array_search($className, $this->classes); 207 | 208 | if ($classIndex === false) { 209 | continue; 210 | } 211 | 212 | unset($this->classes[$classIndex]); 213 | } 214 | 215 | $this->updateClassAttribute(); 216 | 217 | return $this; 218 | } 219 | 220 | /** 221 | * @param string[] $preserved 222 | * 223 | * @return ClassAttribute 224 | */ 225 | public function removeAll(array $preserved = []): self 226 | { 227 | $this->parseClassAttribute(); 228 | 229 | $preservedClasses = []; 230 | 231 | foreach ($preserved as $className) { 232 | if ( ! is_string($className)) { 233 | throw new InvalidArgumentException(sprintf('Class name must be a string, %s given.', (is_object($className) ? get_class($className) : gettype($className)))); 234 | } 235 | 236 | if ( ! in_array($className, $this->classes, true)) { 237 | continue; 238 | } 239 | 240 | $preservedClasses[] = $className; 241 | } 242 | 243 | $this->classes = $preservedClasses; 244 | 245 | $this->updateClassAttribute(); 246 | 247 | return $this; 248 | } 249 | 250 | /** 251 | * @return Element 252 | */ 253 | public function getElement(): Element 254 | { 255 | return $this->element; 256 | } 257 | } 258 | -------------------------------------------------------------------------------- /src/DiDom/Document.php: -------------------------------------------------------------------------------- 1 | 'http://php.net/xpath' 50 | ]; 51 | 52 | /** 53 | * @param DOMDocument|string|null $string An HTML or XML string, a file path or a DOMDocument instance 54 | * @param bool $isFile Indicates that the first parameter is a path to a file 55 | * @param string $encoding The document encoding 56 | * @param string $type The document type 57 | * 58 | * @throws InvalidArgumentException if parameter 3 is not a string 59 | */ 60 | public function __construct($string = null, bool $isFile = false, string $encoding = 'UTF-8', string $type = Document::TYPE_HTML) 61 | { 62 | if ($string instanceof DOMDocument) { 63 | $this->document = $string; 64 | 65 | return; 66 | } 67 | 68 | $this->encoding = $encoding; 69 | 70 | $this->document = new DOMDocument('1.0', $encoding); 71 | 72 | $this->preserveWhiteSpace(false); 73 | 74 | if ($string !== null) { 75 | $this->load($string, $isFile, $type); 76 | } 77 | } 78 | 79 | /** 80 | * Creates a new document. 81 | * 82 | * @param DOMDocument|string|null $string An HTML or XML string, a file path or a DOMDocument instance 83 | * @param bool $isFile Indicates that the first parameter is a path to a file 84 | * @param string $encoding The document encoding 85 | * @param string $type The document type 86 | * 87 | * @return Document 88 | */ 89 | public static function create($string = null, bool $isFile = false, string $encoding = 'UTF-8', string $type = Document::TYPE_HTML) 90 | { 91 | return new Document($string, $isFile, $encoding, $type); 92 | } 93 | 94 | /** 95 | * Creates a new element node. 96 | * 97 | * @param string $name The tag name of the element 98 | * @param string|null $value The value of the element 99 | * @param array $attributes The attributes of the element 100 | * 101 | * @return Element created element 102 | */ 103 | public function createElement(string $name, ?string $value = null, array $attributes = []): Element 104 | { 105 | $node = $this->document->createElement($name); 106 | 107 | return new Element($node, $value, $attributes); 108 | } 109 | 110 | /** 111 | * Creates a new element node by CSS selector. 112 | * 113 | * @param string $selector 114 | * @param string|null $value 115 | * @param array $attributes 116 | * 117 | * @return Element 118 | * 119 | * @throws InvalidSelectorException 120 | */ 121 | public function createElementBySelector(string $selector, ?string $value = null, array $attributes = []): Element 122 | { 123 | $segments = Query::getSegments($selector); 124 | 125 | $name = array_key_exists('tag', $segments) ? $segments['tag'] : 'div'; 126 | 127 | if (array_key_exists('attributes', $segments)) { 128 | $attributes = array_merge($attributes, $segments['attributes']); 129 | } 130 | 131 | if (array_key_exists('id', $segments)) { 132 | $attributes['id'] = $segments['id']; 133 | } 134 | 135 | if (array_key_exists('classes', $segments)) { 136 | $attributes['class'] = implode(' ', $segments['classes']); 137 | } 138 | 139 | return $this->createElement($name, $value, $attributes); 140 | } 141 | 142 | /** 143 | * @param string $content 144 | * 145 | * @return Element 146 | */ 147 | public function createTextNode(string $content): Element 148 | { 149 | return new Element(new DOMText($content)); 150 | } 151 | 152 | /** 153 | * @param string $data 154 | * 155 | * @return Element 156 | */ 157 | public function createComment(string $data): Element 158 | { 159 | return new Element(new DOMComment($data)); 160 | } 161 | 162 | /** 163 | * @param string $data 164 | * 165 | * @return Element 166 | */ 167 | public function createCdataSection(string $data): Element 168 | { 169 | return new Element(new DOMCdataSection($data)); 170 | } 171 | 172 | /** 173 | * @return DocumentFragment 174 | */ 175 | public function createDocumentFragment(): DocumentFragment 176 | { 177 | return new DocumentFragment($this->document->createDocumentFragment()); 178 | } 179 | 180 | /** 181 | * Adds a new child at the end of the children. 182 | * 183 | * @param Element|DOMNode|array $nodes The appended child 184 | * 185 | * @return Element|Element[] 186 | * 187 | * @throws InvalidArgumentException if one of elements of parameter 1 is not an instance of DOMNode or Element 188 | */ 189 | public function appendChild($nodes) 190 | { 191 | $returnArray = true; 192 | 193 | if ( ! is_array($nodes)) { 194 | $nodes = [$nodes]; 195 | 196 | $returnArray = false; 197 | } 198 | 199 | $result = []; 200 | 201 | foreach ($nodes as $node) { 202 | if ($node instanceof Element) { 203 | $node = $node->getNode(); 204 | } 205 | 206 | if ( ! $node instanceof DOMNode) { 207 | throw new InvalidArgumentException(sprintf('Argument 1 passed to %s must be an instance of %s\Element or DOMNode, %s given.', __METHOD__, __NAMESPACE__, (is_object($node) ? get_class($node) : gettype($node)))); 208 | } 209 | 210 | Errors::disable(); 211 | 212 | $cloned = $node->cloneNode(true); 213 | $newNode = $this->document->importNode($cloned, true); 214 | 215 | $result[] = $this->document->appendChild($newNode); 216 | 217 | Errors::restore(); 218 | } 219 | 220 | $result = array_map(function (DOMNode $node) { 221 | return new Element($node); 222 | }, $result); 223 | 224 | return $returnArray ? $result : $result[0]; 225 | } 226 | 227 | /** 228 | * Set preserveWhiteSpace property. 229 | * 230 | * @param bool $value 231 | * 232 | * @return Document 233 | */ 234 | public function preserveWhiteSpace(bool $value = true): self 235 | { 236 | $this->document->preserveWhiteSpace = $value; 237 | 238 | return $this; 239 | } 240 | 241 | /** 242 | * Load HTML or XML. 243 | * 244 | * @param string $string An HTML or XML string or a file path 245 | * @param bool $isFile Indicates that the first parameter is a file path 246 | * @param string $type The type of a document 247 | * @param int|null $options libxml option constants 248 | * 249 | * @throws InvalidArgumentException if parameter 1 is not a string 250 | * @throws InvalidArgumentException if parameter 3 is not a string 251 | * @throws InvalidArgumentException if parameter 4 is not an integer or null 252 | * @throws RuntimeException if the document type is invalid (not Document::TYPE_HTML or Document::TYPE_XML) 253 | */ 254 | public function load(string $string, bool $isFile = false, string $type = Document::TYPE_HTML, int $options = null): void 255 | { 256 | if ( ! in_array(strtolower($type), [Document::TYPE_HTML, Document::TYPE_XML], true)) { 257 | throw new RuntimeException(sprintf('Document type must be "xml" or "html", %s given.', $type)); 258 | } 259 | 260 | if ($options === null) { 261 | // LIBXML_HTML_NODEFDTD - prevents a default doctype being added when one is not found 262 | $options = LIBXML_HTML_NODEFDTD; 263 | } 264 | 265 | $string = trim($string); 266 | 267 | if ($isFile) { 268 | $string = $this->loadFile($string); 269 | } 270 | 271 | if (strtolower($type) === Document::TYPE_HTML) { 272 | $string = Encoder::convertToHtmlEntities($string, $this->encoding); 273 | } 274 | 275 | $this->type = strtolower($type); 276 | 277 | Errors::disable(); 278 | 279 | if ($this->type === Document::TYPE_HTML) { 280 | $this->document->loadHtml($string, $options); 281 | } else { 282 | $this->document->loadXml($string, $options); 283 | } 284 | 285 | Errors::restore(); 286 | } 287 | 288 | /** 289 | * Load HTML from a string. 290 | * 291 | * @param string $html The HTML string 292 | * @param int|null $options Additional parameters 293 | * 294 | * @return Document 295 | * 296 | * @throws InvalidArgumentException if parameter 1 is not a string 297 | */ 298 | public function loadHtml(string $html, ?int $options = null): void 299 | { 300 | $this->load($html, false, Document::TYPE_HTML, $options); 301 | } 302 | 303 | /** 304 | * Load HTML from a file. 305 | * 306 | * @param string $filename The path to the HTML file 307 | * @param int|null $options Additional parameters 308 | * 309 | * @throws InvalidArgumentException if parameter 1 not a string 310 | * @throws RuntimeException if the file doesn't exist 311 | * @throws RuntimeException if you are unable to load the file 312 | */ 313 | public function loadHtmlFile(string $filename, ?int $options = null): void 314 | { 315 | $this->load($filename, true, Document::TYPE_HTML, $options); 316 | } 317 | 318 | /** 319 | * Load XML from a string. 320 | * 321 | * @param string $xml The XML string 322 | * @param int|null $options Additional parameters 323 | * 324 | * @throws InvalidArgumentException if parameter 1 is not a string 325 | */ 326 | public function loadXml(string $xml, ?int $options = null): void 327 | { 328 | $this->load($xml, false, Document::TYPE_XML, $options); 329 | } 330 | 331 | /** 332 | * Load XML from a file. 333 | * 334 | * @param string $filename The path to the XML file 335 | * @param int|null $options Additional parameters 336 | * 337 | * @throws InvalidArgumentException if the file path is not a string 338 | * @throws RuntimeException if the file doesn't exist 339 | * @throws RuntimeException if you are unable to load the file 340 | */ 341 | public function loadXmlFile(string $filename, ?int $options = null): void 342 | { 343 | $this->load($filename, true, Document::TYPE_XML, $options); 344 | } 345 | 346 | /** 347 | * Reads entire file into a string. 348 | * 349 | * @param string $filename The path to the file 350 | * 351 | * @return string 352 | * 353 | * @throws InvalidArgumentException if parameter 1 is not a string 354 | * @throws RuntimeException if an error occurred 355 | */ 356 | protected function loadFile(string $filename): string 357 | { 358 | try { 359 | $content = file_get_contents($filename); 360 | } catch (Exception $exception) { 361 | throw new RuntimeException(sprintf('Could not load file %s.', $filename)); 362 | } 363 | 364 | if ($content === false) { 365 | throw new RuntimeException(sprintf('Could not load file %s.', $filename)); 366 | } 367 | 368 | return $content; 369 | } 370 | 371 | /** 372 | * Checks the existence of the node. 373 | * 374 | * @param string $expression XPath expression or CSS selector 375 | * @param string $type The type of the expression 376 | * 377 | * @return bool 378 | */ 379 | public function has(string $expression, string $type = Query::TYPE_CSS): bool 380 | { 381 | $expression = Query::compile($expression, $type); 382 | $expression = sprintf('count(%s) > 0', $expression); 383 | 384 | return $this->createXpath()->evaluate($expression); 385 | } 386 | 387 | /** 388 | * Searches for a node in the DOM tree for a given XPath expression or CSS selector. 389 | * 390 | * @param string $expression XPath expression or a CSS selector 391 | * @param string $type The type of the expression 392 | * @param bool $wrapNode Returns array of Element if true, otherwise array of DOMElement 393 | * @param Element|DOMElement|null $contextNode The node in which the search will be performed 394 | * 395 | * @return Element[]|DOMElement[] 396 | * 397 | * @throws InvalidSelectorException if the selector is invalid 398 | * @throws InvalidArgumentException if context node is not DOMElement 399 | */ 400 | public function find(string $expression, string $type = Query::TYPE_CSS, bool $wrapNode = true, $contextNode = null): array 401 | { 402 | $expression = Query::compile($expression, $type); 403 | 404 | if ($contextNode !== null) { 405 | if ($contextNode instanceof Element) { 406 | $contextNode = $contextNode->getNode(); 407 | } 408 | 409 | if ( ! $contextNode instanceof DOMElement) { 410 | throw new InvalidArgumentException(sprintf('Argument 4 passed to %s must be an instance of %s\Element or DOMElement, %s given.', __METHOD__, __NAMESPACE__, (is_object($contextNode) ? get_class($contextNode) : gettype($contextNode)))); 411 | } 412 | 413 | if ($type === Query::TYPE_CSS) { 414 | $expression = '.' . $expression; 415 | } 416 | } 417 | 418 | $nodeList = $this->createXpath()->query($expression, $contextNode); 419 | 420 | $result = []; 421 | 422 | if ($wrapNode) { 423 | foreach ($nodeList as $node) { 424 | $result[] = $this->wrapNode($node); 425 | } 426 | } else { 427 | foreach ($nodeList as $node) { 428 | $result[] = $node; 429 | } 430 | } 431 | 432 | return $result; 433 | } 434 | 435 | /** 436 | * Searches for a node in the DOM tree and returns first element or null. 437 | * 438 | * @param string $expression XPath expression or a CSS selector 439 | * @param string $type The type of the expression 440 | * @param bool $wrapNode Returns array of Element if true, otherwise array of DOMElement 441 | * @param Element|DOMElement|null $contextNode The node in which the search will be performed 442 | * 443 | * @return Element|DOMElement|null 444 | * 445 | * @throws InvalidSelectorException if the selector is invalid 446 | */ 447 | public function first(string $expression, string $type = Query::TYPE_CSS, bool $wrapNode = true, $contextNode = null) 448 | { 449 | $expression = Query::compile($expression, $type); 450 | 451 | if ($contextNode !== null && $type === Query::TYPE_CSS) { 452 | $expression = '.' . $expression; 453 | } 454 | 455 | $expression = sprintf('(%s)[1]', $expression); 456 | 457 | $nodes = $this->find($expression, Query::TYPE_XPATH, false, $contextNode); 458 | 459 | if (count($nodes) === 0) { 460 | return null; 461 | } 462 | 463 | return $wrapNode ? $this->wrapNode($nodes[0]) : $nodes[0]; 464 | } 465 | 466 | /** 467 | * @param DOMElement|DOMText|DOMAttr $node 468 | * 469 | * @return Element|string 470 | * 471 | * @throws InvalidArgumentException if parameter 1 is not an instance of DOMElement, DOMText, DOMComment, DOMCdataSection or DOMAttr 472 | */ 473 | protected function wrapNode($node) 474 | { 475 | switch (get_class($node)) { 476 | case 'DOMElement': 477 | case 'DOMComment': 478 | case 'DOMCdataSection': 479 | return new Element($node); 480 | 481 | case 'DOMText': 482 | return $node->data; 483 | 484 | case 'DOMAttr': 485 | return $node->value; 486 | } 487 | 488 | throw new InvalidArgumentException(sprintf('Unknown node type "%s".', get_class($node))); 489 | } 490 | 491 | /** 492 | * Searches for a node in the DOM tree for a given XPath expression. 493 | * 494 | * @param string $expression XPath expression 495 | * @param bool $wrapNode Returns array of Element if true, otherwise array of DOMElement 496 | * @param Element|DOMElement $contextNode The node in which the search will be performed 497 | * 498 | * @return Element[]|DOMElement[] 499 | */ 500 | public function xpath(string $expression, bool $wrapNode = true, $contextNode = null): array 501 | { 502 | return $this->find($expression, Query::TYPE_XPATH, $wrapNode, $contextNode); 503 | } 504 | 505 | /** 506 | * Counts nodes for a given XPath expression or CSS selector. 507 | * 508 | * @param string $expression XPath expression or CSS selector 509 | * @param string $type The type of the expression 510 | * 511 | * @return int 512 | * 513 | * @throws InvalidSelectorException 514 | */ 515 | public function count(string $expression, string $type = Query::TYPE_CSS): int 516 | { 517 | $expression = Query::compile($expression, $type); 518 | $expression = sprintf('count(%s)', $expression); 519 | 520 | return (int) $this->createXpath()->evaluate($expression); 521 | } 522 | 523 | /** 524 | * @return DOMXPath 525 | */ 526 | public function createXpath(): DOMXPath 527 | { 528 | $xpath = new DOMXPath($this->document); 529 | 530 | foreach ($this->namespaces as $prefix => $namespace) { 531 | $xpath->registerNamespace($prefix, $namespace); 532 | } 533 | 534 | $xpath->registerPhpFunctions(); 535 | 536 | return $xpath; 537 | } 538 | 539 | /** 540 | * Register a namespace. 541 | * 542 | * @param string $prefix 543 | * @param string $namespace 544 | */ 545 | public function registerNamespace(string $prefix, string $namespace) 546 | { 547 | $this->namespaces[$prefix] = $namespace; 548 | } 549 | 550 | /** 551 | * Dumps the internal document into a string using HTML formatting. 552 | * 553 | * @return string The document html 554 | */ 555 | public function html(): string 556 | { 557 | return trim($this->document->saveHTML($this->document)); 558 | } 559 | 560 | /** 561 | * Dumps the internal document into a string using XML formatting. 562 | * 563 | * @param int|null $options Additional options 564 | * 565 | * @return string The document xml 566 | */ 567 | public function xml(?int $options = 0): string 568 | { 569 | return trim($this->document->saveXML($this->document, $options)); 570 | } 571 | 572 | /** 573 | * Nicely formats output with indentation and extra space. 574 | * 575 | * @param bool $format Formats output if true 576 | * 577 | * @return Document 578 | */ 579 | public function format(bool $format = true): self 580 | { 581 | $this->document->formatOutput = $format; 582 | 583 | return $this; 584 | } 585 | 586 | /** 587 | * Get the text content of this node and its descendants. 588 | * 589 | * @return string 590 | */ 591 | public function text(): string 592 | { 593 | return $this->getElement()->textContent; 594 | } 595 | 596 | /** 597 | * Indicates if two documents are the same document. 598 | * 599 | * @param Document|DOMDocument $document The compared document 600 | * 601 | * @return bool 602 | * 603 | * @throws InvalidArgumentException if parameter 1 is not an instance of DOMDocument or Document 604 | */ 605 | public function is($document): bool 606 | { 607 | if ($document instanceof Document) { 608 | $element = $document->getElement(); 609 | } else { 610 | if ( ! $document instanceof DOMDocument) { 611 | throw new InvalidArgumentException(sprintf('Argument 1 passed to %s must be an instance of %s or DOMDocument, %s given.', __METHOD__, __CLASS__, (is_object($document) ? get_class($document) : gettype($document)))); 612 | } 613 | 614 | $element = $document->documentElement; 615 | } 616 | 617 | if ($element === null) { 618 | return false; 619 | } 620 | 621 | return $this->getElement()->isSameNode($element); 622 | } 623 | 624 | /** 625 | * Returns the type of the document (XML or HTML). 626 | * 627 | * @return string|null 628 | */ 629 | public function getType(): ?string 630 | { 631 | return $this->type; 632 | } 633 | 634 | /** 635 | * Returns the encoding of the document. 636 | * 637 | * @return string|null 638 | */ 639 | public function getEncoding(): ?string 640 | { 641 | return $this->encoding; 642 | } 643 | 644 | /** 645 | * @return DOMDocument 646 | */ 647 | public function getDocument(): DOMDocument 648 | { 649 | return $this->document; 650 | } 651 | 652 | /** 653 | * @return DOMElement|null 654 | */ 655 | public function getElement(): ?DOMElement 656 | { 657 | return $this->document->documentElement; 658 | } 659 | 660 | /** 661 | * @return Element 662 | */ 663 | public function toElement(): Element 664 | { 665 | if ($this->document->documentElement === null) { 666 | throw new RuntimeException('Cannot convert empty document to Element.'); 667 | } 668 | 669 | return new Element($this->document->documentElement); 670 | } 671 | 672 | /** 673 | * Convert the document to its string representation. 674 | * 675 | * @return string 676 | */ 677 | public function __toString(): string 678 | { 679 | return $this->type === Document::TYPE_HTML ? $this->html() : $this->xml(); 680 | } 681 | } 682 | -------------------------------------------------------------------------------- /src/DiDom/DocumentFragment.php: -------------------------------------------------------------------------------- 1 | setNode($documentFragment); 20 | } 21 | 22 | /** 23 | * Append raw XML data. 24 | * 25 | * @param string $data 26 | */ 27 | public function appendXml($data) 28 | { 29 | $this->node->appendXML($data); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/DiDom/Element.php: -------------------------------------------------------------------------------- 1 | createElement($tagName); 41 | 42 | $this->setNode($node); 43 | } else { 44 | $this->setNode($tagName); 45 | } 46 | 47 | if ($value !== null) { 48 | $this->setValue($value); 49 | } 50 | 51 | foreach ($attributes as $attrName => $attrValue) { 52 | $this->setAttribute($attrName, $attrValue); 53 | } 54 | } 55 | 56 | /** 57 | * Creates a new element. 58 | * 59 | * @param DOMNode|string $name The tag name of an element 60 | * @param string|integer|float|null $value The value of an element 61 | * @param array $attributes The attributes of an element 62 | * 63 | * @return Element 64 | */ 65 | public static function create($name, $value = null, array $attributes = []): self 66 | { 67 | return new Element($name, $value, $attributes); 68 | } 69 | 70 | /** 71 | * Creates a new element node by CSS selector. 72 | * 73 | * @param string $selector 74 | * @param string|null $value 75 | * @param array $attributes 76 | * 77 | * @return Element 78 | * 79 | * @throws InvalidSelectorException 80 | */ 81 | public static function createBySelector(string $selector, ?string $value = null, array $attributes = []): self 82 | { 83 | return Document::create()->createElementBySelector($selector, $value, $attributes); 84 | } 85 | 86 | public function tagName(): string 87 | { 88 | return $this->node->tagName; 89 | } 90 | 91 | /** 92 | * Checks that the node matches selector. 93 | * 94 | * @param string $selector CSS selector 95 | * @param bool $strict 96 | * 97 | * @return bool 98 | * 99 | * @throws InvalidSelectorException if the selector is invalid 100 | * @throws InvalidArgumentException if the tag name is not a string 101 | * @throws RuntimeException if the tag name is not specified in strict mode 102 | */ 103 | public function matches(string $selector, bool $strict = false): bool 104 | { 105 | if ( ! $this->node instanceof DOMElement) { 106 | return false; 107 | } 108 | 109 | if ($selector === '*') { 110 | return true; 111 | } 112 | 113 | if ( ! $strict) { 114 | $innerHtml = $this->html(); 115 | $html = "$innerHtml"; 116 | 117 | $selector = 'root > ' . trim($selector); 118 | 119 | $document = new Document(); 120 | 121 | $document->loadHtml($html, LIBXML_HTML_NODEFDTD | LIBXML_HTML_NOIMPLIED); 122 | 123 | return $document->has($selector); 124 | } 125 | 126 | $segments = Query::getSegments($selector); 127 | 128 | if ( ! array_key_exists('tag', $segments)) { 129 | throw new RuntimeException(sprintf('Tag name must be specified in %s', $selector)); 130 | } 131 | 132 | if ($segments['tag'] !== $this->tagName() && $segments['tag'] !== '*') { 133 | return false; 134 | } 135 | 136 | $segments['id'] = array_key_exists('id', $segments) ? $segments['id'] : null; 137 | 138 | if ($segments['id'] !== $this->getAttribute('id')) { 139 | return false; 140 | } 141 | 142 | $classes = $this->hasAttribute('class') ? explode(' ', trim($this->getAttribute('class'))) : []; 143 | 144 | $segments['classes'] = array_key_exists('classes', $segments) ? $segments['classes'] : []; 145 | 146 | $diff1 = array_diff($segments['classes'], $classes); 147 | $diff2 = array_diff($classes, $segments['classes']); 148 | 149 | if (count($diff1) > 0 || count($diff2) > 0) { 150 | return false; 151 | } 152 | 153 | $attributes = $this->attributes(); 154 | 155 | unset($attributes['id'], $attributes['class']); 156 | 157 | $segments['attributes'] = array_key_exists('attributes', $segments) ? $segments['attributes'] : []; 158 | 159 | $diff1 = array_diff_assoc($segments['attributes'], $attributes); 160 | $diff2 = array_diff_assoc($attributes, $segments['attributes']); 161 | 162 | // if the attributes are not equal 163 | if (count($diff1) > 0 || count($diff2) > 0) { 164 | return false; 165 | } 166 | 167 | return true; 168 | } 169 | 170 | /** 171 | * Determine if an attribute exists on the element. 172 | * 173 | * @param string $name The name of an attribute 174 | * 175 | * @return bool 176 | */ 177 | public function hasAttribute(string $name): bool 178 | { 179 | return $this->node->hasAttribute($name); 180 | } 181 | 182 | /** 183 | * Set an attribute on the element. 184 | * 185 | * @param string $name The name of an attribute 186 | * @param string|integer|float $value The value of an attribute 187 | * 188 | * @return Element 189 | */ 190 | public function setAttribute(string $name, $value): Element 191 | { 192 | if (is_numeric($value)) { 193 | $value = (string) $value; 194 | } 195 | 196 | if ( ! is_string($value)) { 197 | throw new InvalidArgumentException(sprintf('%s expects parameter 2 to be string or null, %s given.', __METHOD__, (is_object($value) ? get_class($value) : gettype($value)))); 198 | } 199 | 200 | $this->node->setAttribute($name, $value); 201 | 202 | return $this; 203 | } 204 | 205 | /** 206 | * Access to the element's attributes. 207 | * 208 | * @param string $name The name of an attribute 209 | * @param string|null $default The value returned if the attribute doesn't exist 210 | * 211 | * @return string|null The value of an attribute or null if attribute doesn't exist 212 | */ 213 | public function getAttribute(string $name, ?string $default = null): ?string 214 | { 215 | if ($this->hasAttribute($name)) { 216 | return $this->node->getAttribute($name); 217 | } 218 | 219 | return $default; 220 | } 221 | 222 | /** 223 | * Unset an attribute on the element. 224 | * 225 | * @param string $name The name of an attribute 226 | * 227 | * @return Element 228 | */ 229 | public function removeAttribute(string $name): self 230 | { 231 | $this->node->removeAttribute($name); 232 | 233 | return $this; 234 | } 235 | 236 | /** 237 | * Unset all attributes of the element. 238 | * 239 | * @param string[] $preserved 240 | * 241 | * @return Element 242 | */ 243 | public function removeAllAttributes(array $preserved = []): self 244 | { 245 | if ( ! $this->node instanceof DOMElement) { 246 | return $this; 247 | } 248 | 249 | foreach ($this->attributes() as $name => $value) { 250 | if (in_array($name, $preserved, true)) { 251 | continue; 252 | } 253 | 254 | $this->node->removeAttribute($name); 255 | } 256 | 257 | return $this; 258 | } 259 | 260 | /** 261 | * Alias for getAttribute and setAttribute methods. 262 | * 263 | * @param string $name The name of an attribute 264 | * @param string|null $value The value that will be returned an attribute doesn't exist 265 | * 266 | * @return string|null|Element 267 | */ 268 | public function attr(string $name, ?string $value = null) 269 | { 270 | if ($value === null) { 271 | return $this->getAttribute($name); 272 | } 273 | 274 | return $this->setAttribute($name, $value); 275 | } 276 | 277 | /** 278 | * Returns the node attributes or null, if it is not DOMElement. 279 | * 280 | * @param string[] $names 281 | * 282 | * @return array|null 283 | */ 284 | public function attributes(array $names = null): ?array 285 | { 286 | if ( ! $this->node instanceof DOMElement) { 287 | return null; 288 | } 289 | 290 | if ($names === null) { 291 | $result = []; 292 | 293 | foreach ($this->node->attributes as $name => $attribute) { 294 | $result[$name] = $attribute->value; 295 | } 296 | 297 | return $result; 298 | } 299 | 300 | $result = []; 301 | 302 | foreach ($this->node->attributes as $name => $attribute) { 303 | if (in_array($name, $names, true)) { 304 | $result[$name] = $attribute->value; 305 | } 306 | } 307 | 308 | return $result; 309 | } 310 | 311 | /** 312 | * @return ClassAttribute 313 | * 314 | * @throws LogicException if the node is not an instance of DOMElement 315 | */ 316 | public function classes(): ClassAttribute 317 | { 318 | if ($this->classAttribute !== null) { 319 | return $this->classAttribute; 320 | } 321 | 322 | if ( ! $this->isElementNode()) { 323 | throw new LogicException('Class attribute is available only for element nodes.'); 324 | } 325 | 326 | $this->classAttribute = new ClassAttribute($this); 327 | 328 | return $this->classAttribute; 329 | } 330 | 331 | /** 332 | * @return StyleAttribute 333 | * 334 | * @throws LogicException if the node is not an instance of DOMElement 335 | */ 336 | public function style(): StyleAttribute 337 | { 338 | if ($this->styleAttribute !== null) { 339 | return $this->styleAttribute; 340 | } 341 | 342 | if ( ! $this->isElementNode()) { 343 | throw new LogicException('Style attribute is available only for element nodes.'); 344 | } 345 | 346 | $this->styleAttribute = new StyleAttribute($this); 347 | 348 | return $this->styleAttribute; 349 | } 350 | 351 | /** 352 | * Dynamically set an attribute on the element. 353 | * 354 | * @param string $name The name of an attribute 355 | * @param string|integer|float $value The value of an attribute 356 | * 357 | * @return Element 358 | */ 359 | public function __set(string $name, $value) 360 | { 361 | return $this->setAttribute($name, $value); 362 | } 363 | 364 | /** 365 | * Dynamically access the element's attributes. 366 | * 367 | * @param string $name The name of an attribute 368 | * 369 | * @return string|null 370 | */ 371 | public function __get(string $name): ?string 372 | { 373 | return $this->getAttribute($name); 374 | } 375 | 376 | /** 377 | * Determine if an attribute exists on the element. 378 | * 379 | * @param string $name The attribute name 380 | * 381 | * @return bool 382 | */ 383 | public function __isset(string $name): bool 384 | { 385 | return $this->hasAttribute($name); 386 | } 387 | 388 | /** 389 | * Unset an attribute on the model. 390 | * 391 | * @param string $name The name of an attribute 392 | */ 393 | public function __unset(string $name) 394 | { 395 | $this->removeAttribute($name); 396 | } 397 | } 398 | -------------------------------------------------------------------------------- /src/DiDom/Encoder.php: -------------------------------------------------------------------------------- 1 | $codes[$characterIndex]) { 43 | $entities .= chr($codes[$characterIndex++]); 44 | 45 | continue; 46 | } 47 | 48 | if (0xF0 <= $codes[$characterIndex]) { 49 | $code = (($codes[$characterIndex++] - 0xF0) << 18) + (($codes[$characterIndex++] - 0x80) << 12) + (($codes[$characterIndex++] - 0x80) << 6) + $codes[$characterIndex++] - 0x80; 50 | } elseif (0xE0 <= $codes[$characterIndex]) { 51 | $code = (($codes[$characterIndex++] - 0xE0) << 12) + (($codes[$characterIndex++] - 0x80) << 6) + $codes[$characterIndex++] - 0x80; 52 | } else { 53 | $code = (($codes[$characterIndex++] - 0xC0) << 6) + $codes[$characterIndex++] - 0x80; 54 | } 55 | 56 | $entities .= '&#' . $code . ';'; 57 | } 58 | 59 | return $entities; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/DiDom/Errors.php: -------------------------------------------------------------------------------- 1 | node->ownerDocument === null) { 44 | throw new LogicException('Can not prepend a child to element without the owner document.'); 45 | } 46 | 47 | $returnArray = true; 48 | 49 | if ( ! is_array($nodes)) { 50 | $nodes = [$nodes]; 51 | 52 | $returnArray = false; 53 | } 54 | 55 | $nodes = array_reverse($nodes); 56 | 57 | $result = []; 58 | 59 | $referenceNode = $this->node->firstChild; 60 | 61 | foreach ($nodes as $node) { 62 | $result[] = $this->insertBefore($node, $referenceNode); 63 | 64 | $referenceNode = $this->node->firstChild; 65 | } 66 | 67 | return $returnArray ? $result : $result[0]; 68 | } 69 | 70 | /** 71 | * Adds a new child at the end of the children. 72 | * 73 | * @param Node|DOMNode|array $nodes The appended child 74 | * 75 | * @return Element|Element[] 76 | * 77 | * @throws LogicException if the current node has no owner document 78 | * @throws InvalidArgumentException if the provided argument is not an instance of DOMNode or Element 79 | */ 80 | public function appendChild($nodes) 81 | { 82 | if ($this->node->ownerDocument === null) { 83 | throw new LogicException('Can not append a child to element without the owner document.'); 84 | } 85 | 86 | $returnArray = true; 87 | 88 | if ( ! is_array($nodes)) { 89 | $nodes = [$nodes]; 90 | 91 | $returnArray = false; 92 | } 93 | 94 | $result = []; 95 | 96 | Errors::disable(); 97 | 98 | foreach ($nodes as $node) { 99 | if ($node instanceof Node) { 100 | $node = $node->getNode(); 101 | } 102 | 103 | if ( ! $node instanceof DOMNode) { 104 | throw new InvalidArgumentException(sprintf('Argument 1 passed to %s must be an instance of %s or DOMNode, %s given.', __METHOD__, __CLASS__, (is_object($node) ? get_class($node) : gettype($node)))); 105 | } 106 | 107 | $clonedNode = $node->cloneNode(true); 108 | $newNode = $this->node->ownerDocument->importNode($clonedNode, true); 109 | 110 | $result[] = $this->node->appendChild($newNode); 111 | } 112 | 113 | Errors::restore(); 114 | 115 | $result = array_map(function (DOMNode $node) { 116 | return new Element($node); 117 | }, $result); 118 | 119 | return $returnArray ? $result : $result[0]; 120 | } 121 | 122 | /** 123 | * Adds a new child before a reference node. 124 | * 125 | * @param Node|DOMNode $node The new node 126 | * @param Element|DOMNode|null $referenceNode The reference node 127 | * 128 | * @return Element 129 | * 130 | * @throws LogicException if the current node has no owner document 131 | * @throws InvalidArgumentException if $node is not an instance of DOMNode or Element 132 | * @throws InvalidArgumentException if $referenceNode is not an instance of DOMNode or Element 133 | */ 134 | public function insertBefore($node, $referenceNode = null): self 135 | { 136 | if ($this->node->ownerDocument === null) { 137 | throw new LogicException('Can not insert a child to an element without the owner document.'); 138 | } 139 | 140 | if ($node instanceof Node) { 141 | $node = $node->getNode(); 142 | } 143 | 144 | if ( ! $node instanceof DOMNode) { 145 | throw new InvalidArgumentException(sprintf('Argument 1 passed to %s must be an instance of %s or DOMNode, %s given.', __METHOD__, __CLASS__, (is_object($node) ? get_class($node) : gettype($node)))); 146 | } 147 | 148 | if ($referenceNode !== null) { 149 | if ($referenceNode instanceof Element) { 150 | $referenceNode = $referenceNode->getNode(); 151 | } 152 | 153 | if ( ! $referenceNode instanceof DOMNode) { 154 | throw new InvalidArgumentException(sprintf('Argument 2 passed to %s must be an instance of %s or DOMNode, %s given.', __METHOD__, __CLASS__, (is_object($referenceNode) ? get_class($referenceNode) : gettype($referenceNode)))); 155 | } 156 | } 157 | 158 | Errors::disable(); 159 | 160 | $clonedNode = $node->cloneNode(true); 161 | $newNode = $this->node->ownerDocument->importNode($clonedNode, true); 162 | 163 | $insertedNode = $this->node->insertBefore($newNode, $referenceNode); 164 | 165 | Errors::restore(); 166 | 167 | return new Element($insertedNode); 168 | } 169 | 170 | /** 171 | * Adds a new child after a reference node. 172 | * 173 | * @param Node|DOMNode $node The new node 174 | * @param Element|DOMNode|null $referenceNode The reference node 175 | * 176 | * @return Element 177 | * 178 | * @throws LogicException if the current node has no owner document 179 | * @throws InvalidArgumentException if $node is not an instance of DOMNode or Element 180 | * @throws InvalidArgumentException if $referenceNode is not an instance of DOMNode or Element 181 | */ 182 | public function insertAfter($node, $referenceNode = null): self 183 | { 184 | if ($referenceNode === null) { 185 | return $this->insertBefore($node); 186 | } 187 | 188 | if ($referenceNode instanceof Node) { 189 | $referenceNode = $referenceNode->getNode(); 190 | } 191 | 192 | if ( ! $referenceNode instanceof DOMNode) { 193 | throw new InvalidArgumentException(sprintf('Argument 2 passed to %s must be an instance of %s or DOMNode, %s given.', __METHOD__, __CLASS__, (is_object($referenceNode) ? get_class($referenceNode) : gettype($referenceNode)))); 194 | } 195 | 196 | return $this->insertBefore($node, $referenceNode->nextSibling); 197 | } 198 | 199 | /** 200 | * Adds a new sibling before a reference node. 201 | * 202 | * @param Node|DOMNode $node The new node 203 | * 204 | * @return Element 205 | * 206 | * @throws LogicException if the current node has no owner document 207 | * @throws InvalidArgumentException if $node is not an instance of DOMNode or Element 208 | * @throws InvalidArgumentException if $referenceNode is not an instance of DOMNode or Element 209 | */ 210 | public function insertSiblingBefore($node): self 211 | { 212 | if ($this->node->ownerDocument === null) { 213 | throw new LogicException('Can not insert a child to an element without the owner document.'); 214 | } 215 | 216 | if ($this->parent() === null) { 217 | throw new LogicException('Can not insert a child to an element without the parent element.'); 218 | } 219 | 220 | if ($node instanceof Node) { 221 | $node = $node->getNode(); 222 | } 223 | 224 | if ( ! $node instanceof DOMNode) { 225 | throw new InvalidArgumentException(sprintf('Argument 1 passed to %s must be an instance of %s or DOMNode, %s given.', __METHOD__, __CLASS__, (is_object($node) ? get_class($node) : gettype($node)))); 226 | } 227 | 228 | Errors::disable(); 229 | 230 | $clonedNode = $node->cloneNode(true); 231 | $newNode = $this->node->ownerDocument->importNode($clonedNode, true); 232 | 233 | $insertedNode = $this->parent()->getNode()->insertBefore($newNode, $this->node); 234 | 235 | Errors::restore(); 236 | 237 | return new Element($insertedNode); 238 | } 239 | 240 | /** 241 | * Adds a new sibling after a reference node. 242 | * 243 | * @param Node|DOMNode $node The new node 244 | * 245 | * @return Element 246 | * 247 | * @throws LogicException if the current node has no owner document 248 | * @throws InvalidArgumentException if $node is not an instance of DOMNode or Element 249 | * @throws InvalidArgumentException if $referenceNode is not an instance of DOMNode or Element 250 | */ 251 | public function insertSiblingAfter($node): self 252 | { 253 | if ($this->node->ownerDocument === null) { 254 | throw new LogicException('Can not insert a child to an element without the owner document.'); 255 | } 256 | 257 | if ($this->parent() === null) { 258 | throw new LogicException('Can not insert a child to an element without the parent element.'); 259 | } 260 | 261 | $nextSibling = $this->nextSibling(); 262 | 263 | // if the current node is the last child 264 | if ($nextSibling === null) { 265 | return $this->parent()->appendChild($node); 266 | } 267 | 268 | return $nextSibling->insertSiblingBefore($node); 269 | } 270 | 271 | /** 272 | * Checks the existence of the node. 273 | * 274 | * @param string $expression XPath expression or CSS selector 275 | * @param string $type The type of the expression 276 | * 277 | * @return bool 278 | */ 279 | public function has(string $expression, string $type = Query::TYPE_CSS): bool 280 | { 281 | return $this->toDocument()->has($expression, $type); 282 | } 283 | 284 | /** 285 | * Searches for a node in the DOM tree for a given XPath expression or CSS selector. 286 | * 287 | * @param string $expression XPath expression or CSS selector 288 | * @param string $type The type of the expression 289 | * @param bool $wrapElement Returns array of Element if true, otherwise array of DOMElement 290 | * 291 | * @return Element[]|DOMElement[] 292 | * 293 | * @throws InvalidSelectorException 294 | */ 295 | public function find(string $expression, string $type = Query::TYPE_CSS, bool $wrapElement = true): array 296 | { 297 | return $this->toDocument()->find($expression, $type, $wrapElement); 298 | } 299 | 300 | /** 301 | * Searches for a node in the owner document using current node as context. 302 | * 303 | * @param string $expression XPath expression or CSS selector 304 | * @param string $type The type of the expression 305 | * @param bool $wrapNode Returns array of Element if true, otherwise array of DOMElement 306 | * 307 | * @return Element[]|DOMElement[] 308 | * 309 | * @throws LogicException if the current node has no owner document 310 | * @throws InvalidSelectorException 311 | */ 312 | public function findInDocument(string $expression, string $type = Query::TYPE_CSS, bool $wrapNode = true): array 313 | { 314 | $ownerDocument = $this->ownerDocument(); 315 | 316 | if ($ownerDocument === null) { 317 | throw new LogicException('Can not search in context without the owner document.'); 318 | } 319 | 320 | return $ownerDocument->find($expression, $type, $wrapNode, $this->node); 321 | } 322 | 323 | /** 324 | * Searches for a node in the DOM tree and returns first element or null. 325 | * 326 | * @param string $expression XPath expression or CSS selector 327 | * @param string $type The type of the expression 328 | * @param bool $wrapNode Returns Element if true, otherwise DOMElement 329 | * 330 | * @return Element|DOMElement|null 331 | * 332 | * @throws InvalidSelectorException 333 | */ 334 | public function first(string $expression, string $type = Query::TYPE_CSS, bool $wrapNode = true) 335 | { 336 | return $this->toDocument()->first($expression, $type, $wrapNode); 337 | } 338 | 339 | /** 340 | * Searches for a node in the owner document using current node as context and returns first element or null. 341 | * 342 | * @param string $expression XPath expression or CSS selector 343 | * @param string $type The type of the expression 344 | * @param bool $wrapNode Returns Element if true, otherwise DOMElement 345 | * 346 | * @return Element|DOMElement|null 347 | * 348 | * @throws InvalidSelectorException 349 | */ 350 | public function firstInDocument(string $expression, string $type = Query::TYPE_CSS, bool $wrapNode = true) 351 | { 352 | $ownerDocument = $this->ownerDocument(); 353 | 354 | if ($ownerDocument === null) { 355 | throw new LogicException('Can not search in context without the owner document.'); 356 | } 357 | 358 | return $ownerDocument->first($expression, $type, $wrapNode, $this->node); 359 | } 360 | 361 | /** 362 | * Searches for a node in the DOM tree for a given XPath expression. 363 | * 364 | * @param string $expression XPath expression 365 | * @param bool $wrapNode Returns array of Element if true, otherwise array of DOMElement 366 | * 367 | * @return Element[]|DOMElement[] 368 | * 369 | * @throws InvalidSelectorException 370 | */ 371 | public function xpath(string $expression, bool $wrapNode = true): array 372 | { 373 | return $this->find($expression, Query::TYPE_XPATH, $wrapNode); 374 | } 375 | 376 | /** 377 | * Counts nodes for a given XPath expression or CSS selector. 378 | * 379 | * @param string $expression XPath expression or CSS selector 380 | * @param string $type The type of the expression 381 | * 382 | * @return int 383 | * 384 | * @throws InvalidSelectorException 385 | */ 386 | public function count(string $expression, string $type = Query::TYPE_CSS): int 387 | { 388 | return $this->toDocument()->count($expression, $type); 389 | } 390 | 391 | /** 392 | * Dumps the node into a string using HTML formatting (including child nodes). 393 | * 394 | * @return string 395 | */ 396 | public function html(): string 397 | { 398 | return $this->toDocument()->html(); 399 | } 400 | 401 | /** 402 | * Dumps the node into a string using HTML formatting (without child nodes). 403 | * 404 | * @return string 405 | */ 406 | public function outerHtml(): string 407 | { 408 | $document = new DOMDocument(); 409 | 410 | $importedNode = $document->importNode($this->node); 411 | 412 | return $document->saveHTML($importedNode); 413 | } 414 | 415 | /** 416 | * Dumps the node descendants into a string using HTML formatting. 417 | * 418 | * @param string $delimiter 419 | * 420 | * @return string 421 | */ 422 | public function innerHtml(string $delimiter = ''): string 423 | { 424 | $innerHtml = []; 425 | 426 | foreach ($this->node->childNodes as $childNode) { 427 | $innerHtml[] = $childNode->ownerDocument->saveHTML($childNode); 428 | } 429 | 430 | return implode($delimiter, $innerHtml); 431 | } 432 | 433 | /** 434 | * Dumps the node descendants into a string using XML formatting. 435 | * 436 | * @param string $delimiter 437 | * 438 | * @return string 439 | */ 440 | public function innerXml(string $delimiter = ''): string 441 | { 442 | $innerXml = []; 443 | 444 | foreach ($this->node->childNodes as $childNode) { 445 | $innerXml[] = $childNode->ownerDocument->saveXML($childNode); 446 | } 447 | 448 | return implode($delimiter, $innerXml); 449 | } 450 | 451 | /** 452 | * Sets inner HTML. 453 | * 454 | * @param string $html 455 | * 456 | * @return static 457 | * 458 | * @throws InvalidArgumentException if passed argument is not a string 459 | * @throws InvalidSelectorException 460 | */ 461 | public function setInnerHtml(string $html): self 462 | { 463 | return $this->setContent($html, Document::TYPE_HTML); 464 | } 465 | 466 | /** 467 | * Sets inner HTML. 468 | * 469 | * @param string $xml 470 | * 471 | * @return static 472 | * 473 | * @throws InvalidArgumentException if passed argument is not a string 474 | * @throws InvalidSelectorException 475 | */ 476 | public function setInnerXml(string $xml): self 477 | { 478 | return $this->setContent($xml, Document::TYPE_XML); 479 | } 480 | 481 | protected function setContent(string $content, string $type): self 482 | { 483 | $this->removeChildren(); 484 | 485 | Errors::disable(); 486 | 487 | $encoding = $this->ownerDocument()->getEncoding() ?? 'UTF-8'; 488 | 489 | $document = new Document("$content", false, $encoding, $type); 490 | 491 | $fragment = $document->first('didom-fragment')->getNode(); 492 | 493 | foreach ($fragment->childNodes as $node) { 494 | $newNode = $this->node->ownerDocument->importNode($node, true); 495 | 496 | $this->node->appendChild($newNode); 497 | } 498 | 499 | Errors::restore(); 500 | 501 | return $this; 502 | } 503 | 504 | /** 505 | * Dumps the node into a string using XML formatting. 506 | * 507 | * @param int $options Additional options 508 | * 509 | * @return string The node XML 510 | */ 511 | public function xml(int $options = 0): string 512 | { 513 | return $this->toDocument()->xml($options); 514 | } 515 | 516 | /** 517 | * Get the text content of this node and its descendants. 518 | * 519 | * @return string The node value 520 | */ 521 | public function text(): string 522 | { 523 | return $this->node->textContent; 524 | } 525 | 526 | /** 527 | * Set the value of this node. 528 | * 529 | * @param string|integer|float $value The new value of the node 530 | * 531 | * @return static 532 | * 533 | * @throws InvalidArgumentException if parameter 1 is not a string 534 | */ 535 | public function setValue($value): self 536 | { 537 | if (is_numeric($value)) { 538 | $value = (string) $value; 539 | } 540 | 541 | if ( ! is_string($value)) { 542 | throw new InvalidArgumentException(sprintf('%s expects parameter 1 to be string, integer or float, %s given', __METHOD__, (is_object($value) ? get_class($value) : gettype($value)))); 543 | } 544 | 545 | $this->node->nodeValue = $value; 546 | 547 | return $this; 548 | } 549 | 550 | /** 551 | * Returns true if the current node is a DOMElement instance. 552 | * 553 | * @return bool 554 | */ 555 | public function isElementNode(): bool 556 | { 557 | return $this->node instanceof DOMElement; 558 | } 559 | 560 | /** 561 | * Returns true if the current node is a a DOMText instance. 562 | * 563 | * @return bool 564 | */ 565 | public function isTextNode(): bool 566 | { 567 | return $this->node instanceof DOMText; 568 | } 569 | 570 | /** 571 | * Returns true if the current node is a DOMComment instance. 572 | * 573 | * @return bool 574 | */ 575 | public function isCommentNode(): bool 576 | { 577 | return $this->node instanceof DOMComment; 578 | } 579 | 580 | /** 581 | * Returns true if the current node is a DOMCdataSection instance. 582 | * 583 | * @return bool 584 | */ 585 | public function isCdataSectionNode(): bool 586 | { 587 | return $this->node instanceof DOMCdataSection; 588 | } 589 | 590 | /** 591 | * Indicates if two nodes are the same node. 592 | * 593 | * @param Element|DOMNode $node 594 | * 595 | * @return bool 596 | * 597 | * @throws InvalidArgumentException if parameter 1 is not an instance of DOMNode 598 | */ 599 | public function is($node): bool 600 | { 601 | if ($node instanceof Node) { 602 | $node = $node->getNode(); 603 | } 604 | 605 | if ( ! $node instanceof DOMNode) { 606 | throw new InvalidArgumentException(sprintf('Argument 1 passed to %s must be an instance of %s or DOMNode, %s given.', __METHOD__, __CLASS__, (is_object($node) ? get_class($node) : gettype($node)))); 607 | } 608 | 609 | return $this->node->isSameNode($node); 610 | } 611 | 612 | /** 613 | * @return Element|Document|null 614 | */ 615 | public function parent() 616 | { 617 | if ($this->node->parentNode === null) { 618 | return null; 619 | } 620 | 621 | if ($this->node->parentNode instanceof DOMDocument) { 622 | return new Document($this->node->parentNode); 623 | } 624 | 625 | return new Element($this->node->parentNode); 626 | } 627 | 628 | /** 629 | * Returns first parent node matches passed selector. 630 | * 631 | * @param string $selector 632 | * @param bool $strict 633 | * 634 | * @return Element|null 635 | * 636 | * @throws InvalidSelectorException if the selector is invalid 637 | */ 638 | public function closest(string $selector, bool $strict = false): ?Element 639 | { 640 | $node = $this; 641 | 642 | while (true) { 643 | $parent = $node->parent(); 644 | 645 | if ($parent === null || $parent instanceof Document) { 646 | return null; 647 | } 648 | 649 | if ($parent->matches($selector, $strict)) { 650 | return $parent; 651 | } 652 | 653 | $node = $parent; 654 | } 655 | } 656 | 657 | /** 658 | * @param string|null $selector 659 | * @param string|null $nodeType 660 | * 661 | * @return Element|null 662 | * 663 | * @throws InvalidArgumentException if parameter 2 is not a string 664 | * @throws RuntimeException if the node type is invalid 665 | * @throws LogicException if the selector used with non DOMElement node type 666 | * @throws InvalidSelectorException if the selector is invalid 667 | */ 668 | public function previousSibling(?string $selector = null, ?string $nodeType = null): ?Element 669 | { 670 | if ($this->node->previousSibling === null) { 671 | return null; 672 | } 673 | 674 | if ($selector === null && $nodeType === null) { 675 | return new Element($this->node->previousSibling); 676 | } 677 | 678 | if ($selector !== null && $nodeType === null) { 679 | $nodeType = 'DOMElement'; 680 | } 681 | 682 | $allowedTypes = ['DOMElement', 'DOMText', 'DOMComment', 'DOMCdataSection']; 683 | 684 | if ( ! in_array($nodeType, $allowedTypes, true)) { 685 | throw new RuntimeException(sprintf('Unknown node type "%s". Allowed types: %s', $nodeType, implode(', ', $allowedTypes))); 686 | } 687 | 688 | if ($selector !== null && $nodeType !== 'DOMElement') { 689 | throw new LogicException(sprintf('Selector can be used only with DOMElement node type, %s given.', $nodeType)); 690 | } 691 | 692 | $node = $this->node->previousSibling; 693 | 694 | while ($node !== null) { 695 | if (get_class($node) !== $nodeType) { 696 | $node = $node->previousSibling; 697 | 698 | continue; 699 | } 700 | 701 | $element = new Element($node); 702 | 703 | if ($selector === null || $element->matches($selector)) { 704 | return $element; 705 | } 706 | 707 | $node = $node->previousSibling; 708 | } 709 | 710 | return null; 711 | } 712 | 713 | /** 714 | * @param string|null $selector 715 | * @param string|null $nodeType 716 | * 717 | * @return Element[] 718 | * 719 | * @throws InvalidArgumentException if parameter 2 is not a string 720 | * @throws RuntimeException if the node type is invalid 721 | * @throws LogicException if the selector used with non DOMElement node type 722 | * @throws InvalidSelectorException if the selector is invalid 723 | */ 724 | public function previousSiblings(?string $selector = null, ?string $nodeType = null): array 725 | { 726 | if ($this->node->previousSibling === null) { 727 | return []; 728 | } 729 | 730 | if ($selector !== null && $nodeType === null) { 731 | $nodeType = 'DOMElement'; 732 | } 733 | 734 | if ($nodeType !== null) { 735 | $allowedTypes = ['DOMElement', 'DOMText', 'DOMComment', 'DOMCdataSection']; 736 | 737 | if ( ! in_array($nodeType, $allowedTypes, true)) { 738 | throw new RuntimeException(sprintf('Unknown node type "%s". Allowed types: %s', $nodeType, implode(', ', $allowedTypes))); 739 | } 740 | } 741 | 742 | if ($selector !== null && $nodeType !== 'DOMElement') { 743 | throw new LogicException(sprintf('Selector can be used only with DOMElement node type, %s given.', $nodeType)); 744 | } 745 | 746 | $result = []; 747 | 748 | $node = $this->node->previousSibling; 749 | 750 | while ($node !== null) { 751 | $element = new Element($node); 752 | 753 | if ($nodeType === null) { 754 | $result[] = $element; 755 | 756 | $node = $node->previousSibling; 757 | 758 | continue; 759 | } 760 | 761 | if (get_class($node) !== $nodeType) { 762 | $node = $node->previousSibling; 763 | 764 | continue; 765 | } 766 | 767 | if ($selector === null) { 768 | $result[] = $element; 769 | 770 | $node = $node->previousSibling; 771 | 772 | continue; 773 | } 774 | 775 | if ($element->matches($selector)) { 776 | $result[] = $element; 777 | } 778 | 779 | $node = $node->previousSibling; 780 | } 781 | 782 | return array_reverse($result); 783 | } 784 | 785 | /** 786 | * @param string|null $selector 787 | * @param string|null $nodeType 788 | * 789 | * @return Element|null 790 | * 791 | * @throws InvalidArgumentException if parameter 2 is not a string 792 | * @throws RuntimeException if the node type is invalid 793 | * @throws LogicException if the selector used with non DOMElement node type 794 | * @throws InvalidSelectorException if the selector is invalid 795 | */ 796 | public function nextSibling(?string $selector = null, ?string $nodeType = null): ?Element 797 | { 798 | if ($this->node->nextSibling === null) { 799 | return null; 800 | } 801 | 802 | if ($selector === null && $nodeType === null) { 803 | return new Element($this->node->nextSibling); 804 | } 805 | 806 | if ($selector !== null && $nodeType === null) { 807 | $nodeType = 'DOMElement'; 808 | } 809 | 810 | $allowedTypes = ['DOMElement', 'DOMText', 'DOMComment', 'DOMCdataSection']; 811 | 812 | if ( ! in_array($nodeType, $allowedTypes, true)) { 813 | throw new RuntimeException(sprintf('Unknown node type "%s". Allowed types: %s', $nodeType, implode(', ', $allowedTypes))); 814 | } 815 | 816 | if ($selector !== null && $nodeType !== 'DOMElement') { 817 | throw new LogicException(sprintf('Selector can be used only with DOMElement node type, %s given.', $nodeType)); 818 | } 819 | 820 | $node = $this->node->nextSibling; 821 | 822 | while ($node !== null) { 823 | if (get_class($node) !== $nodeType) { 824 | $node = $node->nextSibling; 825 | 826 | continue; 827 | } 828 | 829 | $element = new Element($node); 830 | 831 | if ($selector === null || $element->matches($selector)) { 832 | return $element; 833 | } 834 | 835 | $node = $node->nextSibling; 836 | } 837 | 838 | return null; 839 | } 840 | 841 | /** 842 | * @param string|null $selector 843 | * @param string|null $nodeType 844 | * 845 | * @return Element[] 846 | * 847 | * @throws InvalidArgumentException if parameter 2 is not a string 848 | * @throws RuntimeException if the node type is invalid 849 | * @throws LogicException if the selector used with non DOMElement node type 850 | * @throws InvalidSelectorException if the selector is invalid 851 | */ 852 | public function nextSiblings(?string $selector = null, ?string $nodeType = null): array 853 | { 854 | if ($this->node->nextSibling === null) { 855 | return []; 856 | } 857 | 858 | if ($selector !== null && $nodeType === null) { 859 | $nodeType = 'DOMElement'; 860 | } 861 | 862 | $allowedTypes = ['DOMElement', 'DOMText', 'DOMComment', 'DOMCdataSection']; 863 | 864 | if ($nodeType !== null && ! in_array($nodeType, $allowedTypes, true)) { 865 | throw new RuntimeException(sprintf('Unknown node type "%s". Allowed types: %s', $nodeType, implode(', ', $allowedTypes))); 866 | } 867 | 868 | if ($selector !== null && $nodeType !== 'DOMElement') { 869 | throw new LogicException(sprintf('Selector can be used only with DOMElement node type, %s given.', $nodeType)); 870 | } 871 | 872 | $result = []; 873 | 874 | $node = $this->node->nextSibling; 875 | 876 | while ($node !== null) { 877 | $element = new Element($node); 878 | 879 | if ($nodeType === null) { 880 | $result[] = $element; 881 | 882 | $node = $node->nextSibling; 883 | 884 | continue; 885 | } 886 | 887 | if (get_class($node) !== $nodeType) { 888 | $node = $node->nextSibling; 889 | 890 | continue; 891 | } 892 | 893 | if ($selector === null) { 894 | $result[] = $element; 895 | 896 | $node = $node->nextSibling; 897 | 898 | continue; 899 | } 900 | 901 | if ($element->matches($selector)) { 902 | $result[] = $element; 903 | } 904 | 905 | $node = $node->nextSibling; 906 | } 907 | 908 | return $result; 909 | } 910 | 911 | /** 912 | * @param int $index 913 | * 914 | * @return Element|null 915 | */ 916 | public function child(int $index): ?Element 917 | { 918 | $child = $this->node->childNodes->item($index); 919 | 920 | return $child === null ? null : new Element($child); 921 | } 922 | 923 | /** 924 | * @return Element|null 925 | */ 926 | public function firstChild(): ?Element 927 | { 928 | if ($this->node->firstChild === null) { 929 | return null; 930 | } 931 | 932 | return new Element($this->node->firstChild); 933 | } 934 | 935 | /** 936 | * @return Element|null 937 | */ 938 | public function lastChild(): ?Element 939 | { 940 | if ($this->node->lastChild === null) { 941 | return null; 942 | } 943 | 944 | return new Element($this->node->lastChild); 945 | } 946 | 947 | /** 948 | * @return bool 949 | */ 950 | public function hasChildren(): bool 951 | { 952 | return $this->node->hasChildNodes(); 953 | } 954 | 955 | /** 956 | * @return Element[] 957 | */ 958 | public function children(): array 959 | { 960 | $children = []; 961 | 962 | foreach ($this->node->childNodes as $node) { 963 | $children[] = new Element($node); 964 | } 965 | 966 | return $children; 967 | } 968 | 969 | /** 970 | * Removes child from list of children. 971 | * 972 | * @param Node|DOMNode $childNode 973 | * 974 | * @return Element the node that has been removed 975 | */ 976 | public function removeChild($childNode): Element 977 | { 978 | if ($childNode instanceof Node) { 979 | $childNode = $childNode->getNode(); 980 | } 981 | 982 | if ( ! $childNode instanceof DOMNode) { 983 | throw new InvalidArgumentException(sprintf('Argument 1 passed to %s must be an instance of %s or DOMNode, %s given.', __METHOD__, __CLASS__, (is_object($childNode) ? get_class($childNode) : gettype($childNode)))); 984 | } 985 | 986 | $removedNode = $this->node->removeChild($childNode); 987 | 988 | return new Element($removedNode); 989 | } 990 | 991 | /** 992 | * Removes all child nodes. 993 | * 994 | * @return Element[] the nodes that has been removed 995 | */ 996 | public function removeChildren(): array 997 | { 998 | // we need to collect child nodes to array 999 | // because removing nodes from the DOMNodeList on iterating is not working 1000 | $childNodes = []; 1001 | 1002 | foreach ($this->node->childNodes as $childNode) { 1003 | $childNodes[] = $childNode; 1004 | } 1005 | 1006 | $removedNodes = []; 1007 | 1008 | foreach ($childNodes as $childNode) { 1009 | $removedNode = $this->node->removeChild($childNode); 1010 | 1011 | $removedNodes[] = new Element($removedNode); 1012 | } 1013 | 1014 | return $removedNodes; 1015 | } 1016 | 1017 | /** 1018 | * Removes current node from the parent. 1019 | * 1020 | * @return Element the node that has been removed 1021 | * 1022 | * @throws LogicException if the current node has no parent node 1023 | */ 1024 | public function remove(): Element 1025 | { 1026 | if ($this->node->parentNode === null) { 1027 | throw new LogicException('Can not remove an element without the parent node.'); 1028 | } 1029 | 1030 | $removedNode = $this->node->parentNode->removeChild($this->node); 1031 | 1032 | return new Element($removedNode); 1033 | } 1034 | 1035 | /** 1036 | * Replaces a child. 1037 | * 1038 | * @param Node|DOMNode $newNode The new node 1039 | * @param bool $clone Clone the node if true, otherwise move it 1040 | * 1041 | * @return Element The node that has been replaced 1042 | * 1043 | * @throws LogicException if the current node has no parent node 1044 | */ 1045 | public function replace($newNode, bool $clone = true): Element 1046 | { 1047 | if ($this->node->parentNode === null) { 1048 | throw new LogicException('Can not replace an element without the parent node.'); 1049 | } 1050 | 1051 | if ($newNode instanceof Node) { 1052 | $newNode = $newNode->getNode(); 1053 | } 1054 | 1055 | if ( ! $newNode instanceof DOMNode) { 1056 | throw new InvalidArgumentException(sprintf('Argument 1 passed to %s must be an instance of %s or DOMNode, %s given.', __METHOD__, __CLASS__, (is_object($newNode) ? get_class($newNode) : gettype($newNode)))); 1057 | } 1058 | 1059 | if ($clone) { 1060 | $newNode = $newNode->cloneNode(true); 1061 | } 1062 | 1063 | if ($newNode->ownerDocument === null || ! $this->ownerDocument()->is($newNode->ownerDocument)) { 1064 | $newNode = $this->node->ownerDocument->importNode($newNode, true); 1065 | } 1066 | 1067 | $node = $this->node->parentNode->replaceChild($newNode, $this->node); 1068 | 1069 | return new Element($node); 1070 | } 1071 | 1072 | /** 1073 | * Get line number for a node. 1074 | * 1075 | * @return int 1076 | */ 1077 | public function getLineNo(): int 1078 | { 1079 | return $this->node->getLineNo(); 1080 | } 1081 | 1082 | /** 1083 | * Clones a node. 1084 | * 1085 | * @param bool $deep Indicates whether to copy all descendant nodes 1086 | * 1087 | * @return Element The cloned node 1088 | */ 1089 | public function cloneNode(bool $deep = true): Element 1090 | { 1091 | return new Element($this->node->cloneNode($deep)); 1092 | } 1093 | 1094 | /** 1095 | * Sets current node instance. 1096 | * 1097 | * @param DOMElement|DOMText|DOMComment|DOMCdataSection|DOMDocumentFragment $node 1098 | * 1099 | * @return static 1100 | */ 1101 | protected function setNode($node): self 1102 | { 1103 | $allowedClasses = ['DOMElement', 'DOMText', 'DOMComment', 'DOMCdataSection', 'DOMDocumentFragment']; 1104 | 1105 | if ( ! is_object($node) || ! in_array(get_class($node), $allowedClasses, true)) { 1106 | throw new InvalidArgumentException(sprintf('Argument 1 passed to %s must be an instance of DOMElement, DOMText, DOMComment, DOMCdataSection or DOMDocumentFragment, %s given.', __METHOD__, (is_object($node) ? get_class($node) : gettype($node)))); 1107 | } 1108 | 1109 | $this->node = $node; 1110 | 1111 | return $this; 1112 | } 1113 | 1114 | /** 1115 | * Returns current node instance. 1116 | * 1117 | * @return DOMElement|DOMText|DOMComment|DOMCdataSection|DOMDocumentFragment 1118 | */ 1119 | public function getNode() 1120 | { 1121 | return $this->node; 1122 | } 1123 | 1124 | /** 1125 | * Returns the document associated with this node. 1126 | * 1127 | * @return Document|null 1128 | */ 1129 | public function ownerDocument(): ?Document 1130 | { 1131 | if ($this->node->ownerDocument === null) { 1132 | return null; 1133 | } 1134 | 1135 | return new Document($this->node->ownerDocument); 1136 | } 1137 | 1138 | /** 1139 | * Get the DOM document with the current element. 1140 | * 1141 | * @param string $encoding The document encoding 1142 | * 1143 | * @return Document 1144 | */ 1145 | public function toDocument(string $encoding = 'UTF-8'): Document 1146 | { 1147 | $document = new Document(null, false, $encoding); 1148 | 1149 | $document->appendChild($this->node); 1150 | 1151 | return $document; 1152 | } 1153 | 1154 | /** 1155 | * Convert the element to its string representation. 1156 | * 1157 | * @return string 1158 | */ 1159 | public function __toString(): string 1160 | { 1161 | return $this->html(); 1162 | } 1163 | } 1164 | -------------------------------------------------------------------------------- /src/DiDom/Query.php: -------------------------------------------------------------------------------- 1 | ') { 97 | $prefix = '/'; 98 | 99 | $selector = ltrim($selector, '> '); 100 | } 101 | 102 | $segments = self::getSegments($selector); 103 | $xpath = ''; 104 | 105 | while (count($segments) > 0) { 106 | $xpath .= self::buildXpath($segments, $prefix); 107 | 108 | $selector = trim(substr($selector, strlen($segments['selector']))); 109 | $prefix = isset($segments['rel']) ? '/' : '//'; 110 | 111 | if ($selector === '' || substr($selector, 0, 2) === '::' || substr($selector, 0, 1) === ',') { 112 | break; 113 | } 114 | 115 | $segments = self::getSegments($selector); 116 | } 117 | 118 | // if selector has property 119 | if (substr($selector, 0, 2) === '::') { 120 | $property = self::parseProperty($selector); 121 | $propertyXpath = self::convertProperty($property['name'], $property['args']); 122 | 123 | $selector = substr($selector, strlen($property['property'])); 124 | $selector = trim($selector); 125 | 126 | $xpath .= '/' . $propertyXpath; 127 | } 128 | 129 | return [$xpath, $selector]; 130 | } 131 | 132 | /** 133 | * @param string $selector 134 | * 135 | * @return array 136 | * 137 | * @throws InvalidSelectorException 138 | */ 139 | protected static function parseProperty(string $selector): array 140 | { 141 | $name = '(?P[\w\-]+)'; 142 | $args = '(?:\((?P[^\)]+)?\))?'; 143 | 144 | $regexp = '/^::' . $name . $args . '/is'; 145 | 146 | if (preg_match($regexp, $selector, $matches) !== 1) { 147 | throw new InvalidSelectorException(sprintf('Invalid property "%s".', $selector)); 148 | } 149 | 150 | $result = []; 151 | 152 | $result['property'] = $matches[0]; 153 | $result['name'] = $matches['name']; 154 | $result['args'] = isset($matches['args']) ? explode(',', $matches['args']) : []; 155 | 156 | $result['args'] = array_map('trim', $result['args']); 157 | 158 | return $result; 159 | } 160 | 161 | /** 162 | * @param string $name 163 | * @param array $parameters 164 | * 165 | * @return string 166 | * 167 | * @throws InvalidSelectorException if the specified property is unknown 168 | */ 169 | protected static function convertProperty(string $name, array $parameters = []): string 170 | { 171 | if ($name === 'text') { 172 | return 'text()'; 173 | } 174 | 175 | if ($name === 'attr') { 176 | if (count($parameters) === 0) { 177 | return '@*'; 178 | } 179 | 180 | $attributes = []; 181 | 182 | foreach ($parameters as $attribute) { 183 | $attributes[] = sprintf('name() = "%s"', $attribute); 184 | } 185 | 186 | return sprintf('@*[%s]', implode(' or ', $attributes)); 187 | } 188 | 189 | throw new InvalidSelectorException(sprintf('Unknown property "%s".', $name)); 190 | } 191 | 192 | /** 193 | * Converts a CSS pseudo-class into an XPath expression. 194 | * 195 | * @param string $pseudo Pseudo-class 196 | * @param string $tagName 197 | * @param array $parameters 198 | * 199 | * @return string 200 | * 201 | * @throws InvalidSelectorException if the specified pseudo-class is unknown 202 | */ 203 | protected static function convertPseudo(string $pseudo, string &$tagName, array $parameters = []): string 204 | { 205 | switch ($pseudo) { 206 | case 'first-child': 207 | return 'position() = 1'; 208 | case 'last-child': 209 | return 'position() = last()'; 210 | case 'nth-child': 211 | $xpath = sprintf('(name()="%s") and (%s)', $tagName, self::convertNthExpression($parameters[0])); 212 | $tagName = '*'; 213 | 214 | return $xpath; 215 | case 'contains': 216 | $string = trim($parameters[0], '\'"'); 217 | 218 | if (count($parameters) === 1) { 219 | return self::convertContains($string); 220 | } 221 | 222 | if ($parameters[1] !== 'true' && $parameters[1] !== 'false') { 223 | throw new InvalidSelectorException(sprintf('Parameter 2 of "contains" pseudo-class must be equal true or false, "%s" given.', $parameters[1])); 224 | } 225 | 226 | $caseSensitive = $parameters[1] === 'true'; 227 | 228 | if (count($parameters) === 2) { 229 | return self::convertContains($string, $caseSensitive); 230 | } 231 | 232 | if ($parameters[2] !== 'true' && $parameters[2] !== 'false') { 233 | throw new InvalidSelectorException(sprintf('Parameter 3 of "contains" pseudo-class must be equal true or false, "%s" given.', $parameters[2])); 234 | } 235 | 236 | $fullMatch = $parameters[2] === 'true'; 237 | 238 | return self::convertContains($string, $caseSensitive, $fullMatch); 239 | case 'has': 240 | return self::cssToXpath($parameters[0], './/'); 241 | case 'not': 242 | return sprintf('not(self::%s)', self::cssToXpath($parameters[0], '')); 243 | 244 | case 'nth-of-type': 245 | return self::convertNthExpression($parameters[0]); 246 | case 'empty': 247 | return 'count(descendant::*) = 0'; 248 | case 'not-empty': 249 | return 'count(descendant::*) > 0'; 250 | } 251 | 252 | throw new InvalidSelectorException(sprintf('Unknown pseudo-class "%s".', $pseudo)); 253 | } 254 | 255 | /** 256 | * @param array $segments 257 | * @param string $prefix Specifies the nesting of nodes 258 | * 259 | * @return string XPath expression 260 | * 261 | * @throws InvalidArgumentException if you neither specify tag name nor attributes 262 | */ 263 | public static function buildXpath(array $segments, string $prefix = '//'): string 264 | { 265 | $tagName = isset($segments['tag']) ? $segments['tag'] : '*'; 266 | 267 | $attributes = []; 268 | 269 | // if the id attribute specified 270 | if (isset($segments['id'])) { 271 | $attributes[] = sprintf('@id="%s"', $segments['id']); 272 | } 273 | 274 | // if the class attribute specified 275 | if (isset($segments['classes'])) { 276 | foreach ($segments['classes'] as $class) { 277 | $attributes[] = sprintf('contains(concat(" ", normalize-space(@class), " "), " %s ")', $class); 278 | } 279 | } 280 | 281 | // if the attributes specified 282 | if (isset($segments['attributes'])) { 283 | foreach ($segments['attributes'] as $name => $value) { 284 | $attributes[] = self::convertAttribute($name, $value); 285 | } 286 | } 287 | 288 | // if the pseudo class specified 289 | if (array_key_exists('pseudo', $segments)) { 290 | foreach ($segments['pseudo'] as $pseudo) { 291 | $expression = $pseudo['expression'] !== null ? $pseudo['expression'] : ''; 292 | 293 | $parameters = explode(',', $expression); 294 | $parameters = array_map('trim', $parameters); 295 | 296 | $attributes[] = self::convertPseudo($pseudo['type'], $tagName, $parameters); 297 | } 298 | } 299 | 300 | if (count($attributes) === 0 && ! isset($segments['tag'])) { 301 | throw new InvalidArgumentException('The array of segments must contain the name of the tag or at least one attribute.'); 302 | } 303 | 304 | $xpath = $prefix . $tagName; 305 | 306 | if ($count = count($attributes)) { 307 | $xpath .= ($count > 1) ? sprintf('[(%s)]', implode(') and (', $attributes)) : sprintf('[%s]', $attributes[0]); 308 | } 309 | 310 | return $xpath; 311 | } 312 | 313 | /** 314 | * @param string $name The name of an attribute 315 | * @param string|null $value The value of an attribute 316 | * 317 | * @return string 318 | */ 319 | protected static function convertAttribute(string $name, ?string $value): string 320 | { 321 | $isSimpleSelector = ! in_array(substr($name, 0, 1), ['^', '!'], true); 322 | $isSimpleSelector = $isSimpleSelector && ( ! in_array(substr($name, -1), ['^', '$', '*', '!', '~'], true)); 323 | 324 | if ($isSimpleSelector) { 325 | // if specified only the attribute name 326 | $xpath = $value === null ? '@' . $name : sprintf('@%s="%s"', $name, $value); 327 | 328 | return $xpath; 329 | } 330 | 331 | // if the attribute name starts with ^ 332 | // example: *[^data-] 333 | if (substr($name, 0, 1) === '^') { 334 | $xpath = sprintf('@*[starts-with(name(), "%s")]', substr($name, 1)); 335 | 336 | return $value === null ? $xpath : sprintf('%s="%s"', $xpath, $value); 337 | } 338 | 339 | // if the attribute name starts with ! 340 | // example: input[!disabled] 341 | if (substr($name, 0, 1) === '!') { 342 | $xpath = sprintf('not(@%s)', substr($name, 1)); 343 | 344 | return $xpath; 345 | } 346 | 347 | $symbol = substr($name, -1); 348 | $name = substr($name, 0, -1); 349 | 350 | switch ($symbol) { 351 | case '^': 352 | $xpath = sprintf('starts-with(@%s, "%s")', $name, $value); 353 | 354 | break; 355 | case '$': 356 | $xpath = sprintf('substring(@%s, string-length(@%s) - string-length("%s") + 1) = "%s"', $name, $name, $value, $value); 357 | 358 | break; 359 | case '*': 360 | $xpath = sprintf('contains(@%s, "%s")', $name, $value); 361 | 362 | break; 363 | case '!': 364 | $xpath = sprintf('not(@%s="%s")', $name, $value); 365 | 366 | break; 367 | case '~': 368 | $xpath = sprintf('contains(concat(" ", normalize-space(@%s), " "), " %s ")', $name, $value); 369 | 370 | break; 371 | } 372 | 373 | return $xpath; 374 | } 375 | 376 | /** 377 | * Converts nth-expression into an XPath expression. 378 | * 379 | * @param string $expression nth-expression 380 | * 381 | * @return string 382 | * 383 | * @throws InvalidSelectorException if the given nth-child expression is empty or invalid 384 | */ 385 | protected static function convertNthExpression(string $expression): string 386 | { 387 | if ($expression === '') { 388 | throw new InvalidSelectorException('nth-child (or nth-last-child) expression must not be empty.'); 389 | } 390 | 391 | if ($expression === 'odd') { 392 | return 'position() mod 2 = 1 and position() >= 1'; 393 | } 394 | 395 | if ($expression === 'even') { 396 | return 'position() mod 2 = 0 and position() >= 0'; 397 | } 398 | 399 | if (is_numeric($expression)) { 400 | return sprintf('position() = %d', $expression); 401 | } 402 | 403 | if (preg_match("/^(?P[0-9]?n)(?:(?P\+|\-)(?P[0-9]+))?$/is", $expression, $segments)) { 404 | if (isset($segments['mul'])) { 405 | $multiplier = $segments['mul'] === 'n' ? 1 : trim($segments['mul'], 'n'); 406 | $sign = (isset($segments['sign']) && $segments['sign'] === '+') ? '-' : '+'; 407 | $position = isset($segments['pos']) ? $segments['pos'] : 0; 408 | 409 | return sprintf('(position() %s %d) mod %d = 0 and position() >= %d', $sign, $position, $multiplier, $position); 410 | } 411 | } 412 | 413 | throw new InvalidSelectorException(sprintf('Invalid nth-child expression "%s".', $expression)); 414 | } 415 | 416 | /** 417 | * @param string $string 418 | * @param bool $caseSensitive 419 | * @param bool $fullMatch 420 | * 421 | * @return string 422 | */ 423 | protected static function convertContains(string $string, bool $caseSensitive = true, bool $fullMatch = false): string 424 | { 425 | if ($caseSensitive && $fullMatch) { 426 | return sprintf('text() = "%s"', $string); 427 | } 428 | 429 | if ($caseSensitive && ! $fullMatch) { 430 | return sprintf('contains(text(), "%s")', $string); 431 | } 432 | 433 | $strToLowerFunction = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower'; 434 | 435 | if ( ! $caseSensitive && $fullMatch) { 436 | return sprintf("php:functionString(\"{$strToLowerFunction}\", .) = php:functionString(\"{$strToLowerFunction}\", \"%s\")", $string); 437 | } 438 | 439 | // if ! $caseSensitive and ! $fullMatch 440 | return sprintf("contains(php:functionString(\"{$strToLowerFunction}\", .), php:functionString(\"{$strToLowerFunction}\", \"%s\"))", $string); 441 | } 442 | 443 | /** 444 | * Splits the CSS selector into parts (tag name, ID, classes, attributes, pseudo-class). 445 | * 446 | * @param string $selector CSS selector 447 | * 448 | * @return array 449 | * 450 | * @throws InvalidSelectorException if the selector is empty or not valid 451 | */ 452 | public static function getSegments(string $selector): array 453 | { 454 | $selector = trim($selector); 455 | 456 | if ($selector === '') { 457 | throw new InvalidSelectorException('The selector must not be empty.'); 458 | } 459 | 460 | $pregMatchResult = preg_match(self::getSelectorRegex(), $selector, $segments); 461 | 462 | if ($pregMatchResult === false || $pregMatchResult === 0 || $segments[0] === '') { 463 | throw new InvalidSelectorException(sprintf('Invalid selector "%s".', $selector)); 464 | } 465 | 466 | $result = ['selector' => $segments[0]]; 467 | 468 | if (isset($segments['tag']) && $segments['tag'] !== '') { 469 | $result['tag'] = $segments['tag']; 470 | } 471 | 472 | // if the id attribute specified 473 | if (isset($segments['id']) && $segments['id'] !== '') { 474 | $result['id'] = $segments['id']; 475 | } 476 | 477 | // if the attributes specified 478 | if (isset($segments['attrs'])) { 479 | $attributes = trim($segments['attrs'], '[]'); 480 | $attributes = explode('][', $attributes); 481 | 482 | foreach ($attributes as $attribute) { 483 | if ($attribute !== '') { 484 | list($name, $value) = array_pad(explode('=', $attribute, 2), 2, null); 485 | 486 | if ($name === '') { 487 | throw new InvalidSelectorException(sprintf('Invalid selector "%s": attribute name must not be empty.', $selector)); 488 | } 489 | 490 | // equal null if specified only the attribute name 491 | $result['attributes'][$name] = is_string($value) ? trim($value, '\'"') : null; 492 | } 493 | } 494 | } 495 | 496 | // if the class attribute specified 497 | if (isset($segments['classes'])) { 498 | $classes = trim($segments['classes'], '.'); 499 | $classes = explode('.', $classes); 500 | 501 | foreach ($classes as $class) { 502 | if ($class !== '') { 503 | $result['classes'][] = $class; 504 | } 505 | } 506 | } 507 | 508 | // if the pseudo class specified 509 | if (isset($segments['pseudo']) && $segments['pseudo'] !== '') { 510 | preg_match_all('/:(?P[\w\-]+)(?:\((?P[^\)]+)\))?/', $segments['pseudo'], $pseudoClasses); 511 | 512 | $result['pseudo'] = []; 513 | 514 | foreach ($pseudoClasses['type'] as $index => $pseudoType) { 515 | $result['pseudo'][] = [ 516 | 'type' => $pseudoType, 517 | 'expression' => $pseudoClasses['expr'][$index] !== '' ? $pseudoClasses['expr'][$index] : null, 518 | ]; 519 | } 520 | } 521 | 522 | // if it is a direct descendant 523 | if (isset($segments['rel'])) { 524 | $result['rel'] = $segments['rel']; 525 | } 526 | 527 | return $result; 528 | } 529 | 530 | private static function getSelectorRegex(): string 531 | { 532 | $tag = '(?P[\*|\w|\-]+)?'; 533 | $id = '(?:#(?P[\w|\-]+))?'; 534 | $classes = '(?P\.[\w|\-|\.]+)*'; 535 | $attrs = '(?P(?:\[.+?\])*)?'; 536 | $pseudoType = '[\w\-]+'; 537 | $pseudoExpr = '(?:\([^\)]+\))?'; 538 | $pseudo = '(?P(?::' . $pseudoType . $pseudoExpr . ')+)?'; 539 | $rel = '\s*(?P>)?'; 540 | 541 | return '/' . $tag . $id . $classes . $attrs . $pseudo . $rel . '/is'; 542 | } 543 | 544 | /** 545 | * @return array 546 | */ 547 | public static function getCompiled(): array 548 | { 549 | return static::$compiled; 550 | } 551 | 552 | /** 553 | * @param array $compiled 554 | * 555 | * @throws InvalidArgumentException if the attributes is not an array 556 | */ 557 | public static function setCompiled(array $compiled): void 558 | { 559 | static::$compiled = $compiled; 560 | } 561 | } 562 | -------------------------------------------------------------------------------- /src/DiDom/StyleAttribute.php: -------------------------------------------------------------------------------- 1 | isElementNode()) { 36 | throw new InvalidArgumentException(sprintf('The element must contain DOMElement node.')); 37 | } 38 | 39 | $this->element = $element; 40 | 41 | $this->parseStyleAttribute(); 42 | } 43 | 44 | /** 45 | * Parses style attribute of the element. 46 | */ 47 | protected function parseStyleAttribute() 48 | { 49 | if ( ! $this->element->hasAttribute('style')) { 50 | // possible if style attribute has been removed 51 | if ($this->styleString !== '') { 52 | $this->styleString = ''; 53 | $this->properties = []; 54 | } 55 | 56 | return; 57 | } 58 | 59 | // if style attribute is not changed 60 | if ($this->element->getAttribute('style') === $this->styleString) { 61 | return; 62 | } 63 | 64 | // save style attribute as is (without trimming) 65 | $this->styleString = $this->element->getAttribute('style'); 66 | 67 | $styleString = trim($this->styleString, ' ;'); 68 | 69 | if ($styleString === '') { 70 | $this->properties = []; 71 | 72 | return; 73 | } 74 | 75 | $properties = explode(';', $styleString); 76 | 77 | foreach ($properties as $property) { 78 | list($name, $value) = explode(':', $property, 2); 79 | 80 | $name = trim($name); 81 | $value = trim($value); 82 | 83 | $this->properties[$name] = $value; 84 | } 85 | } 86 | 87 | /** 88 | * Updates style attribute of the element. 89 | */ 90 | protected function updateStyleAttribute(): void 91 | { 92 | $this->styleString = $this->buildStyleString(); 93 | 94 | $this->element->setAttribute('style', $this->styleString); 95 | } 96 | 97 | /** 98 | * @return string 99 | */ 100 | protected function buildStyleString(): string 101 | { 102 | $properties = []; 103 | 104 | foreach ($this->properties as $propertyName => $value) { 105 | $properties[] = $propertyName . ': ' . $value; 106 | } 107 | 108 | return implode('; ', $properties); 109 | } 110 | 111 | /** 112 | * @param string $name 113 | * @param string $value 114 | * 115 | * @return StyleAttribute 116 | * 117 | * @throws InvalidArgumentException if property name is not a string 118 | * @throws InvalidArgumentException if property value is not a string 119 | */ 120 | public function setProperty(string $name, string $value): self 121 | { 122 | $this->parseStyleAttribute(); 123 | 124 | $this->properties[$name] = $value; 125 | 126 | $this->updateStyleAttribute(); 127 | 128 | return $this; 129 | } 130 | 131 | /** 132 | * @param array $properties 133 | * 134 | * @return StyleAttribute 135 | * 136 | * @throws InvalidArgumentException if property name is not a string 137 | * @throws InvalidArgumentException if property value is not a string 138 | */ 139 | public function setMultipleProperties(array $properties): self 140 | { 141 | $this->parseStyleAttribute(); 142 | 143 | foreach ($properties as $propertyName => $value) { 144 | if ( ! is_string($propertyName)) { 145 | throw new InvalidArgumentException(sprintf('Property name must be a string, %s given.', (is_object($propertyName) ? get_class($propertyName) : gettype($propertyName)))); 146 | } 147 | 148 | if ( ! is_string($value)) { 149 | throw new InvalidArgumentException(sprintf('Property value must be a string, %s given.', (is_object($value) ? get_class($value) : gettype($value)))); 150 | } 151 | 152 | $this->properties[$propertyName] = $value; 153 | } 154 | 155 | $this->updateStyleAttribute(); 156 | 157 | return $this; 158 | } 159 | 160 | /** 161 | * @param string $name 162 | * @param mixed $default 163 | * 164 | * @return mixed 165 | */ 166 | public function getProperty(string $name, $default = null) 167 | { 168 | $this->parseStyleAttribute(); 169 | 170 | if ( ! array_key_exists($name, $this->properties)) { 171 | return $default; 172 | } 173 | 174 | return $this->properties[$name]; 175 | } 176 | 177 | /** 178 | * @param string[] $propertyNames 179 | * 180 | * @return array 181 | * 182 | * @throws InvalidArgumentException if property name is not a string 183 | */ 184 | public function getMultipleProperties(array $propertyNames): array 185 | { 186 | $this->parseStyleAttribute(); 187 | 188 | $result = []; 189 | 190 | foreach ($propertyNames as $propertyName) { 191 | if ( ! is_string($propertyName)) { 192 | throw new InvalidArgumentException(sprintf('Property name must be a string, %s given.', (is_object($propertyName) ? get_class($propertyName) : gettype($propertyName)))); 193 | } 194 | 195 | if (array_key_exists($propertyName, $this->properties)) { 196 | $result[$propertyName] = $this->properties[$propertyName]; 197 | } 198 | } 199 | 200 | return $result; 201 | } 202 | 203 | /** 204 | * @return array 205 | */ 206 | public function getAllProperties(): array 207 | { 208 | $this->parseStyleAttribute(); 209 | 210 | return $this->properties; 211 | } 212 | 213 | /** 214 | * @param string $name 215 | * 216 | * @return bool 217 | */ 218 | public function hasProperty(string $name): bool 219 | { 220 | $this->parseStyleAttribute(); 221 | 222 | return array_key_exists($name, $this->properties); 223 | } 224 | 225 | /** 226 | * @param string $name 227 | * 228 | * @return StyleAttribute 229 | * 230 | * @throws InvalidArgumentException if property name is not a string 231 | */ 232 | public function removeProperty(string $name): self 233 | { 234 | $this->parseStyleAttribute(); 235 | 236 | unset($this->properties[$name]); 237 | 238 | $this->updateStyleAttribute(); 239 | 240 | return $this; 241 | } 242 | 243 | /** 244 | * @param array $propertyNames 245 | * 246 | * @return StyleAttribute 247 | * 248 | * @throws InvalidArgumentException if property name is not a string 249 | */ 250 | public function removeMultipleProperties(array $propertyNames): self 251 | { 252 | $this->parseStyleAttribute(); 253 | 254 | foreach ($propertyNames as $propertyName) { 255 | if ( ! is_string($propertyName)) { 256 | throw new InvalidArgumentException(sprintf('Property name must be a string, %s given.', (is_object($propertyName) ? get_class($propertyName) : gettype($propertyName)))); 257 | } 258 | 259 | unset($this->properties[$propertyName]); 260 | } 261 | 262 | $this->updateStyleAttribute(); 263 | 264 | return $this; 265 | } 266 | 267 | /** 268 | * @param string[] $preserved 269 | * 270 | * @return StyleAttribute 271 | */ 272 | public function removeAllProperties(array $preserved = []): self 273 | { 274 | $this->parseStyleAttribute(); 275 | 276 | $preservedProperties = []; 277 | 278 | foreach ($preserved as $propertyName) { 279 | if ( ! is_string($propertyName)) { 280 | throw new InvalidArgumentException(sprintf('Property name must be a string, %s given.', (is_object($propertyName) ? get_class($propertyName) : gettype($propertyName)))); 281 | } 282 | 283 | if ( ! array_key_exists($propertyName, $this->properties)) { 284 | continue; 285 | } 286 | 287 | $preservedProperties[$propertyName] = $this->properties[$propertyName]; 288 | } 289 | 290 | $this->properties = $preservedProperties; 291 | 292 | $this->updateStyleAttribute(); 293 | 294 | return $this; 295 | } 296 | 297 | /** 298 | * @return Element 299 | */ 300 | public function getElement(): Element 301 | { 302 | return $this->element; 303 | } 304 | } 305 | --------------------------------------------------------------------------------