├── .github ├── CONTRIBUTING.md ├── FUNDING.yml ├── ISSUE_TEMPLATE.md └── workflows │ └── ci.yml ├── CHANGELOG ├── LICENSE ├── README.md ├── README_API.md ├── build ├── composer.json ├── docs │ └── api.md └── generate_docs.php ├── composer.json └── src └── voku └── helper ├── AbstractDomParser.php ├── AbstractSimpleHtmlDom.php ├── AbstractSimpleHtmlDomNode.php ├── AbstractSimpleXmlDom.php ├── AbstractSimpleXmlDomNode.php ├── DomParserInterface.php ├── HtmlDomHelper.php ├── HtmlDomParser.php ├── SelectorConverter.php ├── SimpleHtmlAttributes.php ├── SimpleHtmlAttributesInterface.php ├── SimpleHtmlDom.php ├── SimpleHtmlDomBlank.php ├── SimpleHtmlDomInterface.php ├── SimpleHtmlDomNode.php ├── SimpleHtmlDomNodeBlank.php ├── SimpleHtmlDomNodeInterface.php ├── SimpleXmlDom.php ├── SimpleXmlDomBlank.php ├── SimpleXmlDomInterface.php ├── SimpleXmlDomNode.php ├── SimpleXmlDomNodeBlank.php ├── SimpleXmlDomNodeInterface.php └── XmlDomParser.php /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | ## Pull Requests 4 | 5 | 1. Create your own [fork](https://help.github.com/articles/fork-a-repo) of this repo 6 | 2. Create a new branch for each feature or improvement 7 | 3. Send a pull request from each feature branch to the **master** branch 8 | 9 | It is very important to separate new features or improvements into separate 10 | feature branches, and to send a pull request for each branch. This allows me to 11 | review and pull in new features or improvements individually. 12 | 13 | ## Style Guide 14 | 15 | All pull requests must adhere to the [PSR-2 standard](https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-2-coding-style-guide.md). 16 | 17 | ## Unit Testing 18 | 19 | All pull requests must be accompanied by passing PHPUnit unit tests and 20 | complete code coverage. 21 | 22 | [Learn about PHPUnit](https://github.com/sebastianbergmann/phpunit/) -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [voku] 2 | patreon: voku 3 | tidelift: "packagist/voku/simple_html_dom" 4 | custom: https://www.paypal.me/moelleken 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | #### What is this feature about (expected vs actual behaviour)? 2 | 3 | #### How can I reproduce it? 4 | 5 | #### Does it take minutes, hours or days to fix? 6 | 7 | #### Any additional information? -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | pull_request: 6 | branches: 7 | - master 8 | 9 | defaults: 10 | run: 11 | shell: bash 12 | 13 | jobs: 14 | tests: 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | php: [ 20 | 7.0, 21 | 7.1, 22 | 7.2, 23 | 7.3, 24 | 7.4, 25 | 8.0, 26 | 8.1 27 | ] 28 | composer: [basic] 29 | timeout-minutes: 10 30 | steps: 31 | - name: Checkout code 32 | uses: actions/checkout@v2 33 | 34 | - name: Setup PHP 35 | uses: shivammathur/setup-php@2.9.0 36 | with: 37 | php-version: ${{ matrix.php }} 38 | coverage: xdebug 39 | extensions: zip 40 | tools: composer 41 | 42 | - name: Determine composer cache directory 43 | id: composer-cache 44 | run: echo "::set-output name=directory::$(composer config cache-dir)" 45 | 46 | - name: Cache composer dependencies 47 | uses: actions/cache@v2.1.3 48 | with: 49 | path: ${{ steps.composer-cache.outputs.directory }} 50 | key: ${{ matrix.php }}-composer-${{ hashFiles('**/composer.lock') }} 51 | restore-keys: ${{ matrix.php }}-composer- 52 | 53 | - name: Install dependencies 54 | run: | 55 | if [[ "${{ matrix.php }}" == "7.4" ]]; then 56 | composer require phpstan/phpstan --no-update 57 | fi; 58 | 59 | if [[ "${{ matrix.composer }}" == "lowest" ]]; then 60 | composer update --prefer-dist --no-interaction --prefer-lowest --prefer-stable 61 | fi; 62 | 63 | if [[ "${{ matrix.composer }}" == "basic" ]]; then 64 | composer update --prefer-dist --no-interaction 65 | fi; 66 | 67 | composer dump-autoload -o 68 | 69 | - name: Run tests 70 | run: | 71 | mkdir -p build/logs 72 | php vendor/bin/phpunit -c phpunit.xml --coverage-clover=build/logs/clover.xml 73 | 74 | - name: Run phpstan 75 | continue-on-error: true 76 | if: ${{ matrix.php == '7.4' }} 77 | run: | 78 | php vendor/bin/phpstan analyse 79 | 80 | - name: Upload coverage results to Coveralls 81 | env: 82 | COVERALLS_REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} 83 | run: | 84 | composer global require php-coveralls/php-coveralls 85 | php-coveralls --coverage_clover=build/logs/clover.xml -v 86 | 87 | - name: Upload coverage results to Codecov 88 | uses: codecov/codecov-action@v1 89 | with: 90 | files: build/logs/clover.xml 91 | 92 | - name: Archive logs artifacts 93 | if: ${{ failure() }} 94 | uses: actions/upload-artifact@v2 95 | with: 96 | name: logs_composer-${{ matrix.composer }}_php-${{ matrix.php }} 97 | path: | 98 | build/logs 99 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | [PHP Simple HTML Dom v4.8.x] 2 | 0: refactor -> findOne() -> will now return always an "Blank" object if no element was found 3 | 1: "SimpleXmlDomNodeInterface" -> fix phpdocs only 4 | 2: "*NodeBlank" -> fix return type from "findOne()" 5 | 3: "innerhtmlKeep" -> added for modifying html without loosing html-hacks for e.g. svg elements 6 | 4: "HtmlDomHelper" -> added "mergeHtmlAttributes()" 7 | 5: "HtmlDomParser" -> hack for multiple root elements 8 | 6: "AbstractSimpleHtmlDom" -> PHP 8.1 -> try to fix php type errors 9 | 7: "XmlDomParser" -> added workaround for DTD requests 10 | 8: "Replace deprecated string interpolation usage" thanks @flavioheleno [Flávio Heleno] 11 | 9: "Adding 'id' to DOMNodes with type string." thanks @devteam-emroc [Dev Team - emroc GmbH] 12 | "Add symfony 7 support" thanks @GerB 13 | "Fix error in parent() method if no parent node" thanks @DieterHolvoet [Dieter Holvoet] 14 | "Add text/template type" thanks @all9lives [Jeremy Brennan] 15 | "fix: Cannot assign null to property DOMNode::* of type string" thanks @frugan-dev [Frugan] 16 | 10: "fix: HtmlDomHelper::mergeHtmlAttributes() with zero values" 17 | 18 | [PHP Simple HTML Dom v4.7.x] 19 | 1: add "findMultiOrFalse()" + "findOneOrFalse()" 20 | 2: fix -> usage of e.g. "textContent" 21 | 3: fix -> usage of special js template tags in the dom 22 | 4: merge improvements from "ivopetkov/html5-dom-document-php -> length attribute 23 | 5: merge improvements from "ivopetkov/html5-dom-document-php -> classList support 24 | 6: add "nextNonWhitespaceSibling()" 25 | 7: fix -> usage of "outerhtml" 26 | 8: add support for "symfony/css-selector": ~5.0 27 | 9: fix -> "save()" -> will use html() insteadof of innerHtml() now 28 | 13: fix -> "val()" -> will now support hidden fields 29 | 14: fix -> keep html comments, also at the beginning of the html input 30 | 15: add "HtmlDomParser->overwriteTemplateLogicSyntaxInSpecialScriptTags()" 31 | 16: add support for "text/x-handlebars-template" 32 | 17: fix -> problem with auto-completion in e.g. PhpStorm 33 | 18: small optimizations + fix phpstan reported errors 34 | 19: add support for different special script-tags 35 | 20: fix -> invalid html (move html that is after "" before "") 36 | 21: fix -> internal invalid self-closing tags (e.g. ) 37 | 22: fix -> invalid html (remove content before "") 38 | 23: fix -> invalid html (remove content before "") + try to repair broken html 39 | 24: fix -> normalize the html after replacing the node 40 | 25: add support for PHP 8 41 | 26: fix -> fix "setAttribute()" -> for e.g. urls 42 | 27: fix -> "XmlDomParser" -> add option for "auto-remove-xpath-namespace" 43 | 28: fix -> allow CSS and xPath syntax for XmlDomParser 44 | 29: use github actions 45 | 30: add "previousNonWhitespaceSibling()" 46 | 31: add "SimpleHtmlDom->delete()" & "SimpleHtmlDom->getTag()" thanks @marioquartz 47 | add support for "symfony/css-selector": ~6.0 thanks @dora38 48 | 49 | [PHP Simple HTML Dom v4.6.x] 50 | 1: add an XmlDomParser Class + simple tests 51 | 2: add support for text/x-custom-template type 52 | 3: fix -> check result of "html5FallbackForScriptTags()" 53 | 54 | 55 | [PHP Simple HTML Dom v4.5.x] 56 | 1: fix -> return types 57 | 2: add abstract class and interface for "Dom Elements" (SimpleHtmlDom*) 58 | 3: and abstract class and interface for "Dom Nodes" (SimpleHtmlDomNode*) 59 | 4: fix -> errors reported by phpstan (level 7) 60 | 5: fix -> error with Google AMP () & Php DomDocument 61 | 62 | 63 | [PHP Simple HTML Dom v4.4.x] 64 | 1: add "findMulti()" method for "SimpleDomParser" 65 | 2: fix -> phpdoc improvements via phpstan 66 | 67 | 68 | [PHP Simple HTML Dom v4.3.x] 69 | 1: add "isRemoved()" method for "SimpleHtmlDom" 70 | 2: fix -> do not remove newlines from the output 71 | 3: fix -> keep HTML closing tags in '; 422 | }, 423 | $html 424 | ); 425 | 426 | if ($htmlTmp !== null) { 427 | $html = $htmlTmp; 428 | } 429 | } 430 | 431 | /** 432 | * @param string $html 433 | * 434 | * @return string 435 | */ 436 | public static function putReplacedBackToPreserveHtmlEntities(string $html, bool $putBrokenReplacedBack = true): string 437 | { 438 | static $DOM_REPLACE__HELPER_CACHE = null; 439 | 440 | if ($DOM_REPLACE__HELPER_CACHE === null) { 441 | $DOM_REPLACE__HELPER_CACHE['tmp'] = \array_merge( 442 | self::$domLinkReplaceHelper['tmp'], 443 | self::$domReplaceHelper['tmp'] 444 | ); 445 | $DOM_REPLACE__HELPER_CACHE['orig'] = \array_merge( 446 | self::$domLinkReplaceHelper['orig'], 447 | self::$domReplaceHelper['orig'] 448 | ); 449 | 450 | $DOM_REPLACE__HELPER_CACHE['tmp']['html_wrapper__start'] = '<' . self::$domHtmlWrapperHelper . '>'; 451 | $DOM_REPLACE__HELPER_CACHE['tmp']['html_wrapper__end'] = ''; 452 | 453 | $DOM_REPLACE__HELPER_CACHE['orig']['html_wrapper__start'] = ''; 454 | $DOM_REPLACE__HELPER_CACHE['orig']['html_wrapper__end'] = ''; 455 | 456 | $DOM_REPLACE__HELPER_CACHE['tmp']['html_wrapper__start_broken'] = self::$domHtmlWrapperHelper . '>'; 457 | $DOM_REPLACE__HELPER_CACHE['tmp']['html_wrapper__end_broken'] = ''; 464 | 465 | $DOM_REPLACE__HELPER_CACHE['orig']['html_special_script__start'] = ' 0 481 | ) { 482 | $html = \str_ireplace(self::$domBrokenReplaceHelper['tmp'], self::$domBrokenReplaceHelper['orig'], $html); 483 | } 484 | 485 | return \str_ireplace($DOM_REPLACE__HELPER_CACHE['tmp'], $DOM_REPLACE__HELPER_CACHE['orig'], $html); 486 | } 487 | 488 | /** 489 | * @param string $html 490 | * 491 | * @return string 492 | */ 493 | public static function replaceToPreserveHtmlEntities(string $html): string 494 | { 495 | // init 496 | $linksNew = []; 497 | $linksOld = []; 498 | 499 | if (\strpos($html, 'http') !== false) { 500 | // regEx for e.g.: [https://www.domain.de/foo.php?foobar=1&email=lars%40moelleken.org&guid=test1233312&{{foo}}#foo] 501 | $regExUrl = '/(\[?\bhttps?:\/\/[^\s<>]+(?:\(\w+\)|[^[:punct:]\s]|\/|}|]))/i'; 502 | \preg_match_all($regExUrl, $html, $linksOld); 503 | 504 | if (!empty($linksOld[1])) { 505 | $linksOld = $linksOld[1]; 506 | foreach ((array) $linksOld as $linkKey => $linkOld) { 507 | $linksNew[$linkKey] = \str_replace( 508 | self::$domLinkReplaceHelper['orig'], 509 | self::$domLinkReplaceHelper['tmp'], 510 | $linkOld 511 | ); 512 | } 513 | } 514 | } 515 | 516 | $linksNewCount = \count($linksNew); 517 | if ($linksNewCount > 0 && \count($linksOld) === $linksNewCount) { 518 | $search = \array_merge($linksOld, self::$domReplaceHelper['orig']); 519 | $replace = \array_merge($linksNew, self::$domReplaceHelper['tmp']); 520 | } else { 521 | $search = self::$domReplaceHelper['orig']; 522 | $replace = self::$domReplaceHelper['tmp']; 523 | } 524 | 525 | return \str_replace($search, $replace, $html); 526 | } 527 | } 528 | -------------------------------------------------------------------------------- /src/voku/helper/AbstractSimpleHtmlDom.php: -------------------------------------------------------------------------------- 1 | 'childNodes', 14 | 'first_child' => 'firstChild', 15 | 'last_child' => 'lastChild', 16 | 'next_sibling' => 'nextSibling', 17 | 'prev_sibling' => 'previousSibling', 18 | 'parent' => 'parentNode', 19 | 'outertext' => 'html', 20 | 'outerhtml' => 'html', 21 | 'innertext' => 'innerHtml', 22 | 'innerhtml' => 'innerHtml', 23 | 'innerhtmlkeep' => 'innerHtmlKeep', 24 | ]; 25 | 26 | /** 27 | * @var string[] 28 | */ 29 | protected static $stringDomNodes = [ 30 | 'id', 31 | 'prefix', 32 | 'content' 33 | ]; 34 | 35 | /** 36 | * @var \DOMElement|\DOMNode|null 37 | */ 38 | protected $node; 39 | 40 | /** 41 | * @var SimpleHtmlAttributes|null 42 | */ 43 | private $classListCache; 44 | 45 | /** 46 | * @param string $name 47 | * @param array $arguments 48 | * 49 | * @throws \BadMethodCallException 50 | * 51 | * @return SimpleHtmlDomInterface|string|null 52 | */ 53 | public function __call($name, $arguments) 54 | { 55 | $name = \strtolower($name); 56 | 57 | if (isset(self::$functionAliases[$name])) { 58 | return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments); 59 | } 60 | 61 | throw new \BadMethodCallException('Method does not exist'); 62 | } 63 | 64 | /** 65 | * @param string $name 66 | * 67 | * @return SimpleHtmlAttributes|string|string[]|null 68 | */ 69 | public function __get($name) 70 | { 71 | $nameOrig = $name; 72 | $name = \strtolower($name); 73 | 74 | switch ($name) { 75 | case 'outerhtml': 76 | case 'outertext': 77 | case 'html': 78 | return $this->html(); 79 | case 'innerhtml': 80 | case 'innertext': 81 | return $this->innerHtml(); 82 | case 'innerhtmlkeep': 83 | return $this->innerHtml(false, false); 84 | case 'text': 85 | case 'plaintext': 86 | return $this->text(); 87 | case 'tag': 88 | return $this->node->nodeName ?? ''; 89 | case 'attr': 90 | return $this->getAllAttributes(); 91 | case 'classlist': 92 | if ($this->classListCache === null) { 93 | $this->classListCache = new SimpleHtmlAttributes($this->node ?? null, 'class'); 94 | } 95 | 96 | return $this->classListCache; 97 | default: 98 | if ($this->node && \property_exists($this->node, $nameOrig)) { 99 | if (\is_string($this->node->{$nameOrig})) { 100 | return HtmlDomParser::putReplacedBackToPreserveHtmlEntities($this->node->{$nameOrig}); 101 | } 102 | 103 | return $this->node->{$nameOrig}; 104 | } 105 | 106 | return $this->getAttribute($name); 107 | } 108 | } 109 | 110 | /** 111 | * @param string $selector 112 | * @param int $idx 113 | * 114 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface 115 | */ 116 | public function __invoke($selector, $idx = null) 117 | { 118 | return $this->find($selector, $idx); 119 | } 120 | 121 | /** 122 | * @param string $name 123 | * 124 | * @return bool 125 | */ 126 | public function __isset($name) 127 | { 128 | $nameOrig = $name; 129 | $name = \strtolower($name); 130 | 131 | switch ($name) { 132 | case 'outertext': 133 | case 'outerhtml': 134 | case 'innertext': 135 | case 'innerhtml': 136 | case 'innerhtmlkeep': 137 | case 'plaintext': 138 | case 'text': 139 | case 'tag': 140 | return true; 141 | default: 142 | if ($this->node && \property_exists($this->node, $nameOrig)) { 143 | return isset($this->node->{$nameOrig}); 144 | } 145 | 146 | return $this->hasAttribute($name); 147 | } 148 | } 149 | 150 | /** 151 | * @param string $name 152 | * @param mixed $value 153 | * 154 | * @return SimpleHtmlDomInterface|null 155 | */ 156 | public function __set($name, $value) 157 | { 158 | $nameOrig = $name; 159 | $name = \strtolower($name); 160 | 161 | switch ($name) { 162 | case 'outerhtml': 163 | case 'outertext': 164 | return $this->replaceNodeWithString($value); 165 | case 'innertext': 166 | case 'innerhtml': 167 | return $this->replaceChildWithString($value); 168 | case 'innerhtmlkeep': 169 | return $this->replaceChildWithString($value, false); 170 | case 'plaintext': 171 | return $this->replaceTextWithString($value); 172 | case 'classlist': 173 | $name = 'class'; 174 | $nameOrig = 'class'; 175 | // no break 176 | default: 177 | if ($this->node && \property_exists($this->node, $nameOrig)) { 178 | // INFO: Cannot assign null to property DOMNode::* of type string 179 | if (in_array($nameOrig, self::$stringDomNodes)) { 180 | $value = (string)$value; 181 | } 182 | 183 | if (!is_null($value)) { 184 | return $this->node->{$nameOrig} = $value; 185 | } 186 | } 187 | 188 | return $this->setAttribute($name, $value); 189 | } 190 | } 191 | 192 | /** 193 | * @return string 194 | */ 195 | public function __toString() 196 | { 197 | return $this->html(); 198 | } 199 | 200 | /** 201 | * @param string $name 202 | * 203 | * @return void 204 | */ 205 | public function __unset($name) 206 | { 207 | /** @noinspection UnusedFunctionResultInspection */ 208 | $this->removeAttribute($name); 209 | } 210 | 211 | /** 212 | * @param string $selector 213 | * @param int|null $idx 214 | * 215 | * @return mixed 216 | */ 217 | abstract public function find(string $selector, $idx = null); 218 | 219 | /** 220 | * @return string[]|null 221 | */ 222 | abstract public function getAllAttributes(); 223 | 224 | abstract public function getAttribute(string $name): string; 225 | 226 | abstract public function hasAttribute(string $name): bool; 227 | 228 | abstract public function html(bool $multiDecodeNewHtmlEntity = false): string; 229 | 230 | abstract public function innerHtml(bool $multiDecodeNewHtmlEntity = false, bool $putBrokenReplacedBack = true): string; 231 | 232 | abstract public function removeAttribute(string $name): SimpleHtmlDomInterface; 233 | 234 | abstract protected function replaceChildWithString(string $string, bool $putBrokenReplacedBack = true): SimpleHtmlDomInterface; 235 | 236 | abstract protected function replaceNodeWithString(string $string): SimpleHtmlDomInterface; 237 | 238 | /** 239 | * @param string $string 240 | * 241 | * @return SimpleHtmlDomInterface 242 | */ 243 | abstract protected function replaceTextWithString($string): SimpleHtmlDomInterface; 244 | 245 | /** 246 | * @param string $name 247 | * @param string|null $value 248 | * @param bool $strictEmptyValueCheck 249 | * 250 | * @return SimpleHtmlDomInterface 251 | */ 252 | abstract public function setAttribute(string $name, $value = null, bool $strictEmptyValueCheck = false): SimpleHtmlDomInterface; 253 | 254 | abstract public function text(): string; 255 | } 256 | -------------------------------------------------------------------------------- /src/voku/helper/AbstractSimpleHtmlDomNode.php: -------------------------------------------------------------------------------- 1 | count(); 26 | } 27 | 28 | if ($this->count() > 0) { 29 | $return = []; 30 | 31 | foreach ($this as $node) { 32 | if ($node instanceof SimpleHtmlDomInterface) { 33 | $return[] = $node->{$name}; 34 | } 35 | } 36 | 37 | return $return; 38 | } 39 | 40 | if ($name === 'plaintext' || $name === 'outertext') { 41 | return []; 42 | } 43 | 44 | return null; 45 | } 46 | 47 | /** 48 | * @param string $selector 49 | * @param int|null $idx 50 | * 51 | * @return SimpleHtmlDomNodeInterface|SimpleHtmlDomNodeInterface[]|null 52 | */ 53 | public function __invoke($selector, $idx = null) 54 | { 55 | return $this->find($selector, $idx); 56 | } 57 | 58 | /** 59 | * @return string 60 | */ 61 | public function __toString() 62 | { 63 | // init 64 | $html = ''; 65 | 66 | foreach ($this as $node) { 67 | $html .= $node->outertext; 68 | } 69 | 70 | return $html; 71 | } 72 | 73 | /** 74 | * @param string $selector 75 | * @param int|null $idx 76 | * 77 | * @return SimpleHtmlDomNodeInterface|SimpleHtmlDomNodeInterface[]|null 78 | */ 79 | abstract public function find(string $selector, $idx = null); 80 | } 81 | -------------------------------------------------------------------------------- /src/voku/helper/AbstractSimpleXmlDom.php: -------------------------------------------------------------------------------- 1 | 'childNodes', 14 | 'first_child' => 'firstChild', 15 | 'last_child' => 'lastChild', 16 | 'next_sibling' => 'nextSibling', 17 | 'prev_sibling' => 'previousSibling', 18 | 'parent' => 'parentNode', 19 | ]; 20 | 21 | /** 22 | * @var \DOMElement|\DOMNode|null 23 | */ 24 | protected $node; 25 | 26 | /** 27 | * @param string $name 28 | * @param array $arguments 29 | * 30 | * @throws \BadMethodCallException 31 | * 32 | * @return SimpleXmlDomInterface|string|null 33 | */ 34 | public function __call($name, $arguments) 35 | { 36 | $name = \strtolower($name); 37 | 38 | if (isset(self::$functionAliases[$name])) { 39 | return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments); 40 | } 41 | 42 | throw new \BadMethodCallException('Method does not exist'); 43 | } 44 | 45 | /** 46 | * @param string $name 47 | * 48 | * @return array|string|null 49 | */ 50 | public function __get($name) 51 | { 52 | $nameOrig = $name; 53 | $name = \strtolower($name); 54 | 55 | switch ($name) { 56 | case 'xml': 57 | return $this->xml(); 58 | case 'plaintext': 59 | return $this->text(); 60 | case 'tag': 61 | return $this->node->nodeName ?? ''; 62 | case 'attr': 63 | return $this->getAllAttributes(); 64 | default: 65 | if ($this->node && \property_exists($this->node, $nameOrig)) { 66 | return $this->node->{$nameOrig}; 67 | } 68 | 69 | return $this->getAttribute($name); 70 | } 71 | } 72 | 73 | /** 74 | * @param string $selector 75 | * @param int|null $idx 76 | * 77 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 78 | */ 79 | public function __invoke($selector, $idx = null) 80 | { 81 | return $this->find($selector, $idx); 82 | } 83 | 84 | /** 85 | * @param string $name 86 | * 87 | * @return bool 88 | */ 89 | public function __isset($name) 90 | { 91 | $nameOrig = $name; 92 | $name = \strtolower($name); 93 | 94 | switch ($name) { 95 | case 'outertext': 96 | case 'outerhtml': 97 | case 'innertext': 98 | case 'innerhtml': 99 | case 'innerhtmlkeep': 100 | case 'plaintext': 101 | case 'text': 102 | case 'tag': 103 | return true; 104 | default: 105 | if ($this->node && \property_exists($this->node, $nameOrig)) { 106 | return isset($this->node->{$nameOrig}); 107 | } 108 | 109 | return $this->hasAttribute($name); 110 | } 111 | } 112 | 113 | /** 114 | * @param string $name 115 | * @param mixed $value 116 | * 117 | * @return SimpleXmlDomInterface|null 118 | */ 119 | public function __set($name, $value) 120 | { 121 | $nameOrig = $name; 122 | $name = \strtolower($name); 123 | 124 | switch ($name) { 125 | case 'outerhtml': 126 | case 'outertext': 127 | return $this->replaceNodeWithString($value); 128 | case 'innertext': 129 | case 'innerhtml': 130 | return $this->replaceChildWithString($value); 131 | case 'innerhtmlkeep': 132 | return $this->replaceChildWithString($value, false); 133 | case 'plaintext': 134 | return $this->replaceTextWithString($value); 135 | default: 136 | if ($this->node && \property_exists($this->node, $nameOrig)) { 137 | return $this->node->{$nameOrig} = $value; 138 | } 139 | 140 | return $this->setAttribute($name, $value); 141 | } 142 | } 143 | 144 | /** 145 | * @return string 146 | */ 147 | public function __toString() 148 | { 149 | return $this->xml(); 150 | } 151 | 152 | /** 153 | * @param string $name 154 | * 155 | * @return void 156 | */ 157 | public function __unset($name) 158 | { 159 | /** @noinspection UnusedFunctionResultInspection */ 160 | $this->removeAttribute($name); 161 | } 162 | 163 | /** 164 | * @param string $selector 165 | * @param int|null $idx 166 | * 167 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 168 | */ 169 | abstract public function find(string $selector, $idx = null); 170 | 171 | /** 172 | * @return string[]|null 173 | */ 174 | abstract public function getAllAttributes(); 175 | 176 | /** 177 | * @param string $name 178 | * 179 | * @return string 180 | */ 181 | abstract public function getAttribute(string $name): string; 182 | 183 | /** 184 | * @param string $name 185 | * 186 | * @return bool 187 | */ 188 | abstract public function hasAttribute(string $name): bool; 189 | 190 | abstract public function innerXml(bool $multiDecodeNewHtmlEntity = false): string; 191 | 192 | abstract public function removeAttribute(string $name): SimpleXmlDomInterface; 193 | 194 | abstract protected function replaceChildWithString(string $string, bool $putBrokenReplacedBack = true): SimpleXmlDomInterface; 195 | 196 | abstract protected function replaceNodeWithString(string $string): SimpleXmlDomInterface; 197 | 198 | /** 199 | * @param string $string 200 | * 201 | * @return SimpleXmlDomInterface 202 | */ 203 | abstract protected function replaceTextWithString($string): SimpleXmlDomInterface; 204 | 205 | /** 206 | * @param string $name 207 | * @param string|null $value 208 | * @param bool $strictEmptyValueCheck 209 | * 210 | * @return SimpleXmlDomInterface 211 | */ 212 | abstract public function setAttribute(string $name, $value = null, bool $strictEmptyValueCheck = false): SimpleXmlDomInterface; 213 | 214 | abstract public function text(): string; 215 | 216 | abstract public function xml(bool $multiDecodeNewHtmlEntity = false): string; 217 | } 218 | -------------------------------------------------------------------------------- /src/voku/helper/AbstractSimpleXmlDomNode.php: -------------------------------------------------------------------------------- 1 | count(); 26 | } 27 | 28 | if ($this->count() > 0) { 29 | $return = []; 30 | 31 | foreach ($this as $node) { 32 | if ($node instanceof SimpleXmlDomInterface) { 33 | $return[] = $node->{$name}; 34 | } 35 | } 36 | 37 | return $return; 38 | } 39 | 40 | if ($name === 'plaintext' || $name === 'outertext') { 41 | return []; 42 | } 43 | 44 | return null; 45 | } 46 | 47 | /** 48 | * @param string $selector 49 | * @param int|null $idx 50 | * 51 | * @return SimpleXmlDomNodeInterface|SimpleXmlDomNodeInterface[]|null 52 | */ 53 | public function __invoke($selector, $idx = null) 54 | { 55 | return $this->find($selector, $idx); 56 | } 57 | 58 | /** 59 | * @return string 60 | */ 61 | public function __toString() 62 | { 63 | // init 64 | $html = ''; 65 | 66 | foreach ($this as $node) { 67 | $html .= $node->outertext; 68 | } 69 | 70 | return $html; 71 | } 72 | 73 | /** 74 | * @param string $selector 75 | * @param int|null $idx 76 | * 77 | * @return SimpleXmlDomNodeInterface|SimpleXmlDomNodeInterface[]|null 78 | */ 79 | abstract public function find(string $selector, $idx = null); 80 | } 81 | -------------------------------------------------------------------------------- /src/voku/helper/DomParserInterface.php: -------------------------------------------------------------------------------- 1 | '); 27 | 28 | $domElement = $dom->findOneOrFalse($htmlCssSelector); 29 | if ($domElement === false) { 30 | return $html; 31 | } 32 | $attributes = $domElement->getAllAttributes(); 33 | if (!$attributes) { 34 | return $html; 35 | } 36 | 37 | $domElementNew = $domNew->findOneOrFalse('textarea'); 38 | if ($domElementNew === false) { 39 | return $html; 40 | } 41 | $attributesNew = $domElementNew->getAllAttributes(); 42 | if (!$attributesNew) { 43 | return $html; 44 | } 45 | 46 | foreach ($attributesNew as $attributeNameNew => $attributeValueNew) { 47 | $attributeNameNew = \strtolower($attributeNameNew); 48 | 49 | if ( 50 | $attributeNameNew === 'class' 51 | || 52 | $attributeNameNew === 'style' 53 | || 54 | \strpos($attributeNameNew, 'on') === 0 // e.g. onClick, ... 55 | ) { 56 | if (isset($attributes[$attributeNameNew])) { 57 | $attributes[$attributeNameNew] .= ' ' . $attributeValueNew; 58 | } else { 59 | $attributes[$attributeNameNew] = $attributeValueNew; 60 | } 61 | } else { 62 | $attributes[$attributeNameNew] = $attributeValueNew; 63 | } 64 | } 65 | 66 | foreach ($attributes as $attributeName => $attributeValue) { 67 | $domElement->setAttribute($attributeName, $attributeValue, true); 68 | } 69 | 70 | return $domElement->html(); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/voku/helper/SelectorConverter.php: -------------------------------------------------------------------------------- 1 | 15 | */ 16 | protected static $compiled = []; 17 | 18 | /** 19 | * @param string $selector 20 | * @param bool $ignoreCssSelectorErrors 21 | *

22 | * Ignore css selector errors and use the $selector as it is on error, 23 | * so that you can also use xPath selectors. 24 | *

25 | * @param bool $isForHtml 26 | * 27 | * @return string 28 | */ 29 | public static function toXPath(string $selector, bool $ignoreCssSelectorErrors = false, bool $isForHtml = true) 30 | { 31 | if (isset(self::$compiled[$selector])) { 32 | return self::$compiled[$selector]; 33 | } 34 | 35 | // Select DOMText 36 | if ($selector === 'text') { 37 | return '//text()'; 38 | } 39 | 40 | // Select DOMComment 41 | if ($selector === 'comment') { 42 | return '//comment()'; 43 | } 44 | 45 | if (\strpos($selector, '//') === 0) { 46 | return $selector; 47 | } 48 | 49 | if (!\class_exists(CssSelectorConverter::class)) { 50 | throw new \RuntimeException('Unable to filter with a CSS selector as the Symfony CssSelector 2.8+ is not installed (you can use filterXPath instead).'); 51 | } 52 | 53 | $converterKey = '-' . $isForHtml . '-' . $ignoreCssSelectorErrors . '-'; 54 | static $converterArray = []; 55 | if (!isset($converterArray[$converterKey])) { 56 | $converterArray[$converterKey] = new CssSelectorConverter($isForHtml); 57 | } 58 | $converter = $converterArray[$converterKey]; 59 | assert($converter instanceof CssSelectorConverter); 60 | 61 | if ($ignoreCssSelectorErrors) { 62 | try { 63 | $xPathQuery = $converter->toXPath($selector); 64 | } catch (\Exception $e) { 65 | $xPathQuery = $selector; 66 | } 67 | } else { 68 | $xPathQuery = $converter->toXPath($selector); 69 | } 70 | 71 | self::$compiled[$selector] = $xPathQuery; 72 | 73 | return $xPathQuery; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/voku/helper/SimpleHtmlAttributes.php: -------------------------------------------------------------------------------- 1 | 26 | */ 27 | private $tokens = []; 28 | 29 | /** 30 | * @var string|null 31 | */ 32 | private $previousValue; 33 | 34 | /** 35 | * Creates a list of space-separated tokens based on the attribute value of an element. 36 | * 37 | * @param \DOMElement|null $element 38 | *

The DOM element.

39 | * @param string $attributeName 40 | *

The name of the attribute.

41 | */ 42 | public function __construct($element, string $attributeName) 43 | { 44 | $this->element = $element; 45 | $this->attributeName = $attributeName; 46 | 47 | $this->tokenize(); 48 | } 49 | 50 | /** @noinspection MagicMethodsValidityInspection */ 51 | 52 | /** 53 | * Returns the value for the property specified. 54 | * 55 | * @param string $name The name of the property 56 | * 57 | * @return int|string The value of the property specified 58 | */ 59 | public function __get(string $name) 60 | { 61 | if ($name === 'length') { 62 | $this->tokenize(); 63 | 64 | return \count($this->tokens); 65 | } 66 | 67 | if ($name === 'value') { 68 | return (string) $this; 69 | } 70 | 71 | throw new \InvalidArgumentException('Undefined property: $' . $name); 72 | } 73 | 74 | /** 75 | * @return string 76 | */ 77 | public function __toString(): string 78 | { 79 | $this->tokenize(); 80 | 81 | return \implode(' ', $this->tokens); 82 | } 83 | 84 | /** 85 | * {@inheritdoc} 86 | */ 87 | public function add(string ...$tokens) 88 | { 89 | if (\count($tokens) === 0) { 90 | return null; 91 | } 92 | 93 | foreach ($tokens as $t) { 94 | if (\in_array($t, $this->tokens, true)) { 95 | continue; 96 | } 97 | 98 | $this->tokens[] = $t; 99 | } 100 | 101 | return $this->setAttributeValue(); 102 | } 103 | 104 | /** 105 | * {@inheritdoc} 106 | */ 107 | public function contains(string $token): bool 108 | { 109 | $this->tokenize(); 110 | 111 | return \in_array($token, $this->tokens, true); 112 | } 113 | 114 | /** 115 | * {@inheritdoc} 116 | */ 117 | public function entries(): \ArrayIterator 118 | { 119 | $this->tokenize(); 120 | 121 | return new \ArrayIterator($this->tokens); 122 | } 123 | 124 | public function item(int $index) 125 | { 126 | $this->tokenize(); 127 | if ($index >= \count($this->tokens)) { 128 | return null; 129 | } 130 | 131 | return $this->tokens[$index]; 132 | } 133 | 134 | /** 135 | * {@inheritdoc} 136 | */ 137 | public function remove(string ...$tokens) 138 | { 139 | if (\count($tokens) === 0) { 140 | return null; 141 | } 142 | 143 | if (\count($this->tokens) === 0) { 144 | return null; 145 | } 146 | 147 | foreach ($tokens as $t) { 148 | $i = \array_search($t, $this->tokens, true); 149 | if ($i === false) { 150 | continue; 151 | } 152 | 153 | \array_splice($this->tokens, $i, 1); 154 | } 155 | 156 | return $this->setAttributeValue(); 157 | } 158 | 159 | /** 160 | * {@inheritdoc} 161 | */ 162 | public function replace(string $old, string $new) 163 | { 164 | if ($old === $new) { 165 | return null; 166 | } 167 | 168 | $this->tokenize(); 169 | $i = \array_search($old, $this->tokens, true); 170 | if ($i !== false) { 171 | $j = \array_search($new, $this->tokens, true); 172 | if ($j === false) { 173 | $this->tokens[$i] = $new; 174 | } else { 175 | \array_splice($this->tokens, $i, 1); 176 | } 177 | 178 | return $this->setAttributeValue(); 179 | } 180 | 181 | return null; 182 | } 183 | 184 | /** 185 | * {@inheritdoc} 186 | */ 187 | public function toggle(string $token, bool $force = null): bool 188 | { 189 | // init 190 | $this->tokenize(); 191 | $isThereAfter = false; 192 | 193 | $i = \array_search($token, $this->tokens, true); 194 | if ($force === null) { 195 | if ($i === false) { 196 | $this->tokens[] = $token; 197 | $isThereAfter = true; 198 | } else { 199 | \array_splice($this->tokens, $i, 1); 200 | } 201 | } elseif ($force) { 202 | if ($i === false) { 203 | $this->tokens[] = $token; 204 | } 205 | $isThereAfter = true; 206 | } else { 207 | /** @noinspection NestedPositiveIfStatementsInspection */ 208 | if ($i !== false) { 209 | \array_splice($this->tokens, $i, 1); 210 | } 211 | } 212 | 213 | /** @noinspection UnusedFunctionResultInspection */ 214 | $this->setAttributeValue(); 215 | 216 | return $isThereAfter; 217 | } 218 | 219 | /** 220 | * @return \DOMAttr|false|null 221 | */ 222 | private function setAttributeValue() 223 | { 224 | if ($this->element === null) { 225 | return false; 226 | } 227 | 228 | $value = \implode(' ', $this->tokens); 229 | if ($this->previousValue === $value) { 230 | return null; 231 | } 232 | 233 | $this->previousValue = $value; 234 | 235 | return $this->element->setAttribute($this->attributeName, $value); 236 | } 237 | 238 | /** 239 | * @return void 240 | */ 241 | private function tokenize() 242 | { 243 | if ($this->element === null) { 244 | return; 245 | } 246 | 247 | $current = $this->element->getAttribute($this->attributeName); 248 | if ($this->previousValue === $current) { 249 | return; 250 | } 251 | 252 | $this->previousValue = $current; 253 | $tokens = \explode(' ', $current); 254 | $finals = []; 255 | foreach ($tokens as $token) { 256 | if ($token === '') { 257 | continue; 258 | } 259 | 260 | if (\in_array($token, $finals, true)) { 261 | continue; 262 | } 263 | 264 | $finals[] = $token; 265 | } 266 | 267 | $this->tokens = $finals; 268 | } 269 | } 270 | -------------------------------------------------------------------------------- /src/voku/helper/SimpleHtmlAttributesInterface.php: -------------------------------------------------------------------------------- 1 | The tokens you want to add to the list.

18 | * 19 | * @return \DOMAttr|false|null 20 | */ 21 | public function add(string ...$tokens); 22 | 23 | /** 24 | * Returns true if the list contains the given token, otherwise false. 25 | * 26 | * @param string $token the token you want to check for the existence of in the list 27 | * 28 | * @return bool true if the list contains the given token, otherwise false 29 | */ 30 | public function contains(string $token): bool; 31 | 32 | /** 33 | * Returns an iterator allowing you to go through all tokens contained in the list. 34 | * 35 | * @return \ArrayIterator 36 | */ 37 | public function entries(): \ArrayIterator; 38 | 39 | /** 40 | * Returns an item in the list by its index (returns null if the number is greater than or equal to the length of 41 | * the list). 42 | * 43 | * @param int $index the zero-based index of the item you want to return 44 | * 45 | * @return string|null 46 | */ 47 | public function item(int $index); 48 | 49 | /** 50 | * Removes the specified tokens from the list. If the string does not exist in the list, no error is thrown. 51 | * 52 | * @param string ...$tokens 53 | *

The token you want to remove from the list. 54 | * 55 | * @return \DOMAttr|false|null 56 | */ 57 | public function remove(string ...$tokens); 58 | 59 | /** 60 | * Replaces an existing token with a new token. 61 | * 62 | * @param string $old the token you want to replace 63 | * @param string $new the token you want to replace $old with 64 | * 65 | * @return \DOMAttr|false|null 66 | */ 67 | public function replace(string $old, string $new); 68 | 69 | /** 70 | * Removes a given token from the list and returns false. If token doesn't exist it's added and the function 71 | * returns true. 72 | * 73 | * @param string $token the token you want to toggle 74 | * @param bool $force A Boolean that, if included, turns the toggle into a one way-only operation. If set to 75 | * false, the token will only be removed but not added again. If set to true, the token will 76 | * only be added but not removed again. 77 | * 78 | * @return bool false if the token is not in the list after the call, or true if the token is in the list after the 79 | * call 80 | */ 81 | public function toggle(string $token, bool $force = null): bool; 82 | } 83 | -------------------------------------------------------------------------------- /src/voku/helper/SimpleHtmlDomBlank.php: -------------------------------------------------------------------------------- 1 | 13 | */ 14 | class SimpleHtmlDomBlank extends AbstractSimpleHtmlDom implements \IteratorAggregate, SimpleHtmlDomInterface 15 | { 16 | /** 17 | * @param string $name 18 | * @param array $arguments 19 | * 20 | * @throws \BadMethodCallException 21 | * 22 | * @return SimpleHtmlDomInterface|string|null 23 | */ 24 | public function __call($name, $arguments) 25 | { 26 | $name = \strtolower($name); 27 | 28 | if (isset(self::$functionAliases[$name])) { 29 | return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments); 30 | } 31 | 32 | throw new \BadMethodCallException('Method does not exist'); 33 | } 34 | 35 | /** 36 | * Find list of nodes with a CSS selector. 37 | * 38 | * @param string $selector 39 | * @param int|null $idx 40 | * 41 | * @return SimpleHtmlDomNodeInterface 42 | */ 43 | public function find(string $selector, $idx = null) 44 | { 45 | return new SimpleHtmlDomNodeBlank(); 46 | } 47 | 48 | public function getTag(): string 49 | { 50 | return ''; 51 | } 52 | 53 | /** 54 | * Returns an array of attributes. 55 | * 56 | * @return null 57 | */ 58 | public function getAllAttributes() 59 | { 60 | return null; 61 | } 62 | 63 | /** 64 | * @return bool 65 | */ 66 | public function hasAttributes(): bool 67 | { 68 | return false; 69 | } 70 | 71 | /** 72 | * Return attribute value. 73 | * 74 | * @param string $name 75 | * 76 | * @return string 77 | */ 78 | public function getAttribute(string $name): string 79 | { 80 | return ''; 81 | } 82 | 83 | /** 84 | * Determine if an attribute exists on the element. 85 | * 86 | * @param string $name 87 | * 88 | * @return bool 89 | */ 90 | public function hasAttribute(string $name): bool 91 | { 92 | return false; 93 | } 94 | 95 | /** 96 | * Get dom node's outer html. 97 | * 98 | * @param bool $multiDecodeNewHtmlEntity 99 | * 100 | * @return string 101 | */ 102 | public function html(bool $multiDecodeNewHtmlEntity = false): string 103 | { 104 | return ''; 105 | } 106 | 107 | /** 108 | * Get dom node's inner html. 109 | * 110 | * @param bool $multiDecodeNewHtmlEntity 111 | * @param bool $putBrokenReplacedBack 112 | * 113 | * @return string 114 | */ 115 | public function innerHtml(bool $multiDecodeNewHtmlEntity = false, bool $putBrokenReplacedBack = true): string 116 | { 117 | return ''; 118 | } 119 | 120 | /** 121 | * Remove attribute. 122 | * 123 | * @param string $name

The name of the html-attribute.

124 | * 125 | * @return SimpleHtmlDomInterface 126 | */ 127 | public function removeAttribute(string $name): SimpleHtmlDomInterface 128 | { 129 | return $this; 130 | } 131 | 132 | /** 133 | * Remove all attributes 134 | * 135 | * @return SimpleHtmlDomBlank 136 | */ 137 | public function removeAttributes(): SimpleHtmlDomInterface 138 | { 139 | return $this; 140 | } 141 | 142 | /** 143 | * @param string $string 144 | * @param bool $putBrokenReplacedBack 145 | * 146 | * @return SimpleHtmlDomInterface 147 | */ 148 | protected function replaceChildWithString(string $string, bool $putBrokenReplacedBack = true): SimpleHtmlDomInterface 149 | { 150 | return new static(); 151 | } 152 | 153 | /** 154 | * @param string $string 155 | * 156 | * @return SimpleHtmlDomInterface 157 | */ 158 | protected function replaceNodeWithString(string $string): SimpleHtmlDomInterface 159 | { 160 | return new static(); 161 | } 162 | 163 | /** 164 | * @param string $string 165 | * 166 | * @return SimpleHtmlDomInterface 167 | */ 168 | protected function replaceTextWithString($string): SimpleHtmlDomInterface 169 | { 170 | return new static(); 171 | } 172 | 173 | /** 174 | * Set attribute value. 175 | * 176 | * @param string $name

The name of the html-attribute.

177 | * @param string|null $value

Set to NULL or empty string, to remove the attribute.

178 | * @param bool $strictEmptyValueCheck

179 | * $value must be NULL, to remove the attribute, 180 | * so that you can set an empty string as attribute-value e.g. autofocus="" 181 | *

182 | * 183 | * @return SimpleHtmlDomInterface 184 | */ 185 | public function setAttribute(string $name, $value = null, bool $strictEmptyValueCheck = false): SimpleHtmlDomInterface 186 | { 187 | return $this; 188 | } 189 | 190 | /** 191 | * Get dom node's plain text. 192 | * 193 | * @return string 194 | */ 195 | public function text(): string 196 | { 197 | return ''; 198 | } 199 | 200 | /** 201 | * Returns children of node. 202 | * 203 | * @param int $idx 204 | * 205 | * @return null 206 | */ 207 | public function childNodes(int $idx = -1) 208 | { 209 | return null; 210 | } 211 | 212 | /** 213 | * Find nodes with a CSS selector. 214 | * 215 | * @param string $selector 216 | * 217 | * @return SimpleHtmlDomNodeInterface 218 | */ 219 | public function findMulti(string $selector): SimpleHtmlDomNodeInterface 220 | { 221 | return new SimpleHtmlDomNodeBlank(); 222 | } 223 | 224 | /** 225 | * Find nodes with a CSS selector or false, if no element is found. 226 | * 227 | * @param string $selector 228 | * 229 | * @return false 230 | */ 231 | public function findMultiOrFalse(string $selector) 232 | { 233 | return false; 234 | } 235 | 236 | /** 237 | * Find one node with a CSS selector. 238 | * 239 | * @param string $selector 240 | * 241 | * @return SimpleHtmlDomInterface 242 | */ 243 | public function findOne(string $selector): SimpleHtmlDomInterface 244 | { 245 | return new static(); 246 | } 247 | 248 | /** 249 | * Find one node with a CSS selector or false, if no element is found. 250 | * 251 | * @param string $selector 252 | * 253 | * @return false 254 | */ 255 | public function findOneOrFalse(string $selector) 256 | { 257 | return false; 258 | } 259 | 260 | /** 261 | * Returns the first child of node. 262 | * 263 | * @return null 264 | */ 265 | public function firstChild() 266 | { 267 | return null; 268 | } 269 | 270 | /** 271 | * Return elements by ".class". 272 | * 273 | * @param string $class 274 | * 275 | * @return SimpleHtmlDomNodeInterface 276 | */ 277 | public function getElementByClass(string $class): SimpleHtmlDomNodeInterface 278 | { 279 | return new SimpleHtmlDomNodeBlank(); 280 | } 281 | 282 | /** 283 | * Return element by #id. 284 | * 285 | * @param string $id 286 | * 287 | * @return SimpleHtmlDomInterface 288 | */ 289 | public function getElementById(string $id): SimpleHtmlDomInterface 290 | { 291 | return new static(); 292 | } 293 | 294 | /** 295 | * Return element by tag name. 296 | * 297 | * @param string $name 298 | * 299 | * @return SimpleHtmlDomInterface 300 | */ 301 | public function getElementByTagName(string $name): SimpleHtmlDomInterface 302 | { 303 | return new static(); 304 | } 305 | 306 | /** 307 | * Returns elements by "#id". 308 | * 309 | * @param string $id 310 | * @param int|null $idx 311 | * 312 | * @return SimpleHtmlDomNodeInterface 313 | */ 314 | public function getElementsById(string $id, $idx = null) 315 | { 316 | return new SimpleHtmlDomNodeBlank(); 317 | } 318 | 319 | /** 320 | * Returns elements by tag name. 321 | * 322 | * @param string $name 323 | * @param int|null $idx 324 | * 325 | * @return SimpleHtmlDomNodeInterface 326 | */ 327 | public function getElementsByTagName(string $name, $idx = null) 328 | { 329 | return new SimpleHtmlDomNodeBlank(); 330 | } 331 | 332 | /** 333 | * Create a new "HtmlDomParser"-object from the current context. 334 | * 335 | * @return HtmlDomParser 336 | */ 337 | public function getHtmlDomParser(): HtmlDomParser 338 | { 339 | return new HtmlDomParser($this); 340 | } 341 | 342 | /** 343 | * @return \DOMNode 344 | */ 345 | public function getNode(): \DOMNode 346 | { 347 | return new \DOMNode(); 348 | } 349 | 350 | /** 351 | * Nodes can get partially destroyed in which they're still an 352 | * actual DOM node (such as \DOMElement) but almost their entire 353 | * body is gone, including the `nodeType` attribute. 354 | * 355 | * @return bool true if node has been destroyed 356 | */ 357 | public function isRemoved(): bool 358 | { 359 | return true; 360 | } 361 | 362 | /** 363 | * Returns the last child of node. 364 | * 365 | * @return null 366 | */ 367 | public function lastChild() 368 | { 369 | return null; 370 | } 371 | 372 | /** 373 | * Returns the next sibling of node. 374 | * 375 | * @return null 376 | */ 377 | public function nextSibling() 378 | { 379 | return null; 380 | } 381 | 382 | /** 383 | * Returns the next sibling of node. 384 | * 385 | * @return null 386 | */ 387 | public function nextNonWhitespaceSibling() 388 | { 389 | return null; 390 | } 391 | 392 | /** 393 | * Returns the previous sibling of node. 394 | * 395 | * @return null 396 | */ 397 | public function previousNonWhitespaceSibling() 398 | { 399 | return null; 400 | } 401 | 402 | /** 403 | * Returns the parent of node. 404 | * 405 | * @return SimpleHtmlDomInterface|null 406 | */ 407 | public function parentNode(): ?SimpleHtmlDomInterface 408 | { 409 | return new static(); 410 | } 411 | 412 | /** 413 | * Returns the previous sibling of node. 414 | * 415 | * @return null 416 | */ 417 | public function previousSibling() 418 | { 419 | return null; 420 | } 421 | 422 | /** 423 | * @param string|string[]|null $value

424 | * null === get the current input value 425 | * text === set a new input value 426 | *

427 | * 428 | * @return string|string[]|null 429 | */ 430 | public function val($value = null) 431 | { 432 | return null; 433 | } 434 | 435 | /** 436 | * Retrieve an external iterator. 437 | * 438 | * @see http://php.net/manual/en/iteratoraggregate.getiterator.php 439 | * 440 | * @return SimpleHtmlDomNodeInterface 441 | *

442 | * An instance of an object implementing Iterator or 443 | * Traversable 444 | *

445 | */ 446 | public function getIterator(): SimpleHtmlDomNodeInterface 447 | { 448 | return new SimpleHtmlDomNodeBlank(); 449 | } 450 | 451 | /** 452 | * Get dom node's inner xml. 453 | * 454 | * @param bool $multiDecodeNewHtmlEntity 455 | * 456 | * @return string 457 | */ 458 | public function innerXml(bool $multiDecodeNewHtmlEntity = false): string 459 | { 460 | return ''; 461 | } 462 | 463 | /** 464 | * Delete 465 | * 466 | * @return void 467 | */ 468 | public function delete() 469 | { 470 | $this->outertext=''; 471 | } 472 | } 473 | -------------------------------------------------------------------------------- /src/voku/helper/SimpleHtmlDomInterface.php: -------------------------------------------------------------------------------- 1 | Get dom node's outer html (alias for "outerHtml").

8 | * @property string $outerhtml 9 | *

Get dom node's outer html.

10 | * @property string $innertext 11 | *

Get dom node's inner html (alias for "innerHtml").

12 | * @property string $innerhtml 13 | *

Get dom node's inner html.

14 | * @property string $innerhtmlKeep 15 | *

Get dom node's inner html + keep fix for broken html.

16 | * @property string $plaintext 17 | *

Get dom node's plain text.

18 | * @property string $class 19 | *

Get dom node's class attribute.

20 | * @property string $id 21 | *

Get dom node's id attribute.

22 | * @property SimpleHtmlAttributes $classList 23 | *

Get dom node attributes.

24 | * @property-read string $tag 25 | *

Get dom node name.

26 | * @property-read string $attr 27 | *

Get dom node attributes.

28 | * @property-read string $text 29 | *

Get dom node name.

30 | * @property-read string $html 31 | *

Get dom node's outer html.

32 | * 33 | * @method SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface|null children() children($idx = -1) 34 | *

Returns children of node.

35 | * @method SimpleHtmlDomInterface|null first_child() 36 | *

Returns the first child of node.

37 | * @method SimpleHtmlDomInterface|null last_child() 38 | *

Returns the last child of node.

39 | * @method SimpleHtmlDomInterface|null next_sibling() 40 | *

Returns the next sibling of node.

41 | * @method SimpleHtmlDomInterface|null prev_sibling() 42 | *

Returns the previous sibling of node.

43 | * @method SimpleHtmlDomInterface|null parent() 44 | *

Returns the parent of node.

45 | * @method string outerText() 46 | *

Get dom node's outer html (alias for "outerHtml()").

47 | * @method string outerHtml() 48 | *

Get dom node's outer html.

49 | * @method string innerText() 50 | *

Get dom node's inner html (alias for "innerHtml()").

51 | * 52 | * @extends \IteratorAggregate 53 | */ 54 | interface SimpleHtmlDomInterface extends \IteratorAggregate 55 | { 56 | /** 57 | * @param string $name 58 | * @param array $arguments 59 | * 60 | * @throws \BadMethodCallException 61 | * 62 | * @return SimpleHtmlDomInterface|string|null 63 | */ 64 | public function __call($name, $arguments); 65 | 66 | /** 67 | * @param string $name 68 | * 69 | * @return array|string|null 70 | */ 71 | public function __get($name); 72 | 73 | /** 74 | * @param string $selector 75 | * @param int $idx 76 | * 77 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface 78 | */ 79 | public function __invoke($selector, $idx = null); 80 | 81 | /** 82 | * @param string $name 83 | * 84 | * @return bool 85 | */ 86 | public function __isset($name); 87 | 88 | /** 89 | * @return string 90 | */ 91 | public function __toString(); 92 | 93 | /** 94 | * Return the tag of node 95 | * 96 | * @return string 97 | */ 98 | public function getTag():string; 99 | 100 | /** 101 | * Returns children of node. 102 | * 103 | * @param int $idx 104 | * 105 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface|null 106 | */ 107 | public function childNodes(int $idx = -1); 108 | 109 | /** 110 | * Find list of nodes with a CSS selector. 111 | * 112 | * @param string $selector 113 | * @param int|null $idx 114 | * 115 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface 116 | */ 117 | public function find(string $selector, $idx = null); 118 | 119 | /** 120 | * Find nodes with a CSS selector. 121 | * 122 | * @param string $selector 123 | * 124 | * @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface 125 | */ 126 | public function findMulti(string $selector): SimpleHtmlDomNodeInterface; 127 | 128 | /** 129 | * Find nodes with a CSS selector or false, if no element is found. 130 | * 131 | * @param string $selector 132 | * 133 | * @return false|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface 134 | */ 135 | public function findMultiOrFalse(string $selector); 136 | 137 | /** 138 | * Find one node with a CSS selector. 139 | * 140 | * @param string $selector 141 | * 142 | * @return SimpleHtmlDomInterface 143 | */ 144 | public function findOne(string $selector): self; 145 | 146 | /** 147 | * Find one node with a CSS selector or false, if no element is found. 148 | * 149 | * @param string $selector 150 | * 151 | * @return false|SimpleHtmlDomInterface 152 | */ 153 | public function findOneOrFalse(string $selector); 154 | 155 | /** 156 | * Returns the first child of node. 157 | * 158 | * @return SimpleHtmlDomInterface|null 159 | */ 160 | public function firstChild(); 161 | 162 | /** 163 | * Returns an array of attributes. 164 | * 165 | * @return string[]|null 166 | */ 167 | public function getAllAttributes(); 168 | 169 | /** 170 | * Return attribute value. 171 | * 172 | * @param string $name 173 | * 174 | * @return string 175 | */ 176 | public function getAttribute(string $name): string; 177 | 178 | /** 179 | * Return elements by ".class". 180 | * 181 | * @param string $class 182 | * 183 | * @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface 184 | */ 185 | public function getElementByClass(string $class); 186 | 187 | /** 188 | * Return element by "#id". 189 | * 190 | * @param string $id 191 | * 192 | * @return SimpleHtmlDomInterface 193 | */ 194 | public function getElementById(string $id): self; 195 | 196 | /** 197 | * Return element by tag name. 198 | * 199 | * @param string $name 200 | * 201 | * @return SimpleHtmlDomInterface 202 | */ 203 | public function getElementByTagName(string $name): self; 204 | 205 | /** 206 | * Returns elements by "#id". 207 | * 208 | * @param string $id 209 | * @param int|null $idx 210 | * 211 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface 212 | */ 213 | public function getElementsById(string $id, $idx = null); 214 | 215 | /** 216 | * Returns elements by tag name. 217 | * 218 | * @param string $name 219 | * @param int|null $idx 220 | * 221 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface 222 | */ 223 | public function getElementsByTagName(string $name, $idx = null); 224 | 225 | /** 226 | * Create a new "HtmlDomParser"-object from the current context. 227 | * 228 | * @return HtmlDomParser 229 | */ 230 | public function getHtmlDomParser(): HtmlDomParser; 231 | 232 | /** 233 | * Retrieve an external iterator. 234 | * 235 | * @see http://php.net/manual/en/iteratoraggregate.getiterator.php 236 | * 237 | * @return SimpleHtmlDomNodeInterface 238 | *

239 | * An instance of an object implementing Iterator or 240 | * Traversable 241 | *

242 | */ 243 | public function getIterator(): SimpleHtmlDomNodeInterface; 244 | 245 | /** 246 | * @return \DOMNode 247 | */ 248 | public function getNode(): \DOMNode; 249 | 250 | /** 251 | * Determine if an attribute exists on the element. 252 | * 253 | * @param string $name 254 | * 255 | * @return bool 256 | */ 257 | public function hasAttribute(string $name): bool; 258 | 259 | /** 260 | * Get dom node's outer html. 261 | * 262 | * @param bool $multiDecodeNewHtmlEntity 263 | * 264 | * @return string 265 | */ 266 | public function html(bool $multiDecodeNewHtmlEntity = false): string; 267 | 268 | /** 269 | * Get dom node's inner html. 270 | * 271 | * @param bool $multiDecodeNewHtmlEntity 272 | * @param bool $putBrokenReplacedBack 273 | * 274 | * @return string 275 | */ 276 | public function innerHtml(bool $multiDecodeNewHtmlEntity = false, bool $putBrokenReplacedBack = true): string; 277 | 278 | /** 279 | * Get dom node's inner html. 280 | * 281 | * @param bool $multiDecodeNewHtmlEntity 282 | * 283 | * @return string 284 | */ 285 | public function innerXml(bool $multiDecodeNewHtmlEntity = false): string; 286 | 287 | /** 288 | * Nodes can get partially destroyed in which they're still an 289 | * actual DOM node (such as \DOMElement) but almost their entire 290 | * body is gone, including the `nodeType` attribute. 291 | * 292 | * @return bool true if node has been destroyed 293 | */ 294 | public function isRemoved(): bool; 295 | 296 | /** 297 | * Returns the last child of node. 298 | * 299 | * @return SimpleHtmlDomInterface|null 300 | */ 301 | public function lastChild(); 302 | 303 | /** 304 | * Returns the next sibling of node. 305 | * 306 | * @return SimpleHtmlDomInterface|null 307 | */ 308 | public function nextSibling(); 309 | 310 | /** 311 | * Returns the next sibling of node, and it will ignore whitespace elements. 312 | * 313 | * @return SimpleHtmlDomInterface|null 314 | */ 315 | public function nextNonWhitespaceSibling(); 316 | 317 | /** 318 | * Returns the previous sibling of node, and it will ignore whitespace elements. 319 | * 320 | * @return SimpleHtmlDomInterface|null 321 | */ 322 | public function previousNonWhitespaceSibling(); 323 | 324 | /** 325 | * Returns the parent of node. 326 | * 327 | * @return SimpleHtmlDomInterface|null 328 | */ 329 | public function parentNode(): ?self; 330 | 331 | /** 332 | * Returns the previous sibling of node. 333 | * 334 | * @return SimpleHtmlDomInterface|null 335 | */ 336 | public function previousSibling(); 337 | 338 | /** 339 | * Remove attribute. 340 | * 341 | * @param string $name

The name of the html-attribute.

342 | * 343 | * @return SimpleHtmlDomInterface 344 | */ 345 | public function removeAttribute(string $name): self; 346 | 347 | /** 348 | * Set attribute value. 349 | * 350 | * @param string $name

The name of the html-attribute.

351 | * @param string|null $value

Set to NULL or empty string, to remove the attribute.

352 | * @param bool $strictEmptyValueCheck

353 | * $value must be NULL, to remove the attribute, 354 | * so that you can set an empty string as attribute-value e.g. autofocus="" 355 | *

356 | * 357 | * @return SimpleHtmlDomInterface 358 | */ 359 | public function setAttribute(string $name, $value = null, bool $strictEmptyValueCheck = false): self; 360 | 361 | /** 362 | * Remove all attributes 363 | * 364 | * @return SimpleHtmlDomInterface 365 | */ 366 | public function removeAttributes(): self; 367 | 368 | /** 369 | * Get dom node's plain text. 370 | * 371 | * @return string 372 | */ 373 | public function text(): string; 374 | 375 | /** 376 | * @param string|string[]|null $value

377 | * null === get the current input value 378 | * text === set a new input value 379 | *

380 | * 381 | * @return string|string[]|null 382 | */ 383 | public function val($value = null); 384 | 385 | /** 386 | * Delete 387 | * 388 | * @return mixed 389 | */ 390 | public function delete(); 391 | } 392 | -------------------------------------------------------------------------------- /src/voku/helper/SimpleHtmlDomNode.php: -------------------------------------------------------------------------------- 1 | |SimpleHtmlDomNodeInterface[]|null 19 | */ 20 | public function find(string $selector, $idx = null) 21 | { 22 | // init 23 | $elements = new static(); 24 | 25 | foreach ($this as $node) { 26 | \assert($node instanceof SimpleHtmlDomInterface); 27 | foreach ($node->find($selector) as $res) { 28 | $elements[] = $res; 29 | } 30 | } 31 | 32 | // return all elements 33 | if ($idx === null) { 34 | if (\count($elements) === 0) { 35 | return new SimpleHtmlDomNodeBlank(); 36 | } 37 | 38 | return $elements; 39 | } 40 | 41 | // handle negative values 42 | if ($idx < 0) { 43 | $idx = \count($elements) + $idx; 44 | } 45 | 46 | // return one element 47 | return $elements[$idx] ?? null; 48 | } 49 | 50 | /** 51 | * Find nodes with a CSS selector. 52 | * 53 | * @param string $selector 54 | * 55 | * @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface 56 | */ 57 | public function findMulti(string $selector): SimpleHtmlDomNodeInterface 58 | { 59 | return $this->find($selector, null); 60 | } 61 | 62 | /** 63 | * Find nodes with a CSS selector. 64 | * 65 | * @param string $selector 66 | * 67 | * @return false|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface 68 | */ 69 | public function findMultiOrFalse(string $selector) 70 | { 71 | $return = $this->find($selector, null); 72 | 73 | if ($return instanceof SimpleHtmlDomNodeBlank) { 74 | return false; 75 | } 76 | 77 | return $return; 78 | } 79 | 80 | /** 81 | * Find one node with a CSS selector. 82 | * 83 | * @param string $selector 84 | * 85 | * @return SimpleHtmlDomNodeInterface 86 | */ 87 | public function findOne(string $selector) 88 | { 89 | $return = $this->find($selector, 0); 90 | 91 | return $return ?? new SimpleHtmlDomNodeBlank(); 92 | } 93 | 94 | /** 95 | * Find one node with a CSS selector. 96 | * 97 | * @param string $selector 98 | * 99 | * @return false|SimpleHtmlDomNodeInterface 100 | */ 101 | public function findOneOrFalse(string $selector) 102 | { 103 | $return = $this->find($selector, 0); 104 | 105 | return $return ?? false; 106 | } 107 | 108 | /** 109 | * Get html of elements. 110 | * 111 | * @return string[] 112 | */ 113 | public function innerHtml(): array 114 | { 115 | // init 116 | $html = []; 117 | 118 | foreach ($this as $node) { 119 | $html[] = $node->outertext; 120 | } 121 | 122 | return $html; 123 | } 124 | 125 | /** 126 | * alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x) 127 | * 128 | * @return string[] 129 | */ 130 | public function innertext() 131 | { 132 | return $this->innerHtml(); 133 | } 134 | 135 | /** 136 | * alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x) 137 | * 138 | * @return string[] 139 | */ 140 | public function outertext() 141 | { 142 | return $this->innerHtml(); 143 | } 144 | 145 | /** 146 | * Get plain text. 147 | * 148 | * @return string[] 149 | */ 150 | public function text(): array 151 | { 152 | // init 153 | $text = []; 154 | 155 | foreach ($this as $node) { 156 | $text[] = $node->plaintext; 157 | } 158 | 159 | return $text; 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /src/voku/helper/SimpleHtmlDomNodeBlank.php: -------------------------------------------------------------------------------- 1 | 29 | */ 30 | public function findMulti(string $selector): SimpleHtmlDomNodeInterface 31 | { 32 | return new self(); 33 | } 34 | 35 | /** 36 | * Find nodes with a CSS selector. 37 | * 38 | * @param string $selector 39 | * 40 | * @return false 41 | */ 42 | public function findMultiOrFalse(string $selector) 43 | { 44 | return false; 45 | } 46 | 47 | /** 48 | * Find one node with a CSS selector. 49 | * 50 | * @param string $selector 51 | * 52 | * @return SimpleHtmlDomInterface 53 | */ 54 | public function findOne(string $selector) 55 | { 56 | return new SimpleHtmlDomBlank(); 57 | } 58 | 59 | /** 60 | * Find one node with a CSS selector or false, if no element is found. 61 | * 62 | * @param string $selector 63 | * 64 | * @return false 65 | */ 66 | public function findOneOrFalse(string $selector) 67 | { 68 | return false; 69 | } 70 | 71 | /** 72 | * @return string[] 73 | */ 74 | public function innerHtml(): array 75 | { 76 | return []; 77 | } 78 | 79 | /** 80 | * alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x) 81 | * 82 | * @return string[] 83 | */ 84 | public function innertext() 85 | { 86 | return []; 87 | } 88 | 89 | /** 90 | * alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x) 91 | * 92 | * @return string[] 93 | */ 94 | public function outertext() 95 | { 96 | return []; 97 | } 98 | 99 | /** 100 | * @return string[] 101 | */ 102 | public function text(): array 103 | { 104 | return []; 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/voku/helper/SimpleHtmlDomNodeInterface.php: -------------------------------------------------------------------------------- 1 | The list items count.

8 | * @property-read string[] $outertext 9 | *

Get dom node's outer html.

10 | * @property-read string[] $plaintext 11 | *

Get dom node's plain text.

12 | * 13 | * @extends \IteratorAggregate 14 | */ 15 | interface SimpleHtmlDomNodeInterface extends \IteratorAggregate 16 | { 17 | /** 18 | * @param string $name 19 | * 20 | * @return array|null 21 | */ 22 | public function __get($name); 23 | 24 | /** 25 | * @param string $selector 26 | * @param int $idx 27 | * 28 | * @return SimpleHtmlDomNodeInterface|SimpleHtmlDomNodeInterface[]|null 29 | */ 30 | public function __invoke($selector, $idx = null); 31 | 32 | /** 33 | * @return string 34 | */ 35 | public function __toString(); 36 | 37 | /** 38 | * Get the number of items in this dom node. 39 | * 40 | * @return int 41 | */ 42 | public function count(); 43 | 44 | /** 45 | * Find list of nodes with a CSS selector. 46 | * 47 | * @param string $selector 48 | * @param int $idx 49 | * 50 | * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|null 51 | */ 52 | public function find(string $selector, $idx = null); 53 | 54 | /** 55 | * Find nodes with a CSS selector. 56 | * 57 | * @param string $selector 58 | * 59 | * @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface 60 | */ 61 | public function findMulti(string $selector): self; 62 | 63 | /** 64 | * Find nodes with a CSS selector or false, if no element is found. 65 | * 66 | * @param string $selector 67 | * 68 | * @return false|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface 69 | */ 70 | public function findMultiOrFalse(string $selector); 71 | 72 | /** 73 | * Find one node with a CSS selector. 74 | * 75 | * @param string $selector 76 | * 77 | * @return SimpleHtmlDomNodeInterface 78 | */ 79 | public function findOne(string $selector); 80 | 81 | /** 82 | * Find one node with a CSS selector or false, if no element is found. 83 | * 84 | * @param string $selector 85 | * 86 | * @return false|SimpleHtmlDomNodeInterface 87 | */ 88 | public function findOneOrFalse(string $selector); 89 | 90 | /** 91 | * Get html of elements. 92 | * 93 | * @return string[] 94 | */ 95 | public function innerHtml(): array; 96 | 97 | /** 98 | * alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x) 99 | * 100 | * @return string[] 101 | */ 102 | public function innertext(); 103 | 104 | /** 105 | * alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x) 106 | * 107 | * @return string[] 108 | */ 109 | public function outertext(); 110 | 111 | /** 112 | * Get plain text. 113 | * 114 | * @return string[] 115 | */ 116 | public function text(): array; 117 | } 118 | -------------------------------------------------------------------------------- /src/voku/helper/SimpleXmlDom.php: -------------------------------------------------------------------------------- 1 | 13 | */ 14 | class SimpleXmlDom extends AbstractSimpleXmlDom implements \IteratorAggregate, SimpleXmlDomInterface 15 | { 16 | /** 17 | * @param \DOMElement|\DOMNode $node 18 | */ 19 | public function __construct(\DOMNode $node) 20 | { 21 | $this->node = $node; 22 | } 23 | 24 | /** 25 | * @param string $name 26 | * @param array $arguments 27 | * 28 | * @throws \BadMethodCallException 29 | * 30 | * @return SimpleXmlDomInterface|string|null 31 | */ 32 | public function __call($name, $arguments) 33 | { 34 | $name = \strtolower($name); 35 | 36 | if (isset(self::$functionAliases[$name])) { 37 | return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments); 38 | } 39 | 40 | throw new \BadMethodCallException('Method does not exist'); 41 | } 42 | 43 | /** 44 | * Find list of nodes with a CSS or xPath selector. 45 | * 46 | * @param string $selector 47 | * @param int|null $idx 48 | * 49 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 50 | */ 51 | public function find(string $selector, $idx = null) 52 | { 53 | return $this->getXmlDomParser()->find($selector, $idx); 54 | } 55 | 56 | /** 57 | * Returns an array of attributes. 58 | * 59 | * @return string[]|null 60 | */ 61 | public function getAllAttributes() 62 | { 63 | if ( 64 | $this->node 65 | && 66 | $this->node->hasAttributes() 67 | ) { 68 | $attributes = []; 69 | foreach ($this->node->attributes ?? [] as $attr) { 70 | $attributes[$attr->name] = XmlDomParser::putReplacedBackToPreserveHtmlEntities($attr->value); 71 | } 72 | 73 | return $attributes; 74 | } 75 | 76 | return null; 77 | } 78 | 79 | /** 80 | * @return bool 81 | */ 82 | public function hasAttributes(): bool 83 | { 84 | return $this->node->hasAttributes(); 85 | } 86 | 87 | /** 88 | * Return attribute value. 89 | * 90 | * @param string $name 91 | * 92 | * @return string 93 | */ 94 | public function getAttribute(string $name): string 95 | { 96 | if ($this->node instanceof \DOMElement) { 97 | return XmlDomParser::putReplacedBackToPreserveHtmlEntities( 98 | $this->node->getAttribute($name) 99 | ); 100 | } 101 | 102 | return ''; 103 | } 104 | 105 | /** 106 | * Determine if an attribute exists on the element. 107 | * 108 | * @param string $name 109 | * 110 | * @return bool 111 | */ 112 | public function hasAttribute(string $name): bool 113 | { 114 | if (!$this->node instanceof \DOMElement) { 115 | return false; 116 | } 117 | 118 | return $this->node->hasAttribute($name); 119 | } 120 | 121 | /** 122 | * Get dom node's inner html. 123 | * 124 | * @param bool $multiDecodeNewHtmlEntity 125 | * 126 | * @return string 127 | */ 128 | public function innerXml(bool $multiDecodeNewHtmlEntity = false): string 129 | { 130 | return $this->getXmlDomParser()->innerXml($multiDecodeNewHtmlEntity); 131 | } 132 | 133 | /** 134 | * Remove attribute. 135 | * 136 | * @param string $name

The name of the html-attribute.

137 | * 138 | * @return SimpleXmlDomInterface 139 | */ 140 | public function removeAttribute(string $name): SimpleXmlDomInterface 141 | { 142 | if (\method_exists($this->node, 'removeAttribute')) { 143 | $this->node->removeAttribute($name); 144 | } 145 | 146 | return $this; 147 | } 148 | 149 | /** 150 | * Replace child node. 151 | * 152 | * @param string $string 153 | * @param bool $putBrokenReplacedBack 154 | * 155 | * @return SimpleXmlDomInterface 156 | */ 157 | protected function replaceChildWithString(string $string, bool $putBrokenReplacedBack = true): SimpleXmlDomInterface 158 | { 159 | if (!empty($string)) { 160 | $newDocument = new XmlDomParser($string); 161 | 162 | $tmpDomString = $this->normalizeStringForComparision($newDocument); 163 | $tmpStr = $this->normalizeStringForComparision($string); 164 | 165 | if ($tmpDomString !== $tmpStr) { 166 | throw new \RuntimeException( 167 | 'Not valid XML fragment!' . "\n" . 168 | $tmpDomString . "\n" . 169 | $tmpStr 170 | ); 171 | } 172 | } 173 | 174 | /** @var \DOMNode[] $remove_nodes */ 175 | $remove_nodes = []; 176 | if ($this->node->childNodes->length > 0) { 177 | // INFO: We need to fetch the nodes first, before we can delete them, because of missing references in the dom, 178 | // if we delete the elements on the fly. 179 | foreach ($this->node->childNodes as $node) { 180 | $remove_nodes[] = $node; 181 | } 182 | } 183 | foreach ($remove_nodes as $remove_node) { 184 | $this->node->removeChild($remove_node); 185 | } 186 | 187 | if (!empty($newDocument)) { 188 | $ownerDocument = $this->node->ownerDocument; 189 | if ( 190 | $ownerDocument 191 | && 192 | $newDocument->getDocument()->documentElement 193 | ) { 194 | $newNode = $ownerDocument->importNode($newDocument->getDocument()->documentElement, true); 195 | /** @noinspection UnusedFunctionResultInspection */ 196 | $this->node->appendChild($newNode); 197 | } 198 | } 199 | 200 | return $this; 201 | } 202 | 203 | /** 204 | * Replace this node. 205 | * 206 | * @param string $string 207 | * 208 | * @return SimpleXmlDomInterface 209 | */ 210 | protected function replaceNodeWithString(string $string): SimpleXmlDomInterface 211 | { 212 | if (empty($string)) { 213 | if ($this->node->parentNode) { 214 | $this->node->parentNode->removeChild($this->node); 215 | } 216 | 217 | return $this; 218 | } 219 | 220 | $newDocument = new XmlDomParser($string); 221 | 222 | $tmpDomOuterTextString = $this->normalizeStringForComparision($newDocument); 223 | $tmpStr = $this->normalizeStringForComparision($string); 224 | 225 | if ($tmpDomOuterTextString !== $tmpStr) { 226 | throw new \RuntimeException( 227 | 'Not valid XML fragment!' . "\n" 228 | . $tmpDomOuterTextString . "\n" . 229 | $tmpStr 230 | ); 231 | } 232 | 233 | $ownerDocument = $this->node->ownerDocument; 234 | if ( 235 | $ownerDocument === null 236 | || 237 | $newDocument->getDocument()->documentElement === null 238 | ) { 239 | return $this; 240 | } 241 | 242 | $newNode = $ownerDocument->importNode($newDocument->getDocument()->documentElement, true); 243 | 244 | $this->node->parentNode->replaceChild($newNode, $this->node); 245 | $this->node = $newNode; 246 | 247 | return $this; 248 | } 249 | 250 | /** 251 | * Replace this node with text 252 | * 253 | * @param string $string 254 | * 255 | * @return SimpleXmlDomInterface 256 | */ 257 | protected function replaceTextWithString($string): SimpleXmlDomInterface 258 | { 259 | if (empty($string)) { 260 | if ($this->node->parentNode) { 261 | $this->node->parentNode->removeChild($this->node); 262 | } 263 | 264 | return $this; 265 | } 266 | 267 | $ownerDocument = $this->node->ownerDocument; 268 | if ($ownerDocument) { 269 | $newElement = $ownerDocument->createTextNode($string); 270 | $newNode = $ownerDocument->importNode($newElement, true); 271 | $this->node->parentNode->replaceChild($newNode, $this->node); 272 | $this->node = $newNode; 273 | } 274 | 275 | return $this; 276 | } 277 | 278 | /** 279 | * Set attribute value. 280 | * 281 | * @param string $name

The name of the html-attribute.

282 | * @param string|null $value

Set to NULL or empty string, to remove the attribute.

283 | * @param bool $strictEmptyValueCheck

284 | * $value must be NULL, to remove the attribute, 285 | * so that you can set an empty string as attribute-value e.g. autofocus="" 286 | *

287 | * 288 | * @return SimpleXmlDomInterface 289 | */ 290 | public function setAttribute(string $name, $value = null, bool $strictEmptyValueCheck = false): SimpleXmlDomInterface 291 | { 292 | if ( 293 | ($strictEmptyValueCheck && $value === null) 294 | || 295 | (!$strictEmptyValueCheck && empty($value)) 296 | ) { 297 | /** @noinspection UnusedFunctionResultInspection */ 298 | $this->removeAttribute($name); 299 | } elseif (\method_exists($this->node, 'setAttribute')) { 300 | /** @noinspection UnusedFunctionResultInspection */ 301 | $this->node->setAttribute($name, HtmlDomParser::replaceToPreserveHtmlEntities((string) $value)); 302 | } 303 | 304 | return $this; 305 | } 306 | 307 | /** 308 | * Get dom node's plain text. 309 | * 310 | * @return string 311 | */ 312 | public function text(): string 313 | { 314 | return $this->getXmlDomParser()->fixHtmlOutput($this->node->textContent); 315 | } 316 | 317 | /** 318 | * Get dom node's outer html. 319 | * 320 | * @param bool $multiDecodeNewHtmlEntity 321 | * 322 | * @return string 323 | */ 324 | public function xml(bool $multiDecodeNewHtmlEntity = false): string 325 | { 326 | return $this->getXmlDomParser()->xml($multiDecodeNewHtmlEntity, false); 327 | } 328 | 329 | /** 330 | * Change the name of a tag in a "DOMNode". 331 | * 332 | * @param \DOMNode $node 333 | * @param string $name 334 | * 335 | * @return \DOMElement|false 336 | *

DOMElement a new instance of class DOMElement or false 337 | * if an error occured.

338 | */ 339 | protected function changeElementName(\DOMNode $node, string $name) 340 | { 341 | $ownerDocument = $node->ownerDocument; 342 | if (!$ownerDocument) { 343 | return false; 344 | } 345 | 346 | $newNode = $ownerDocument->createElement($name); 347 | 348 | foreach ($node->childNodes as $child) { 349 | $child = $ownerDocument->importNode($child, true); 350 | $newNode->appendChild($child); 351 | } 352 | 353 | foreach ($node->attributes ?? [] as $attrName => $attrNode) { 354 | /** @noinspection UnusedFunctionResultInspection */ 355 | $newNode->setAttribute($attrName, $attrNode); 356 | } 357 | 358 | if ($newNode->ownerDocument) { 359 | /** @noinspection UnusedFunctionResultInspection */ 360 | $newNode->ownerDocument->replaceChild($newNode, $node); 361 | } 362 | 363 | return $newNode; 364 | } 365 | 366 | /** 367 | * Returns children of node. 368 | * 369 | * @param int $idx 370 | * 371 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface|null 372 | */ 373 | public function childNodes(int $idx = -1) 374 | { 375 | $nodeList = $this->getIterator(); 376 | 377 | if ($idx === -1) { 378 | return $nodeList; 379 | } 380 | 381 | return $nodeList[$idx] ?? null; 382 | } 383 | 384 | /** 385 | * Find nodes with a CSS or xPath selector. 386 | * 387 | * @param string $selector 388 | * 389 | * @return SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 390 | */ 391 | public function findMulti(string $selector): SimpleXmlDomNodeInterface 392 | { 393 | return $this->getXmlDomParser()->findMulti($selector); 394 | } 395 | 396 | /** 397 | * Find nodes with a CSS or xPath selector. 398 | * 399 | * @param string $selector 400 | * 401 | * @return false|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 402 | */ 403 | public function findMultiOrFalse(string $selector) 404 | { 405 | return $this->getXmlDomParser()->findMultiOrFalse($selector); 406 | } 407 | 408 | /** 409 | * Find one node with a CSS or xPath selector. 410 | * 411 | * @param string $selector 412 | * 413 | * @return SimpleXmlDomInterface 414 | */ 415 | public function findOne(string $selector): SimpleXmlDomInterface 416 | { 417 | return $this->getXmlDomParser()->findOne($selector); 418 | } 419 | 420 | /** 421 | * Find one node with a CSS or xPath selector or false, if no element is found. 422 | * 423 | * @param string $selector 424 | * 425 | * @return false|SimpleXmlDomInterface 426 | */ 427 | public function findOneOrFalse(string $selector) 428 | { 429 | return $this->getXmlDomParser()->findOneOrFalse($selector); 430 | } 431 | 432 | /** 433 | * Returns the first child of node. 434 | * 435 | * @return SimpleXmlDomInterface|null 436 | */ 437 | public function firstChild() 438 | { 439 | /** @var \DOMNode|null $node */ 440 | $node = $this->node->firstChild; 441 | 442 | if ($node === null) { 443 | return null; 444 | } 445 | 446 | return new static($node); 447 | } 448 | 449 | /** 450 | * Return elements by ".class". 451 | * 452 | * @param string $class 453 | * 454 | * @return SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 455 | */ 456 | public function getElementByClass(string $class): SimpleXmlDomNodeInterface 457 | { 458 | return $this->findMulti(".{$class}"); 459 | } 460 | 461 | /** 462 | * Return element by #id. 463 | * 464 | * @param string $id 465 | * 466 | * @return SimpleXmlDomInterface 467 | */ 468 | public function getElementById(string $id): SimpleXmlDomInterface 469 | { 470 | return $this->findOne("#{$id}"); 471 | } 472 | 473 | /** 474 | * Return element by tag name. 475 | * 476 | * @param string $name 477 | * 478 | * @return SimpleXmlDomInterface 479 | */ 480 | public function getElementByTagName(string $name): SimpleXmlDomInterface 481 | { 482 | if ($this->node instanceof \DOMElement) { 483 | $node = $this->node->getElementsByTagName($name)->item(0); 484 | } else { 485 | $node = null; 486 | } 487 | 488 | if ($node === null) { 489 | return new SimpleXmlDomBlank(); 490 | } 491 | 492 | return new static($node); 493 | } 494 | 495 | /** 496 | * Returns elements by "#id". 497 | * 498 | * @param string $id 499 | * @param int|null $idx 500 | * 501 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 502 | */ 503 | public function getElementsById(string $id, $idx = null) 504 | { 505 | return $this->find("#{$id}", $idx); 506 | } 507 | 508 | /** 509 | * Returns elements by tag name. 510 | * 511 | * @param string $name 512 | * @param int|null $idx 513 | * 514 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 515 | */ 516 | public function getElementsByTagName(string $name, $idx = null) 517 | { 518 | if ($this->node instanceof \DOMElement) { 519 | $nodesList = $this->node->getElementsByTagName($name); 520 | } else { 521 | $nodesList = []; 522 | } 523 | 524 | $elements = new SimpleXmlDomNode(); 525 | 526 | foreach ($nodesList as $node) { 527 | $elements[] = new static($node); 528 | } 529 | 530 | // return all elements 531 | if ($idx === null) { 532 | if (\count($elements) === 0) { 533 | return new SimpleXmlDomNodeBlank(); 534 | } 535 | 536 | return $elements; 537 | } 538 | 539 | // handle negative values 540 | if ($idx < 0) { 541 | $idx = \count($elements) + $idx; 542 | } 543 | 544 | // return one element 545 | return $elements[$idx] ?? new SimpleXmlDomBlank(); 546 | } 547 | 548 | /** 549 | * @return \DOMNode 550 | */ 551 | public function getNode(): \DOMNode 552 | { 553 | return $this->node; 554 | } 555 | 556 | /** 557 | * Create a new "XmlDomParser"-object from the current context. 558 | * 559 | * @return XmlDomParser 560 | */ 561 | public function getXmlDomParser(): XmlDomParser 562 | { 563 | return new XmlDomParser($this); 564 | } 565 | 566 | /** 567 | * Get dom node's inner html. 568 | * 569 | * @param bool $multiDecodeNewHtmlEntity 570 | * @param bool $putBrokenReplacedBack 571 | * 572 | * @return string 573 | */ 574 | public function innerHtml(bool $multiDecodeNewHtmlEntity = false, bool $putBrokenReplacedBack = true): string 575 | { 576 | return $this->getXmlDomParser()->innerHtml($multiDecodeNewHtmlEntity, $putBrokenReplacedBack); 577 | } 578 | 579 | /** 580 | * Nodes can get partially destroyed in which they're still an 581 | * actual DOM node (such as \DOMElement) but almost their entire 582 | * body is gone, including the `nodeType` attribute. 583 | * 584 | * @return bool true if node has been destroyed 585 | */ 586 | public function isRemoved(): bool 587 | { 588 | return !isset($this->node->nodeType); 589 | } 590 | 591 | /** 592 | * Returns the last child of node. 593 | * 594 | * @return SimpleXmlDomInterface|null 595 | */ 596 | public function lastChild() 597 | { 598 | /** @var \DOMNode|null $node */ 599 | $node = $this->node->lastChild; 600 | 601 | if ($node === null) { 602 | return null; 603 | } 604 | 605 | return new static($node); 606 | } 607 | 608 | /** 609 | * Returns the next sibling of node. 610 | * 611 | * @return SimpleXmlDomInterface|null 612 | */ 613 | public function nextSibling() 614 | { 615 | /** @var \DOMNode|null $node */ 616 | $node = $this->node->nextSibling; 617 | 618 | if ($node === null) { 619 | return null; 620 | } 621 | 622 | return new static($node); 623 | } 624 | 625 | /** 626 | * Returns the next sibling of node. 627 | * 628 | * @return SimpleXmlDomInterface|null 629 | */ 630 | public function nextNonWhitespaceSibling() 631 | { 632 | /** @var \DOMNode|null $node */ 633 | $node = $this->node->nextSibling; 634 | 635 | if ($node === null) { 636 | return null; 637 | } 638 | 639 | while ($node && !\trim($node->textContent)) { 640 | /** @var \DOMNode|null $node */ 641 | $node = $node->nextSibling; 642 | } 643 | 644 | return new static($node); 645 | } 646 | 647 | /** 648 | * Returns the parent of node. 649 | * 650 | * @return SimpleXmlDomInterface 651 | */ 652 | public function parentNode(): SimpleXmlDomInterface 653 | { 654 | return new static($this->node->parentNode); 655 | } 656 | 657 | /** 658 | * Returns the previous sibling of node. 659 | * 660 | * @return SimpleXmlDomInterface|null 661 | */ 662 | public function previousSibling() 663 | { 664 | /** @var \DOMNode|null $node */ 665 | $node = $this->node->previousSibling; 666 | 667 | if ($node === null) { 668 | return null; 669 | } 670 | 671 | return new static($node); 672 | } 673 | 674 | /** 675 | * Returns the previous sibling of node. 676 | * 677 | * @return SimpleXmlDomInterface|null 678 | */ 679 | public function previousNonWhitespaceSibling() 680 | { 681 | /** @var \DOMNode|null $node */ 682 | $node = $this->node->previousSibling; 683 | 684 | while ($node && !\trim($node->textContent)) { 685 | /** @var \DOMNode|null $node */ 686 | $node = $node->previousSibling; 687 | } 688 | 689 | if ($node === null) { 690 | return null; 691 | } 692 | 693 | return new static($node); 694 | } 695 | 696 | /** 697 | * @param string|string[]|null $value

698 | * null === get the current input value 699 | * text === set a new input value 700 | *

701 | * 702 | * @return string|string[]|null 703 | */ 704 | public function val($value = null) 705 | { 706 | if ($value === null) { 707 | if ( 708 | $this->tag === 'input' 709 | && 710 | ( 711 | $this->getAttribute('type') === 'hidden' 712 | || 713 | $this->getAttribute('type') === 'text' 714 | || 715 | !$this->hasAttribute('type') 716 | ) 717 | ) { 718 | return $this->getAttribute('value'); 719 | } 720 | 721 | if ( 722 | $this->hasAttribute('checked') 723 | && 724 | \in_array($this->getAttribute('type'), ['checkbox', 'radio'], true) 725 | ) { 726 | return $this->getAttribute('value'); 727 | } 728 | 729 | if ($this->node->nodeName === 'select') { 730 | $valuesFromDom = []; 731 | $options = $this->getElementsByTagName('option'); 732 | if ($options instanceof SimpleXmlDomNode) { 733 | foreach ($options as $option) { 734 | if ($this->hasAttribute('checked')) { 735 | $valuesFromDom[] = (string) $option->getAttribute('value'); 736 | } 737 | } 738 | } 739 | 740 | if (\count($valuesFromDom) === 0) { 741 | return null; 742 | } 743 | 744 | return $valuesFromDom; 745 | } 746 | 747 | if ($this->node->nodeName === 'textarea') { 748 | return $this->node->nodeValue; 749 | } 750 | } else { 751 | /** @noinspection NestedPositiveIfStatementsInspection */ 752 | if (\in_array($this->getAttribute('type'), ['checkbox', 'radio'], true)) { 753 | if ($value === $this->getAttribute('value')) { 754 | /** @noinspection UnusedFunctionResultInspection */ 755 | $this->setAttribute('checked', 'checked'); 756 | } else { 757 | /** @noinspection UnusedFunctionResultInspection */ 758 | $this->removeAttribute('checked'); 759 | } 760 | } elseif ($this->node instanceof \DOMElement && $this->node->nodeName === 'select') { 761 | foreach ($this->node->getElementsByTagName('option') as $option) { 762 | /** @var \DOMElement $option */ 763 | if ($value === $option->getAttribute('value')) { 764 | /** @noinspection UnusedFunctionResultInspection */ 765 | $option->setAttribute('selected', 'selected'); 766 | } else { 767 | $option->removeAttribute('selected'); 768 | } 769 | } 770 | } elseif ($this->node->nodeName === 'input' && \is_string($value)) { 771 | // Set value for input elements 772 | /** @noinspection UnusedFunctionResultInspection */ 773 | $this->setAttribute('value', $value); 774 | } elseif ($this->node->nodeName === 'textarea' && \is_string($value)) { 775 | $this->node->nodeValue = $value; 776 | } 777 | } 778 | 779 | return null; 780 | } 781 | 782 | /** 783 | * Retrieve an external iterator. 784 | * 785 | * @see http://php.net/manual/en/iteratoraggregate.getiterator.php 786 | * 787 | * @return SimpleXmlDomNode 788 | *

789 | * An instance of an object implementing Iterator or 790 | * Traversable 791 | *

792 | */ 793 | public function getIterator(): SimpleXmlDomNodeInterface 794 | { 795 | $elements = new SimpleXmlDomNode(); 796 | if ($this->node->hasChildNodes()) { 797 | foreach ($this->node->childNodes as $node) { 798 | $elements[] = new static($node); 799 | } 800 | } 801 | 802 | return $elements; 803 | } 804 | 805 | /** 806 | * Normalize the given input for comparision. 807 | * 808 | * @param string|XmlDomParser $input 809 | * 810 | * @return string 811 | */ 812 | private function normalizeStringForComparision($input): string 813 | { 814 | if ($input instanceof XmlDomParser) { 815 | $string = $input->html(false, false); 816 | } else { 817 | $string = (string) $input; 818 | } 819 | 820 | return 821 | \urlencode( 822 | \urldecode( 823 | \trim( 824 | \str_replace( 825 | [ 826 | ' ', 827 | "\n", 828 | "\r", 829 | '/>', 830 | ], 831 | [ 832 | '', 833 | '', 834 | '', 835 | '>', 836 | ], 837 | \strtolower($string) 838 | ) 839 | ) 840 | ) 841 | ); 842 | } 843 | } 844 | -------------------------------------------------------------------------------- /src/voku/helper/SimpleXmlDomBlank.php: -------------------------------------------------------------------------------- 1 | 13 | */ 14 | class SimpleXmlDomBlank extends AbstractSimpleXmlDom implements \IteratorAggregate, SimpleXmlDomInterface 15 | { 16 | /** 17 | * @param string $name 18 | * @param array $arguments 19 | * 20 | * @throws \BadMethodCallException 21 | * 22 | * @return SimpleXmlDomInterface|string|null 23 | */ 24 | public function __call($name, $arguments) 25 | { 26 | $name = \strtolower($name); 27 | 28 | if (isset(self::$functionAliases[$name])) { 29 | return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments); 30 | } 31 | 32 | throw new \BadMethodCallException('Method does not exist'); 33 | } 34 | 35 | /** 36 | * Find list of nodes with a CSS or xPath selector. 37 | * 38 | * @param string $selector 39 | * @param int|null $idx 40 | * 41 | * @return SimpleXmlDomNodeInterface 42 | */ 43 | public function find(string $selector, $idx = null) 44 | { 45 | return new SimpleXmlDomNodeBlank(); 46 | } 47 | 48 | /** 49 | * Returns an array of attributes. 50 | * 51 | * @return null 52 | */ 53 | public function getAllAttributes() 54 | { 55 | return null; 56 | } 57 | 58 | /** 59 | * @return bool 60 | */ 61 | public function hasAttributes(): bool 62 | { 63 | return false; 64 | } 65 | 66 | /** 67 | * Return attribute value. 68 | * 69 | * @param string $name 70 | * 71 | * @return string 72 | */ 73 | public function getAttribute(string $name): string 74 | { 75 | return ''; 76 | } 77 | 78 | /** 79 | * Determine if an attribute exists on the element. 80 | * 81 | * @param string $name 82 | * 83 | * @return bool 84 | */ 85 | public function hasAttribute(string $name): bool 86 | { 87 | return false; 88 | } 89 | 90 | /** 91 | * Get dom node's inner xml. 92 | * 93 | * @param bool $multiDecodeNewHtmlEntity 94 | * 95 | * @return string 96 | */ 97 | public function innerXml(bool $multiDecodeNewHtmlEntity = false): string 98 | { 99 | return ''; 100 | } 101 | 102 | /** 103 | * Remove attribute. 104 | * 105 | * @param string $name

The name of the html-attribute.

106 | * 107 | * @return SimpleXmlDomInterface 108 | */ 109 | public function removeAttribute(string $name): SimpleXmlDomInterface 110 | { 111 | return $this; 112 | } 113 | 114 | /** 115 | * @param string $string 116 | * @param bool $putBrokenReplacedBack 117 | * 118 | * @return SimpleXmlDomInterface 119 | */ 120 | protected function replaceChildWithString(string $string, bool $putBrokenReplacedBack = true): SimpleXmlDomInterface 121 | { 122 | return new static(); 123 | } 124 | 125 | /** 126 | * @param string $string 127 | * 128 | * @return SimpleXmlDomInterface 129 | */ 130 | protected function replaceNodeWithString(string $string): SimpleXmlDomInterface 131 | { 132 | return new static(); 133 | } 134 | 135 | /** 136 | * @param string $string 137 | * 138 | * @return SimpleXmlDomInterface 139 | */ 140 | protected function replaceTextWithString($string): SimpleXmlDomInterface 141 | { 142 | return new static(); 143 | } 144 | 145 | /** 146 | * Set attribute value. 147 | * 148 | * @param string $name

The name of the html-attribute.

149 | * @param string|null $value

Set to NULL or empty string, to remove the attribute.

150 | * @param bool $strictEmptyValueCheck

151 | * $value must be NULL, to remove the attribute, 152 | * so that you can set an empty string as attribute-value e.g. autofocus="" 153 | *

154 | * 155 | * @return SimpleXmlDomInterface 156 | */ 157 | public function setAttribute(string $name, $value = null, bool $strictEmptyValueCheck = false): SimpleXmlDomInterface 158 | { 159 | return $this; 160 | } 161 | 162 | /** 163 | * Get dom node's plain text. 164 | * 165 | * @return string 166 | */ 167 | public function text(): string 168 | { 169 | return ''; 170 | } 171 | 172 | /** 173 | * Get dom node's outer html. 174 | * 175 | * @param bool $multiDecodeNewHtmlEntity 176 | * 177 | * @return string 178 | */ 179 | public function xml(bool $multiDecodeNewHtmlEntity = false): string 180 | { 181 | return ''; 182 | } 183 | 184 | /** 185 | * Returns children of node. 186 | * 187 | * @param int $idx 188 | * 189 | * @return null 190 | */ 191 | public function childNodes(int $idx = -1) 192 | { 193 | return null; 194 | } 195 | 196 | /** 197 | * Find nodes with a CSS or xPath selector. 198 | * 199 | * @param string $selector 200 | * 201 | * @return SimpleXmlDomNodeInterface 202 | */ 203 | public function findMulti(string $selector): SimpleXmlDomNodeInterface 204 | { 205 | return new SimpleXmlDomNodeBlank(); 206 | } 207 | 208 | /** 209 | * Find nodes with a CSS or xPath selector or false, if no element is found. 210 | * 211 | * @param string $selector 212 | * 213 | * @return false 214 | */ 215 | public function findMultiOrFalse(string $selector) 216 | { 217 | return false; 218 | } 219 | 220 | /** 221 | * Find one node with a CSS or xPath selector. 222 | * 223 | * @param string $selector 224 | * 225 | * @return SimpleXmlDomInterface 226 | */ 227 | public function findOne(string $selector): SimpleXmlDomInterface 228 | { 229 | return new static(); 230 | } 231 | 232 | /** 233 | * Find one node with a CSS or xPath selector or false, if no element is found. 234 | * 235 | * @param string $selector 236 | * 237 | * @return false 238 | */ 239 | public function findOneOrFalse(string $selector) 240 | { 241 | return false; 242 | } 243 | 244 | /** 245 | * Returns the first child of node. 246 | * 247 | * @return null 248 | */ 249 | public function firstChild() 250 | { 251 | return null; 252 | } 253 | 254 | /** 255 | * Return elements by ".class". 256 | * 257 | * @param string $class 258 | * 259 | * @return SimpleXmlDomNodeInterface 260 | */ 261 | public function getElementByClass(string $class): SimpleXmlDomNodeInterface 262 | { 263 | return new SimpleXmlDomNodeBlank(); 264 | } 265 | 266 | /** 267 | * Return element by #id. 268 | * 269 | * @param string $id 270 | * 271 | * @return SimpleXmlDomInterface 272 | */ 273 | public function getElementById(string $id): SimpleXmlDomInterface 274 | { 275 | return new static(); 276 | } 277 | 278 | /** 279 | * Return element by tag name. 280 | * 281 | * @param string $name 282 | * 283 | * @return SimpleXmlDomInterface 284 | */ 285 | public function getElementByTagName(string $name): SimpleXmlDomInterface 286 | { 287 | return new static(); 288 | } 289 | 290 | /** 291 | * Returns elements by "#id". 292 | * 293 | * @param string $id 294 | * @param int|null $idx 295 | * 296 | * @return SimpleXmlDomNodeInterface 297 | */ 298 | public function getElementsById(string $id, $idx = null) 299 | { 300 | return new SimpleXmlDomNodeBlank(); 301 | } 302 | 303 | /** 304 | * Returns elements by tag name. 305 | * 306 | * @param string $name 307 | * @param int|null $idx 308 | * 309 | * @return SimpleXmlDomNodeInterface 310 | */ 311 | public function getElementsByTagName(string $name, $idx = null) 312 | { 313 | return new SimpleXmlDomNodeBlank(); 314 | } 315 | 316 | /** 317 | * @return \DOMNode 318 | */ 319 | public function getNode(): \DOMNode 320 | { 321 | return new \DOMNode(); 322 | } 323 | 324 | /** 325 | * Create a new "XmlDomParser"-object from the current context. 326 | * 327 | * @return XmlDomParser 328 | */ 329 | public function getXmlDomParser(): XmlDomParser 330 | { 331 | return new XmlDomParser($this); 332 | } 333 | 334 | /** 335 | * Get dom node's inner html. 336 | * 337 | * @param bool $multiDecodeNewHtmlEntity 338 | * @param bool $putBrokenReplacedBack 339 | * 340 | * @return string 341 | */ 342 | public function innerHtml(bool $multiDecodeNewHtmlEntity = false, bool $putBrokenReplacedBack = true): string 343 | { 344 | return ''; 345 | } 346 | 347 | /** 348 | * Nodes can get partially destroyed in which they're still an 349 | * actual DOM node (such as \DOMElement) but almost their entire 350 | * body is gone, including the `nodeType` attribute. 351 | * 352 | * @return bool true if node has been destroyed 353 | */ 354 | public function isRemoved(): bool 355 | { 356 | return true; 357 | } 358 | 359 | /** 360 | * Returns the last child of node. 361 | * 362 | * @return null 363 | */ 364 | public function lastChild() 365 | { 366 | return null; 367 | } 368 | 369 | /** 370 | * Returns the next sibling of node. 371 | * 372 | * @return null 373 | */ 374 | public function nextSibling() 375 | { 376 | return null; 377 | } 378 | 379 | /** 380 | * Returns the next sibling of node. 381 | * 382 | * @return null 383 | */ 384 | public function nextNonWhitespaceSibling() 385 | { 386 | return null; 387 | } 388 | 389 | /** 390 | * Returns the parent of node. 391 | * 392 | * @return SimpleXmlDomInterface 393 | */ 394 | public function parentNode(): SimpleXmlDomInterface 395 | { 396 | return new static(); 397 | } 398 | 399 | /** 400 | * Returns the previous sibling of node. 401 | * 402 | * @return null 403 | */ 404 | public function previousSibling() 405 | { 406 | return null; 407 | } 408 | 409 | /** 410 | * Returns the previous sibling of node. 411 | * 412 | * @return null 413 | */ 414 | public function previousNonWhitespaceSibling() 415 | { 416 | return null; 417 | } 418 | 419 | /** 420 | * @param string|string[]|null $value

421 | * null === get the current input value 422 | * text === set a new input value 423 | *

424 | * 425 | * @return string|string[]|null 426 | */ 427 | public function val($value = null) 428 | { 429 | return null; 430 | } 431 | 432 | /** 433 | * Retrieve an external iterator. 434 | * 435 | * @see http://php.net/manual/en/iteratoraggregate.getiterator.php 436 | * 437 | * @return SimpleXmlDomNodeInterface 438 | *

439 | * An instance of an object implementing Iterator or 440 | * Traversable 441 | *

442 | */ 443 | public function getIterator(): SimpleXmlDomNodeInterface 444 | { 445 | return new SimpleXmlDomNodeBlank(); 446 | } 447 | } 448 | -------------------------------------------------------------------------------- /src/voku/helper/SimpleXmlDomInterface.php: -------------------------------------------------------------------------------- 1 | Get dom node's outer html (alias for "outerHtml").

8 | * @property string $outerhtml 9 | *

Get dom node's outer html.

10 | * @property string $innertext 11 | *

Get dom node's inner html (alias for "innerHtml").

12 | * @property string $innerhtml 13 | *

Get dom node's inner html.

14 | * @property string $plaintext 15 | *

Get dom node's plain text.

16 | * @property-read string $tag 17 | *

Get dom node name.

18 | * @property-read string $attr 19 | *

Get dom node attributes.

20 | * @property-read string $text 21 | *

Get dom node name.

22 | * @property-read string $html 23 | *

Get dom node's outer html.

24 | * 25 | * @method SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface|null children() children($idx = -1) 26 | *

Returns children of node.

27 | * @method SimpleXmlDomInterface|null first_child() 28 | *

Returns the first child of node.

29 | * @method SimpleXmlDomInterface|null last_child() 30 | *

Returns the last child of node.

31 | * @method SimpleXmlDomInterface|null next_sibling() 32 | *

Returns the next sibling of node.

33 | * @method SimpleXmlDomInterface|null prev_sibling() 34 | *

Returns the previous sibling of node.

35 | * @method SimpleXmlDomInterface|null parent() 36 | *

Returns the parent of node.

37 | * @method string outerText() 38 | *

Get dom node's outer html (alias for "outerHtml()").

39 | * @method string outerHtml() 40 | *

Get dom node's outer html.

41 | * @method string innerText() 42 | *

Get dom node's inner html (alias for "innerHtml()").

43 | * 44 | * @extends \IteratorAggregate 45 | */ 46 | interface SimpleXmlDomInterface extends \IteratorAggregate 47 | { 48 | /** 49 | * @param string $name 50 | * @param array $arguments 51 | * 52 | * @throws \BadMethodCallException 53 | * 54 | * @return SimpleXmlDomInterface|string|null 55 | */ 56 | public function __call($name, $arguments); 57 | 58 | /** 59 | * @param string $name 60 | * 61 | * @return array|string|null 62 | */ 63 | public function __get($name); 64 | 65 | /** 66 | * @param string $selector 67 | * @param int $idx 68 | * 69 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 70 | */ 71 | public function __invoke($selector, $idx = null); 72 | 73 | /** 74 | * @param string $name 75 | * 76 | * @return bool 77 | */ 78 | public function __isset($name); 79 | 80 | /** 81 | * @return string 82 | */ 83 | public function __toString(); 84 | 85 | /** 86 | * Returns children of node. 87 | * 88 | * @param int $idx 89 | * 90 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface|null 91 | */ 92 | public function childNodes(int $idx = -1); 93 | 94 | /** 95 | * Find list of nodes with a CSS or xPath selector. 96 | * 97 | * @param string $selector 98 | * @param int|null $idx 99 | * 100 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 101 | */ 102 | public function find(string $selector, $idx = null); 103 | 104 | /** 105 | * Find nodes with a CSS or xPath selector. 106 | * 107 | * @param string $selector 108 | * 109 | * @return SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 110 | */ 111 | public function findMulti(string $selector): SimpleXmlDomNodeInterface; 112 | 113 | /** 114 | * Find nodes with a CSS or xPath selector or false, if no element is found. 115 | * 116 | * @param string $selector 117 | * 118 | * @return false|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 119 | */ 120 | public function findMultiOrFalse(string $selector); 121 | 122 | /** 123 | * Find one node with a CSS or xPath selector. 124 | * 125 | * @param string $selector 126 | * 127 | * @return SimpleXmlDomInterface 128 | */ 129 | public function findOne(string $selector): self; 130 | 131 | /** 132 | * Find one node with a CSS or xPath selector or false, if no element is found. 133 | * 134 | * @param string $selector 135 | * 136 | * @return false|SimpleXmlDomInterface 137 | */ 138 | public function findOneOrFalse(string $selector); 139 | 140 | /** 141 | * Returns the first child of node. 142 | * 143 | * @return SimpleXmlDomInterface|null 144 | */ 145 | public function firstChild(); 146 | 147 | /** 148 | * Returns an array of attributes. 149 | * 150 | * @return string[]|null 151 | */ 152 | public function getAllAttributes(); 153 | 154 | /** 155 | * @return bool 156 | */ 157 | public function hasAttributes(): bool; 158 | 159 | /** 160 | * Return attribute value. 161 | * 162 | * @param string $name 163 | * 164 | * @return string 165 | */ 166 | public function getAttribute(string $name): string; 167 | 168 | /** 169 | * Return elements by ".class". 170 | * 171 | * @param string $class 172 | * 173 | * @return SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 174 | */ 175 | public function getElementByClass(string $class); 176 | 177 | /** 178 | * Return element by "#id". 179 | * 180 | * @param string $id 181 | * 182 | * @return SimpleXmlDomInterface 183 | */ 184 | public function getElementById(string $id): self; 185 | 186 | /** 187 | * Return element by tag name. 188 | * 189 | * @param string $name 190 | * 191 | * @return SimpleXmlDomInterface 192 | */ 193 | public function getElementByTagName(string $name): self; 194 | 195 | /** 196 | * Returns elements by "#id". 197 | * 198 | * @param string $id 199 | * @param int|null $idx 200 | * 201 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 202 | */ 203 | public function getElementsById(string $id, $idx = null); 204 | 205 | /** 206 | * Returns elements by tag name. 207 | * 208 | * @param string $name 209 | * @param int|null $idx 210 | * 211 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 212 | */ 213 | public function getElementsByTagName(string $name, $idx = null); 214 | 215 | /** 216 | * Retrieve an external iterator. 217 | * 218 | * @see http://php.net/manual/en/iteratoraggregate.getiterator.php 219 | * 220 | * @return SimpleXmlDomNodeInterface 221 | *

222 | * An instance of an object implementing Iterator or 223 | * Traversable 224 | *

225 | */ 226 | public function getIterator(): SimpleXmlDomNodeInterface; 227 | 228 | /** 229 | * @return \DOMNode 230 | */ 231 | public function getNode(): \DOMNode; 232 | 233 | /** 234 | * Create a new "XmlDomParser"-object from the current context. 235 | * 236 | * @return XmlDomParser 237 | */ 238 | public function getXmlDomParser(): XmlDomParser; 239 | 240 | /** 241 | * Determine if an attribute exists on the element. 242 | * 243 | * @param string $name 244 | * 245 | * @return bool 246 | */ 247 | public function hasAttribute(string $name): bool; 248 | 249 | /** 250 | * Get dom node's inner html. 251 | * 252 | * @param bool $multiDecodeNewHtmlEntity 253 | * @param bool $putBrokenReplacedBack 254 | * 255 | * @return string 256 | */ 257 | public function innerHtml(bool $multiDecodeNewHtmlEntity = false, bool $putBrokenReplacedBack = true): string; 258 | 259 | /** 260 | * Get dom node's inner html. 261 | * 262 | * @param bool $multiDecodeNewHtmlEntity 263 | * 264 | * @return string 265 | */ 266 | public function innerXml(bool $multiDecodeNewHtmlEntity = false): string; 267 | 268 | /** 269 | * Nodes can get partially destroyed in which they're still an 270 | * actual DOM node (such as \DOMElement) but almost their entire 271 | * body is gone, including the `nodeType` attribute. 272 | * 273 | * @return bool true if node has been destroyed 274 | */ 275 | public function isRemoved(): bool; 276 | 277 | /** 278 | * Returns the last child of node. 279 | * 280 | * @return SimpleXmlDomInterface|null 281 | */ 282 | public function lastChild(); 283 | 284 | /** 285 | * Returns the next sibling of node. 286 | * 287 | * @return SimpleXmlDomInterface|null 288 | */ 289 | public function nextSibling(); 290 | 291 | /** 292 | * Returns the next sibling of node. 293 | * 294 | * @return SimpleXmlDomInterface|null 295 | */ 296 | public function nextNonWhitespaceSibling(); 297 | 298 | /** 299 | * Returns the parent of node. 300 | * 301 | * @return SimpleXmlDomInterface 302 | */ 303 | public function parentNode(): self; 304 | 305 | /** 306 | * Returns the previous sibling of node. 307 | * 308 | * @return SimpleXmlDomInterface|null 309 | */ 310 | public function previousSibling(); 311 | 312 | /** 313 | * Returns the previous sibling of node. 314 | * 315 | * @return SimpleXmlDomInterface|null 316 | */ 317 | public function previousNonWhitespaceSibling(); 318 | 319 | /** 320 | * Remove attribute. 321 | * 322 | * @param string $name

The name of the html-attribute.

323 | * 324 | * @return SimpleXmlDomInterface 325 | */ 326 | public function removeAttribute(string $name): self; 327 | 328 | /** 329 | * Set attribute value. 330 | * 331 | * @param string $name

The name of the html-attribute.

332 | * @param string|null $value

Set to NULL or empty string, to remove the attribute.

333 | * @param bool $strictEmptyValueCheck

334 | * $value must be NULL, to remove the attribute, 335 | * so that you can set an empty string as attribute-value e.g. autofocus="" 336 | *

337 | * 338 | * @return SimpleXmlDomInterface 339 | */ 340 | public function setAttribute(string $name, $value = null, bool $strictEmptyValueCheck = false): self; 341 | 342 | /** 343 | * Get dom node's plain text. 344 | * 345 | * @return string 346 | */ 347 | public function text(): string; 348 | 349 | /** 350 | * @param string|string[]|null $value

351 | * null === get the current input value 352 | * text === set a new input value 353 | *

354 | * 355 | * @return string|string[]|null 356 | */ 357 | public function val($value = null); 358 | 359 | /** 360 | * Get dom node's outer html. 361 | * 362 | * @param bool $multiDecodeNewHtmlEntity 363 | * 364 | * @return string 365 | */ 366 | public function xml(bool $multiDecodeNewHtmlEntity = false): string; 367 | } 368 | -------------------------------------------------------------------------------- /src/voku/helper/SimpleXmlDomNode.php: -------------------------------------------------------------------------------- 1 | |SimpleXmlDomNodeInterface[]|null 19 | */ 20 | public function find(string $selector, $idx = null) 21 | { 22 | // init 23 | $elements = new static(); 24 | 25 | foreach ($this as $node) { 26 | \assert($node instanceof SimpleXmlDomInterface); 27 | foreach ($node->find($selector) as $res) { 28 | $elements->append($res); 29 | } 30 | } 31 | 32 | // return all elements 33 | if ($idx === null) { 34 | if (\count($elements) === 0) { 35 | return new SimpleXmlDomNodeBlank(); 36 | } 37 | 38 | return $elements; 39 | } 40 | 41 | // handle negative values 42 | if ($idx < 0) { 43 | $idx = \count($elements) + $idx; 44 | } 45 | 46 | // return one element 47 | return $elements[$idx] ?? null; 48 | } 49 | 50 | /** 51 | * Find nodes with a CSS or xPath selector. 52 | * 53 | * @param string $selector 54 | * 55 | * @return SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 56 | */ 57 | public function findMulti(string $selector): SimpleXmlDomNodeInterface 58 | { 59 | return $this->find($selector, null); 60 | } 61 | 62 | /** 63 | * Find nodes with a CSS or xPath selector. 64 | * 65 | * @param string $selector 66 | * 67 | * @return false|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 68 | */ 69 | public function findMultiOrFalse(string $selector) 70 | { 71 | $return = $this->find($selector, null); 72 | 73 | if ($return instanceof SimpleXmlDomNodeBlank) { 74 | return false; 75 | } 76 | 77 | return $return; 78 | } 79 | 80 | /** 81 | * Find one node with a CSS or xPath selector. 82 | * 83 | * @param string $selector 84 | * 85 | * @return SimpleXmlDomNodeInterface 86 | */ 87 | public function findOne(string $selector) 88 | { 89 | $return = $this->find($selector, 0); 90 | 91 | return $return ?? new SimpleXmlDomNodeBlank(); 92 | } 93 | 94 | /** 95 | * Find one node with a CSS or xPath selector or false, if no element is found. 96 | * 97 | * @param string $selector 98 | * 99 | * @return false|SimpleXmlDomNodeInterface 100 | */ 101 | public function findOneOrFalse(string $selector) 102 | { 103 | $return = $this->find($selector, 0); 104 | 105 | return $return ?? false; 106 | } 107 | 108 | /** 109 | * Get html of elements. 110 | * 111 | * @return string[] 112 | */ 113 | public function innerHtml(): array 114 | { 115 | // init 116 | $html = []; 117 | 118 | foreach ($this as $node) { 119 | $html[] = $node->outertext; 120 | } 121 | 122 | return $html; 123 | } 124 | 125 | /** 126 | * alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x) 127 | * 128 | * @return string[] 129 | */ 130 | public function innertext() 131 | { 132 | return $this->innerHtml(); 133 | } 134 | 135 | /** 136 | * alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x) 137 | * 138 | * @return string[] 139 | */ 140 | public function outertext() 141 | { 142 | return $this->innerHtml(); 143 | } 144 | 145 | /** 146 | * Get plain text. 147 | * 148 | * @return string[] 149 | */ 150 | public function text(): array 151 | { 152 | // init 153 | $text = []; 154 | 155 | foreach ($this as $node) { 156 | $text[] = $node->plaintext; 157 | } 158 | 159 | return $text; 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /src/voku/helper/SimpleXmlDomNodeBlank.php: -------------------------------------------------------------------------------- 1 | 29 | */ 30 | public function findMulti(string $selector): SimpleXmlDomNodeInterface 31 | { 32 | return new self(); 33 | } 34 | 35 | /** 36 | * Find nodes with a CSS or xPath selector. 37 | * 38 | * @param string $selector 39 | * 40 | * @return false 41 | */ 42 | public function findMultiOrFalse(string $selector) 43 | { 44 | return false; 45 | } 46 | 47 | /** 48 | * Find one node with a CSS or xPath selector. 49 | * 50 | * @param string $selector 51 | * 52 | * @return SimpleXmlDomInterface 53 | */ 54 | public function findOne(string $selector) 55 | { 56 | return new SimpleXmlDomBlank(); 57 | } 58 | 59 | /** 60 | * @param string $selector 61 | * 62 | * @return false 63 | */ 64 | public function findOneOrFalse(string $selector) 65 | { 66 | return false; 67 | } 68 | 69 | /** 70 | * @return string[] 71 | */ 72 | public function innerHtml(): array 73 | { 74 | return []; 75 | } 76 | 77 | /** 78 | * alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x) 79 | * 80 | * @return string[] 81 | */ 82 | public function innertext() 83 | { 84 | return []; 85 | } 86 | 87 | /** 88 | * alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x) 89 | * 90 | * @return string[] 91 | */ 92 | public function outertext() 93 | { 94 | return []; 95 | } 96 | 97 | /** 98 | * @return string[] 99 | */ 100 | public function text(): array 101 | { 102 | return []; 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/voku/helper/SimpleXmlDomNodeInterface.php: -------------------------------------------------------------------------------- 1 | The list items count.

8 | * @property-read string[] $outertext 9 | *

Get dom node's outer html.

10 | * @property-read string[] $plaintext 11 | *

Get dom node's plain text.

12 | * 13 | * @extends \IteratorAggregate 14 | */ 15 | interface SimpleXmlDomNodeInterface extends \IteratorAggregate 16 | { 17 | /** 18 | * @param string $name 19 | * 20 | * @return array|null 21 | */ 22 | public function __get($name); 23 | 24 | /** 25 | * @param string $selector 26 | * @param int $idx 27 | * 28 | * @return SimpleXmlDomNodeInterface|SimpleXmlDomNodeInterface[]|null 29 | */ 30 | public function __invoke($selector, $idx = null); 31 | 32 | /** 33 | * @return string 34 | */ 35 | public function __toString(); 36 | 37 | /** 38 | * Get the number of items in this dom node. 39 | * 40 | * @return int 41 | */ 42 | public function count(); 43 | 44 | /** 45 | * Find list of nodes with a CSS or xPath selector. 46 | * 47 | * @param string $selector 48 | * @param int $idx 49 | * 50 | * @return SimpleXmlDomNode|SimpleXmlDomNode[]|null 51 | */ 52 | public function find(string $selector, $idx = null); 53 | 54 | /** 55 | * Find nodes with a CSS or xPath selector. 56 | * 57 | * @param string $selector 58 | * 59 | * @return SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 60 | */ 61 | public function findMulti(string $selector): self; 62 | 63 | /** 64 | * Find nodes with a CSS or xPath selector. 65 | * 66 | * @param string $selector 67 | * 68 | * @return false|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 69 | */ 70 | public function findMultiOrFalse(string $selector); 71 | 72 | /** 73 | * Find one node with a CSS or xPath selector. 74 | * 75 | * @param string $selector 76 | * 77 | * @return SimpleXmlDomInterface 78 | */ 79 | public function findOne(string $selector); 80 | 81 | /** 82 | * Find one node with a CSS or xPath selector or false, if no element is found. 83 | * 84 | * @param string $selector 85 | * 86 | * @return false|SimpleXmlDomInterface 87 | */ 88 | public function findOneOrFalse(string $selector); 89 | 90 | /** 91 | * Get html of elements. 92 | * 93 | * @return string[] 94 | */ 95 | public function innerHtml(): array; 96 | 97 | /** 98 | * alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x) 99 | * 100 | * @return string[] 101 | */ 102 | public function innertext(); 103 | 104 | /** 105 | * alias for "$this->innerHtml()" (added for compatibly-reasons with v1.x) 106 | * 107 | * @return string[] 108 | */ 109 | public function outertext(); 110 | 111 | /** 112 | * Get plain text. 113 | * 114 | * @return string[] 115 | */ 116 | public function text(): array; 117 | } 118 | -------------------------------------------------------------------------------- /src/voku/helper/XmlDomParser.php: -------------------------------------------------------------------------------- 1 | Get dom node's plain text.

10 | * 11 | * @method static XmlDomParser file_get_xml($xml, $libXMLExtraOptions = null) 12 | *

Load XML from file.

13 | * @method static XmlDomParser str_get_xml($xml, $libXMLExtraOptions = null) 14 | *

Load XML from string.

15 | */ 16 | class XmlDomParser extends AbstractDomParser 17 | { 18 | /** 19 | * @var callable|null 20 | * 21 | * @phpstan-var null|callable(string $cssSelectorString, string $xPathString, \DOMXPath, \voku\helper\XmlDomParser): string 22 | */ 23 | private $callbackXPathBeforeQuery; 24 | 25 | /** 26 | * @var callable|null 27 | * 28 | * @phpstan-var null|callable(string $xmlString, \voku\helper\XmlDomParser): string 29 | */ 30 | private $callbackBeforeCreateDom; 31 | 32 | /** 33 | * @var bool 34 | */ 35 | private $autoRemoveXPathNamespaces = false; 36 | 37 | /** 38 | * @var bool 39 | */ 40 | private $autoRegisterXPathNamespaces = false; 41 | 42 | /** 43 | * @var bool 44 | */ 45 | private $reportXmlErrorsAsException = false; 46 | 47 | /** 48 | * @var string[] 49 | * 50 | * @phpstan-var array 51 | */ 52 | private $xPathNamespaces = []; 53 | 54 | /** 55 | * @param \DOMNode|SimpleXmlDomInterface|string $element HTML code or SimpleXmlDomInterface, \DOMNode 56 | */ 57 | public function __construct($element = null) 58 | { 59 | $this->document = new \DOMDocument('1.0', $this->getEncoding()); 60 | 61 | // DOMDocument settings 62 | $this->document->preserveWhiteSpace = true; 63 | $this->document->formatOutput = true; 64 | 65 | if ($element instanceof SimpleXmlDomInterface) { 66 | $element = $element->getNode(); 67 | } 68 | 69 | if ($element instanceof \DOMNode) { 70 | $domNode = $this->document->importNode($element, true); 71 | 72 | if ($domNode instanceof \DOMNode) { 73 | /** @noinspection UnusedFunctionResultInspection */ 74 | $this->document->appendChild($domNode); 75 | } 76 | 77 | return; 78 | } 79 | 80 | if ($element !== null) { 81 | $this->loadXml($element); 82 | } 83 | } 84 | 85 | /** 86 | * @param string $name 87 | * @param array $arguments 88 | * 89 | * @throws \BadMethodCallException 90 | * @throws \RuntimeException 91 | * 92 | * @return static 93 | */ 94 | public static function __callStatic($name, $arguments) 95 | { 96 | $arguments0 = $arguments[0] ?? ''; 97 | 98 | $arguments1 = $arguments[1] ?? null; 99 | 100 | if ($name === 'str_get_xml') { 101 | $parser = new static(); 102 | 103 | return $parser->loadXml($arguments0, $arguments1); 104 | } 105 | 106 | if ($name === 'file_get_xml') { 107 | $parser = new static(); 108 | 109 | return $parser->loadXmlFile($arguments0, $arguments1); 110 | } 111 | 112 | throw new \BadMethodCallException('Method does not exist'); 113 | } 114 | 115 | /** @noinspection MagicMethodsValidityInspection */ 116 | 117 | /** 118 | * @param string $name 119 | * 120 | * @return string|null 121 | */ 122 | public function __get($name) 123 | { 124 | $name = \strtolower($name); 125 | 126 | if ($name === 'plaintext') { 127 | return $this->text(); 128 | } 129 | 130 | return null; 131 | } 132 | 133 | /** 134 | * @return string 135 | */ 136 | public function __toString() 137 | { 138 | return $this->xml(false, false, true, 0); 139 | } 140 | 141 | /** 142 | * Create DOMDocument from XML. 143 | * 144 | * @param string $xml 145 | * @param int|null $libXMLExtraOptions 146 | * @param bool $useDefaultLibXMLOptions 147 | * 148 | * @return \DOMDocument 149 | */ 150 | protected function createDOMDocument(string $xml, $libXMLExtraOptions = null, $useDefaultLibXMLOptions = true): \DOMDocument 151 | { 152 | if ($this->callbackBeforeCreateDom) { 153 | $xml = \call_user_func($this->callbackBeforeCreateDom, $xml, $this); 154 | } 155 | 156 | // set error level 157 | $internalErrors = \libxml_use_internal_errors(true); 158 | if (\PHP_VERSION_ID < 80000) { 159 | $disableEntityLoader = \libxml_disable_entity_loader(true); 160 | } 161 | \libxml_clear_errors(); 162 | 163 | $optionsXml = 0; 164 | if ($useDefaultLibXMLOptions) { 165 | $optionsXml = \LIBXML_DTDLOAD | \LIBXML_DTDATTR | \LIBXML_NONET; 166 | 167 | if (\defined('LIBXML_BIGLINES')) { 168 | $optionsXml |= \LIBXML_BIGLINES; 169 | } 170 | 171 | if (\defined('LIBXML_COMPACT')) { 172 | $optionsXml |= \LIBXML_COMPACT; 173 | } 174 | } 175 | 176 | if ($libXMLExtraOptions !== null) { 177 | $optionsXml |= $libXMLExtraOptions; 178 | } 179 | 180 | $this->xPathNamespaces = []; // reset 181 | $matches = []; 182 | \preg_match_all('#xmlns:(?.*)=(["\'])(?.*)\\2#Ui', $xml, $matches); 183 | foreach ($matches['namespaceKey'] ?? [] as $index => $key) { 184 | if ($key) { 185 | $this->xPathNamespaces[\trim($key, ':')] = $matches['namespaceValue'][$index]; 186 | } 187 | } 188 | 189 | if ($this->autoRemoveXPathNamespaces) { 190 | $xml = $this->removeXPathNamespaces($xml); 191 | } 192 | 193 | $xml = self::replaceToPreserveHtmlEntities($xml); 194 | 195 | $documentFound = false; 196 | $sxe = \simplexml_load_string($xml, \SimpleXMLElement::class, $optionsXml); 197 | $xmlErrors = \libxml_get_errors(); 198 | if ($sxe !== false && \count($xmlErrors) === 0) { 199 | $domElementTmp = \dom_import_simplexml($sxe); 200 | if ($domElementTmp->ownerDocument instanceof \DOMDocument) { 201 | $documentFound = true; 202 | $this->document = $domElementTmp->ownerDocument; 203 | } 204 | } 205 | 206 | if ($documentFound === false) { 207 | // UTF-8 hack: http://php.net/manual/en/domdocument.loadhtml.php#95251 208 | $xmlHackUsed = false; 209 | /** @noinspection StringFragmentMisplacedInspection */ 210 | if (\stripos('getEncoding() . '" ?>' . $xml; 213 | } 214 | 215 | $documentFound = $this->document->loadXML($xml, $optionsXml); 216 | 217 | // remove the "xml-encoding" hack 218 | if ($xmlHackUsed) { 219 | foreach ($this->document->childNodes as $child) { 220 | if ($child->nodeType === \XML_PI_NODE) { 221 | /** @noinspection UnusedFunctionResultInspection */ 222 | $this->document->removeChild($child); 223 | 224 | break; 225 | } 226 | } 227 | } 228 | } 229 | 230 | if ( 231 | $documentFound === false 232 | && 233 | \count($xmlErrors) > 0 234 | ) { 235 | $errorStr = 'XML-Errors: ' . \print_r($xmlErrors, true) . ' in ' . \print_r($xml, true); 236 | 237 | if (!$this->reportXmlErrorsAsException) { 238 | \trigger_error($errorStr, \E_USER_WARNING); 239 | } else { 240 | throw new \InvalidArgumentException($errorStr); 241 | } 242 | } 243 | 244 | // set encoding 245 | $this->document->encoding = $this->getEncoding(); 246 | 247 | // restore lib-xml settings 248 | \libxml_clear_errors(); 249 | \libxml_use_internal_errors($internalErrors); 250 | if (\PHP_VERSION_ID < 80000 && isset($disableEntityLoader)) { 251 | \libxml_disable_entity_loader($disableEntityLoader); 252 | } 253 | 254 | return $this->document; 255 | } 256 | 257 | /** 258 | * Find list of nodes with a CSS or xPath selector. 259 | * 260 | * @param string $selector 261 | * @param int|null $idx 262 | * 263 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 264 | */ 265 | public function find(string $selector, $idx = null) 266 | { 267 | $xPathQuery = SelectorConverter::toXPath($selector, true, false); 268 | 269 | $xPath = new \DOMXPath($this->document); 270 | 271 | if ($this->autoRegisterXPathNamespaces) { 272 | foreach ($this->xPathNamespaces as $key => $value) { 273 | $xPath->registerNamespace($key, $value); 274 | } 275 | } 276 | 277 | if ($this->callbackXPathBeforeQuery) { 278 | $xPathQuery = \call_user_func($this->callbackXPathBeforeQuery, $selector, $xPathQuery, $xPath, $this); 279 | } 280 | 281 | $nodesList = $xPath->query($xPathQuery); 282 | 283 | $elements = new SimpleXmlDomNode(); 284 | 285 | if ($nodesList) { 286 | foreach ($nodesList as $node) { 287 | $elements[] = new SimpleXmlDom($node); 288 | } 289 | } 290 | 291 | // return all elements 292 | if ($idx === null) { 293 | if (\count($elements) === 0) { 294 | return new SimpleXmlDomNodeBlank(); 295 | } 296 | 297 | return $elements; 298 | } 299 | 300 | // handle negative values 301 | if ($idx < 0) { 302 | $idx = \count($elements) + $idx; 303 | } 304 | 305 | // return one element 306 | return $elements[$idx] ?? new SimpleXmlDomBlank(); 307 | } 308 | 309 | /** 310 | * Find nodes with a CSS or xPath selector. 311 | * 312 | * @param string $selector 313 | * 314 | * @return SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 315 | */ 316 | public function findMulti(string $selector): SimpleXmlDomNodeInterface 317 | { 318 | return $this->find($selector, null); 319 | } 320 | 321 | /** 322 | * Find nodes with a CSS or xPath selector or false, if no element is found. 323 | * 324 | * @param string $selector 325 | * 326 | * @return false|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 327 | */ 328 | public function findMultiOrFalse(string $selector) 329 | { 330 | $return = $this->find($selector, null); 331 | 332 | if ($return instanceof SimpleXmlDomNodeBlank) { 333 | return false; 334 | } 335 | 336 | return $return; 337 | } 338 | 339 | /** 340 | * Find one node with a CSS or xPath selector. 341 | * 342 | * @param string $selector 343 | * 344 | * @return SimpleXmlDomInterface 345 | */ 346 | public function findOne(string $selector): SimpleXmlDomInterface 347 | { 348 | return $this->find($selector, 0); 349 | } 350 | 351 | /** 352 | * Find one node with a CSS or xPath selector or false, if no element is found. 353 | * 354 | * @param string $selector 355 | * 356 | * @return false|SimpleXmlDomInterface 357 | */ 358 | public function findOneOrFalse(string $selector) 359 | { 360 | $return = $this->find($selector, 0); 361 | 362 | if ($return instanceof SimpleXmlDomBlank) { 363 | return false; 364 | } 365 | 366 | return $return; 367 | } 368 | 369 | /** 370 | * @param string $content 371 | * @param bool $multiDecodeNewHtmlEntity 372 | * @param bool $putBrokenReplacedBack 373 | * 374 | * @return string 375 | */ 376 | public function fixHtmlOutput( 377 | string $content, 378 | bool $multiDecodeNewHtmlEntity = false, 379 | bool $putBrokenReplacedBack = true 380 | ): string { 381 | $content = $this->decodeHtmlEntity($content, $multiDecodeNewHtmlEntity); 382 | 383 | return self::putReplacedBackToPreserveHtmlEntities($content, $putBrokenReplacedBack); 384 | } 385 | 386 | /** 387 | * Return elements by ".class". 388 | * 389 | * @param string $class 390 | * 391 | * @return SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 392 | */ 393 | public function getElementByClass(string $class): SimpleXmlDomNodeInterface 394 | { 395 | return $this->findMulti(".{$class}"); 396 | } 397 | 398 | /** 399 | * Return element by #id. 400 | * 401 | * @param string $id 402 | * 403 | * @return SimpleXmlDomInterface 404 | */ 405 | public function getElementById(string $id): SimpleXmlDomInterface 406 | { 407 | return $this->findOne("#{$id}"); 408 | } 409 | 410 | /** 411 | * Return element by tag name. 412 | * 413 | * @param string $name 414 | * 415 | * @return SimpleXmlDomInterface 416 | */ 417 | public function getElementByTagName(string $name): SimpleXmlDomInterface 418 | { 419 | $node = $this->document->getElementsByTagName($name)->item(0); 420 | 421 | if ($node === null) { 422 | return new SimpleXmlDomBlank(); 423 | } 424 | 425 | return new SimpleXmlDom($node); 426 | } 427 | 428 | /** 429 | * Returns elements by "#id". 430 | * 431 | * @param string $id 432 | * @param int|null $idx 433 | * 434 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 435 | */ 436 | public function getElementsById(string $id, $idx = null) 437 | { 438 | return $this->find("#{$id}", $idx); 439 | } 440 | 441 | /** 442 | * Returns elements by tag name. 443 | * 444 | * @param string $name 445 | * @param int|null $idx 446 | * 447 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 448 | */ 449 | public function getElementsByTagName(string $name, $idx = null) 450 | { 451 | $nodesList = $this->document->getElementsByTagName($name); 452 | 453 | $elements = new SimpleXmlDomNode(); 454 | 455 | foreach ($nodesList as $node) { 456 | $elements[] = new SimpleXmlDom($node); 457 | } 458 | 459 | // return all elements 460 | if ($idx === null) { 461 | if (\count($elements) === 0) { 462 | return new SimpleXmlDomNodeBlank(); 463 | } 464 | 465 | return $elements; 466 | } 467 | 468 | // handle negative values 469 | if ($idx < 0) { 470 | $idx = \count($elements) + $idx; 471 | } 472 | 473 | // return one element 474 | return $elements[$idx] ?? new SimpleXmlDomNodeBlank(); 475 | } 476 | 477 | /** 478 | * Get dom node's outer html. 479 | * 480 | * @param bool $multiDecodeNewHtmlEntity 481 | * @param bool $putBrokenReplacedBack 482 | * 483 | * @return string 484 | */ 485 | public function html(bool $multiDecodeNewHtmlEntity = false, bool $putBrokenReplacedBack = true): string 486 | { 487 | if (static::$callback !== null) { 488 | \call_user_func(static::$callback, [$this]); 489 | } 490 | 491 | $content = $this->document->saveHTML(); 492 | 493 | if ($content === false) { 494 | return ''; 495 | } 496 | 497 | return $this->fixHtmlOutput($content, $multiDecodeNewHtmlEntity, $putBrokenReplacedBack); 498 | } 499 | 500 | /** 501 | * Load HTML from string. 502 | * 503 | * @param string $html 504 | * @param int|null $libXMLExtraOptions 505 | * 506 | * @return $this 507 | */ 508 | public function loadHtml(string $html, $libXMLExtraOptions = null): DomParserInterface 509 | { 510 | $this->document = $this->createDOMDocument($html, $libXMLExtraOptions); 511 | 512 | return $this; 513 | } 514 | 515 | /** 516 | * Load HTML from file. 517 | * 518 | * @param string $filePath 519 | * @param int|null $libXMLExtraOptions 520 | * 521 | * @throws \RuntimeException 522 | * 523 | * @return $this 524 | */ 525 | public function loadHtmlFile(string $filePath, $libXMLExtraOptions = null): DomParserInterface 526 | { 527 | if ( 528 | !\preg_match("/^https?:\/\//i", $filePath) 529 | && 530 | !\file_exists($filePath) 531 | ) { 532 | throw new \RuntimeException("File {$filePath} not found"); 533 | } 534 | 535 | try { 536 | if (\class_exists('\voku\helper\UTF8')) { 537 | $html = \voku\helper\UTF8::file_get_contents($filePath); 538 | } else { 539 | $html = \file_get_contents($filePath); 540 | } 541 | } catch (\Exception $e) { 542 | throw new \RuntimeException("Could not load file {$filePath}"); 543 | } 544 | 545 | if ($html === false) { 546 | throw new \RuntimeException("Could not load file {$filePath}"); 547 | } 548 | 549 | return $this->loadHtml($html, $libXMLExtraOptions); 550 | } 551 | 552 | /** 553 | * @param string $selector 554 | * @param int $idx 555 | * 556 | * @return SimpleXmlDomInterface|SimpleXmlDomInterface[]|SimpleXmlDomNodeInterface 557 | */ 558 | public function __invoke($selector, $idx = null) 559 | { 560 | return $this->find($selector, $idx); 561 | } 562 | 563 | /** 564 | * @param string $xml 565 | * 566 | * @return string 567 | */ 568 | private function removeXPathNamespaces(string $xml): string 569 | { 570 | foreach ($this->xPathNamespaces as $key => $value) { 571 | $xml = \str_replace($key . ':', '', $xml); 572 | } 573 | 574 | return (string) \preg_replace('#xmlns:?.*=(["\'])(?:.*)\\1#Ui', '', $xml); 575 | } 576 | 577 | /** 578 | * Load XML from string. 579 | * 580 | * @param string $xml 581 | * @param int|null $libXMLExtraOptions 582 | * @param bool $useDefaultLibXMLOptions 583 | * 584 | * @return $this 585 | */ 586 | public function loadXml(string $xml, $libXMLExtraOptions = null, $useDefaultLibXMLOptions = true): self 587 | { 588 | $this->document = $this->createDOMDocument($xml, $libXMLExtraOptions, $useDefaultLibXMLOptions); 589 | 590 | return $this; 591 | } 592 | 593 | /** 594 | * Load XML from file. 595 | * 596 | * @param string $filePath 597 | * @param int|null $libXMLExtraOptions 598 | * @param bool $useDefaultLibXMLOptions 599 | * 600 | * @throws \RuntimeException 601 | * 602 | * @return $this 603 | */ 604 | public function loadXmlFile(string $filePath, $libXMLExtraOptions = null, $useDefaultLibXMLOptions = true): self 605 | { 606 | if ( 607 | !\preg_match("/^https?:\/\//i", $filePath) 608 | && 609 | !\file_exists($filePath) 610 | ) { 611 | throw new \RuntimeException("File {$filePath} not found"); 612 | } 613 | 614 | try { 615 | if (\class_exists('\voku\helper\UTF8')) { 616 | $xml = \voku\helper\UTF8::file_get_contents($filePath); 617 | } else { 618 | $xml = \file_get_contents($filePath); 619 | } 620 | } catch (\Exception $e) { 621 | throw new \RuntimeException("Could not load file {$filePath}"); 622 | } 623 | 624 | if ($xml === false) { 625 | throw new \RuntimeException("Could not load file {$filePath}"); 626 | } 627 | 628 | return $this->loadXml($xml, $libXMLExtraOptions, $useDefaultLibXMLOptions); 629 | } 630 | 631 | /** 632 | * @param callable $callback 633 | * @param \DOMNode|null $domNode 634 | * 635 | * @return void 636 | */ 637 | public function replaceTextWithCallback($callback, \DOMNode $domNode = null) 638 | { 639 | if ($domNode === null) { 640 | $domNode = $this->document; 641 | } 642 | 643 | if ($domNode->hasChildNodes()) { 644 | $children = []; 645 | 646 | // since looping through a DOM being modified is a bad idea we prepare an array: 647 | foreach ($domNode->childNodes as $child) { 648 | $children[] = $child; 649 | } 650 | 651 | foreach ($children as $child) { 652 | if ($child->nodeType === \XML_TEXT_NODE) { 653 | /** @noinspection PhpSillyAssignmentInspection */ 654 | /** @var \DOMText $child */ 655 | $child = $child; 656 | 657 | $oldText = self::putReplacedBackToPreserveHtmlEntities($child->wholeText); 658 | $newText = $callback($oldText); 659 | if ($domNode->ownerDocument) { 660 | $newTextNode = $domNode->ownerDocument->createTextNode(self::replaceToPreserveHtmlEntities($newText)); 661 | $domNode->replaceChild($newTextNode, $child); 662 | } 663 | } else { 664 | $this->replaceTextWithCallback($callback, $child); 665 | } 666 | } 667 | } 668 | } 669 | 670 | /** 671 | * @param bool $autoRemoveXPathNamespaces 672 | * 673 | * @return $this 674 | */ 675 | public function autoRemoveXPathNamespaces(bool $autoRemoveXPathNamespaces = true): self 676 | { 677 | $this->autoRemoveXPathNamespaces = $autoRemoveXPathNamespaces; 678 | 679 | return $this; 680 | } 681 | 682 | /** 683 | * @param bool $autoRegisterXPathNamespaces 684 | * 685 | * @return $this 686 | */ 687 | public function autoRegisterXPathNamespaces(bool $autoRegisterXPathNamespaces = true): self 688 | { 689 | $this->autoRegisterXPathNamespaces = $autoRegisterXPathNamespaces; 690 | 691 | return $this; 692 | } 693 | 694 | /** 695 | * @param callable $callbackXPathBeforeQuery 696 | * 697 | * @phpstan-param callable(string $cssSelectorString, string $xPathString, \DOMXPath, \voku\helper\XmlDomParser): string $callbackXPathBeforeQuery 698 | * 699 | * @return $this 700 | */ 701 | public function setCallbackXPathBeforeQuery(callable $callbackXPathBeforeQuery): self 702 | { 703 | $this->callbackXPathBeforeQuery = $callbackXPathBeforeQuery; 704 | 705 | return $this; 706 | } 707 | 708 | /** 709 | * @param callable $callbackBeforeCreateDom 710 | * 711 | * @phpstan-param callable(string $xmlString, \voku\helper\XmlDomParser): string $callbackBeforeCreateDom 712 | * 713 | * @return $this 714 | */ 715 | public function setCallbackBeforeCreateDom(callable $callbackBeforeCreateDom): self 716 | { 717 | $this->callbackBeforeCreateDom = $callbackBeforeCreateDom; 718 | 719 | return $this; 720 | } 721 | 722 | /** 723 | * @param bool $reportXmlErrorsAsException 724 | * 725 | * @return $this 726 | */ 727 | public function reportXmlErrorsAsException(bool $reportXmlErrorsAsException = true): self 728 | { 729 | $this->reportXmlErrorsAsException = $reportXmlErrorsAsException; 730 | 731 | return $this; 732 | } 733 | } 734 | --------------------------------------------------------------------------------