├── CHANGELOG.md ├── LICENSE.md ├── README.md ├── composer.json └── src ├── Css2Xpath.php ├── DOMXPath.php ├── Document.php ├── Document ├── NodeList.php └── Query.php ├── Exception ├── BadMethodCallException.php ├── ExceptionInterface.php └── RuntimeException.php ├── NodeList.php └── Query.php /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file, in reverse chronological order by release. 4 | 5 | ## 2.7.3 - TBD 6 | 7 | ### Added 8 | 9 | - Nothing. 10 | 11 | ### Changed 12 | 13 | - Nothing. 14 | 15 | ### Deprecated 16 | 17 | - Nothing. 18 | 19 | ### Removed 20 | 21 | - Nothing. 22 | 23 | ### Fixed 24 | 25 | - Nothing. 26 | 27 | ## 2.7.2 - 2019-06-18 28 | 29 | ### Added 30 | 31 | - [#26](https://github.com/zendframework/zend-dom/pull/26) adds support for PHP 7.3. 32 | 33 | ### Changed 34 | 35 | - Nothing. 36 | 37 | ### Deprecated 38 | 39 | - Nothing. 40 | 41 | ### Removed 42 | 43 | - Nothing. 44 | 45 | ### Fixed 46 | 47 | - Nothing. 48 | 49 | ## 2.7.1 - 2018-04-09 50 | 51 | ### Added 52 | 53 | - Nothing. 54 | 55 | ### Changed 56 | 57 | - Nothing. 58 | 59 | ### Deprecated 60 | 61 | - Nothing. 62 | 63 | ### Removed 64 | 65 | - Nothing. 66 | 67 | ### Fixed 68 | 69 | - [#21](https://github.com/zendframework/zend-dom/pull/21) fixes an issue with 70 | matching against nested attribute selectors (e.g., `div[class="foo"] div 71 | [class="bar"]`), ensuring such syntax will transform to expected XPath. 72 | 73 | - [#22](https://github.com/zendframework/zend-dom/pull/22) adds a missing import 74 | statement for the `DOMNode` class to the (deprecated) `Zend\Dom\Query` class 75 | definition. 76 | 77 | - [#24](https://github.com/zendframework/zend-dom/pull/24) updates how the 78 | tokenizer marks multiple words within attribute values in order to be 79 | more robust. 80 | 81 | - [#23](https://github.com/zendframework/zend-dom/pull/23) fixes an issue with 82 | how descendant selectors work, ensuring spaces may be used around the `>` 83 | operator. 84 | 85 | ## 2.7.0 - 2018-03-27 86 | 87 | ### Added 88 | 89 | - [#20](https://github.com/zendframework/zend-dom/pull/20) adds support for 90 | attribute selectors that contain spaces, such as `input[value="Marty McFly"]`. 91 | Previously, spaces within the selector value would result in a query per 92 | space-separated word; they now, correctly, result in a single query for the 93 | exact value. 94 | 95 | - [#19](https://github.com/zendframework/zend-dom/pull/19) adds support for PHP 96 | versions 7.1 and 7.2. 97 | 98 | - Adds documentation and publishes it to https://docs.zendframework.com/zend-dom/ 99 | 100 | ### Deprecated 101 | 102 | - Nothing. 103 | 104 | ### Removed 105 | 106 | - [#13](https://github.com/zendframework/zend-dom/pull/13) and 107 | [#19](https://github.com/zendframework/zend-dom/pull/19) remove support for PHP 108 | versions prior to 5.6. 109 | 110 | - [#13](https://github.com/zendframework/zend-dom/pull/13) and 111 | [#19](https://github.com/zendframework/zend-dom/pull/19) remove support for HHVM. 112 | 113 | ### Fixed 114 | 115 | - Nothing. 116 | 117 | ## 2.6.0 - 2015-10-13 118 | 119 | ### Added 120 | 121 | - [#2](https://github.com/zendframework/zend-dom/pull/2) adds context node 122 | support for DOMXPath->query that supports querying in the context of a 123 | specific node. 124 | 125 | ### Deprecated 126 | 127 | - Nothing. 128 | 129 | ### Removed 130 | 131 | - Nothing. 132 | 133 | ### Fixed 134 | 135 | - [#5](https://github.com/zendframework/zend-dom/pull/5) - Increase test converage and improve tests. 136 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2005-2017, Zend Technologies USA, Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | - Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | - Redistributions in binary form must reproduce the above copyright notice, this 11 | list of conditions and the following disclaimer in the documentation and/or 12 | other materials provided with the distribution. 13 | 14 | - Neither the name of Zend Technologies USA, Inc. nor the names of its 15 | contributors may be used to endorse or promote products derived from this 16 | software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # zend-dom 2 | 3 | > ## Repository abandoned 2019-12-31 4 | > 5 | > This repository has moved to [laminas/laminas-dom](https://github.com/laminas/laminas-dom). 6 | 7 | [![Build Status](https://secure.travis-ci.org/zendframework/zend-dom.svg?branch=master)](https://secure.travis-ci.org/zendframework/zend-dom) 8 | [![Coverage Status](https://coveralls.io/repos/github/zendframework/zend-dom/badge.svg?branch=master)](https://coveralls.io/github/zendframework/zend-dom?branch=master) 9 | 10 | The `Zend\Dom` component provides tools for working with DOM documents and 11 | structures. Currently, we offer `Zend\Dom\Query`, which provides a unified 12 | interface for querying DOM documents utilizing both XPath and CSS selectors. 13 | 14 | 15 | - File issues at https://github.com/zendframework/zend-dom/issues 16 | - Documentation is at https://docs.zendframework.com/zend-dom 17 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "zendframework/zend-dom", 3 | "description": "provides tools for working with DOM documents and structures", 4 | "license": "BSD-3-Clause", 5 | "keywords": [ 6 | "zf", 7 | "zendframework", 8 | "dom" 9 | ], 10 | "support": { 11 | "docs": "https://docs.zendframework.com/zend-dom/", 12 | "issues": "https://github.com/zendframework/zend-dom/issues", 13 | "source": "https://github.com/zendframework/zend-dom", 14 | "rss": "https://github.com/zendframework/zend-dom/releases.atom", 15 | "slack": "https://zendframework-slack.herokuapp.com", 16 | "forum": "https://discourse.zendframework.com/c/questions/components" 17 | }, 18 | "require": { 19 | "php": "^5.6 || ^7.0" 20 | }, 21 | "require-dev": { 22 | "phpunit/phpunit": "^5.7.23 || ^6.4.3", 23 | "zendframework/zend-coding-standard": "~1.0.0" 24 | }, 25 | "autoload": { 26 | "psr-4": { 27 | "Zend\\Dom\\": "src/" 28 | } 29 | }, 30 | "autoload-dev": { 31 | "psr-4": { 32 | "ZendTest\\Dom\\": "test/" 33 | } 34 | }, 35 | "config": { 36 | "sort-packages": true 37 | }, 38 | "extra": { 39 | "branch-alias": { 40 | "dev-master": "2.7.x-dev", 41 | "dev-develop": "2.8.x-dev" 42 | } 43 | }, 44 | "scripts": { 45 | "check": [ 46 | "@cs-check", 47 | "@test" 48 | ], 49 | "cs-check": "phpcs", 50 | "cs-fix": "phpcbf", 51 | "test": "phpunit --colors=always", 52 | "test-coverage": "phpunit --colors=always --coverage-clover clover.xml" 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/Css2Xpath.php: -------------------------------------------------------------------------------- 1 | errors = [null]; 37 | 38 | if ($contextNode === null) { 39 | $contextNode = $this->document->documentElement; 40 | } 41 | 42 | set_error_handler([$this, 'addError'], \E_WARNING); 43 | $nodeList = $this->query($expression, $contextNode); 44 | restore_error_handler(); 45 | 46 | $exception = array_pop($this->errors); 47 | if ($exception) { 48 | throw $exception; 49 | } 50 | 51 | return $nodeList; 52 | } 53 | 54 | /** 55 | * Adds an error to the stack of errors 56 | * 57 | * @param int $errno 58 | * @param string $errstr 59 | * @param string $errfile 60 | * @param int $errline 61 | * @return void 62 | */ 63 | public function addError($errno, $errstr = '', $errfile = '', $errline = 0) 64 | { 65 | $last_error = end($this->errors); 66 | $this->errors[] = new ErrorException( 67 | $errstr, 68 | 0, 69 | $errno, 70 | $errfile, 71 | $errline, 72 | $last_error 73 | ); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/Document.php: -------------------------------------------------------------------------------- 1 | setStringDocument($document, $type, $encoding); 77 | } 78 | 79 | /** 80 | * Get raw set document 81 | * 82 | * @return string|null 83 | */ 84 | public function getStringDocument() 85 | { 86 | return $this->stringDocument; 87 | } 88 | 89 | /** 90 | * Set raw document 91 | * 92 | * @param string|null $document 93 | * @param string|null $forcedType Type for the provided document (see constants) 94 | * @param string|null $forcedEncoding Encoding for the provided document 95 | * @return self 96 | */ 97 | protected function setStringDocument($document, $forcedType = null, $forcedEncoding = null) 98 | { 99 | $type = static::DOC_HTML; 100 | if (strstr($document, 'DTD XHTML')) { 101 | $type = static::DOC_XHTML; 102 | } 103 | 104 | // Breaking XML declaration to make syntax highlighting work 105 | if ('<' . '?xml' == substr(trim($document), 0, 5)) { 106 | $type = static::DOC_XML; 107 | if (preg_match('/]*xmlns="([^"]+)"[^>]*>/i', $document, $matches)) { 108 | $this->xpathNamespaces[] = $matches[1]; 109 | $type = static::DOC_XHTML; 110 | } 111 | } 112 | 113 | // Unsetting previously registered DOMDocument 114 | $this->domDocument = null; 115 | $this->stringDocument = ! empty($document) ? $document : null; 116 | 117 | $this->setType($forcedType ?: (! empty($document) ? $type : null)); 118 | $this->setEncoding($forcedEncoding); 119 | $this->setErrors([]); 120 | 121 | return $this; 122 | } 123 | 124 | /** 125 | * Get raw document type 126 | * 127 | * @return string|null 128 | */ 129 | public function getType() 130 | { 131 | return $this->type; 132 | } 133 | 134 | /** 135 | * Set raw document type 136 | * 137 | * @param string $type 138 | * @return self 139 | */ 140 | protected function setType($type) 141 | { 142 | $this->type = $type; 143 | 144 | return $this; 145 | } 146 | 147 | /** 148 | * Get DOMDocument generated from set raw document 149 | * 150 | * @return DOMDocument 151 | * @throws Exception\RuntimeException If cannot get DOMDocument; no document registered 152 | */ 153 | public function getDomDocument() 154 | { 155 | if (null === ($stringDocument = $this->getStringDocument())) { 156 | throw new Exception\RuntimeException('Cannot get DOMDocument; no document registered'); 157 | } 158 | 159 | if (null === $this->domDocument) { 160 | $this->domDocument = $this->getDomDocumentFromString($stringDocument); 161 | } 162 | 163 | return $this->domDocument; 164 | } 165 | 166 | /** 167 | * Set DOMDocument 168 | * 169 | * @param DOMDocument $domDocument 170 | * @return self 171 | * @deprecated 172 | */ 173 | protected function setDomDocument(DOMDocument $domDocument) 174 | { 175 | $this->domDocument = $domDocument; 176 | 177 | return $this; 178 | } 179 | 180 | /** 181 | * Get set document encoding 182 | * 183 | * @return string|null 184 | */ 185 | public function getEncoding() 186 | { 187 | return $this->encoding; 188 | } 189 | 190 | /** 191 | * Set raw document encoding for DOMDocument generation 192 | * 193 | * @param string|null $encoding 194 | * @return self 195 | */ 196 | public function setEncoding($encoding) 197 | { 198 | $this->encoding = $encoding; 199 | 200 | return $this; 201 | } 202 | 203 | /** 204 | * Get DOMDocument generation errors 205 | * 206 | * @return array 207 | */ 208 | public function getErrors() 209 | { 210 | return $this->errors; 211 | } 212 | 213 | /** 214 | * Set document errors from DOMDocument generation 215 | * 216 | * @param array $errors 217 | * @return self 218 | */ 219 | protected function setErrors($errors) 220 | { 221 | $this->errors = $errors; 222 | 223 | return $this; 224 | } 225 | 226 | /** 227 | * Get DOMDocument from set raw document 228 | * 229 | * @return DOMDocument 230 | * @throws Exception\RuntimeException 231 | */ 232 | protected function getDomDocumentFromString($stringDocument) 233 | { 234 | libxml_use_internal_errors(true); 235 | libxml_disable_entity_loader(true); 236 | 237 | $encoding = $this->getEncoding(); 238 | $domDoc = null === $encoding ? new DOMDocument('1.0') : new DOMDocument('1.0', $encoding); 239 | $type = $this->getType(); 240 | 241 | switch ($type) { 242 | case static::DOC_XML: 243 | $success = $domDoc->loadXML($stringDocument); 244 | foreach ($domDoc->childNodes as $child) { 245 | if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { 246 | throw new Exception\RuntimeException( 247 | 'Invalid XML: Detected use of illegal DOCTYPE' 248 | ); 249 | } 250 | } 251 | break; 252 | case static::DOC_HTML: 253 | case static::DOC_XHTML: 254 | default: 255 | $success = $domDoc->loadHTML($stringDocument); 256 | break; 257 | } 258 | 259 | $errors = libxml_get_errors(); 260 | if (! empty($errors)) { 261 | $this->setErrors($errors); 262 | libxml_clear_errors(); 263 | } 264 | 265 | libxml_disable_entity_loader(false); 266 | libxml_use_internal_errors(false); 267 | 268 | if (! $success) { 269 | throw new Exception\RuntimeException(sprintf('Error parsing document (type == %s)', $type)); 270 | } 271 | 272 | return $domDoc; 273 | } 274 | 275 | /** 276 | * Get Document's registered XPath namespaces 277 | * 278 | * @return array 279 | */ 280 | public function getXpathNamespaces() 281 | { 282 | return $this->xpathNamespaces; 283 | } 284 | 285 | /** 286 | * Register XPath namespaces 287 | * 288 | * @param array $xpathNamespaces 289 | * @return void 290 | */ 291 | public function registerXpathNamespaces($xpathNamespaces) 292 | { 293 | $this->xpathNamespaces = $xpathNamespaces; 294 | } 295 | 296 | /** 297 | * Get Document's registered XPath PHP Functions 298 | * 299 | * @return string|null 300 | */ 301 | public function getXpathPhpFunctions() 302 | { 303 | return $this->xpathPhpFunctions; 304 | } 305 | /** 306 | * Register PHP Functions to use in internal DOMXPath 307 | * 308 | * @param bool $xpathPhpFunctions 309 | * @return void 310 | */ 311 | public function registerXpathPhpFunctions($xpathPhpFunctions = true) 312 | { 313 | $this->xpathPhpFunctions = $xpathPhpFunctions; 314 | } 315 | } 316 | -------------------------------------------------------------------------------- /src/Document/NodeList.php: -------------------------------------------------------------------------------- 1 | list = $list; 43 | } 44 | 45 | /** 46 | * Iterator: rewind to first element 47 | * 48 | * @return DOMNode 49 | */ 50 | public function rewind() 51 | { 52 | $this->position = 0; 53 | 54 | return $this->list->item(0); 55 | } 56 | 57 | /** 58 | * Iterator: is current position valid? 59 | * 60 | * @return bool 61 | */ 62 | public function valid() 63 | { 64 | return $this->offsetExists($this->position); 65 | } 66 | 67 | /** 68 | * Iterator: return current element 69 | * 70 | * @return DOMNode 71 | */ 72 | public function current() 73 | { 74 | return $this->list->item($this->position); 75 | } 76 | 77 | /** 78 | * Iterator: return key of current element 79 | * 80 | * @return int 81 | */ 82 | public function key() 83 | { 84 | return $this->position; 85 | } 86 | 87 | /** 88 | * Iterator: move to next element 89 | * 90 | * @return DOMNode 91 | */ 92 | public function next() 93 | { 94 | ++$this->position; 95 | 96 | return $this->list->item($this->position); 97 | } 98 | 99 | /** 100 | * Countable: get count 101 | * 102 | * @return int 103 | */ 104 | public function count() 105 | { 106 | return $this->list->length; 107 | } 108 | 109 | /** 110 | * ArrayAccess: offset exists 111 | * 112 | * @param int $key 113 | * @return bool 114 | */ 115 | public function offsetExists($key) 116 | { 117 | // DOMNodeList return `null` if item not exists. 118 | return (null !== $this->list->item($key)); 119 | } 120 | 121 | /** 122 | * ArrayAccess: get offset 123 | * 124 | * @param int $key 125 | * @return mixed 126 | */ 127 | public function offsetGet($key) 128 | { 129 | return $this->list->item($key); 130 | } 131 | 132 | /** 133 | * ArrayAccess: set offset 134 | * 135 | * @param mixed $key 136 | * @param mixed $value 137 | * @throws Exception\BadMethodCallException when attempting to write to a read-only item 138 | */ 139 | public function offsetSet($key, $value) 140 | { 141 | throw new Exception\BadMethodCallException('Attempting to write to a read-only list'); 142 | } 143 | 144 | /** 145 | * ArrayAccess: unset offset 146 | * 147 | * @param mixed $key 148 | * @throws Exception\BadMethodCallException when attempting to unset a read-only item 149 | */ 150 | public function offsetUnset($key) 151 | { 152 | throw new Exception\BadMethodCallException('Attempting to unset on a read-only list'); 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /src/Document/Query.php: -------------------------------------------------------------------------------- 1 | getDomDocument()); 47 | 48 | $xpathNamespaces = $document->getXpathNamespaces(); 49 | foreach ($xpathNamespaces as $prefix => $namespaceUri) { 50 | $xpath->registerNamespace($prefix, $namespaceUri); 51 | } 52 | 53 | if ($xpathPhpfunctions = $document->getXpathPhpFunctions()) { 54 | $xpath->registerNamespace('php', 'http://php.net/xpath'); 55 | if ($xpathPhpfunctions === true) { 56 | $xpath->registerPhpFunctions(); 57 | } else { 58 | $xpath->registerPhpFunctions($xpathPhpfunctions); 59 | } 60 | } 61 | 62 | $nodeList = $xpath->queryWithErrorException($expression, $contextNode); 63 | return new NodeList($nodeList); 64 | } 65 | 66 | /** 67 | * Transform CSS expression to XPath 68 | * 69 | * @param string $path 70 | * @return string 71 | */ 72 | public static function cssToXpath($path) 73 | { 74 | $path = (string) $path; 75 | if (strstr($path, ',')) { 76 | $paths = explode(',', $path); 77 | $expressions = []; 78 | foreach ($paths as $path) { 79 | $xpath = static::cssToXpath(trim($path)); 80 | if (is_string($xpath)) { 81 | $expressions[] = $xpath; 82 | } elseif (is_array($xpath)) { 83 | $expressions = array_merge($expressions, $xpath); 84 | } 85 | } 86 | return implode('|', $expressions); 87 | } 88 | 89 | do { 90 | $placeholder = '{' . uniqid(mt_rand(), true) . '}'; 91 | } while (strpos($path, $placeholder) !== false); 92 | 93 | // Arbitrary attribute value contains whitespace 94 | $path = preg_replace_callback( 95 | '/\[\S+?([\'"])((?!\1|\\\1).*?)\1\]/', 96 | function ($matches) use ($placeholder) { 97 | return str_replace($matches[2], preg_replace('/\s+/', $placeholder, $matches[2]), $matches[0]); 98 | }, 99 | $path 100 | ); 101 | 102 | $paths = ['//']; 103 | $path = preg_replace('|\s*>\s*|', '>', $path); 104 | $segments = preg_split('/\s+/', $path); 105 | $segments = str_replace($placeholder, ' ', $segments); 106 | 107 | foreach ($segments as $key => $segment) { 108 | $pathSegment = static::_tokenize($segment); 109 | if (0 == $key) { 110 | if (0 === strpos($pathSegment, '[contains(')) { 111 | $paths[0] .= '*' . ltrim($pathSegment, '*'); 112 | } else { 113 | $paths[0] .= $pathSegment; 114 | } 115 | continue; 116 | } 117 | if (0 === strpos($pathSegment, '[contains(')) { 118 | foreach ($paths as $pathKey => $xpath) { 119 | $paths[$pathKey] .= '//*' . ltrim($pathSegment, '*'); 120 | $paths[] = $xpath . $pathSegment; 121 | } 122 | } else { 123 | foreach ($paths as $pathKey => $xpath) { 124 | $paths[$pathKey] .= '//' . $pathSegment; 125 | } 126 | } 127 | } 128 | 129 | if (1 == count($paths)) { 130 | return $paths[0]; 131 | } 132 | return implode('|', $paths); 133 | } 134 | 135 | // @codingStandardsIgnoreStart 136 | /** 137 | * Tokenize CSS expressions to XPath 138 | * 139 | * @param string $expression 140 | * @return string 141 | */ 142 | protected static function _tokenize($expression) 143 | { 144 | // @codingStandardsIgnoreEnd 145 | // Child selectors 146 | $expression = str_replace('>', '/', $expression); 147 | 148 | // IDs 149 | $expression = preg_replace('|#([a-z][a-z0-9_-]*)|i', '[@id=\'$1\']', $expression); 150 | $expression = preg_replace('|(?cssQuery = $cssQuery; 76 | $this->xpathQuery = $xpathQuery; 77 | $this->document = $document; 78 | $this->nodeList = $nodeList; 79 | $this->contextNode = $contextNode; 80 | } 81 | 82 | /** 83 | * Retrieve CSS Query 84 | * 85 | * @return string 86 | */ 87 | public function getCssQuery() 88 | { 89 | return $this->cssQuery; 90 | } 91 | 92 | /** 93 | * Retrieve XPath query 94 | * 95 | * @return string 96 | */ 97 | public function getXpathQuery() 98 | { 99 | return $this->xpathQuery; 100 | } 101 | 102 | /** 103 | * Retrieve DOMDocument 104 | * 105 | * @return DOMDocument 106 | */ 107 | public function getDocument() 108 | { 109 | return $this->document; 110 | } 111 | 112 | /** 113 | * Retrieve context node 114 | * 115 | * @return DOMNode 116 | */ 117 | public function getContextNode() 118 | { 119 | return $this->contextNode; 120 | } 121 | 122 | /** 123 | * Iterator: rewind to first element 124 | * 125 | * @return DOMNode 126 | */ 127 | public function rewind() 128 | { 129 | $this->position = 0; 130 | 131 | return $this->nodeList->item(0); 132 | } 133 | 134 | /** 135 | * Iterator: is current position valid? 136 | * 137 | * @return bool 138 | */ 139 | public function valid() 140 | { 141 | if (in_array($this->position, range(0, $this->nodeList->length - 1)) && $this->nodeList->length > 0) { 142 | return true; 143 | } 144 | 145 | return false; 146 | } 147 | 148 | /** 149 | * Iterator: return current element 150 | * 151 | * @return DOMNode 152 | */ 153 | public function current() 154 | { 155 | return $this->nodeList->item($this->position); 156 | } 157 | 158 | /** 159 | * Iterator: return key of current element 160 | * 161 | * @return int 162 | */ 163 | public function key() 164 | { 165 | return $this->position; 166 | } 167 | 168 | /** 169 | * Iterator: move to next element 170 | * 171 | * @return DOMNode 172 | */ 173 | public function next() 174 | { 175 | ++$this->position; 176 | 177 | return $this->nodeList->item($this->position); 178 | } 179 | 180 | /** 181 | * Countable: get count 182 | * 183 | * @return int 184 | */ 185 | public function count() 186 | { 187 | return $this->nodeList->length; 188 | } 189 | 190 | /** 191 | * ArrayAccess: offset exists 192 | * 193 | * @param int $key 194 | * @return bool 195 | */ 196 | public function offsetExists($key) 197 | { 198 | if (in_array($key, range(0, $this->nodeList->length - 1)) && $this->nodeList->length > 0) { 199 | return true; 200 | } 201 | return false; 202 | } 203 | 204 | /** 205 | * ArrayAccess: get offset 206 | * 207 | * @param int $key 208 | * @return mixed 209 | */ 210 | public function offsetGet($key) 211 | { 212 | return $this->nodeList->item($key); 213 | } 214 | 215 | /** 216 | * ArrayAccess: set offset 217 | * 218 | * @param mixed $key 219 | * @param mixed $value 220 | * @throws Exception\BadMethodCallException when attempting to write to a read-only item 221 | */ 222 | public function offsetSet($key, $value) 223 | { 224 | throw new Exception\BadMethodCallException('Attempting to write to a read-only list'); 225 | } 226 | 227 | /** 228 | * ArrayAccess: unset offset 229 | * 230 | * @param mixed $key 231 | * @throws Exception\BadMethodCallException when attempting to unset a read-only item 232 | */ 233 | public function offsetUnset($key) 234 | { 235 | throw new Exception\BadMethodCallException('Attempting to unset on a read-only list'); 236 | } 237 | } 238 | -------------------------------------------------------------------------------- /src/Query.php: -------------------------------------------------------------------------------- 1 | setEncoding($encoding); 74 | $this->setDocument($document); 75 | } 76 | 77 | /** 78 | * Set document encoding 79 | * 80 | * @param string $encoding 81 | * @return Query 82 | */ 83 | public function setEncoding($encoding) 84 | { 85 | $this->encoding = (null === $encoding) ? null : (string) $encoding; 86 | return $this; 87 | } 88 | 89 | /** 90 | * Get document encoding 91 | * 92 | * @return null|string 93 | */ 94 | public function getEncoding() 95 | { 96 | return $this->encoding; 97 | } 98 | 99 | /** 100 | * Set document to query 101 | * 102 | * @param string $document 103 | * @param null|string $encoding Document encoding 104 | * @return Query 105 | */ 106 | public function setDocument($document, $encoding = null) 107 | { 108 | if (0 === strlen($document)) { 109 | return $this; 110 | } 111 | // breaking XML declaration to make syntax highlighting work 112 | if ('<' . '?xml' == substr(trim($document), 0, 5)) { 113 | if (preg_match('/]*xmlns="([^"]+)"[^>]*>/i', $document, $matches)) { 114 | $this->xpathNamespaces[] = $matches[1]; 115 | return $this->setDocumentXhtml($document, $encoding); 116 | } 117 | return $this->setDocumentXml($document, $encoding); 118 | } 119 | if (strstr($document, 'DTD XHTML')) { 120 | return $this->setDocumentXhtml($document, $encoding); 121 | } 122 | return $this->setDocumentHtml($document, $encoding); 123 | } 124 | 125 | /** 126 | * Register HTML document 127 | * 128 | * @param string $document 129 | * @param null|string $encoding Document encoding 130 | * @return Query 131 | */ 132 | public function setDocumentHtml($document, $encoding = null) 133 | { 134 | $this->document = (string) $document; 135 | $this->docType = self::DOC_HTML; 136 | if (null !== $encoding) { 137 | $this->setEncoding($encoding); 138 | } 139 | return $this; 140 | } 141 | 142 | /** 143 | * Register XHTML document 144 | * 145 | * @param string $document 146 | * @param null|string $encoding Document encoding 147 | * @return Query 148 | */ 149 | public function setDocumentXhtml($document, $encoding = null) 150 | { 151 | $this->document = (string) $document; 152 | $this->docType = self::DOC_XHTML; 153 | if (null !== $encoding) { 154 | $this->setEncoding($encoding); 155 | } 156 | return $this; 157 | } 158 | 159 | /** 160 | * Register XML document 161 | * 162 | * @param string $document 163 | * @param null|string $encoding Document encoding 164 | * @return Query 165 | */ 166 | public function setDocumentXml($document, $encoding = null) 167 | { 168 | $this->document = (string) $document; 169 | $this->docType = self::DOC_XML; 170 | if (null !== $encoding) { 171 | $this->setEncoding($encoding); 172 | } 173 | return $this; 174 | } 175 | 176 | /** 177 | * Retrieve current document 178 | * 179 | * @return string 180 | */ 181 | public function getDocument() 182 | { 183 | return $this->document; 184 | } 185 | 186 | /** 187 | * Get document type 188 | * 189 | * @return string 190 | */ 191 | public function getDocumentType() 192 | { 193 | return $this->docType; 194 | } 195 | 196 | /** 197 | * Get any DOMDocument errors found 198 | * 199 | * @return false|array 200 | */ 201 | public function getDocumentErrors() 202 | { 203 | return $this->documentErrors; 204 | } 205 | 206 | /** 207 | * Perform a CSS selector query 208 | * 209 | * @param string $query 210 | * @param DOMNode $contextNode 211 | * @return NodeList 212 | */ 213 | public function execute($query, DOMNode $contextNode = null) 214 | { 215 | $xpathQuery = Document\Query::cssToXpath($query); 216 | return $this->queryXpath($xpathQuery, $query, $contextNode); 217 | } 218 | 219 | /** 220 | * Perform an XPath query 221 | * 222 | * @param string|array $xpathQuery 223 | * @param string|null $query CSS selector query 224 | * @param DOMNode $contextNode $contextNode 225 | * @throws Exception\RuntimeException 226 | * @return NodeList 227 | */ 228 | public function queryXpath($xpathQuery, $query = null, DOMNode $contextNode = null) 229 | { 230 | if (null === ($document = $this->getDocument())) { 231 | throw new Exception\RuntimeException('Cannot query; no document registered'); 232 | } 233 | 234 | $encoding = $this->getEncoding(); 235 | libxml_use_internal_errors(true); 236 | libxml_disable_entity_loader(true); 237 | if (null === $encoding) { 238 | $domDoc = new DOMDocument('1.0'); 239 | } else { 240 | $domDoc = new DOMDocument('1.0', $encoding); 241 | } 242 | $type = $this->getDocumentType(); 243 | switch ($type) { 244 | case self::DOC_XML: 245 | $success = $domDoc->loadXML($document); 246 | foreach ($domDoc->childNodes as $child) { 247 | if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { 248 | throw new Exception\RuntimeException( 249 | 'Invalid XML: Detected use of illegal DOCTYPE' 250 | ); 251 | } 252 | } 253 | break; 254 | case self::DOC_HTML: 255 | case self::DOC_XHTML: 256 | default: 257 | $success = $domDoc->loadHTML($document); 258 | break; 259 | } 260 | $errors = libxml_get_errors(); 261 | if (! empty($errors)) { 262 | $this->documentErrors = $errors; 263 | libxml_clear_errors(); 264 | } 265 | libxml_disable_entity_loader(false); 266 | libxml_use_internal_errors(false); 267 | 268 | if (! $success) { 269 | throw new Exception\RuntimeException(sprintf('Error parsing document (type == %s)', $type)); 270 | } 271 | 272 | $nodeList = $this->getNodeList($domDoc, $xpathQuery, $contextNode); 273 | return new NodeList($query, $xpathQuery, $domDoc, $nodeList, $contextNode); 274 | } 275 | 276 | /** 277 | * Register XPath namespaces 278 | * 279 | * @param array $xpathNamespaces 280 | * @return void 281 | */ 282 | public function registerXpathNamespaces($xpathNamespaces) 283 | { 284 | $this->xpathNamespaces = $xpathNamespaces; 285 | } 286 | 287 | /** 288 | * Register PHP Functions to use in internal DOMXPath 289 | * 290 | * @param bool $xpathPhpFunctions 291 | * @return void 292 | */ 293 | public function registerXpathPhpFunctions($xpathPhpFunctions = true) 294 | { 295 | $this->xpathPhpFunctions = $xpathPhpFunctions; 296 | } 297 | 298 | /** 299 | * Prepare node list 300 | * 301 | * @param DOMDocument $document 302 | * @param string|array $xpathQuery 303 | * @param DOMNode $contextNode 304 | * @return \DOMNodeList 305 | * @throws \ErrorException If query cannot be executed 306 | */ 307 | protected function getNodeList($document, $xpathQuery, DOMNode $contextNode = null) 308 | { 309 | $xpath = new DOMXPath($document); 310 | foreach ($this->xpathNamespaces as $prefix => $namespaceUri) { 311 | $xpath->registerNamespace($prefix, $namespaceUri); 312 | } 313 | if ($this->xpathPhpFunctions) { 314 | $xpath->registerNamespace("php", "http://php.net/xpath"); 315 | ($this->xpathPhpFunctions === true) ? 316 | $xpath->registerPhpFunctions() 317 | : $xpath->registerPhpFunctions($this->xpathPhpFunctions); 318 | } 319 | $xpathQuery = (string) $xpathQuery; 320 | 321 | $nodeList = $xpath->queryWithErrorException($xpathQuery, $contextNode); 322 | return $nodeList; 323 | } 324 | } 325 | --------------------------------------------------------------------------------