├── CHANGELOG.md ├── LICENSE.md ├── README.md ├── composer.json └── src ├── Exception ├── ExceptionInterface.php ├── InvalidArgumentException.php └── RuntimeException.php └── Security.php /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file, in reverse chronological order by release. 4 | 5 | ## 1.2.1 - TBD 6 | 7 | ### Added 8 | 9 | - Nothing. 10 | 11 | ### Changed 12 | 13 | - Nothing. 14 | 15 | ### Deprecated 16 | 17 | - Nothing. 18 | 19 | ### Removed 20 | 21 | - Nothing. 22 | 23 | ### Fixed 24 | 25 | - Nothing. 26 | 27 | ## 1.2.0 - 2019-01-22 28 | 29 | ### Added 30 | 31 | - [#6](https://github.com/zendframework/zendxml/pull/6) adds the following method: 32 | 33 | ```php 34 | Security::scanHtml( 35 | string $html, 36 | DOMDocument $dom = null, 37 | int $libXmlConstants = 0 38 | ) : SimpleXMLElement|DOMDocument|bool 39 | ``` 40 | 41 | This method allows scanning markup known to be HTML, versus assuming the 42 | markup is generic XML. 43 | 44 | ### Changed 45 | 46 | - Nothing. 47 | 48 | ### Deprecated 49 | 50 | - Nothing. 51 | 52 | ### Removed 53 | 54 | - Nothing. 55 | 56 | ### Fixed 57 | 58 | - Nothing. 59 | 60 | ## 1.1.1 - 2019-01-22 61 | 62 | ### Added 63 | 64 | - [#16](https://github.com/zendframework/ZendXml/pull/16) adds support for PHP 7.3. 65 | 66 | ### Changed 67 | 68 | - Nothing. 69 | 70 | ### Deprecated 71 | 72 | - Nothing. 73 | 74 | ### Removed 75 | 76 | - Nothing. 77 | 78 | ### Fixed 79 | 80 | - [#17](https://github.com/zendframework/ZendXml/pull/17) properly enables heuristic security checks for PHP 5.6.0 - 5.6.5 when PHP 81 | is running as PHP-FPM. 82 | 83 | ## 1.1.0 - 2018-04-30 84 | 85 | ### Added 86 | 87 | - [#13](https://github.com/zendframework/ZendXml/pull/13) adds support for PHP 7.1 and 7.2. 88 | 89 | ### Changed 90 | 91 | - Nothing. 92 | 93 | ### Deprecated 94 | 95 | - Nothing. 96 | 97 | ### Removed 98 | 99 | - [#13](https://github.com/zendframework/ZendXml/pull/13) removes support for PHP 5.3, 5.4, and 5.5. 100 | 101 | - [#13](https://github.com/zendframework/ZendXml/pull/13) removes support for HHVM. 102 | 103 | ### Fixed 104 | 105 | - Nothing. 106 | 107 | ## 1.0.2 - 2016-02-04 108 | 109 | ### Added 110 | 111 | - Nothing. 112 | 113 | ### Deprecated 114 | 115 | - Nothing. 116 | 117 | ### Removed 118 | 119 | - Nothing. 120 | 121 | ### Fixed 122 | 123 | - [#11](https://github.com/zendframework/ZendXml/pull/11) updates the 124 | dependencies to PHP `^5.3.3 || ^7.0` and PHPUnit `^3.7 || ^4.0`, ensuring 125 | better compatibility with other components, and with PHP 7. The test matrix 126 | was also expanded to add PHP 7 as a required platform. 127 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014-2018, Zend Technologies USA, Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | - Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | - Redistributions in binary form must reproduce the above copyright notice, this 11 | list of conditions and the following disclaimer in the documentation and/or 12 | other materials provided with the distribution. 13 | 14 | - Neither the name of Zend Technologies USA, Inc. nor the names of its 15 | contributors may be used to endorse or promote products derived from this 16 | software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ZendXml 2 | 3 | > ## Repository abandoned 2019-12-31 4 | > 5 | > This repository has moved to [laminas/laminas-xml](https://github.com/laminas/laminas-xml). 6 | 7 | [![Build Status](https://secure.travis-ci.org/zendframework/ZendXml.svg?branch=master)](https://secure.travis-ci.org/zendframework/ZendXml) 8 | [![Coverage Status](https://coveralls.io/repos/github/zendframework/ZendXml/badge.svg?branch=master)](https://coveralls.io/github/zendframework/ZendXml?branch=master) 9 | 10 | An utility component for XML usage and best practices in PHP 11 | 12 | ## Installation 13 | 14 | You can install using: 15 | 16 | ``` 17 | curl -s https://getcomposer.org/installer | php 18 | php composer.phar install 19 | ``` 20 | 21 | Notice that this library doesn't have any external dependencies, the usage of composer is for autoloading and standard purpose. 22 | 23 | 24 | ## ZendXml\Security 25 | 26 | This is a security component to prevent [XML eXternal Entity](https://www.owasp.org/index.php/XML_External_Entity_%28XXE%29_Processing) (XXE) and [XML Entity Expansion](http://projects.webappsec.org/w/page/13247002/XML%20Entity%20Expansion) (XEE) attacks on XML documents. 27 | 28 | The XXE attack is prevented disabling the load of external entities in the libxml library used by PHP, using the function [libxml_disable_entity_loader](http://www.php.net/manual/en/function.libxml-disable-entity-loader.php). 29 | 30 | The XEE attack is prevented looking inside the XML document for ENTITY usage. If the XML document uses ENTITY the library throw an Exception. 31 | 32 | We have two static methods to scan and load XML document from a string (scan) and from a file (scanFile). You can decide to get a SimpleXMLElement or DOMDocument as result, using the following use cases: 33 | 34 | ```php 35 | use ZendXml\Security as XmlSecurity; 36 | 37 | $xml = << 39 | 40 | test 41 | 42 | XML; 43 | 44 | // SimpleXML use case 45 | $simplexml = XmlSecurity::scan($xml); 46 | printf ("SimpleXMLElement: %s\n", ($simplexml instanceof \SimpleXMLElement) ? 'yes' : 'no'); 47 | 48 | // DOMDocument use case 49 | $dom = new \DOMDocument('1.0'); 50 | $dom = XmlSecurity::scan($xml, $dom); 51 | printf ("DOMDocument: %s\n", ($dom instanceof \DOMDocument) ? 'yes' : 'no'); 52 | ``` 53 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "zendframework/zendxml", 3 | "description": "Utility library for XML usage, best practices, and security in PHP", 4 | "license": "BSD-3-Clause", 5 | "keywords": [ 6 | "zf", 7 | "zendframework", 8 | "xml", 9 | "security" 10 | ], 11 | "support": { 12 | "issues": "https://github.com/zendframework/ZendXml/issues", 13 | "source": "https://github.com/zendframework/ZendXml", 14 | "rss": "https://github.com/zendframework/ZendXml/releases.atom", 15 | "chat": "https://zendframework-slack.herokuapp.com", 16 | "forum": "https://discourse.zendframework.com/c/questions/components" 17 | }, 18 | "require": { 19 | "php": "^5.6 || ^7.0" 20 | }, 21 | "require-dev": { 22 | "zendframework/zend-coding-standard": "~1.0.0", 23 | "phpunit/phpunit": "^5.7.27 || ^6.5.8 || ^7.1.4" 24 | }, 25 | "autoload": { 26 | "psr-4": { 27 | "ZendXml\\": "src/" 28 | } 29 | }, 30 | "autoload-dev": { 31 | "psr-4": { 32 | "ZendXmlTest\\": "test/" 33 | } 34 | }, 35 | "config": { 36 | "sort-packages": true 37 | }, 38 | "extra": { 39 | "branch-alias": { 40 | "dev-master": "1.2.x-dev", 41 | "dev-develop": "1.3.x-dev" 42 | } 43 | }, 44 | "scripts": { 45 | "check": [ 46 | "@cs-check", 47 | "@test" 48 | ], 49 | "cs-check": "phpcs", 50 | "cs-fix": "phpcbf", 51 | "test": "phpunit --colors=always", 52 | "test-coverage": "phpunit --colors=always --coverage-clover clover.xml" 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/Exception/ExceptionInterface.php: -------------------------------------------------------------------------------- 1 | 0) { 65 | return true; 66 | } 67 | return false; 68 | }, E_WARNING); 69 | 70 | $result = $callback($xml, $dom, LIBXML_NONET | $libXmlConstants); 71 | 72 | restore_error_handler(); 73 | 74 | if (! $result) { 75 | // Entity load to previous setting 76 | if (! self::isPhpFpm()) { 77 | libxml_disable_entity_loader($loadEntities); 78 | libxml_use_internal_errors($useInternalXmlErrors); 79 | } 80 | return false; 81 | } 82 | 83 | // Scan for potential XEE attacks using ENTITY, if not PHP-FPM 84 | if (! self::isPhpFpm()) { 85 | foreach ($dom->childNodes as $child) { 86 | if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { 87 | if ($child->entities->length > 0) { 88 | throw new Exception\RuntimeException(self::ENTITY_DETECT); 89 | } 90 | } 91 | } 92 | } 93 | 94 | // Entity load to previous setting 95 | if (! self::isPhpFpm()) { 96 | libxml_disable_entity_loader($loadEntities); 97 | libxml_use_internal_errors($useInternalXmlErrors); 98 | } 99 | 100 | if (isset($simpleXml)) { 101 | $result = simplexml_import_dom($dom); 102 | if (! $result instanceof SimpleXMLElement) { 103 | return false; 104 | } 105 | return $result; 106 | } 107 | return $dom; 108 | } 109 | 110 | /** 111 | * Scan XML string for potential XXE and XEE attacks 112 | * 113 | * @param string $xml 114 | * @param DomDocument $dom 115 | * @param int $libXmlConstants additional libxml constants to pass in 116 | * @throws Exception\RuntimeException 117 | * @return SimpleXMLElement|DomDocument|boolean 118 | */ 119 | public static function scan($xml, DOMDocument $dom = null, $libXmlConstants = 0) 120 | { 121 | $callback = function ($xml, $dom, $constants) { 122 | return $dom->loadXml($xml, $constants); 123 | }; 124 | return self::scanString($xml, $dom, $libXmlConstants, $callback); 125 | } 126 | 127 | /** 128 | * Scan HTML string for potential XXE and XEE attacks 129 | * 130 | * @param string $xml 131 | * @param DomDocument $dom 132 | * @param int $libXmlConstants additional libxml constants to pass in 133 | * @throws Exception\RuntimeException 134 | * @return SimpleXMLElement|DomDocument|boolean 135 | */ 136 | public static function scanHtml($html, DOMDocument $dom = null, $libXmlConstants = 0) 137 | { 138 | $callback = function ($html, $dom, $constants) { 139 | return $dom->loadHtml($html, $constants); 140 | }; 141 | return self::scanString($html, $dom, $libXmlConstants, $callback); 142 | } 143 | 144 | /** 145 | * Scan XML file for potential XXE/XEE attacks 146 | * 147 | * @param string $file 148 | * @param DOMDocument $dom 149 | * @throws Exception\InvalidArgumentException 150 | * @return SimpleXMLElement|DomDocument 151 | */ 152 | public static function scanFile($file, DOMDocument $dom = null) 153 | { 154 | if (! file_exists($file)) { 155 | throw new Exception\InvalidArgumentException( 156 | "The file $file specified doesn't exist" 157 | ); 158 | } 159 | return self::scan(file_get_contents($file), $dom); 160 | } 161 | 162 | /** 163 | * Return true if PHP is running with PHP-FPM 164 | * 165 | * This method is mainly used to determine whether or not heuristic checks 166 | * (vs libxml checks) should be made, due to threading issues in libxml; 167 | * under php-fpm, threading becomes a concern. 168 | * 169 | * However, PHP versions 5.6.6+ contain a patch to the 170 | * libxml support in PHP that makes the libxml checks viable; in such 171 | * versions, this method will return false to enforce those checks, which 172 | * are more strict and accurate than the heuristic checks. 173 | * 174 | * @return boolean 175 | */ 176 | public static function isPhpFpm() 177 | { 178 | $isVulnerableVersion = version_compare(PHP_VERSION, '5.6', 'ge') 179 | && version_compare(PHP_VERSION, '5.6.6', 'lt'); 180 | 181 | if (0 === strpos(php_sapi_name(), 'fpm') && $isVulnerableVersion) { 182 | return true; 183 | } 184 | return false; 185 | } 186 | 187 | /** 188 | * Determine and return the string(s) to use for the $generator) { 245 | $prefix = $generator('<' . '?xml'); 246 | if (0 === strncmp($xml, $prefix, strlen($prefix))) { 247 | return $encoding; 248 | } 249 | } 250 | 251 | // Fallback 252 | return 'UTF-8'; 253 | } 254 | 255 | /** 256 | * Attempt to detect the specified XML encoding. 257 | * 258 | * Using the file's encoding, determines if an "encoding" attribute is 259 | * present and well-formed in the XML declaration; if so, it returns a 260 | * list with both the ASCII representation of that declaration and the 261 | * original file encoding. 262 | * 263 | * If not, a list containing only the provided file encoding is returned. 264 | * 265 | * @param string $xml 266 | * @param string $fileEncoding 267 | * @return string[] Potential XML encodings 268 | */ 269 | protected static function detectXmlEncoding($xml, $fileEncoding) 270 | { 271 | $encodingMap = self::getAsciiEncodingMap(); 272 | $generator = $encodingMap[$fileEncoding]; 273 | $encAttr = $generator('encoding="'); 274 | $quote = $generator('"'); 275 | $close = $generator('>'); 276 | 277 | $closePos = strpos($xml, $close); 278 | if (false === $closePos) { 279 | return [$fileEncoding]; 280 | } 281 | 282 | $encPos = strpos($xml, $encAttr); 283 | if (false === $encPos 284 | || $encPos > $closePos 285 | ) { 286 | return [$fileEncoding]; 287 | } 288 | 289 | $encPos += strlen($encAttr); 290 | $quotePos = strpos($xml, $quote, $encPos); 291 | if (false === $quotePos) { 292 | return [$fileEncoding]; 293 | } 294 | 295 | $encoding = self::substr($xml, $encPos, $quotePos); 296 | return [ 297 | // Following line works because we're only supporting 8-bit safe encodings at this time. 298 | str_replace('\0', '', $encoding), // detected encoding 299 | $fileEncoding, // file encoding 300 | ]; 301 | } 302 | 303 | /** 304 | * Return a list of BOM maps. 305 | * 306 | * Returns a list of common encoding -> BOM maps, along with the character 307 | * length to compare against. 308 | * 309 | * @link https://en.wikipedia.org/wiki/Byte_order_mark 310 | * @return array 311 | */ 312 | protected static function getBomMap() 313 | { 314 | return [ 315 | [ 316 | 'encoding' => 'UTF-32BE', 317 | 'bom' => pack('CCCC', 0x00, 0x00, 0xfe, 0xff), 318 | 'length' => 4, 319 | ], 320 | [ 321 | 'encoding' => 'UTF-32LE', 322 | 'bom' => pack('CCCC', 0xff, 0xfe, 0x00, 0x00), 323 | 'length' => 4, 324 | ], 325 | [ 326 | 'encoding' => 'GB-18030', 327 | 'bom' => pack('CCCC', 0x84, 0x31, 0x95, 0x33), 328 | 'length' => 4, 329 | ], 330 | [ 331 | 'encoding' => 'UTF-16BE', 332 | 'bom' => pack('CC', 0xfe, 0xff), 333 | 'length' => 2, 334 | ], 335 | [ 336 | 'encoding' => 'UTF-16LE', 337 | 'bom' => pack('CC', 0xff, 0xfe), 338 | 'length' => 2, 339 | ], 340 | [ 341 | 'encoding' => 'UTF-8', 342 | 'bom' => pack('CCC', 0xef, 0xbb, 0xbf), 343 | 'length' => 3, 344 | ], 345 | ]; 346 | } 347 | 348 | /** 349 | * Return a map of encoding => generator pairs. 350 | * 351 | * Returns a map of encoding => generator pairs, where the generator is a 352 | * callable that accepts a string and returns the appropriate byte order 353 | * sequence of that string for the encoding. 354 | * 355 | * @return array 356 | */ 357 | protected static function getAsciiEncodingMap() 358 | { 359 | return [ 360 | 'UTF-32BE' => function ($ascii) { 361 | return preg_replace('/(.)/', "\0\0\0\\1", $ascii); 362 | }, 363 | 'UTF-32LE' => function ($ascii) { 364 | return preg_replace('/(.)/', "\\1\0\0\0", $ascii); 365 | }, 366 | 'UTF-32odd1' => function ($ascii) { 367 | return preg_replace('/(.)/', "\0\\1\0\0", $ascii); 368 | }, 369 | 'UTF-32odd2' => function ($ascii) { 370 | return preg_replace('/(.)/', "\0\0\\1\0", $ascii); 371 | }, 372 | 'UTF-16BE' => function ($ascii) { 373 | return preg_replace('/(.)/', "\0\\1", $ascii); 374 | }, 375 | 'UTF-16LE' => function ($ascii) { 376 | return preg_replace('/(.)/', "\\1\0", $ascii); 377 | }, 378 | 'UTF-8' => function ($ascii) { 379 | return $ascii; 380 | }, 381 | 'GB-18030' => function ($ascii) { 382 | return $ascii; 383 | }, 384 | ]; 385 | } 386 | 387 | /** 388 | * Binary-safe substr. 389 | * 390 | * substr() is not binary-safe; this method loops by character to ensure 391 | * multi-byte characters are aggregated correctly. 392 | * 393 | * @param string $string 394 | * @param int $start 395 | * @param int $end 396 | * @return string 397 | */ 398 | protected static function substr($string, $start, $end) 399 | { 400 | $substr = ''; 401 | for ($i = $start; $i < $end; $i += 1) { 402 | $substr .= $string[$i]; 403 | } 404 | return $substr; 405 | } 406 | } 407 | --------------------------------------------------------------------------------