├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── composer.json ├── phpunit.xml ├── src └── FastXml │ ├── CallbackHandler │ ├── CallbackHandlerInterface.php │ └── GenericHandler.php │ └── Parser.php └── tests ├── FastXmlTest ├── ParserTest.php ├── sample.xml ├── sample2.xml └── sample3.xml ├── bootstrap.php └── phpunit.xml /.gitignore: -------------------------------------------------------------------------------- 1 | nbproject 2 | vendor 3 | composer.lock -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: php 2 | php: 3 | - 7.0 4 | - 7.1 5 | - 7.2 6 | - 7.3 7 | - 7.4 8 | 9 | script: 10 | - php vendor/bin/phpunit 11 | 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Alex Oleshkevich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | PHP Fast XML Parser 2 | ========= 3 | 4 | PHP Fast XML Parser is a PHP library for parsing large XML files using PHP. 5 | Key features: 6 | 7 | - Lightweight; 8 | - Flexible (result can be easily managed via callback handlers); 9 | - Good for memory critical projects (~10Mb in average while parsing 500mb XML file) 10 | 11 | [![Build Status](https://travis-ci.org/alex-oleshkevich/php-fast-xml-parser.svg)](https://travis-ci.org/alex-oleshkevich/php-fast-xml-parser) 12 | 13 | ## Installation 14 | 15 | ``` 16 | composer require alex.oleshkevich/fast-xml-parser 17 | ``` 18 | 19 | Example & Tutorial 20 | -------------- 21 | 22 | ```php 23 | setOnItemParsedCallback(function ($item) use ($self) { 37 | // do smth with parsed item 38 | }); 39 | 40 | // set "on progress" callback 41 | $handler->setOnProgressCallback(function ($bytesProcessed, $bytesTotal) use ($self) { 42 | // eg. draw a progress bar 43 | }); 44 | 45 | // instantiate 46 | $parser = new Parser($handler); 47 | 48 | // define tags which you don't want to include in resulting array (optional) 49 | $parser->setIgnoreTags(['root']); 50 | 51 | // define end tag for every item 52 | // (this is used as marker to determine when XML 53 | // item was processed. 54 | // For example, if you want to extract "value" from this XML source 55 | // 56 | // VALUE 57 | // VALUE 58 | // VALUE 59 | // 60 | // you must call $parser->setEndTag('value') so library can 61 | // emit content of every tag in "onItemParsed" event. 62 | $parser->setEndTag('value'); 63 | 64 | // run 65 | $parser->parse('bigfile.xml'); 66 | ``` 67 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "alex.oleshkevich/fast-xml-parser", 3 | "description": "Fast SAX XML parser for PHP", 4 | "homepage": "https://github.com/alex-oleshkevich/php-fast-xml-parser", 5 | "authors": [ 6 | { 7 | "name": "Alex Oleshkevich", 8 | "email": "alex.oleshkevich@gmail.com", 9 | "homepage": "http://github.com/alex-oleshkevich" 10 | } 11 | ], 12 | "type": "library", 13 | "license": "MIT", 14 | "require": { 15 | "php": ">=7.0.0" 16 | }, 17 | "autoload": { 18 | "psr-0": { 19 | "FastXml": "src", 20 | "FastXmlTest": "tests" 21 | } 22 | }, 23 | "require-dev": { 24 | "phpunit/phpunit": "^6.5" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /phpunit.xml: -------------------------------------------------------------------------------- 1 | 2 | 12 | 13 | 14 | 15 | tests/FastXmlTest 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/FastXml/CallbackHandler/CallbackHandlerInterface.php: -------------------------------------------------------------------------------- 1 | onItemParsedCallback)) { 24 | $callback = $this->onItemParsedCallback; 25 | $callback($item); 26 | } 27 | } 28 | 29 | public function onProgress($bytesProcessed, $bytesTotal) 30 | { 31 | if (is_callable($this->onProgressCallback)) { 32 | $callback = $this->onProgressCallback; 33 | $callback($bytesProcessed, $bytesTotal); 34 | } 35 | } 36 | 37 | public function setOnProgressCallback(callable $callback) 38 | { 39 | $this->onProgressCallback = $callback; 40 | } 41 | 42 | public function setOnItemParsedCallback(callable $callback) 43 | { 44 | $this->onItemParsedCallback = $callback; 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/FastXml/Parser.php: -------------------------------------------------------------------------------- 1 | callbackHandler = $callbackHandler; 61 | 62 | $this->parser = xml_parser_create('UTF-8'); 63 | xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, 0); 64 | xml_set_object($this->parser, $this); 65 | xml_set_element_handler($this->parser, 'startTag', 'endTag'); 66 | xml_set_character_data_handler($this->parser, 'tagData'); 67 | xml_set_external_entity_ref_handler($this->parser, 'convertEntities'); 68 | } 69 | 70 | /** 71 | * Set option to XML parser. 72 | * 73 | * @param int $name 74 | * @param mixed $value 75 | * @see XML_OPTION_* constants 76 | * @link http://php.net/manual/en/function.xml-parser-set-option.php 77 | * @return Parser 78 | */ 79 | public function setOption($name, $value) 80 | { 81 | xml_parser_set_option($this->parser, $name, $value); 82 | return $this; 83 | } 84 | 85 | /** 86 | * Get option from XML parser. 87 | * 88 | * @param int $name 89 | * @see XML_OPTION_* constants 90 | * @link http://php.net/manual/en/function.xml-parser-set-option.php 91 | * @return mixed 92 | */ 93 | public function getParserOption($name) 94 | { 95 | return xml_parser_get_option($this->parser, $name); 96 | } 97 | 98 | /** 99 | * @return int 100 | */ 101 | public function getReadBuffer() 102 | { 103 | return $this->readBuffer; 104 | } 105 | 106 | /** 107 | * @param int $readBuffer 108 | * @return Parser 109 | */ 110 | public function setReadBuffer($readBuffer) 111 | { 112 | $this->readBuffer = $readBuffer; 113 | return $this; 114 | } 115 | 116 | /** 117 | * Do not include these tags into result. 118 | * 119 | * @param array $tags 120 | */ 121 | public function setIgnoreTags(array $tags) 122 | { 123 | $this->ignoreTags = $tags; 124 | } 125 | 126 | /** 127 | * Sets end tag. 128 | * 129 | * End tag is a tag which is used to determine separate blocks. 130 | * @param string $tag 131 | */ 132 | public function setEndTag($tag) 133 | { 134 | $this->endTag = $tag; 135 | } 136 | 137 | /** 138 | * Handles start tag. 139 | * 140 | * @param resource $parser 141 | * @param string $name 142 | * @return null 143 | */ 144 | public function startTag($parser, $name) 145 | { 146 | if (in_array($name, $this->ignoreTags)) { 147 | $this->currentTag = null; 148 | return; 149 | } 150 | $this->currentTag = $name; 151 | } 152 | 153 | /** 154 | * Handles tag content. 155 | * 156 | * @param resource $parser 157 | * @param string $data 158 | */ 159 | public function tagData($parser, $data) 160 | { 161 | if ($this->currentTag) { 162 | if (!isset($this->currentData[$this->currentTag])) { 163 | $this->currentData[$this->currentTag] = ''; 164 | } 165 | $this->currentData[$this->currentTag] .= trim($data); 166 | } 167 | } 168 | 169 | /** 170 | * Handles close tag. 171 | * 172 | * @param resource $parser 173 | * @param string $name 174 | */ 175 | public function endTag($parser, $name) 176 | { 177 | if ($name == $this->endTag) { 178 | $this->callbackHandler->onItemParsed($this->currentData); 179 | $this->currentData = array(); 180 | } 181 | } 182 | 183 | /** 184 | * Replaces all html entities into its original symbols. 185 | * 186 | * @param string $content 187 | * @return string 188 | */ 189 | public function convertEntities($content) 190 | { 191 | $table = array_map('utf8_encode', array_flip( 192 | array_diff( 193 | get_html_translation_table(HTML_ENTITIES), 194 | get_html_translation_table(HTML_SPECIALCHARS) 195 | ) 196 | )); 197 | return preg_replace('/&#[\d\w]+;/', '', strtr($content, $table)); 198 | } 199 | 200 | /** 201 | * Do parsing. 202 | * 203 | * @throws Exception 204 | */ 205 | public function parse($file) 206 | { 207 | $handle = fopen($file, 'r'); 208 | if (!$handle) { 209 | throw new Exception('Unable to open file.'); 210 | } 211 | 212 | while (!feof($handle)) { 213 | $data = fread($handle, $this->readBuffer); 214 | xml_parse($this->parser, $data, feof($handle)); 215 | $this->callbackHandler->onProgress(ftell($handle), filesize($file)); 216 | } 217 | } 218 | 219 | } 220 | -------------------------------------------------------------------------------- /tests/FastXmlTest/ParserTest.php: -------------------------------------------------------------------------------- 1 | setOnItemParsedCallback(function ($item) use ($self, &$iteration) { 20 | $self->assertEquals('VALUE ' . $iteration, $item['value']); 21 | $iteration++; 22 | }); 23 | $handler->setOnProgressCallback(function ($bytesProcessed, $bytesTotal) use ($self, $file) { 24 | $self->assertEquals(filesize($file), $bytesProcessed); 25 | $self->assertEquals(filesize($file), $bytesTotal); 26 | }); 27 | $parser = new Parser($handler); 28 | $parser->setIgnoreTags(['root']); 29 | $parser->setEndTag('value'); 30 | $parser->parse($file); 31 | } 32 | 33 | public function testParserSkipsTags() 34 | { 35 | $file = __DIR__ . '/sample2.xml'; 36 | 37 | $self = $this; 38 | $iteration = 1; 39 | $handler = new GenericHandler; 40 | $handler->setOnItemParsedCallback(function ($item) use ($self, &$iteration) { 41 | $this->assertArrayNotHasKey('invalid', $item); 42 | $iteration++; 43 | }); 44 | $parser = new Parser($handler); 45 | $parser->setIgnoreTags(['root', 'invalid']); 46 | $parser->setEndTag('content'); 47 | $parser->parse($file); 48 | } 49 | 50 | public function testParserReportsOnProgress() 51 | { 52 | $file = __DIR__ . '/sample2.xml'; 53 | 54 | $self = $this; 55 | $handler = new GenericHandler; 56 | $handler->setOnProgressCallback(function ($bytesProcessed, $bytesTotal) use ($self) { 57 | $this->assertContains($bytesProcessed, array(100, 200, 300, 363)); 58 | }); 59 | $parser = new Parser($handler); 60 | $parser->setReadBuffer(100); 61 | $parser->setIgnoreTags(['root']); 62 | $parser->setEndTag('content'); 63 | $parser->parse($file); 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /tests/FastXmlTest/sample.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | VALUE 1 4 | VALUE 2 5 | VALUE 3 6 | 7 | -------------------------------------------------------------------------------- /tests/FastXmlTest/sample2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | VALUE 1 5 | INVALID VALUE 1 6 | 7 | 8 | VALUE 2 9 | INVALID VALUE 2 10 | 11 | 12 | VALUE 3 13 | INVALID VALUE 3 14 | 15 | 16 | -------------------------------------------------------------------------------- /tests/FastXmlTest/sample3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | VALUE 1 5 | INVALID VALUE 1 6 | 7 | 8 | VALUE 2 9 | INVALID VALUE 2 10 | 11 | 12 | VALUE 3 13 | INVALID VALUE 3 14 | 15 | 16 | -------------------------------------------------------------------------------- /tests/bootstrap.php: -------------------------------------------------------------------------------- 1 | 2 | 12 | 13 | 14 | 15 | FastXmlTest 16 | 17 | 18 | 19 | --------------------------------------------------------------------------------