├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── composer.json
├── phpunit.xml
├── src
└── FastXml
│ ├── CallbackHandler
│ ├── CallbackHandlerInterface.php
│ └── GenericHandler.php
│ └── Parser.php
└── tests
├── FastXmlTest
├── ParserTest.php
├── sample.xml
├── sample2.xml
└── sample3.xml
├── bootstrap.php
└── phpunit.xml
/.gitignore:
--------------------------------------------------------------------------------
1 | nbproject
2 | vendor
3 | composer.lock
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: php
2 | php:
3 | - 7.0
4 | - 7.1
5 | - 7.2
6 | - 7.3
7 | - 7.4
8 |
9 | script:
10 | - php vendor/bin/phpunit
11 |
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Alex Oleshkevich
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | PHP Fast XML Parser
2 | =========
3 |
4 | PHP Fast XML Parser is a PHP library for parsing large XML files using PHP.
5 | Key features:
6 |
7 | - Lightweight;
8 | - Flexible (result can be easily managed via callback handlers);
9 | - Good for memory critical projects (~10Mb in average while parsing 500mb XML file)
10 |
11 | [](https://travis-ci.org/alex-oleshkevich/php-fast-xml-parser)
12 |
13 | ## Installation
14 |
15 | ```
16 | composer require alex.oleshkevich/fast-xml-parser
17 | ```
18 |
19 | Example & Tutorial
20 | --------------
21 |
22 | ```php
23 | setOnItemParsedCallback(function ($item) use ($self) {
37 | // do smth with parsed item
38 | });
39 |
40 | // set "on progress" callback
41 | $handler->setOnProgressCallback(function ($bytesProcessed, $bytesTotal) use ($self) {
42 | // eg. draw a progress bar
43 | });
44 |
45 | // instantiate
46 | $parser = new Parser($handler);
47 |
48 | // define tags which you don't want to include in resulting array (optional)
49 | $parser->setIgnoreTags(['root']);
50 |
51 | // define end tag for every item
52 | // (this is used as marker to determine when XML
53 | // item was processed.
54 | // For example, if you want to extract "value" from this XML source
55 | //
56 | // VALUE
57 | // VALUE
58 | // VALUE
59 | //
60 | // you must call $parser->setEndTag('value') so library can
61 | // emit content of every tag in "onItemParsed" event.
62 | $parser->setEndTag('value');
63 |
64 | // run
65 | $parser->parse('bigfile.xml');
66 | ```
67 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "alex.oleshkevich/fast-xml-parser",
3 | "description": "Fast SAX XML parser for PHP",
4 | "homepage": "https://github.com/alex-oleshkevich/php-fast-xml-parser",
5 | "authors": [
6 | {
7 | "name": "Alex Oleshkevich",
8 | "email": "alex.oleshkevich@gmail.com",
9 | "homepage": "http://github.com/alex-oleshkevich"
10 | }
11 | ],
12 | "type": "library",
13 | "license": "MIT",
14 | "require": {
15 | "php": ">=7.0.0"
16 | },
17 | "autoload": {
18 | "psr-0": {
19 | "FastXml": "src",
20 | "FastXmlTest": "tests"
21 | }
22 | },
23 | "require-dev": {
24 | "phpunit/phpunit": "^6.5"
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/phpunit.xml:
--------------------------------------------------------------------------------
1 |
2 |
12 |
13 |
14 |
15 | tests/FastXmlTest
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/src/FastXml/CallbackHandler/CallbackHandlerInterface.php:
--------------------------------------------------------------------------------
1 | onItemParsedCallback)) {
24 | $callback = $this->onItemParsedCallback;
25 | $callback($item);
26 | }
27 | }
28 |
29 | public function onProgress($bytesProcessed, $bytesTotal)
30 | {
31 | if (is_callable($this->onProgressCallback)) {
32 | $callback = $this->onProgressCallback;
33 | $callback($bytesProcessed, $bytesTotal);
34 | }
35 | }
36 |
37 | public function setOnProgressCallback(callable $callback)
38 | {
39 | $this->onProgressCallback = $callback;
40 | }
41 |
42 | public function setOnItemParsedCallback(callable $callback)
43 | {
44 | $this->onItemParsedCallback = $callback;
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/FastXml/Parser.php:
--------------------------------------------------------------------------------
1 | callbackHandler = $callbackHandler;
61 |
62 | $this->parser = xml_parser_create('UTF-8');
63 | xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, 0);
64 | xml_set_object($this->parser, $this);
65 | xml_set_element_handler($this->parser, 'startTag', 'endTag');
66 | xml_set_character_data_handler($this->parser, 'tagData');
67 | xml_set_external_entity_ref_handler($this->parser, 'convertEntities');
68 | }
69 |
70 | /**
71 | * Set option to XML parser.
72 | *
73 | * @param int $name
74 | * @param mixed $value
75 | * @see XML_OPTION_* constants
76 | * @link http://php.net/manual/en/function.xml-parser-set-option.php
77 | * @return Parser
78 | */
79 | public function setOption($name, $value)
80 | {
81 | xml_parser_set_option($this->parser, $name, $value);
82 | return $this;
83 | }
84 |
85 | /**
86 | * Get option from XML parser.
87 | *
88 | * @param int $name
89 | * @see XML_OPTION_* constants
90 | * @link http://php.net/manual/en/function.xml-parser-set-option.php
91 | * @return mixed
92 | */
93 | public function getParserOption($name)
94 | {
95 | return xml_parser_get_option($this->parser, $name);
96 | }
97 |
98 | /**
99 | * @return int
100 | */
101 | public function getReadBuffer()
102 | {
103 | return $this->readBuffer;
104 | }
105 |
106 | /**
107 | * @param int $readBuffer
108 | * @return Parser
109 | */
110 | public function setReadBuffer($readBuffer)
111 | {
112 | $this->readBuffer = $readBuffer;
113 | return $this;
114 | }
115 |
116 | /**
117 | * Do not include these tags into result.
118 | *
119 | * @param array $tags
120 | */
121 | public function setIgnoreTags(array $tags)
122 | {
123 | $this->ignoreTags = $tags;
124 | }
125 |
126 | /**
127 | * Sets end tag.
128 | *
129 | * End tag is a tag which is used to determine separate blocks.
130 | * @param string $tag
131 | */
132 | public function setEndTag($tag)
133 | {
134 | $this->endTag = $tag;
135 | }
136 |
137 | /**
138 | * Handles start tag.
139 | *
140 | * @param resource $parser
141 | * @param string $name
142 | * @return null
143 | */
144 | public function startTag($parser, $name)
145 | {
146 | if (in_array($name, $this->ignoreTags)) {
147 | $this->currentTag = null;
148 | return;
149 | }
150 | $this->currentTag = $name;
151 | }
152 |
153 | /**
154 | * Handles tag content.
155 | *
156 | * @param resource $parser
157 | * @param string $data
158 | */
159 | public function tagData($parser, $data)
160 | {
161 | if ($this->currentTag) {
162 | if (!isset($this->currentData[$this->currentTag])) {
163 | $this->currentData[$this->currentTag] = '';
164 | }
165 | $this->currentData[$this->currentTag] .= trim($data);
166 | }
167 | }
168 |
169 | /**
170 | * Handles close tag.
171 | *
172 | * @param resource $parser
173 | * @param string $name
174 | */
175 | public function endTag($parser, $name)
176 | {
177 | if ($name == $this->endTag) {
178 | $this->callbackHandler->onItemParsed($this->currentData);
179 | $this->currentData = array();
180 | }
181 | }
182 |
183 | /**
184 | * Replaces all html entities into its original symbols.
185 | *
186 | * @param string $content
187 | * @return string
188 | */
189 | public function convertEntities($content)
190 | {
191 | $table = array_map('utf8_encode', array_flip(
192 | array_diff(
193 | get_html_translation_table(HTML_ENTITIES),
194 | get_html_translation_table(HTML_SPECIALCHARS)
195 | )
196 | ));
197 | return preg_replace('/[\d\w]+;/', '', strtr($content, $table));
198 | }
199 |
200 | /**
201 | * Do parsing.
202 | *
203 | * @throws Exception
204 | */
205 | public function parse($file)
206 | {
207 | $handle = fopen($file, 'r');
208 | if (!$handle) {
209 | throw new Exception('Unable to open file.');
210 | }
211 |
212 | while (!feof($handle)) {
213 | $data = fread($handle, $this->readBuffer);
214 | xml_parse($this->parser, $data, feof($handle));
215 | $this->callbackHandler->onProgress(ftell($handle), filesize($file));
216 | }
217 | }
218 |
219 | }
220 |
--------------------------------------------------------------------------------
/tests/FastXmlTest/ParserTest.php:
--------------------------------------------------------------------------------
1 | setOnItemParsedCallback(function ($item) use ($self, &$iteration) {
20 | $self->assertEquals('VALUE ' . $iteration, $item['value']);
21 | $iteration++;
22 | });
23 | $handler->setOnProgressCallback(function ($bytesProcessed, $bytesTotal) use ($self, $file) {
24 | $self->assertEquals(filesize($file), $bytesProcessed);
25 | $self->assertEquals(filesize($file), $bytesTotal);
26 | });
27 | $parser = new Parser($handler);
28 | $parser->setIgnoreTags(['root']);
29 | $parser->setEndTag('value');
30 | $parser->parse($file);
31 | }
32 |
33 | public function testParserSkipsTags()
34 | {
35 | $file = __DIR__ . '/sample2.xml';
36 |
37 | $self = $this;
38 | $iteration = 1;
39 | $handler = new GenericHandler;
40 | $handler->setOnItemParsedCallback(function ($item) use ($self, &$iteration) {
41 | $this->assertArrayNotHasKey('invalid', $item);
42 | $iteration++;
43 | });
44 | $parser = new Parser($handler);
45 | $parser->setIgnoreTags(['root', 'invalid']);
46 | $parser->setEndTag('content');
47 | $parser->parse($file);
48 | }
49 |
50 | public function testParserReportsOnProgress()
51 | {
52 | $file = __DIR__ . '/sample2.xml';
53 |
54 | $self = $this;
55 | $handler = new GenericHandler;
56 | $handler->setOnProgressCallback(function ($bytesProcessed, $bytesTotal) use ($self) {
57 | $this->assertContains($bytesProcessed, array(100, 200, 300, 363));
58 | });
59 | $parser = new Parser($handler);
60 | $parser->setReadBuffer(100);
61 | $parser->setIgnoreTags(['root']);
62 | $parser->setEndTag('content');
63 | $parser->parse($file);
64 | }
65 |
66 | }
67 |
--------------------------------------------------------------------------------
/tests/FastXmlTest/sample.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | VALUE 1
4 | VALUE 2
5 | VALUE 3
6 |
7 |
--------------------------------------------------------------------------------
/tests/FastXmlTest/sample2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | VALUE 1
5 | INVALID VALUE 1
6 |
7 |
8 | VALUE 2
9 | INVALID VALUE 2
10 |
11 |
12 | VALUE 3
13 | INVALID VALUE 3
14 |
15 |
16 |
--------------------------------------------------------------------------------
/tests/FastXmlTest/sample3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | VALUE 1
5 | INVALID VALUE 1
6 |
7 |
8 | VALUE 2
9 | INVALID VALUE 2
10 |
11 |
12 | VALUE 3
13 | INVALID VALUE 3
14 |
15 |
16 |
--------------------------------------------------------------------------------
/tests/bootstrap.php:
--------------------------------------------------------------------------------
1 |
2 |
12 |
13 |
14 |
15 | FastXmlTest
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------