├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── composer.json
├── phpunit.xml
├── src
├── XmlStringStreamer.php
└── XmlStringStreamer
│ ├── Parser
│ ├── StringWalker.php
│ └── UniqueNode.php
│ ├── ParserInterface.php
│ ├── Stream
│ ├── File.php
│ └── Stdin.php
│ └── StreamInterface.php
└── tests
├── integration
└── XmlStringStreamer
│ ├── Stream
│ └── FileIntegrationTest.php
│ └── XmlStringStreamerIntegrationTest.php
├── unit
└── XmlStringStreamer
│ ├── Parser
│ ├── StringWalkerTest.php
│ └── UniqueNodeTest.php
│ └── XmlStringStreamerTest.php
└── xml
├── incomplete.xml
├── orphanet-xml-example.xml
├── pubmed-example.xml
├── rewind_working_blob.xml
├── short.xml
├── short_last_chunk.xml
└── stream_seeking.xml
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | vendor/
3 | composer.lock
4 | .phpunit.result.cache
5 |
6 | docker-compose.yml
7 | Dockerfile
8 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: php
2 | dist: xenial
3 |
4 | matrix:
5 | include:
6 | - php: 7.2
7 | - php: 7.3
8 | - php: 7.4
9 | - php: 8.0
10 |
11 | before_script: composer install
12 | script: vendor/bin/phpunit
13 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2014 Oskar Thornblad
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | xml-string-streamer [](https://travis-ci.org/prewk/xml-string-streamer)
2 | ===================
3 |
4 | Purpose
5 | -------
6 | To stream XML files too big to fit into memory, with very low memory consumption. This library is a successor to [XmlStreamer](https://github.com/prewk/XmlStreamer).
7 |
8 | Installation
9 | ------------
10 |
11 | ### Legacy support
12 |
13 | * All versions below 1 support PHP 5.3 - 7.2
14 | * Version 1 and above support PHP 7.2+
15 |
16 | ### With composer
17 |
18 | Run `composer require prewk/xml-string-streamer` to install this package.
19 |
20 | Usage
21 | -----
22 |
23 | Let's say you have a 2 GB XML file gigantic.xml containing customer items that look like this:
24 |
25 | ````xml
26 |
27 |
28 |
29 | Jane
30 | Doe
31 |
32 | ...
33 |
34 | ````
35 |
36 | Create a streamer and parse it:
37 |
38 | ````php
39 | // Convenience method for creating a file streamer with the default parser
40 | $streamer = Prewk\XmlStringStreamer::createStringWalkerParser("gigantic.xml");
41 |
42 | while ($node = $streamer->getNode()) {
43 | // $node will be a string like this: "JaneDoe"
44 | $simpleXmlNode = simplexml_load_string($node);
45 | echo (string)$simpleXmlNode->firstName;
46 | }
47 | ````
48 |
49 | Without the convenience method (functionally equivalient):
50 |
51 | ````php
52 | use Prewk\XmlStringStreamer;
53 | use Prewk\XmlStringStreamer\Stream;
54 | use Prewk\XmlStringStreamer\Parser;
55 |
56 | // Prepare our stream to be read with a 1kb buffer
57 | $stream = new Stream\File("gigantic.xml", 1024);
58 |
59 | // Construct the default parser (StringWalker)
60 | $parser = new Parser\StringWalker();
61 |
62 | // Create the streamer
63 | $streamer = new XmlStringStreamer($parser, $stream);
64 |
65 | // Iterate through the `` nodes
66 | while ($node = $streamer->getNode()) {
67 | // $node will be a string like this: "JaneDoe"
68 | $simpleXmlNode = simplexml_load_string($node);
69 | echo (string)$simpleXmlNode->firstName;
70 | }
71 | ````
72 |
73 | Convenience method for the UniqueNode parser:
74 |
75 | ````php
76 | $streamer = Prewk\XmlStringStreamer::createUniqueNodeParser("file.xml", array("uniqueNode" => "customer"));
77 | ````
78 |
79 | Parsers
80 | -------
81 |
82 | ### Parser\StringWalker
83 |
84 | Works like an XmlReader, and walks the XML tree node by node. Captures by node depth setting.
85 |
86 | ### Parser\UniqueNode
87 |
88 | A much faster parser that captures everything between a provided element's opening and closing tags. Special prerequisites apply.
89 |
90 | Stream providers
91 | ----------------
92 |
93 | ### Stream\File
94 |
95 | Use this provider to parse large XML files on disk. Pick a chunk size, for example: 1024 bytes.
96 |
97 | ````php
98 | $CHUNK_SIZE = 1024;
99 | $provider = new Prewk\XmlStringStreamer\Stream\File("large-xml-file.xml", $CHUNK_SIZE);
100 | ````
101 |
102 | ### Stream\Stdin
103 |
104 | Use this provider if you want to create a CLI application that streams large XML files through STDIN.
105 |
106 | ````php
107 | $CHUNK_SIZE = 1024;
108 | $fsp = new Prewk\XmlStringStreamer\Stream\Stdin($CHUNK_SIZE);
109 | ````
110 |
111 | ### Stream\Guzzle
112 |
113 | Use this provider if you want to stream over HTTP with [Guzzle](https://github.com/guzzle/guzzle). Resides in its own repo due to its higher PHP version requirements (5.5): [https://github.com/prewk/xml-string-streamer-guzzle](https://github.com/prewk/xml-string-streamer-guzzle)
114 |
115 | StringWalker Options
116 | --------------------
117 |
118 | ### Usage
119 |
120 | ````php
121 | use Prewk\XmlStringStreamer;
122 | use Prewk\XmlStringStreamer\Parser;
123 | use Prewk\XmlStringStreamer\Stream;
124 |
125 | $options = array(
126 | "captureDepth" => 3
127 | );
128 |
129 | $parser = new Parser\StringWalker($options);
130 | ````
131 |
132 | ### Available options for the StringWalker parser
133 |
134 | | Option | Default | Description |
135 | | ------ | ------- | ----------- |
136 | | (int) captureDepth | `2` | Depth we start collecting nodes at |
137 | | (array) tags | See example | Supported tags |
138 | | (bool) expectGT | `false` | Whether to support `>` in XML comments/CDATA or not |
139 | | (array) tagsWithAllowedGT | See example | If _expectGT_ is `true`, this option lists the tags with allowed `>` characters in them |
140 |
141 | ### Examples
142 |
143 | #### captureDepth
144 |
145 | Default behavior with a capture depth of `2`:
146 |
147 | ````xml
148 |
149 |
150 |
151 | ...
152 |
153 |
154 | ...
155 |
156 |
157 | ````
158 |
159 | ..will capture the `` nodes.
160 |
161 | But say your XML looks like this:
162 |
163 | ````xml
164 |
165 |
166 |
167 |
168 | ...
169 |
170 |
171 | ...
172 |
173 |
174 |
175 | ````
176 | Then you'll need to set the capture depth to `3` to capture the `` nodes.
177 |
178 | Node depth visualized:
179 |
180 | ````xml
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 | ````
189 |
190 | #### tags
191 |
192 | Default value:
193 |
194 | ````php
195 | array(
196 | array("", "?>", 0),
197 | array("", 0),
198 | array("", 0),
199 | array("", 0),
200 | array("", ">", -1),
201 | array("<", "/>", 0),
202 | array("<", ">", 1)
203 | ),
204 | ````
205 |
206 | First parameter: opening tag, second parameter: closing tag, third parameter: depth.
207 |
208 | If you know that your XML doesn't have any XML comments, CDATA or self-closing tags, you can tune your performance by setting the _tags_ option and omitting them:
209 |
210 | ````php
211 | array(
212 | array("", "?>", 0),
213 | array("", 0),
214 | array("", ">", -1),
215 | array("<", ">", 1)
216 | ),
217 | ````
218 |
219 | #### expectGT & tagsWithAllowedGT
220 |
221 | You can allow the `>` character within XML comments and CDATA sections if you want. This is pretty uncommon, and therefore turned off by default for performance reasons.
222 |
223 | Default value for tagsWithAllowedGT:
224 |
225 | ````php
226 | array(
227 | array(""),
228 | array("")
229 | ),
230 | ````
231 |
232 | UniqueNode Options
233 | ------------------
234 |
235 | ### Usage
236 |
237 | ````php
238 | use Prewk\XmlStringStreamer;
239 | use Prewk\XmlStringStreamer\Parser;
240 | use Prewk\XmlStringStreamer\Stream;
241 |
242 | $options = array(
243 | "uniqueNode" => "TheNodeToCapture"
244 | );
245 |
246 | $parser = new Parser\UniqueNode($options);
247 | ````
248 |
249 | ### Available options for the UniqueNode parser
250 |
251 | | Option | Description |
252 | | ------ | ----------- |
253 | | (string) uniqueNode | Required option: Specify the node name to capture |
254 | | (bool) checkShortClosing | Whether to check short closing tag or not |
255 |
256 | ### Examples
257 |
258 | #### uniqueNode
259 |
260 | Say you have an XML file like this:
261 | ````xml
262 |
263 |
264 |
265 | ...
266 |
267 |
268 | ...
269 |
270 |
271 | ...
272 |
273 |
274 | ````
275 | You want to capture the stuff nodes, therefore set _uniqueNode_ to `"stuff"`.
276 |
277 | If you have an XML file with short closing tags like this:
278 | ````xml
279 |
280 |
281 |
282 |
283 | ...
284 |
285 |
286 |
287 | ````
288 | You want to capture the stuff nodes, therefore set _uniqueNode_ to `"stuff"` and _checkShortClosing_ to `true`.
289 |
290 | But if your XML file look like this:
291 | ````xml
292 |
293 |
294 |
295 | Lorem ipsum
296 |
297 | Oops, another stuff node
298 |
299 |
300 | ...
301 |
302 | ````
303 |
304 | ..you won't be able to use the UniqueNode parser, because `` exists inside of another `` node.
305 |
306 | Advanced Usage
307 | ------------------------
308 |
309 | ### Progress bar
310 |
311 | You can track progress using a closure as the third argument when constructing the stream class. Example with the `File` stream using the `StringWalker` parser:
312 |
313 | ````php
314 | use Prewk\XmlStringStreamer;
315 | use Prewk\XmlStringStreamer\Stream\File;
316 | use Prewk\XmlStringStreamer\Parser\StringWalker;
317 |
318 | $file = "path/to/file.xml";
319 |
320 | // Save the total file size
321 | $totalSize = filesize($file);
322 |
323 | // Construct the file stream
324 | $stream = new File($file, 16384, function($chunk, $readBytes) use ($totalSize) {
325 | // This closure will be called every time the streamer requests a new chunk of data from the XML file
326 | echo "Progress: $readBytes / $totalSize\n";
327 | });
328 | // Construct the parser
329 | $parser = new StringWalker;
330 |
331 | // Construct the streamer
332 | $streamer = new XmlStringStreamer($parser, $stream);
333 |
334 | // Start parsing
335 | while ($node = $streamer->getNode()) {
336 | // ....
337 | }
338 | ````
339 |
340 | _You could of course do something more intelligent than spamming with `echo`._
341 |
342 | ### Accessing the root element (version 0.7.0+)
343 |
344 | Setting the parser option `extractContainer` tells the parser to gather everything before and after your intended child element capture. The results are available via the parser's `getExtractedContainer()` method.
345 |
346 | _Note:_ `getExtractedContainer()` will return different things depending on if you've streamed the whole file or not. If you need the containing XML data prematurely you can get it inside of the while loop, but it will just be the opening elements and therefore considered invalid XML by parsers such as SimpleXML.
347 |
348 | ````php
349 | use Prewk\XmlStringStreamer;
350 | use Prewk\XmlStringStreamer\Stream\File;
351 | use Prewk\XmlStringStreamer\Parser\StringWalker;
352 |
353 | $file = "path/to/file.xml";
354 |
355 | // Construct the file stream
356 | $stream = new File($file, 16384);
357 | // Construct the parser
358 | $parser = new StringWalker(array(
359 | "extractContainer" => true, // Required option
360 | ));
361 |
362 | // Construct the streamer
363 | $streamer = new XmlStringStreamer($parser, $stream);
364 |
365 | // Start parsing
366 | while ($node = $streamer->getNode()) {
367 | // ....
368 | }
369 |
370 | // Get the containing XML
371 | $containingXml = $parser->getExtractedContainer();
372 |
373 | $xmlObj = simplexml_load_string($containingXml);
374 | $rootElementName = $xmlObj->getName();
375 | $rootElementFooAttribute = $xmlObj->attributes()->foo;
376 | ````
377 |
378 | _This method should be considered experimental, and may extract weird stuff in edge cases_
379 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "prewk/xml-string-streamer",
3 | "description": "Stream large XML files with low memory consumption",
4 | "homepage": "https://github.com/prewk/xml-string-streamer",
5 | "license": "MIT",
6 | "authors": [
7 | {
8 | "name": "prewk",
9 | "email": "oskar.thornblad@gmail.com"
10 | }
11 | ],
12 | "autoload": {
13 | "psr-4": {
14 | "Prewk\\": "src/"
15 | }
16 | },
17 | "scripts": {
18 | "test": "phpunit"
19 | },
20 | "require": {
21 | "php": "^7.2 || ^8.0"
22 | },
23 | "require-dev": {
24 | "prewk/xml-faker": "^0.0.2",
25 | "mockery/mockery": "^1.3.0",
26 | "phpunit/phpunit": "^8.5.8 || ^9.5.0"
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/phpunit.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
13 |
14 |
15 |
16 | ./tests/*
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/src/XmlStringStreamer.php:
--------------------------------------------------------------------------------
1 |
7 | */
8 |
9 | namespace Prewk;
10 |
11 | use Prewk\XmlStringStreamer\ParserInterface;
12 | use Prewk\XmlStringStreamer\StreamInterface;
13 | use Prewk\XmlStringStreamer\Parser;
14 | use Prewk\XmlStringStreamer\Stream;
15 |
16 | /**
17 | * The base class for the xml-string-streamer
18 | */
19 | class XmlStringStreamer
20 | {
21 | /**
22 | * The current parser
23 | * @var ParserInterface
24 | */
25 | protected $parser;
26 | /**
27 | * The current stream
28 | * @var StreamInterface
29 | */
30 | protected $stream;
31 |
32 | /**
33 | * Constructs the XML streamer
34 | * @param ParserInterface $parser A parser with options set
35 | * @param StreamInterface $stream A stream for the parser to use
36 | */
37 | public function __construct(ParserInterface $parser, StreamInterface $stream)
38 | {
39 | $this->parser = $parser;
40 | $this->stream = $stream;
41 | }
42 |
43 | /**
44 | * Convenience method for creating a StringWalker parser with a File stream
45 | * @param string|resource $file File path or handle
46 | * @param array $options Parser configuration
47 | * @return XmlStringStreamer A streamer ready for use
48 | */
49 | public static function createStringWalkerParser($file, $options = array())
50 | {
51 | $stream = new Stream\File($file, 16384);
52 | $parser = new Parser\StringWalker($options);
53 | return new XmlStringStreamer($parser, $stream);
54 | }
55 |
56 | /**
57 | * Convenience method for creating a UniqueNode parser with a File stream
58 | * @param string|resource $file File path or handle
59 | * @param array $options Parser configuration
60 | * @return XmlStringStreamer A streamer ready for use
61 | */
62 | public static function createUniqueNodeParser($file, $options = array())
63 | {
64 | $stream = new Stream\File($file, 16384);
65 | $parser = new Parser\UniqueNode($options);
66 | return new XmlStringStreamer($parser, $stream);
67 | }
68 |
69 | /**
70 | * Gets the next node from the parser
71 | * @return bool|string The xml string or false
72 | */
73 | public function getNode()
74 | {
75 | return $this->parser->getNodeFrom($this->stream);
76 | }
77 | }
--------------------------------------------------------------------------------
/src/XmlStringStreamer/Parser/StringWalker.php:
--------------------------------------------------------------------------------
1 |
7 | */
8 |
9 | namespace Prewk\XmlStringStreamer\Parser;
10 |
11 | use Exception;
12 | use Prewk\XmlStringStreamer\ParserInterface;
13 | use Prewk\XmlStringStreamer\StreamInterface;
14 |
15 | /**
16 | * The string walker parser builds the XML nodes by fetching one element at a time until a certain depth is re-reached
17 | */
18 | class StringWalker implements ParserInterface
19 | {
20 | /**
21 | * Holds the parser configuration
22 | * @var array
23 | */
24 | protected $options;
25 |
26 | /**
27 | * Is this the first run?
28 | * @var boolean
29 | */
30 | protected $firstRun;
31 |
32 | /**
33 | * What depth are we currently at?
34 | * @var integer
35 | */
36 | protected $depth;
37 |
38 | /**
39 | * The latest chunk from the stream
40 | * @var string
41 | */
42 | protected $chunk;
43 |
44 | /**
45 | * Last XML node in the making, used for anti-freeze detection
46 | * @var null|string
47 | */
48 | protected $lastChunk;
49 |
50 | /**
51 | * XML node in the making
52 | * @var null|string
53 | */
54 | protected $shaved;
55 |
56 | /**
57 | * Whether to capture or not
58 | * @var boolean
59 | */
60 | protected $capture;
61 |
62 | /**
63 | * If extractContainer is true, this will grow with the XML captured before and after the specified capture depth
64 | * @var string
65 | */
66 | protected $containerXml;
67 |
68 | /**
69 | * Parser constructor
70 | * @param array $options An options array
71 | */
72 | public function __construct(array $options = array())
73 | {
74 | $this->reset();
75 |
76 | $this->options = array_merge(array(
77 | "captureDepth" => 2,
78 | "expectGT" => false,
79 | "tags" => array(
80 | array("", "?>", 0),
81 | array("", 0),
82 | array("", 0),
83 | array("", 0),
84 | array("", ">", -1),
85 | array("<", "/>", 0),
86 | array("<", ">", 1),
87 | ),
88 | "tagsWithAllowedGT" => array(
89 | array(""),
90 | array(""),
91 | ),
92 | "extractContainer" => false,
93 | ), $options);
94 | }
95 |
96 | /**
97 | * Shaves off the next element from the chunk
98 | * @return string[]|bool Either a shaved off element array(0 => Captured element, 1 => Data from last shaving point up to and including captured element) or false if one could not be obtained
99 | */
100 | protected function shave()
101 | {
102 | preg_match("/<[^>]+>/", $this->chunk, $matches, PREG_OFFSET_CAPTURE);
103 |
104 | if (isset($matches[0], $matches[0][0], $matches[0][1])) {
105 | list($captured, $offset) = $matches[0];
106 |
107 | if ($this->options["expectGT"]) {
108 | // Some elements support > inside
109 | foreach ($this->options["tagsWithAllowedGT"] as $tag) {
110 | list($opening, $closing) = $tag;
111 |
112 | if (substr($captured, 0, strlen($opening)) === $opening) {
113 | // We have a match, our preg_match may have ended too early
114 | // Most often, this isn't the case
115 | if (substr($captured, -1 * strlen($closing)) !== $closing) {
116 | // In this case, the preg_match ended too early, let's find the real end
117 | $position = strpos($this->chunk, $closing);
118 | if ($position === false) {
119 | // We need more XML!
120 |
121 | return false;
122 | }
123 |
124 | // We found the end, modify $captured
125 | $captured = substr($this->chunk, $offset, $position + strlen($closing) - $offset);
126 | }
127 | }
128 | }
129 | }
130 |
131 | // Data in between
132 | $data = substr($this->chunk, 0, $offset);
133 |
134 | // Shave from chunk
135 | $this->chunk = substr($this->chunk, $offset + strlen($captured));
136 |
137 | return array($captured, $data . $captured);
138 | }
139 |
140 | return false;
141 | }
142 |
143 | /**
144 | * Extract XML compatible tag head and tail
145 | * @param string $element XML element
146 | * @return string[] 0 => Opening tag, 1 => Closing tag
147 | */
148 | protected function getEdges($element)
149 | {
150 | // TODO: Performance tuning possible here by not looping
151 |
152 | foreach ($this->options["tags"] as $tag) {
153 | list($opening, $closing, $depth) = $tag;
154 |
155 | if (substr($element, 0, strlen($opening)) === $opening
156 | && substr($element, -1 * strlen($closing)) === $closing) {
157 |
158 | return $tag;
159 | }
160 | }
161 | }
162 |
163 | /**
164 | * The shave method must be able to request more data even though there isn't any more to fetch from the stream, this method wraps the getChunk call so that it returns true as long as there is XML data left
165 | * @param StreamInterface $stream The stream to read from
166 | * @return bool Returns whether there is more XML data or not
167 | */
168 | protected function prepareChunk(StreamInterface $stream)
169 | {
170 | if (!$this->firstRun && is_null($this->shaved)) {
171 | // We're starting again after a flush
172 | $this->shaved = "";
173 |
174 | return true;
175 | } else if (is_null($this->shaved)) {
176 | $this->shaved = "";
177 | }
178 |
179 | $newChunk = $stream->getChunk();
180 |
181 | if ($newChunk !== false) {
182 | $this->chunk .= $newChunk;
183 |
184 | return true;
185 | } else {
186 | if (trim($this->chunk) !== "" && $this->chunk !== $this->lastChunk) {
187 | // Update anti-freeze protection chunk
188 | $this->lastChunk = $this->chunk;
189 | // Continue
190 | return true;
191 | }
192 | }
193 |
194 | return false;
195 | }
196 |
197 | /**
198 | * Get the extracted container XML, if called before the whole stream is parsed, the XML returned will most likely be invalid due to missing closing tags
199 | * @return string XML string
200 | * @throws Exception if the extractContainer option isn't true
201 | */
202 | public function getExtractedContainer()
203 | {
204 | if (!$this->options["extractContainer"]) {
205 | throw new Exception("This method requires the 'extractContainer' option to be true");
206 | }
207 |
208 | return $this->containerXml;
209 | }
210 |
211 | /**
212 | * Tries to retrieve the next node or returns false
213 | * @param StreamInterface $stream The stream to use
214 | * @return string|bool The next xml node or false if one could not be retrieved
215 | */
216 | public function getNodeFrom(StreamInterface $stream)
217 | {
218 | // Iterate and append to $this->chunk
219 | while ($this->prepareChunk($stream)) {
220 | $this->firstRun = false;
221 | // Shave off elements
222 | while ($shaved = $this->shave()) {
223 | list($element, $data) = $shaved;
224 |
225 | // Analyze element
226 | list($opening, $closing, $depth) = $this->getEdges($element);
227 |
228 | // Update depth
229 | $this->depth += $depth;
230 |
231 | $flush = false;
232 | $captureOnce = false;
233 |
234 | // Capture or don't?
235 | if ($this->depth === $this->options["captureDepth"] && $depth > 0) {
236 | // Yes, we've just entered capture depth, start capturing
237 | $this->capture = true;
238 | } else if ($this->depth === $this->options["captureDepth"] - 1 && $depth < 0) {
239 | // No, we've just exited capture depth, stop capturing and prepare for flush
240 | $flush = true;
241 | $this->capture = false;
242 |
243 | // ..but include this last node
244 | $this->shaved .= $data;
245 | } else if ($this->options["extractContainer"] && $this->depth < $this->options["captureDepth"]) {
246 | // We're outside of our capture scope, save to the special buffer if extractContainer is true
247 | $this->containerXml .= $element;
248 | } else if ($depth === 0 && $this->depth + 1 === $this->options["captureDepth"]) {
249 | // Self-closing element - capture this element and flush but don't start capturing everything yet
250 | $captureOnce = true;
251 | $flush = true;
252 | }
253 |
254 | // Capture the last retrieved node
255 | if ($this->capture || $captureOnce) {
256 | $this->shaved .= $data;
257 | }
258 |
259 | if ($flush) {
260 | // Flush the whole node and start over on the next
261 | $flush = $this->shaved;
262 | $this->shaved = null;
263 |
264 | return $flush;
265 | }
266 | }
267 | }
268 |
269 | return false;
270 | }
271 |
272 | public function reset()
273 | {
274 | $this->firstRun = true;
275 | $this->depth = 0;
276 | $this->chunk = '';
277 | $this->lastChunk = null;
278 | $this->shaved = null;
279 | $this->capture = false;
280 | $this->containerXml = "";
281 | }
282 | }
283 |
--------------------------------------------------------------------------------
/src/XmlStringStreamer/Parser/UniqueNode.php:
--------------------------------------------------------------------------------
1 |
7 | * @author Roman Voloboev
8 | */
9 |
10 | namespace Prewk\XmlStringStreamer\Parser;
11 |
12 | use Exception;
13 | use Prewk\XmlStringStreamer\ParserInterface;
14 | use Prewk\XmlStringStreamer\StreamInterface;
15 |
16 | /**
17 | * The unique node parser starts at a given element name and flushes when its corresponding closing tag is found
18 | */
19 | class UniqueNode implements ParserInterface
20 | {
21 | const FIND_OPENING_TAG_ACTION = 0;
22 | const FIND_CLOSING_TAG_ACTION = 1;
23 |
24 | /**
25 | * Current working XML blob
26 | * @var string
27 | */
28 | private $workingBlob;
29 |
30 | /**
31 | * The flushed node
32 | * @var string
33 | */
34 | private $flushed;
35 |
36 | /**
37 | * Start position of the given element in the workingBlob
38 | * @var integer
39 | */
40 | private $startPos;
41 |
42 | /**
43 | * Records how far we've searched in the XML blob so far
44 | * @var integer
45 | */
46 | private $hasSearchedUntilPos;
47 |
48 | /**
49 | * Next action to perform
50 | * @var integer
51 | */
52 | private $nextAction;
53 |
54 | /**
55 | * Indicates short closing tag
56 | * @var bool
57 | */
58 |
59 | private $shortClosedTagNow;
60 |
61 | /**
62 | * If extractContainer is true, this will grow with the XML captured before and after the specified capture depth
63 | * @var string
64 | */
65 | protected $containerXml;
66 |
67 | /**
68 | * Whether we're found our first capture target or not
69 | * @var bool
70 | */
71 | protected $preCapture;
72 |
73 | /**
74 | * @var array
75 | */
76 | private $options;
77 |
78 | /**
79 | * Parser constructor
80 | * @param array $options An options array
81 | * @throws Exception if the required option uniqueNode isn't set
82 | */
83 | public function __construct(array $options = array())
84 | {
85 | $this->reset();
86 |
87 | $this->options = array_merge(array(
88 | "extractContainer" => false,
89 | ), $options);
90 |
91 | if (!isset($this->options["uniqueNode"])) {
92 | throw new Exception("Required option 'uniqueNode' not set");
93 | }
94 | }
95 |
96 | /**
97 | * Search the blob for our unique node's opening tag
98 | * @return bool|int Either returns the char position of the opening tag or false
99 | */
100 | protected function getOpeningTagPos()
101 | {
102 | $startPositionInBlob = false;
103 | if (preg_match("/<" . preg_quote($this->options["uniqueNode"]) . "(>| )/", $this->workingBlob, $matches, PREG_OFFSET_CAPTURE) === 1) {
104 | $startPositionInBlob = $matches[0][1];
105 | }
106 |
107 |
108 | if ($startPositionInBlob === false) {
109 | $this->hasSearchedUntilPos = strlen($this->workingBlob) - 1;
110 | }
111 |
112 | return $startPositionInBlob;
113 | }
114 |
115 | /**
116 | * Search short closing tag in $workingBlob before
117 | *
118 | * @param string $workingBlob
119 | * @param int $len
120 | * @return bool|int Either returns the char position of the short closing tag or false
121 | */
122 | private function checkShortClosingTag($workingBlob, $len) {
123 | $resultEndPositionInBlob = false;
124 | while ($len = strpos($workingBlob, "/>", $len + 1)) {
125 | $subBlob = substr($workingBlob, $this->startPos, $len + strlen("/>") - $this->startPos);
126 | $cntOpen = substr_count($subBlob, "<");
127 | $cntClose = substr_count($subBlob, "/>");
128 | if ($cntOpen === $cntClose && $cntOpen === 1) {
129 | $resultEndPositionInBlob = $len + strlen("/>");
130 | break; // end while. so $endPositionInBlob correct now
131 | }
132 | }
133 | return $resultEndPositionInBlob;
134 | }
135 |
136 | /**
137 | * Search the blob for our unique node's closing tag
138 | * @return bool|int Either returns the char position of the closing tag or false
139 | */
140 | protected function getClosingTagPos()
141 | {
142 | $endPositionInBlob = strpos($this->workingBlob, "" . $this->options["uniqueNode"] . ">", $this->startPos);
143 | if ($endPositionInBlob === false) {
144 |
145 | if (isset($this->options["checkShortClosing"]) && $this->options["checkShortClosing"] === true) {
146 | $endPositionInBlob = $this->checkShortClosingTag($this->workingBlob, $this->startPos);
147 | }
148 |
149 | if ($endPositionInBlob === false) {
150 | $this->hasSearchedUntilPos = strlen($this->workingBlob) - 1;
151 | } else {
152 | $this->shortClosedTagNow = true;
153 | }
154 | } else {
155 | if (isset($this->options["checkShortClosing"]) && $this->options["checkShortClosing"] === true) {
156 | $tmpEndPositionInBlob = $this->checkShortClosingTag(substr($this->workingBlob, 0, $endPositionInBlob), $this->startPos);
157 | if ($tmpEndPositionInBlob !== false) {
158 | $this->shortClosedTagNow = true;
159 | $endPositionInBlob = $tmpEndPositionInBlob;
160 | }
161 | }
162 | }
163 |
164 | return $endPositionInBlob;
165 | }
166 |
167 | /**
168 | * Set the start position in the workingBlob from where we should start reading when the closing tag is found
169 | * @param int $startPositionInBlob Position of starting tag
170 | */
171 | protected function startSalvaging($startPositionInBlob)
172 | {
173 | $this->startPos = $startPositionInBlob;
174 | }
175 |
176 | /**
177 | * Cut everything from the start position to the end position in the workingBlob (+ tag length) and flush it out for later return in getNodeFrom
178 | * @param int $endPositionInBlob Position of the closing tag
179 | */
180 | protected function flush($endPositionInBlob) {
181 | $endTagLen = $this->shortClosedTagNow ? 0 : strlen("" . $this->options["uniqueNode"] . ">");
182 | $realEndPosition = $endPositionInBlob + $endTagLen;
183 | $this->flushed = substr($this->workingBlob, $this->startPos, $realEndPosition - $this->startPos);
184 | $this->workingBlob = substr($this->workingBlob, $realEndPosition);
185 | $this->hasSearchedUntilPos = 0;
186 | $this->shortClosedTagNow = false;
187 | }
188 |
189 | /**
190 | * Decides whether we're to fetch more chunks from the stream or keep working with what we have.
191 | * @param StreamInterface $stream The stream provider
192 | * @return bool Keep working?
193 | */
194 | protected function prepareChunk(StreamInterface $stream)
195 | {
196 | if ($this->hasSearchedUntilPos > -1 && $this->hasSearchedUntilPos < (strlen($this->workingBlob) - 1)) {
197 | // More work to do
198 | return true;
199 | }
200 |
201 | $chunk = $stream->getChunk();
202 |
203 | if ($chunk === false) {
204 | // EOF
205 | if ($this->hasSearchedUntilPos === -1 && strlen($this->workingBlob) > 0) {
206 | // EOF, but we haven't even started searching, special case that probably means we're dealing with a file of less size than the stream buffer
207 | // Therefore, keep looping
208 | return true;
209 | }
210 | return false;
211 | } else {
212 | // New chunk fetched
213 |
214 | if ($this->nextAction === self::FIND_OPENING_TAG_ACTION && !$this->options["extractContainer"]) {
215 | // Prevent a memory leak if we never find our first node, throw away our old stuff
216 | // but keep some letters to not cut off a first node
217 | $this->workingBlob = substr($this->workingBlob, -1 * strlen("<" . $this->options["uniqueNode"] . ">")) . $chunk;
218 | } else {
219 | $this->workingBlob .= $chunk;
220 | }
221 |
222 | return true;
223 | }
224 | }
225 |
226 | /**
227 | * Tries to retrieve the next node or returns false
228 | * @param StreamInterface $stream The stream to use
229 | * @return string|bool The next xml node or false if one could not be retrieved
230 | */
231 | public function getNodeFrom(StreamInterface $stream)
232 | {
233 | while ($this->prepareChunk($stream)) {
234 | // What's our next course of action?
235 | if ($this->nextAction === self::FIND_OPENING_TAG_ACTION) {
236 | // Try to find an opening tag
237 | $positionInBlob = $this->getOpeningTagPos();
238 |
239 | if ($positionInBlob !== false) {
240 |
241 | if ($this->options["extractContainer"] && $this->preCapture) {
242 | $this->containerXml .= substr($this->workingBlob, 0, $positionInBlob);
243 | $this->preCapture = false;
244 | }
245 |
246 |
247 | $this->startSalvaging($positionInBlob);
248 |
249 | // The next course of action will be to find a closing tag
250 | $this->nextAction = self::FIND_CLOSING_TAG_ACTION;
251 | }
252 | }
253 |
254 | if ($this->nextAction === self::FIND_CLOSING_TAG_ACTION) {
255 | // Try to find a closing tag
256 | $positionInBlob = $this->getClosingTagPos();
257 | if ($positionInBlob !== false) {
258 | // We found it, we now have a full node to flush out
259 | $this->flush($positionInBlob);
260 |
261 | // The next course of action will be to find an opening tag
262 | $this->nextAction = self::FIND_OPENING_TAG_ACTION;
263 |
264 | // Get the flushed node and make way for the next node
265 | $flushed = $this->flushed;
266 | $this->flushed = "";
267 |
268 | return $flushed;
269 | }
270 | }
271 | }
272 |
273 | if ($this->options["extractContainer"]) {
274 | $this->containerXml .= $this->workingBlob;
275 | }
276 |
277 | return false;
278 | }
279 |
280 | /**
281 | * Get the extracted container XML, if called before the whole stream is parsed, the XML returned can be invalid due to missing closing tags
282 | * @return string XML string
283 | * @throws Exception if the extractContainer option isn't true
284 | */
285 | public function getExtractedContainer()
286 | {
287 | if (!$this->options["extractContainer"]) {
288 | throw new Exception("This method requires the 'extractContainer' option to be true");
289 | }
290 |
291 | return $this->containerXml;
292 | }
293 |
294 | /**
295 | * @internal
296 | * @return string
297 | */
298 | public function getCurrentWorkingBlob()
299 | {
300 | return $this->workingBlob;
301 | }
302 |
303 | public function reset()
304 | {
305 | $this->workingBlob = '';
306 | $this->flushed = '';
307 | $this->startPos = 0;
308 | $this->hasSearchedUntilPos = -1;
309 | $this->nextAction = 0;
310 | $this->shortClosedTagNow = false;
311 | $this->containerXml = '';
312 | $this->preCapture = true;
313 | }
314 | }
315 |
--------------------------------------------------------------------------------
/src/XmlStringStreamer/ParserInterface.php:
--------------------------------------------------------------------------------
1 |
7 | */
8 |
9 | namespace Prewk\XmlStringStreamer;
10 |
11 | /**
12 | * Interface describing a parser
13 | */
14 | interface ParserInterface
15 | {
16 | /**
17 | * Parser contructor
18 | * @param array $options An options array decided by the parser implementation
19 | */
20 | public function __construct(array $options = array());
21 |
22 | /**
23 | * Tries to retrieve the next node or returns false
24 | * @param StreamInterface $stream The stream to use
25 | * @return string|bool The next xml node or false if one could not be retrieved
26 | */
27 | public function getNodeFrom(StreamInterface $stream);
28 |
29 | /**
30 | * Get the extracted container XML, if called before the whole stream is parsed, the XML returned can be invalid due to missing closing tags
31 | * @return string XML string
32 | */
33 | public function getExtractedContainer();
34 |
35 | /**
36 | * Reset all Parser internal caches, working blobs, working chunks etc.
37 | *
38 | * you have to Reset Parser state in case of rewinding Stream (or other Stream manipulation like seek)
39 | *
40 | * @return void
41 | */
42 | public function reset();
43 | }
44 |
--------------------------------------------------------------------------------
/src/XmlStringStreamer/Stream/File.php:
--------------------------------------------------------------------------------
1 | handle = fopen($mixed, 'rb');
27 | } elseif (is_resource($mixed) && get_resource_type($mixed) === "stream") {
28 | $this->handle = $mixed;
29 | } else {
30 | throw new \Exception("First argument must be either a filename or a file handle");
31 | }
32 |
33 | if ($this->handle === false) {
34 | throw new \Exception("Couldn't create file handle");
35 | }
36 |
37 | $this->chunkSize = $chunkSize;
38 | $this->chunkCallback = $chunkCallback;
39 | }
40 |
41 | public function __destruct() {
42 | if (is_resource($this->handle)) {
43 | fclose($this->handle);
44 | }
45 | }
46 |
47 | public function getChunk()
48 | {
49 | if (is_resource($this->handle) && !feof($this->handle)) {
50 | $buffer = fread($this->handle, $this->chunkSize);
51 | $this->readBytes += strlen($buffer);
52 |
53 | if (is_callable($this->chunkCallback)) {
54 | call_user_func_array($this->chunkCallback, array($buffer, $this->readBytes));
55 | }
56 |
57 | return $buffer;
58 | }
59 |
60 | return false;
61 | }
62 |
63 | public function isSeekable()
64 | {
65 | $meta = stream_get_meta_data($this->handle);
66 |
67 | return $meta["seekable"];
68 | }
69 |
70 | public function rewind()
71 | {
72 | if (!$this->isSeekable()) {
73 | throw new Exception("Attempted to rewind an unseekable stream");
74 | }
75 |
76 | $this->readBytes = 0;
77 | rewind($this->handle);
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/src/XmlStringStreamer/Stream/Stdin.php:
--------------------------------------------------------------------------------
1 |
7 | */
8 |
9 | namespace Prewk\XmlStringStreamer;
10 |
11 | use Exception;
12 |
13 | /**
14 | * Interface describing a stream provider
15 | */
16 | interface StreamInterface
17 | {
18 | /**
19 | * Gets the next chunk form the stream if one is available
20 | * @return bool|string The next chunk if available, or false if not available
21 | */
22 | public function getChunk();
23 |
24 | /**
25 | * Is the stream seekable?
26 | * @return bool
27 | */
28 | public function isSeekable();
29 |
30 | /**
31 | * Rewind the stream
32 | * @return void
33 | * @throws Exception if the stream isn't seekable
34 | */
35 | public function rewind();
36 | }
--------------------------------------------------------------------------------
/tests/integration/XmlStringStreamer/Stream/FileIntegrationTest.php:
--------------------------------------------------------------------------------
1 | assertEquals($stream->getChunk(), $chunk1, "First chunk received from the stream should be as expected");
22 | $this->assertEquals($stream->getChunk(), $chunk2, "Second chunk received from the stream should be as expected");
23 | $this->assertEquals($stream->getChunk(), false, "Third chunk received from the stream should be false");
24 | }
25 |
26 | public function test_stream_a_file_by_handle()
27 | {
28 | $chunk1 = "1234567890";
29 | $chunk2 = "abcdefghij";
30 | $bufferSize = 10;
31 | $full = $chunk1 . $chunk2;
32 |
33 | $tmpFile = tempnam(sys_get_temp_dir(), "xmlss-phpunit");
34 | file_put_contents($tmpFile, $full);
35 |
36 | $stream = new File(fopen($tmpFile, "r"), $bufferSize);
37 |
38 | $this->assertEquals($stream->getChunk(), $chunk1, "First chunk received from the stream should be as expected");
39 | $this->assertEquals($stream->getChunk(), $chunk2, "Second chunk received from the stream should be as expected");
40 | $this->assertEquals($stream->getChunk(), false, "Third chunk received from the stream should be false");
41 | }
42 |
43 | public function test_chunk_callback()
44 | {
45 | $file = __dir__ . "/../../../xml/pubmed-example.xml";
46 | $chunkSize = 100;
47 |
48 | $callbackCount = 0;
49 | $stream = new File($file, 100, function($buffer, $readBytes) use (&$callbackCount) {
50 | $callbackCount++;
51 | });
52 |
53 | $chunkCount = 0;
54 | while ($chunk = $stream->getChunk()) {
55 | $chunkCount++;
56 | }
57 |
58 | $this->assertEquals($callbackCount, $chunkCount, "Chunk callback count should be the same as getChunk count");
59 | }
60 |
61 | public function test_compressed_file()
62 | {
63 | if (!extension_loaded("zlib")) {
64 | $this->markTestSkipped("zlib extension is not installed");
65 | }
66 |
67 | $chunk1 = "1234567890";
68 | $chunk2 = "abcdefghij";
69 | $bufferSize = 10;
70 | $full = $chunk1 . $chunk2;
71 |
72 | $tmpFile = tempnam(sys_get_temp_dir(), "xmlss-phpunit");
73 | $wp = fopen("compress.zlib://$tmpFile", "wb");
74 | fwrite($wp, $full, $bufferSize);
75 | fclose($wp);
76 |
77 | file_put_contents($tmpFile, $full);
78 |
79 | $stream = new File("compress.zlib://$tmpFile", $bufferSize);
80 |
81 | $this->assertEquals($stream->getChunk(), $chunk1, "First chunk received from the stream should be as expected");
82 | $this->assertEquals($stream->getChunk(), $chunk2, "Second chunk received from the stream should be as expected");
83 | $this->assertEquals($stream->getChunk(), false, "Third chunk received from the stream should be false");
84 | }
85 |
86 | public function test_remote_stream()
87 | {
88 | if (ini_get("allow_url_fopen") !== "1") {
89 | $this->markTestSkipped("allow_url_fopen is disabled");
90 | }
91 |
92 | $chunk1 = "\nassertEquals($chunk1, $stream->getChunk(), "First chunk received from the stream should be as expected");
99 | }
100 |
101 | public function test_rewind()
102 | {
103 | $chunk1 = "1234567890";
104 | $chunk2 = "abcdefghij";
105 | $bufferSize = 10;
106 | $full = $chunk1 . $chunk2;
107 |
108 | $tmpFile = tempnam(sys_get_temp_dir(), "xmlss-phpunit");
109 | file_put_contents($tmpFile, $full);
110 |
111 | $stream = new File($tmpFile, $bufferSize);
112 |
113 | $this->assertEquals($stream->getChunk(), $chunk1, "First chunk received from the stream should be as expected");
114 | $this->assertEquals($stream->getChunk(), $chunk2, "Second chunk received from the stream should be as expected");
115 | $stream->rewind();
116 | $this->assertEquals($stream->getChunk(), $chunk1, "First chunk received from the stream should be as expected");
117 | $this->assertEquals($stream->getChunk(), $chunk2, "Second chunk received from the stream should be as expected");
118 | $this->assertEquals($stream->getChunk(), false, "Third chunk received from the stream should be false");
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/tests/integration/XmlStringStreamer/XmlStringStreamerIntegrationTest.php:
--------------------------------------------------------------------------------
1 | 4,
19 | ));
20 | $streamer = new XmlStringStreamer($parser, $stream);
21 |
22 | $expectedValues = array("000000100182", "000000100182");
23 | $foundValues = array();
24 |
25 | while ($node = $streamer->getNode()) {
26 | $xmlNode = simplexml_load_string($node);
27 | $foundValues[] = (string)$xmlNode->field[0]["value"];
28 | }
29 |
30 | $this->assertEquals($expectedValues, $foundValues, "It should only catch two values and abort");
31 | }
32 |
33 | public function test_incomplete_file_with_UniqueNode()
34 | {
35 | $file = __dir__ . "/../../xml/incomplete.xml";
36 |
37 | $stream = new File($file, 16384);
38 | $parser = new UniqueNode(array(
39 | "uniqueNode" => "row",
40 | ));
41 | $streamer = new XmlStringStreamer($parser, $stream);
42 |
43 | $expectedValues = array("000000100182", "000000100182");
44 | $foundValues = array();
45 |
46 | while ($node = $streamer->getNode()) {
47 | $xmlNode = simplexml_load_string($node);
48 | $foundValues[] = (string)$xmlNode->field[0]["value"];
49 | }
50 |
51 | $this->assertEquals($expectedValues, $foundValues, "It should only catch two values and abort");
52 | }
53 |
54 | public function test_createStringWalkerParser_convenience_method_with_pubmed_xml()
55 | {
56 | $file = __dir__ . "/../../xml/pubmed-example.xml";
57 |
58 | $streamer = XmlStringStreamer::createStringWalkerParser($file);
59 |
60 | $expectedPMIDs = array("24531174", "24529294", "24449586");
61 | $foundPMIDs = array();
62 |
63 | while ($node = $streamer->getNode()) {
64 | $xmlNode = simplexml_load_string($node);
65 | $foundPMIDs[] = (string)$xmlNode->MedlineCitation->PMID;
66 | }
67 |
68 | $this->assertEquals($expectedPMIDs, $foundPMIDs, "The PMID nodes should be as expected");
69 | }
70 |
71 | public function test_StringWalker_parser_with_pubmed_xml_and_container_extraction()
72 | {
73 | $file = __dir__ . "/../../xml/pubmed-example.xml";
74 |
75 | $stream = new File($file, 16384);
76 | $parser = new StringWalker(array(
77 | "extractContainer" => true,
78 | ));
79 | $streamer = new XmlStringStreamer($parser, $stream);
80 |
81 | $expectedPMIDs = array("24531174", "24529294", "24449586");
82 | $foundPMIDs = array();
83 |
84 | while ($node = $streamer->getNode()) {
85 | $xmlNode = simplexml_load_string($node);
86 | $foundPMIDs[] = (string)$xmlNode->MedlineCitation->PMID;
87 | }
88 |
89 | $this->assertEquals($expectedPMIDs, $foundPMIDs, "The PMID nodes should be as expected");
90 |
91 | $containerXml = simplexml_load_string($parser->getExtractedContainer());
92 | $this->assertEquals("PubmedArticleSet", $containerXml->getName(), "Root node should be as expected");
93 | $this->assertEquals("bar", $containerXml->attributes()->foo, "Attributes should be extracted correctly");
94 | $this->assertEquals("qux", $containerXml->attributes()->baz, "Attributes should be extracted correctly");
95 | }
96 |
97 | public function test_createStringWalkerParser_convenience_method_with_orphanet_xml_and_custom_captureDepth()
98 | {
99 | $file = __dir__ . "/../../xml/orphanet-xml-example.xml";
100 |
101 | $streamer = XmlStringStreamer::createStringWalkerParser($file, array(
102 | "captureDepth" => 3,
103 | ));
104 |
105 | $expectedOrphaNumbers = array("166024", "166032", "58");
106 | $foundOrphaNumbers = array();
107 |
108 | while ($node = $streamer->getNode()) {
109 | $xmlNode = simplexml_load_string($node);
110 | $foundOrphaNumbers[] = (string)$xmlNode->OrphaNumber;
111 | }
112 |
113 | $this->assertEquals($expectedOrphaNumbers, $foundOrphaNumbers, "The OrphaNumber nodes should be as expected");
114 | }
115 |
116 | public function test_createUniqueNodeParser_convenience_method_with_pubmed_xml()
117 | {
118 | $file = __dir__ . "/../../xml/pubmed-example.xml";
119 |
120 | $streamer = XmlStringStreamer::createUniqueNodeParser($file, array(
121 | "uniqueNode" => "PubmedArticle"
122 | ));
123 |
124 | $expectedPMIDs = array("24531174", "24529294", "24449586");
125 | $foundPMIDs = array();
126 |
127 | while ($node = $streamer->getNode()) {
128 | $xmlNode = simplexml_load_string($node);
129 | $foundPMIDs[] = (string)$xmlNode->MedlineCitation->PMID;
130 | }
131 |
132 | $this->assertEquals($expectedPMIDs, $foundPMIDs, "The PMID nodes should be as expected");
133 | }
134 |
135 | public function test_UniqueNode_parser_with_pubmed_xml_and_container_extraction()
136 | {
137 | $file = __dir__ . "/../../xml/pubmed-example.xml";
138 |
139 | $stream = new File($file, 512);
140 | $parser = new UniqueNode(array(
141 | "uniqueNode" => "PubmedArticle",
142 | "extractContainer" => true,
143 | ));
144 | $streamer = new XmlStringStreamer($parser, $stream);
145 |
146 | $expectedPMIDs = array("24531174", "24529294", "24449586");
147 | $foundPMIDs = array();
148 |
149 | while ($node = $streamer->getNode()) {
150 | $xmlNode = simplexml_load_string($node);
151 | $foundPMIDs[] = (string)$xmlNode->MedlineCitation->PMID;
152 | }
153 |
154 | $this->assertEquals($expectedPMIDs, $foundPMIDs, "The PMID nodes should be as expected");
155 |
156 | $containerXml = simplexml_load_string($parser->getExtractedContainer());
157 | $this->assertEquals("PubmedArticleSet", $containerXml->getName(), "Root node should be as expected");
158 | $this->assertEquals("bar", $containerXml->attributes()->foo, "Attributes should be extracted correctly");
159 | $this->assertEquals("qux", $containerXml->attributes()->baz, "Attributes should be extracted correctly");
160 | }
161 |
162 | public function test_createUniqueNodeParser_convenience_method_with_orphanet_xml()
163 | {
164 | $file = __dir__ . "/../../xml/orphanet-xml-example.xml";
165 |
166 | $streamer = XmlStringStreamer::createUniqueNodeParser($file, array(
167 | "uniqueNode" => "Disorder"
168 | ));
169 |
170 | $expectedOrphaNumbers = array("166024", "166032", "58");
171 | $foundOrphaNumbers = array();
172 |
173 | while ($node = $streamer->getNode()) {
174 | $xmlNode = simplexml_load_string($node);
175 | $foundOrphaNumbers[] = (string)$xmlNode->OrphaNumber;
176 | }
177 |
178 | $this->assertEquals($expectedOrphaNumbers, $foundOrphaNumbers, "The OrphaNumber nodes should be as expected");
179 | }
180 |
181 | public function test_UniqueNode_parser_with_file_shorter_than_buffer()
182 | {
183 | $file = __dir__ . "/../../xml/short.xml";
184 |
185 | $stream = new XmlStringStreamer\Stream\File($file, 1024);
186 | $parser = new XmlStringStreamer\Parser\UniqueNode(array(
187 | "uniqueNode" => "capture"
188 | ));
189 | $streamer = new XmlStringStreamer($parser, $stream);
190 |
191 | $expectedNodes = array(
192 | "foo",
193 | "bar",
194 | );
195 |
196 | $foundNodes = array();
197 | while ($node = $streamer->getNode()) {
198 | $xmlNode = simplexml_load_string($node);
199 | $foundNodes[] = (string)$xmlNode->node;
200 | }
201 |
202 | $this->assertEquals($expectedNodes, $foundNodes, "The found nodes should equal the expected nodes");
203 | }
204 |
205 | public function test_StringWalker_parser_with_file_shorter_than_buffer()
206 | {
207 | $file = __dir__ . "/../../xml/short.xml";
208 |
209 | $stream = new XmlStringStreamer\Stream\File($file, 1024);
210 | $parser = new XmlStringStreamer\Parser\StringWalker();
211 | $streamer = new XmlStringStreamer($parser, $stream);
212 |
213 | $expectedNodes = array(
214 | "foo",
215 | "bar",
216 | );
217 |
218 | $foundNodes = array();
219 | while ($node = $streamer->getNode()) {
220 | $xmlNode = simplexml_load_string($node);
221 | $foundNodes[] = (string)$xmlNode->node;
222 | }
223 |
224 | $this->assertEquals($expectedNodes, $foundNodes, "The found nodes should equal the expected nodes");
225 | }
226 |
227 | public function test_UniqueNode_parser_with_file_with_data_in_last_chunk()
228 | {
229 | $file = __dir__ . "/../../xml/short_last_chunk.xml";
230 |
231 | $stream = new XmlStringStreamer\Stream\File($file, 200);
232 | $parser = $parser = new UniqueNode(array("uniqueNode" => 'capture'));
233 | $streamer = new XmlStringStreamer($parser, $stream);
234 |
235 | $foundNodes = 0;
236 | while ($node = $streamer->getNode()) {
237 | $foundNodes++;
238 | }
239 |
240 | $this->assertEquals(2, $foundNodes, "The found nodes should equal the expected nodes number.");
241 | }
242 |
243 | public function test_UniqueNode_parser_reset_working_blob()
244 | {
245 | $file = __dir__ . "/../../xml/rewind_working_blob.xml";
246 |
247 | $stream = new XmlStringStreamer\Stream\File($file, 50);
248 | $parser = new UniqueNode(array("uniqueNode" => 'item'));
249 | $streamer = new XmlStringStreamer($parser, $stream);
250 |
251 | self::assertSame('- 0
', $streamer->getNode());
252 | self::assertSame('- 1
', $streamer->getNode());
253 | self::assertSame('- 2
', $streamer->getNode());
254 |
255 | // at this stage, internal working blob in parser has "preloaded" one extra valid item
256 | self::assertSame("\n - 3
\n - 4
getCurrentWorkingBlob());
257 |
258 | $stream->rewind();
259 | // because internal working blob had one extra valid item, we still get it
260 | self::assertSame('- 3
', $streamer->getNode());
261 |
262 | // now next item will result into fetching previous working blob plus beginning of file after rewinding
263 | self::assertSame("- 4
\n - 0
", $streamer->getNode());
264 |
265 | self::assertSame('- 1
', $streamer->getNode());
266 | self::assertSame('- 2
', $streamer->getNode());
267 |
268 | $stream->rewind(); // rewind stream again
269 | $parser->reset(); // but now also reset internal working blob
270 |
271 | self::assertSame('- 0
', $streamer->getNode());
272 | self::assertSame('- 1
', $streamer->getNode());
273 | self::assertSame('- 2
', $streamer->getNode());
274 |
275 | self::assertSame("\n - 3
\n - 4
getCurrentWorkingBlob());
276 | $parser->reset();
277 | self::assertSame('', $parser->getCurrentWorkingBlob());
278 |
279 | // in opposite case, reseting blob without rewinding will jump over 2 items
280 | self::assertSame('- 5
', $streamer->getNode());
281 | }
282 |
283 | public function test_StringWalker_parser_reset_working_blob()
284 | {
285 | $file = __dir__ . "/../../xml/rewind_working_blob.xml";
286 |
287 | $stream = new XmlStringStreamer\Stream\File($file, 80);
288 | $parser = new XmlStringStreamer\Parser\StringWalker();
289 | $streamer = new XmlStringStreamer($parser, $stream);
290 |
291 | self::assertSame("\n - 0
", $streamer->getNode());
292 | self::assertSame("\n - 1
", $streamer->getNode());
293 | self::assertSame("\n - 2
", $streamer->getNode());
294 |
295 | $stream->rewind();
296 | // after rewind, previous part of chunk with beginning of file is current node
297 | self::assertSame("\n - 3", $streamer->getNode());
298 |
299 | // but after that, we are able to get proper nodes (depends on chunk length)
300 | self::assertSame("\n
- 0
", $streamer->getNode());
301 | self::assertSame("\n - 1
", $streamer->getNode());
302 | self::assertSame("\n - 2
", $streamer->getNode());
303 |
304 | $stream->rewind();
305 | $parser->reset();
306 | // now rewind and reset will cause proper loading from beginning of file
307 | self::assertSame("\n - 0
", $streamer->getNode());
308 | self::assertSame("\n - 1
", $streamer->getNode());
309 | self::assertSame("\n - 2
", $streamer->getNode());
310 |
311 | // in opposite, just resetting parser without rewinding will cause false as result - unable to retrieve
312 | $parser->reset();
313 | self::assertFalse($streamer->getNode());
314 |
315 | // it is possible to recover by rewind/reset again
316 | $stream->rewind();
317 | $parser->reset();
318 | self::assertSame("\n - 0
", $streamer->getNode());
319 | }
320 |
321 | public function test_UniqueNode_parser_stream_seeking()
322 | {
323 | $filePath = __dir__ . '/../../xml/stream_seeking.xml';
324 | $fileHandle = fopen($filePath, 'rb');
325 |
326 | $stream = new XmlStringStreamer\Stream\File($fileHandle, 50);
327 | $parser = new UniqueNode(["uniqueNode" => 'item']);
328 | $streamer = new XmlStringStreamer($parser, $stream);
329 |
330 | self::assertSame('- first item to read
', $streamer->getNode());
331 |
332 | /**
333 | * @see /tests/xml/stream_seeking.xml
334 | * hash character is used as seek target in file, creating case where closing tag precedes opening tag
335 | */
336 | $seekTargetPosition = strpos(file_get_contents($filePath), '#');
337 | fseek($fileHandle, $seekTargetPosition);
338 | $parser->reset();
339 |
340 | self::assertSame('- second item to read
', $streamer->getNode());
341 | }
342 | }
343 |
--------------------------------------------------------------------------------
/tests/unit/XmlStringStreamer/Parser/StringWalkerTest.php:
--------------------------------------------------------------------------------
1 | shouldReceive("getChunk")
20 | ->once()
21 | ->andReturn(substr($fullString, $i, $bufferSize));
22 | }
23 | $stream->shouldReceive("getChunk")
24 | ->andReturn(false);
25 |
26 | return $stream;
27 | }
28 |
29 | public function test_stringWalker_empty_xml()
30 | {
31 | $stream = $this->getStreamMock("", 1024);
32 |
33 | $parser = new StringWalker();
34 |
35 | $this->assertFalse($parser->getNodeFrom($stream), "An empty stream should just exit nicely");
36 | }
37 |
38 | public function test_default_options()
39 | {
40 | $node1 = <<
42 | Lorem
43 | Ipsum
44 | 1
45 |
46 | eot;
47 | $node2 = <<
49 | Lorem
50 | Ipsum
51 | 2
52 |
53 | eot;
54 | $node3 = <<
56 | Lorem
57 | Ipsum
58 | 3
59 |
60 | eot;
61 | $xml = <<
63 |
64 | $node1
65 | $node2
66 | $node3
67 |
68 | eot;
69 |
70 | $stream = $this->getStreamMock($xml, 50);
71 |
72 | $parser = new StringWalker();
73 |
74 | $this->assertEquals(
75 | trim($node1),
76 | trim($parser->getNodeFrom($stream)),
77 | "Node 1 should be obtained on the first getNodeFrom"
78 | );
79 | $this->assertEquals(
80 | trim($node2),
81 | trim($parser->getNodeFrom($stream)),
82 | "Node 2 should be obtained on the second getNodeFrom"
83 | );
84 | $this->assertEquals(
85 | trim($node3),
86 | trim($parser->getNodeFrom($stream)),
87 | "Node 3 should be obtained on the third getNodeFrom"
88 | );
89 | $this->assertFalse(
90 | false,
91 | "When no nodes are left, false should be returned"
92 | );
93 | }
94 |
95 | public function test_custom_captureDepth()
96 | {
97 | $node1 = <<
99 | Lorem
100 | Ipsum
101 | 1
102 |
103 | eot;
104 | $node2 = <<
106 | Lorem
107 | Ipsum
108 | 2
109 |
110 | eot;
111 | $node3 = <<
113 | Lorem
114 | Ipsum
115 | 3
116 |
117 | eot;
118 | $xml = <<
120 |
121 |
122 | $node1
123 | $node2
124 | $node3
125 |
126 |
127 | eot;
128 |
129 | $stream = $this->getStreamMock($xml, 50);
130 |
131 | $parser = new StringWalker(array(
132 | "captureDepth" => 3,
133 | ));
134 |
135 | $this->assertEquals(
136 | trim($node1),
137 | trim($parser->getNodeFrom($stream)),
138 | "Node 1 should be obtained on the first getNodeFrom"
139 | );
140 | $this->assertEquals(
141 | trim($node2),
142 | trim($parser->getNodeFrom($stream)),
143 | "Node 2 should be obtained on the second getNodeFrom"
144 | );
145 | $this->assertEquals(
146 | trim($node3),
147 | trim($parser->getNodeFrom($stream)),
148 | "Node 3 should be obtained on the third getNodeFrom"
149 | );
150 | $this->assertFalse(
151 | false,
152 | "When no nodes are left, false should be returned"
153 | );
154 | }
155 |
156 | public function test_special_elements()
157 | {
158 | $node1 = <<
160 |
161 | Lorem
162 | Ipsum
163 | 1
164 |
165 | eot;
166 | $node2 = <<
168 | Lorem
169 |
170 | Ipsum
171 | 2
172 |
173 | eot;
174 | $node3 = <<
176 | Lorem
177 | Ipsum
178 | 3
179 |
180 | eot;
181 | $xml = <<
183 |
184 |
185 | $node1
186 | $node2
187 | $node3
188 |
189 | eot;
190 |
191 | $stream = $this->getStreamMock($xml, 50);
192 |
193 | $parser = new StringWalker();
194 |
195 | $this->assertEquals(
196 | trim($node1),
197 | trim($parser->getNodeFrom($stream)),
198 | "Node 1 should be obtained on the first getNodeFrom"
199 | );
200 | $this->assertEquals(
201 | trim($node2),
202 | trim($parser->getNodeFrom($stream)),
203 | "Node 2 should be obtained on the second getNodeFrom"
204 | );
205 | $this->assertEquals(
206 | trim($node3),
207 | trim($parser->getNodeFrom($stream)),
208 | "Node 3 should be obtained on the third getNodeFrom"
209 | );
210 | $this->assertFalse(
211 | false,
212 | "When no nodes are left, false should be returned"
213 | );
214 | }
215 |
216 | public function test_special_elements_with_GT()
217 | {
218 | $node1 = <<
220 |
223 | Lorem
224 | Ipsum
225 | 1
226 |
227 | eot;
228 | $node2 = <<
230 | Lorem
231 | chars
233 | >>><>
234 | ]]>
235 | Ipsum
236 | 2
237 |
238 | eot;
239 | $node3 = <<
241 | Lorem
242 | Ipsum
243 | 3
244 |
245 | eot;
246 | $xml = <<
248 |
249 |
250 | $node1
251 | $node2
252 | $node3
253 |
254 | eot;
255 |
256 | $stream = $this->getStreamMock($xml, 50);
257 |
258 | $parser = new StringWalker(array(
259 | "expectGT" => true,
260 | ));
261 |
262 | $this->assertEquals(
263 | trim($node1),
264 | trim($parser->getNodeFrom($stream)),
265 | "Node 1 should be obtained on the first getNodeFrom"
266 | );
267 | $this->assertEquals(
268 | trim($node2),
269 | trim($parser->getNodeFrom($stream)),
270 | "Node 2 should be obtained on the second getNodeFrom"
271 | );
272 | $this->assertEquals(
273 | trim($node3),
274 | trim($parser->getNodeFrom($stream)),
275 | "Node 3 should be obtained on the third getNodeFrom"
276 | );
277 | $this->assertFalse(
278 | false,
279 | "When no nodes are left, false should be returned"
280 | );
281 | }
282 |
283 | public function test_self_closing_elements()
284 | {
285 | $node1 = <<
287 |
288 |
289 | 1
290 |
291 | eot;
292 | $node2 = <<
294 |
295 |
296 | 2
297 |
298 | eot;
299 | $node3 = <<
301 |
302 |
303 | 3
304 |
305 | eot;
306 | $xml = <<
308 |
309 | $node1
310 | $node2
311 | $node3
312 |
313 | eot;
314 |
315 | $stream = $this->getStreamMock($xml, 50);
316 |
317 | $parser = new StringWalker();
318 |
319 | $this->assertEquals(
320 | trim($node1),
321 | trim($parser->getNodeFrom($stream)),
322 | "Node 1 should be obtained on the first getNodeFrom"
323 | );
324 | $this->assertEquals(
325 | trim($node2),
326 | trim($parser->getNodeFrom($stream)),
327 | "Node 2 should be obtained on the second getNodeFrom"
328 | );
329 | $this->assertEquals(
330 | trim($node3),
331 | trim($parser->getNodeFrom($stream)),
332 | "Node 3 should be obtained on the third getNodeFrom"
333 | );
334 | $this->assertFalse(
335 | false,
336 | "When no nodes are left, false should be returned"
337 | );
338 | }
339 |
340 | public function test_self_closing_elements_at_depth()
341 | {
342 | $xml = <<
344 |
345 | baz
346 |
347 |
348 | eot;
349 |
350 | $stream = $this->getStreamMock($xml, 50);
351 |
352 | $parser = new StringWalker(array(
353 | "captureDepth" => 2,
354 | ));
355 |
356 | $this->assertEquals(
357 | trim("baz"),
358 | trim($parser->getNodeFrom($stream))
359 | );
360 |
361 | $this->assertEquals(
362 | trim(""),
363 | trim($parser->getNodeFrom($stream))
364 | );
365 | }
366 |
367 | public function test_different_capture_node_types()
368 | {
369 | $node1 = <<
371 |
372 |
373 | 1
374 |
375 | eot;
376 | $node2 = <<
378 |
379 |
380 | 2
381 |
382 | eot;
383 | $node3 = <<
385 |
386 |
387 | 3
388 |
389 | eot;
390 | $xml = <<
392 |
393 | $node1
394 | $node2
395 | $node3
396 |
397 | eot;
398 |
399 | $stream = $this->getStreamMock($xml, 50);
400 |
401 | $parser = new StringWalker();
402 |
403 | $this->assertEquals(
404 | trim($node1),
405 | trim($parser->getNodeFrom($stream)),
406 | "Node 1 should be obtained on the first getNodeFrom"
407 | );
408 | $this->assertEquals(
409 | trim($node2),
410 | trim($parser->getNodeFrom($stream)),
411 | "Node 2 should be obtained on the second getNodeFrom"
412 | );
413 | $this->assertEquals(
414 | trim($node3),
415 | trim($parser->getNodeFrom($stream)),
416 | "Node 3 should be obtained on the third getNodeFrom"
417 | );
418 | $this->assertFalse(
419 | false,
420 | "When no nodes are left, false should be returned"
421 | );
422 | }
423 |
424 | public function test_multiple_roots()
425 | {
426 | $node1 = <<
428 |
429 |
430 | 1
431 |
432 | eot;
433 | $node2 = <<
435 |
436 |
437 | 2
438 |
439 | eot;
440 | $node3 = <<
442 |
443 |
444 | 3
445 |
446 | eot;
447 | $node4 = <<
449 |
450 |
451 | 3
452 |
453 | eot;
454 | $xml = <<
456 |
457 | $node1
458 | $node2
459 |
460 |
461 | $node3
462 | $node4
463 |
464 | eot;
465 |
466 | $stream = $this->getStreamMock($xml, 50);
467 |
468 | $parser = new StringWalker();
469 |
470 | $this->assertEquals(
471 | trim($node1),
472 | trim($parser->getNodeFrom($stream)),
473 | "Node 1 should be obtained on the first getNodeFrom from root-a"
474 | );
475 | $this->assertEquals(
476 | trim($node2),
477 | trim($parser->getNodeFrom($stream)),
478 | "Node 2 should be obtained on the second getNodeFrom from root-a"
479 | );
480 | $this->assertEquals(
481 | trim($node3),
482 | trim($parser->getNodeFrom($stream)),
483 | "Node 3 should be obtained on the third getNodeFrom from root-b"
484 | );
485 | $this->assertEquals(
486 | trim($node4),
487 | trim($parser->getNodeFrom($stream)),
488 | "Node 4 should be obtained on the third getNodeFrom from root-b"
489 | );
490 | $this->assertFalse(
491 | false,
492 | "When no nodes are left, false should be returned"
493 | );
494 | }
495 | }
--------------------------------------------------------------------------------
/tests/unit/XmlStringStreamer/Parser/UniqueNodeTest.php:
--------------------------------------------------------------------------------
1 | shouldReceive("getChunk")
19 | ->once()
20 | ->andReturn(substr($fullString, $i, $bufferSize));
21 | }
22 | $stream->shouldReceive("getChunk")
23 | ->andReturn(false);
24 |
25 | return $stream;
26 | }
27 |
28 | public function test_uniqueNode_empty_xml()
29 | {
30 | $stream = $this->getStreamMock("", 1024);
31 |
32 | $parser = new UniqueNode(array(
33 | "uniqueNode" => "child"
34 | ));
35 |
36 | $this->assertFalse($parser->getNodeFrom($stream), "An empty stream should just exit nicely");
37 | }
38 |
39 | public function test_uniqueNode_setting()
40 | {
41 | $node1 = <<
43 | Lorem
44 | Ipsum
45 | 1
46 |
47 | eot;
48 | $node2 = <<
50 | Lorem
51 | Ipsum
52 | 2
53 |
54 | eot;
55 | $node3 = <<
57 | Lorem
58 | Ipsum
59 | 3
60 |
61 | eot;
62 | $xml = <<
64 |
65 | $node1
66 | $node2
67 | $node3
68 |
69 | eot;
70 |
71 | $stream = $this->getStreamMock($xml, 50);
72 |
73 | $parser = new UniqueNode(array(
74 | "uniqueNode" => "child"
75 | ));
76 |
77 | $this->assertEquals(
78 | trim($node1),
79 | trim($parser->getNodeFrom($stream)),
80 | "Node 1 should be obtained on the first getNodeFrom"
81 | );
82 | $this->assertEquals(
83 | trim($node2),
84 | trim($parser->getNodeFrom($stream)),
85 | "Node 2 should be obtained on the first getNodeFrom"
86 | );
87 | $this->assertEquals(
88 | trim($node3),
89 | trim($parser->getNodeFrom($stream)),
90 | "Node 3 should be obtained on the first getNodeFrom"
91 | );
92 | $this->assertFalse(
93 | false,
94 | "When no nodes are left, false should be returned"
95 | );
96 | }
97 |
98 | public function test_uniqueNode_memory_leaks()
99 | {
100 | $node = <<
102 | Lorem
103 | Ipsum
104 | 1
105 |
106 | eot;
107 | $content = "";
108 |
109 | for ($i = 0; $i < 100; $i++) {
110 | $content .= $node;
111 | }
112 |
113 | $xml = <<
115 |
116 | $content
117 |
118 | eot;
119 |
120 | $BUFFER_SIZE = 50;
121 | $stream = $this->getStreamMock($xml, $BUFFER_SIZE);
122 |
123 | $parser = new UniqueNode(array(
124 | "uniqueNode" => "unknown"
125 | ));
126 |
127 | $memoryFootprintBefore = strlen(serialize($parser));
128 |
129 | $parser->getNodeFrom($stream);
130 |
131 | $memoryFootprintAfter = strlen(serialize($parser));
132 |
133 | $this->assertLessThan(
134 | $BUFFER_SIZE,
135 | $memoryFootprintAfter - $memoryFootprintBefore,
136 | "Memory shouldn't grow in an uncontrolled manner when the first node isn't found"
137 | );
138 | }
139 |
140 | public function test_uniqueNode_shortClosing_setting() {
141 | $node1 = <<
143 | eot;
144 | $node2 = <<
146 | Lorem
147 | Ipsum
148 | 2
149 |
150 | eot;
151 | $node3 = <<
153 | eot;
154 | $xml = <<
156 |
157 | $node1
158 | $node2
159 | $node3
160 |
161 | eot;
162 | $stream = $this->getStreamMock($xml, 50);
163 |
164 | $parser = new UniqueNode(array(
165 | "uniqueNode" => "child",
166 | 'checkShortClosing' => true
167 | ));
168 |
169 | $this->assertEquals(
170 | trim($node1),
171 | trim($parser->getNodeFrom($stream)),
172 | "Node 1 should be obtained on the first getNodeFrom"
173 | );
174 | $this->assertEquals(
175 | trim($node2),
176 | trim($parser->getNodeFrom($stream)),
177 | "Node 2 should be obtained on the first getNodeFrom"
178 | );
179 | $this->assertEquals(
180 | trim($node3),
181 | trim($parser->getNodeFrom($stream)),
182 | "Node 3 should be obtained on the first getNodeFrom"
183 | );
184 | }
185 |
186 | public function test_requires_uniqueNode_setting()
187 | {
188 | $this->expectException(\Exception::class);
189 |
190 | new UniqueNode;
191 | }
192 |
193 | public function test_multiple_roots()
194 | {
195 | $node1 = <<
197 |
198 |
199 | 1
200 |
201 | eot;
202 | $node2 = <<
204 |
205 |
206 | 2
207 |
208 | eot;
209 | $node3 = <<
211 |
212 |
213 | 3
214 |
215 | eot;
216 | $node4 = <<
218 |
219 |
220 | 3
221 |
222 | eot;
223 | $xml = <<
225 |
226 | $node1
227 | $node2
228 |
229 |
230 | $node3
231 | $node4
232 |
233 | eot;
234 |
235 | $stream = $this->getStreamMock($xml, 50);
236 |
237 | $parser = new UniqueNode(array(
238 | "uniqueNode" => "child"
239 | ));
240 |
241 | $this->assertEquals(
242 | trim($node1),
243 | trim($parser->getNodeFrom($stream)),
244 | "Node 1 should be obtained on the first getNodeFrom from root-a"
245 | );
246 | $this->assertEquals(
247 | trim($node2),
248 | trim($parser->getNodeFrom($stream)),
249 | "Node 2 should be obtained on the second getNodeFrom from root-a"
250 | );
251 | $this->assertEquals(
252 | trim($node3),
253 | trim($parser->getNodeFrom($stream)),
254 | "Node 3 should be obtained on the third getNodeFrom from root-b"
255 | );
256 | $this->assertEquals(
257 | trim($node4),
258 | trim($parser->getNodeFrom($stream)),
259 | "Node 4 should be obtained on the third getNodeFrom from root-b"
260 | );
261 | $this->assertFalse(
262 | false,
263 | "When no nodes are left, false should be returned"
264 | );
265 | }
266 |
267 | public function test_orphan_closing_tag_is_ignored()
268 | {
269 | $expectedStringToBeFlushed = 'read this';
270 | $xml = <<
272 |
273 |
274 | $expectedStringToBeFlushed
275 |
276 | eot;
277 |
278 | $stream = $this->getStreamMock($xml, strlen($xml));
279 |
280 | $parser = new UniqueNode([
281 | "uniqueNode" => "child"
282 | ]);
283 |
284 | $this->assertEquals(
285 | $expectedStringToBeFlushed,
286 | $parser->getNodeFrom($stream),
287 | "Orphan closing tag must not act as closing tag for first opening tag"
288 | );
289 | }
290 | }
291 |
--------------------------------------------------------------------------------
/tests/unit/XmlStringStreamer/XmlStringStreamerTest.php:
--------------------------------------------------------------------------------
1 | loremipsum";
13 |
14 | $parser = Mockery::mock("\\Prewk\\XmlStringStreamer\\ParserInterface");
15 | $parser->shouldReceive("getNodeFrom")
16 | ->with(Mockery::type("\\Prewk\\XmlStringStreamer\\StreamInterface"))
17 | ->once()
18 | ->andReturn($node);
19 |
20 | $stream = Mockery::mock("\\Prewk\\XmlStringStreamer\\StreamInterface");
21 |
22 | $streamer = new XmlStringStreamer($parser, $stream);
23 |
24 | $this->assertEquals($node, $streamer->getNode(), "Node received from the parser should be what was expected");
25 | }
26 | }
--------------------------------------------------------------------------------
/tests/xml/incomplete.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
2 |
3 |
4 |
5 | 166024
6 | http://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=en&Expert=166024
7 | Multiple epiphyseal dysplasia, Al-Gazali type
8 |
9 |
10 |
11 |
12 | Multiple epiphyseal dysplasia - macrocephaly - distinctive facies
13 |
14 |
15 |
16 | OMIM
17 | 607131
18 |
19 |
20 | ICD10
21 | Q78.8
22 |
23 |
24 |
25 |
26 | 166032
27 | http://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=en&Expert=166032
28 | Multiple epiphyseal dysplasia, with miniepiphyses
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 | OMIM
37 | 609325
38 |
39 |
40 | ICD10
41 | Q78.8
42 |
43 |
44 |
45 |
46 | 58
47 | http://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=en&Expert=58
48 | Alexander disease
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 | OMIM
57 | 203450
58 |
59 |
60 | ICD10
61 | E75.2
62 |
63 |
64 |
65 |
66 |
67 |
--------------------------------------------------------------------------------
/tests/xml/pubmed-example.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | 24531174
8 |
9 | 2014
10 | 02
11 | 17
12 |
13 |
14 | 2014
15 | 04
16 | 01
17 |
18 |
19 |
20 | 1121-7138
21 |
22 | 37
23 | 1
24 |
25 | 2014
26 | Jan
27 |
28 |
29 | The new microbiologica
30 | New Microbiol.
31 |
32 | High prevalence of streptococcal or Epstein-Barr virus infections in children with acute non-septic monoarthritis.
33 |
34 | 81-6
35 |
36 |
37 | To investigate associations between infections and acute monoarthritis, we performed a prospective study on 32 children consecutively hospitalized and 32 age-matched controls. Among 26 (81%) children having infections, the most frequent agents were Group A ?-hemolytic Streptococcus (GAS: 53%) and Epstein-Barr virus (EBV: 37.5%). Among controls, only 5 (16%) were infected with GAS and 2 (6%) with EBV (P<0.005). The most frequently involved joints were hip in 15 children and ankle in 10 children. Our study showed that acute monoarthritis in children may be frequently associated with streptococcal or EBV infections.
38 |
39 |
40 |
41 | Di Loreto
42 | Simona
43 | S
44 | Pediatric Unit and School, University of L'Aquila, San Salvatore Hospital, L'Aquila, Italy.
45 |
46 |
47 | Fabiano
48 | Cecilia
49 | C
50 |
51 |
52 | Nigro
53 | Giovanni
54 | G
55 |
56 |
57 | eng
58 |
59 | Journal Article
60 |
61 |
62 | 2014
63 | 01
64 | 15
65 |
66 |
67 |
68 | Italy
69 | New Microbiol
70 | 9516291
71 | 1121-7138
72 |
73 | IM
74 |
75 |
76 | Arthritis, Juvenile
77 | epidemiology
78 | microbiology
79 | virology
80 |
81 |
82 | Child
83 |
84 |
85 | Child, Preschool
86 |
87 |
88 | Epstein-Barr Virus Infections
89 | epidemiology
90 | virology
91 |
92 |
93 | Female
94 |
95 |
96 | Herpesvirus 4, Human
97 | genetics
98 | isolation & purification
99 |
100 |
101 | Humans
102 |
103 |
104 | Infant
105 |
106 |
107 | Italy
108 | epidemiology
109 |
110 |
111 | Male
112 |
113 |
114 | Prevalence
115 |
116 |
117 | Streptococcal Infections
118 | epidemiology
119 | microbiology
120 |
121 |
122 | Streptococcus pyogenes
123 | genetics
124 | isolation & purification
125 |
126 |
127 |
128 |
129 |
130 |
131 | 2013
132 | 7
133 | 16
134 |
135 |
136 | 2013
137 | 9
138 | 19
139 |
140 |
141 | 2014
142 | 1
143 | 15
144 |
145 |
146 | 2014
147 | 2
148 | 18
149 | 6
150 | 0
151 |
152 |
153 | 2014
154 | 2
155 | 18
156 | 6
157 | 0
158 |
159 |
160 | 2014
161 | 4
162 | 2
163 | 6
164 | 0
165 |
166 |
167 | ppublish
168 |
169 | 24531174
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 | 24529294
178 |
179 | 2014
180 | 02
181 | 24
182 |
183 |
184 | 2014
185 | 04
186 | 08
187 |
188 |
189 |
190 | 0392-856X
191 |
192 | 32
193 | 1 Suppl 80
194 |
195 | 2014 Jan-Feb
196 |
197 |
198 | Clinical and experimental rheumatology
199 | Clin. Exp. Rheumatol.
200 |
201 | Ultrasound imaging in juvenile idiopathic arthritis for the rheumatologist.
202 |
203 | S34-41
204 |
205 |
206 | The present review provides an update of the currently available data and discusses research issues of US imaging in juvenile idiopathic arthritis (JIA). This review also includes a brief description of the normal sonoanatomy of healthy joints in children in order to avoid misinterpretation. Musculoskeletal ultrasound (US) is a quick, inexpensive, bedside method for evaluating children with no need for anaesthesiological support. Until now, the major objective in the application of US in children is to improve clinical diagnosis and patient care in daily practice. Articular disorders in children affect the epiphyseal cartilage leading to alterations in maturation and growth. US imaging allows distinguishing between synovitis and joint cartilage as well as between articular and peri-articular structures. Currently the principal applications for using US in patients with JIA include: detection of synovitis, tenosynovitis, enthesitis and cartilage and bone abnormalities. US is also used to guide needle injection. To date, the role of US in therapy monitoring has not been fully established. Future topics for study include: establishing international definitions (Bmode and Doppler) for joint components in healthy children and for US findings in JIA patients, consensus on scanning protocols and scoring systems, evaluation of the role of US with power Doppler in the assessment of the real state of disease (activity/remission) and developing a specific training programme for paediatric rheumatologists performing US in patients with JIA.
207 |
208 |
209 |
210 | Collado Ramos
211 | Paz
212 | P
213 | Department of Rheumatology/Paediatric Rheumatology Unit, Hospital Universitario Severo Ochoa, Madrid, Spain. paxcollado@yahoo.es.
214 |
215 |
216 | eng
217 |
218 | Journal Article
219 | Review
220 |
221 |
222 | 2014
223 | 02
224 | 17
225 |
226 |
227 |
228 | Italy
229 | Clin Exp Rheumatol
230 | 8308521
231 | 0392-856X
232 |
233 |
234 |
235 | 0
236 | Antirheumatic Agents
237 |
238 |
239 | IM
240 |
241 |
242 | Antirheumatic Agents
243 | therapeutic use
244 |
245 |
246 | Arthritis, Juvenile
247 | drug therapy
248 | ultrasonography
249 |
250 |
251 | Child
252 |
253 |
254 | Child, Preschool
255 |
256 |
257 | Humans
258 |
259 |
260 | Joints
261 | drug effects
262 | ultrasonography
263 |
264 |
265 | Predictive Value of Tests
266 |
267 |
268 | Remission Induction
269 |
270 |
271 | Rheumatology
272 | methods
273 |
274 |
275 | Severity of Illness Index
276 |
277 |
278 | Treatment Outcome
279 |
280 |
281 | Ultrasonography, Doppler
282 |
283 |
284 | Ultrasonography, Doppler, Color
285 |
286 |
287 | Ultrasonography, Interventional
288 |
289 |
290 |
291 |
292 |
293 |
294 | 2013
295 | 7
296 | 30
297 |
298 |
299 | 2013
300 | 11
301 | 18
302 |
303 |
304 | 2014
305 | 2
306 | 17
307 |
308 |
309 | 2014
310 | 2
311 | 18
312 | 6
313 | 0
314 |
315 |
316 | 2014
317 | 2
318 | 18
319 | 6
320 | 0
321 |
322 |
323 | 2014
324 | 4
325 | 9
326 | 6
327 | 0
328 |
329 |
330 | ppublish
331 |
332 | 7428
333 | 24529294
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 | 24449586
342 |
343 | 2014
344 | 01
345 | 22
346 |
347 |
348 | 2014
349 | 03
350 | 18
351 |
352 |
353 | 2014
354 | 03
355 | 31
356 |
357 |
358 |
359 | 2326-5205
360 |
361 | 66
362 | 1
363 |
364 | 2014
365 | Jan
366 |
367 |
368 | Arthritis & rheumatology (Hoboken, N.J.)
369 |
370 | Detection of enthesitis in children with enthesitis-related arthritis: dolorimetry compared to ultrasonography.
371 |
372 | 218-27
373 |
374 | 10.1002/art.38197
375 |
376 | To evaluate the distribution of enthesitis and the accuracy of physical examination with a dolorimeter for the detection of enthesitis in children, using ultrasound (US) assessment as the reference standard.
377 | We performed a prospective cross-sectional study of 30 patients with enthesitis-related arthritis (ERA) and 30 control subjects. The following tendon insertion sites were assessed by standardized physical examination with a dolorimeter and US: common extensor on the lateral humeral epicondyle, common flexor on the medial humeral epicondyle, quadriceps at the superior patella, patellar ligament at the inferior patella, Achilles, and plantar fascia at the calcaneus.
378 | Abnormal findings on US were detected most commonly at the insertion of the quadriceps (30% [18 of 60 sites]), common extensor (12% [7 of 60]), and Achilles (10% [6 of 60]) tendons. The intrarater reliability of US (kappa statistic) was 0.78 (95% confidence interval [95% CI] 0.63-0.93), and the interrater reliability was 0.81 (95% CI 0.67-0.95). Tenderness as detected by standardized dolorimeter examination had poor positive predictive value for US-confirmed enthesitis. In comparison to controls, patients with ERA reported more pain and had lower pain thresholds at every site, including control sites (P < 0.001 for all comparisons). The interrater reliability of dolorimeter examination for detection of enthesitis was low (κ = 0.49 [95% CI 0.33-0.65]).
379 | Compared to US, standardized dolorimeter examination for the detection of enthesitis in children has poor accuracy and reliability. The decreased pain threshold of ERA patients likely contributed to the limited accuracy of the physical examination findings. Further research regarding the utility of US for identifying enthesitis at diagnosis of juvenile idiopathic arthritis, accurately predicting disease progression, and guiding therapeutic decisions is warranted.
380 | Copyright © 2014 by the American College of Rheumatology.
381 |
382 |
383 |
384 | Weiss
385 | Pamela F
386 | PF
387 | Children's Hospital of Philadelphia, Philadelphia, Pennsylvania.
388 |
389 |
390 | Chauvin
391 | Nancy A
392 | NA
393 |
394 |
395 | Klink
396 | Andrew J
397 | AJ
398 |
399 |
400 | Localio
401 | Russell
402 | R
403 |
404 |
405 | Feudtner
406 | Chris
407 | C
408 |
409 |
410 | Jaramillo
411 | Diego
412 | D
413 |
414 |
415 | Colbert
416 | Robert A
417 | RA
418 |
419 |
420 | Sherry
421 | David D
422 | DD
423 |
424 |
425 | Keren
426 | Ron
427 | R
428 |
429 |
430 | eng
431 |
432 |
433 | 1-K23-AR059749-01A1
434 | AR
435 | NIAMS NIH HHS
436 | United States
437 |
438 |
439 | Z01-AR-041184
440 | AR
441 | NIAMS NIH HHS
442 | United States
443 |
444 |
445 |
446 | Journal Article
447 | Research Support, N.I.H., Extramural
448 | Research Support, Non-U.S. Gov't
449 |
450 |
451 |
452 | United States
453 | Arthritis Rheumatol
454 | 101623795
455 |
456 | AIM
457 | IM
458 |
459 |
460 | Adolescent
461 |
462 |
463 | Arthritis, Juvenile
464 | diagnosis
465 |
466 |
467 | Case-Control Studies
468 |
469 |
470 | Child
471 |
472 |
473 | Child, Preschool
474 |
475 |
476 | Cross-Sectional Studies
477 |
478 |
479 | Female
480 |
481 |
482 | Humans
483 |
484 |
485 | Joint Capsule
486 | ultrasonography
487 |
488 |
489 | Male
490 |
491 |
492 | Pain Measurement
493 | methods
494 |
495 |
496 | Pain Threshold
497 |
498 |
499 | Prospective Studies
500 |
501 |
502 | Reproducibility of Results
503 |
504 |
505 | Sensitivity and Specificity
506 |
507 |
508 | Severity of Illness Index
509 |
510 |
511 | Tendons
512 | ultrasonography
513 |
514 |
515 | NIHMS553631 [Available on 01/01/15]
516 | PMC3964147 [Available on 01/01/15]
517 |
518 |
519 |
520 |
521 | 2013
522 | 6
523 | 20
524 |
525 |
526 | 2013
527 | 9
528 | 10
529 |
530 |
531 | 2014
532 | 1
533 | 23
534 | 6
535 | 0
536 |
537 |
538 | 2014
539 | 1
540 | 23
541 | 6
542 | 0
543 |
544 |
545 | 2014
546 | 3
547 | 19
548 | 6
549 | 0
550 |
551 |
552 | 2015
553 | 1
554 | 1
555 | 0
556 | 0
557 |
558 |
559 | ppublish
560 |
561 | 10.1002/art.38197
562 | 24449586
563 | PMC3964147
564 | NIHMS553631
565 |
566 |
567 |
568 |
569 |
--------------------------------------------------------------------------------
/tests/xml/rewind_working_blob.xml:
--------------------------------------------------------------------------------
1 |
2 | - 0
3 | - 1
4 | - 2
5 | - 3
6 | - 4
7 | - 5
8 | - 6
9 | - 7
10 | - 8
11 | - 9
12 |
13 |
--------------------------------------------------------------------------------
/tests/xml/short.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | foo
4 |
5 |
6 | bar
7 |
8 |
9 |
--------------------------------------------------------------------------------
/tests/xml/short_last_chunk.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | foo
4 |
5 |
6 | foo
7 |
8 |
9 | foo
10 |
11 |
12 | foo
13 |
14 |
15 | bar
16 |
17 |
18 |
--------------------------------------------------------------------------------
/tests/xml/stream_seeking.xml:
--------------------------------------------------------------------------------
1 |
2 | - first item to read
3 | - seek will land before hash character: #
4 | - second item to read
5 |
6 |
--------------------------------------------------------------------------------