├── autoload.php ├── composer.json ├── examples ├── example1.php └── example1.xml ├── library └── SimpleXMLReader.php └── readme.md /autoload.php: -------------------------------------------------------------------------------- 1 | =5.3.0", 18 | "lib-libxml" : "*", 19 | "ext-xmlreader" : "*", 20 | "ext-dom" : "*" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /examples/example1.php: -------------------------------------------------------------------------------- 1 | registerCallback("Цена", array($this, "callbackPrice")); 13 | // by xpath 14 | $this->registerCallback("/Данные/Остатки/Остаток", array($this, "callbackRest")); 15 | } 16 | 17 | protected function callbackPrice($reader) 18 | { 19 | $xml = $reader->expandSimpleXml(); 20 | $attributes = $xml->attributes(); 21 | $ref = (string) $attributes->{"Номенклатура"}; 22 | if ($ref) { 23 | $price = floatval((string)$xml); 24 | $xpath = $this->currentXpath(); 25 | echo "$xpath: $ref = $price;\n"; 26 | } 27 | return true; 28 | } 29 | 30 | protected function callbackRest($reader) 31 | { 32 | $xml = $reader->expandSimpleXml(); 33 | $attributes = $xml->attributes(); 34 | $ref = (string) $attributes->{"Номенклатура"}; 35 | if ($ref) { 36 | $rest = floatval((string) $xml); 37 | $xpath = $this->currentXpath(); 38 | echo "$xpath: $ref = $rest;\n"; 39 | } 40 | return true; 41 | } 42 | 43 | } 44 | 45 | echo "
"; 46 | $file = dirname(__FILE__) . "/example1.xml"; 47 | $reader = new ExampleXmlReader1; 48 | $reader->open($file); 49 | $reader->parse(); 50 | $reader->close(); 51 | -------------------------------------------------------------------------------- /library/SimpleXMLReader.php: -------------------------------------------------------------------------------- 1 | 8 | * @url http://github.com/dkrnl/SimpleXMLReader 9 | */ 10 | class SimpleXMLReader extends XMLReader 11 | { 12 | 13 | /** 14 | * Callbacks 15 | * 16 | * @var array 17 | */ 18 | protected $callback = array(); 19 | 20 | 21 | /** 22 | * Depth 23 | * 24 | * @var int 25 | */ 26 | protected $currentDepth = 0; 27 | 28 | 29 | /** 30 | * Previos depth 31 | * 32 | * @var int 33 | */ 34 | protected $prevDepth = 0; 35 | 36 | 37 | /** 38 | * Stack of the parsed nodes 39 | * 40 | * @var array 41 | */ 42 | protected $nodesParsed = array(); 43 | 44 | 45 | /** 46 | * Stack of the node types 47 | * 48 | * @var array 49 | */ 50 | protected $nodesType = array(); 51 | 52 | 53 | /** 54 | * Stack of node position 55 | * 56 | * @var array 57 | */ 58 | protected $nodesCounter = array(); 59 | 60 | /** 61 | * Do not remove redundant white space. 62 | * 63 | * @var bool 64 | */ 65 | public $preserveWhiteSpace = true; 66 | 67 | 68 | /** 69 | * Add node callback 70 | * 71 | * @param string $xpath 72 | * @param callback $callback 73 | * @param integer $nodeType 74 | * @return SimpleXMLReader 75 | */ 76 | public function registerCallback($xpath, $callback, $nodeType = XMLREADER::ELEMENT) 77 | { 78 | if (isset($this->callback[$nodeType][$xpath])) { 79 | throw new Exception("Already exists callback '$xpath':$nodeType."); 80 | } 81 | if (!is_callable($callback)) { 82 | throw new Exception("Not callable callback '$xpath':$nodeType."); 83 | } 84 | $this->callback[$nodeType][$xpath] = $callback; 85 | return $this; 86 | } 87 | 88 | 89 | /** 90 | * Remove node callback 91 | * 92 | * @param string $xpath 93 | * @param integer $nodeType 94 | * @return SimpleXMLReader 95 | */ 96 | public function unRegisterCallback($xpath, $nodeType = XMLREADER::ELEMENT) 97 | { 98 | if (!isset($this->callback[$nodeType][$xpath])) { 99 | throw new Exception("Unknow parser callback '$xpath':$nodeType."); 100 | } 101 | unset($this->callback[$nodeType][$xpath]); 102 | return $this; 103 | } 104 | 105 | /** 106 | * Moves cursor to the next node in the document. 107 | * 108 | * @link http://php.net/manual/en/xmlreader.read.php 109 | * @return bool Returns TRUE on success or FALSE on failure. 110 | */ 111 | public function read() 112 | { 113 | $read = parent::read(); 114 | if ($this->depth < $this->prevDepth) { 115 | if (!isset($this->nodesParsed[$this->depth])) { 116 | throw new Exception("Invalid xml: missing items in SimpleXMLReader::\$nodesParsed"); 117 | } 118 | if (!isset($this->nodesCounter[$this->depth])) { 119 | throw new Exception("Invalid xml: missing items in SimpleXMLReader::\$nodesCounter"); 120 | } 121 | if (!isset($this->nodesType[$this->depth])) { 122 | throw new Exception("Invalid xml: missing items in SimpleXMLReader::\$nodesType"); 123 | } 124 | $this->nodesParsed = array_slice($this->nodesParsed, 0, $this->depth + 1, true); 125 | $this->nodesCounter = array_slice($this->nodesCounter, 0, $this->depth + 1, true); 126 | $this->nodesType = array_slice($this->nodesType, 0, $this->depth + 1, true); 127 | } 128 | if (isset($this->nodesParsed[$this->depth]) && $this->localName == $this->nodesParsed[$this->depth] && $this->nodeType == $this->nodesType[$this->depth]) { 129 | $this->nodesCounter[$this->depth] = $this->nodesCounter[$this->depth] + 1; 130 | } else { 131 | $this->nodesParsed[$this->depth] = $this->localName; 132 | $this->nodesType[$this->depth] = $this->nodeType; 133 | $this->nodesCounter[$this->depth] = 1; 134 | } 135 | $this->prevDepth = $this->depth; 136 | return $read; 137 | } 138 | 139 | /** 140 | * Return current xpath node 141 | * 142 | * @param boolean $nodesCounter 143 | * @return string 144 | */ 145 | public function currentXpath($nodesCounter = false) 146 | { 147 | if (count($this->nodesCounter) != count($this->nodesParsed) && count($this->nodesCounter) != count($this->nodesType)) { 148 | throw new Exception("Empty reader"); 149 | } 150 | $result = ""; 151 | foreach ($this->nodesParsed as $depth => $name) { 152 | switch ($this->nodesType[$depth]) { 153 | case self::ELEMENT: 154 | $result .= "/" . $name; 155 | if ($nodesCounter) { 156 | $result .= "[" . $this->nodesCounter[$depth] . "]"; 157 | } 158 | break; 159 | 160 | case self::TEXT: 161 | case self::CDATA: 162 | $result .= "/text()"; 163 | break; 164 | 165 | case self::COMMENT: 166 | $result .= "/comment()"; 167 | break; 168 | 169 | case self::ATTRIBUTE: 170 | $result .= "[@{$name}]"; 171 | break; 172 | } 173 | } 174 | return $result; 175 | } 176 | 177 | 178 | /** 179 | * Run parser 180 | * 181 | * @return void 182 | */ 183 | public function parse() 184 | { 185 | if (empty($this->callback)) { 186 | throw new Exception("Empty parser callback."); 187 | } 188 | $continue = true; 189 | while ($continue && $this->read()) { 190 | if (!isset($this->callback[$this->nodeType])) { 191 | continue; 192 | } 193 | if (isset($this->callback[$this->nodeType][$this->name])) { 194 | $continue = call_user_func($this->callback[$this->nodeType][$this->name], $this); 195 | } else { 196 | $xpath = $this->currentXpath(false); // without node counter 197 | if (isset($this->callback[$this->nodeType][$xpath])) { 198 | $continue = call_user_func($this->callback[$this->nodeType][$xpath], $this); 199 | } else { 200 | $xpath = $this->currentXpath(true); // with node counter 201 | if (isset($this->callback[$this->nodeType][$xpath])) { 202 | $continue = call_user_func($this->callback[$this->nodeType][$xpath], $this); 203 | } 204 | } 205 | } 206 | } 207 | } 208 | 209 | /** 210 | * Run XPath query on current node 211 | * 212 | * @param string $path 213 | * @param string $version 214 | * @param string $encoding 215 | * @param string $className 216 | * @return array(SimpleXMLElement) 217 | */ 218 | public function expandXpath($path, $version = "1.0", $encoding = "UTF-8", $className = null) 219 | { 220 | return $this->expandSimpleXml($version, $encoding, $className)->xpath($path); 221 | } 222 | 223 | /** 224 | * Expand current node to string 225 | * 226 | * @param string $version 227 | * @param string $encoding 228 | * @param string $className 229 | * @return SimpleXMLElement 230 | */ 231 | public function expandString($version = "1.0", $encoding = "UTF-8", $className = null) 232 | { 233 | return $this->expandSimpleXml($version, $encoding, $className)->asXML(); 234 | } 235 | 236 | /** 237 | * Expand current node to SimpleXMLElement 238 | * 239 | * @param string $version 240 | * @param string $encoding 241 | * @param string $className 242 | * @return SimpleXMLElement 243 | */ 244 | public function expandSimpleXml($version = "1.0", $encoding = "UTF-8", $className = null) 245 | { 246 | $element = $this->expand(); 247 | $document = new DomDocument($version, $encoding); 248 | $document->preserveWhiteSpace = $this->preserveWhiteSpace; 249 | if ($element instanceof DOMCharacterData) { 250 | $nodeName = array_splice($this->nodesParsed, -2, 1); 251 | $nodeName = (isset($nodeName[0]) && $nodeName[0] ? $nodeName[0] : "root"); 252 | $node = $document->createElement($nodeName); 253 | $node->appendChild($element); 254 | $element = $node; 255 | } 256 | $node = $document->importNode($element, true); 257 | $document->appendChild($node); 258 | return simplexml_import_dom($node, $className); 259 | } 260 | 261 | /** 262 | * Expand current node to DomDocument 263 | * 264 | * @param string $version 265 | * @param string $encoding 266 | * @return DomDocument 267 | */ 268 | public function expandDomDocument($version = "1.0", $encoding = "UTF-8") 269 | { 270 | $element = $this->expand(); 271 | $document = new DomDocument($version, $encoding); 272 | $document->preserveWhiteSpace = $this->preserveWhiteSpace; 273 | if ($element instanceof DOMCharacterData) { 274 | $nodeName = array_splice($this->nodesParsed, -2, 1); 275 | $nodeName = (isset($nodeName[0]) && $nodeName[0] ? $nodeName[0] : "root"); 276 | $node = $document->createElement($nodeName); 277 | $node->appendChild($element); 278 | $element = $node; 279 | } 280 | $node = $document->importNode($element, true); 281 | $document->appendChild($node); 282 | return $document; 283 | } 284 | 285 | } 286 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Simple XML Reader 2 | 3 | Wrapper XMLReader(http://php.net/manual/ru/book.xmlreader.php) class, for simple **SAX**-reading(and simple **XPath**-queries) of **huge**(testing over 1G file) xml. 4 | 5 | **Minimum the memory** usage of other xml libraries(SimpleXML, DOMXML). 6 | 7 | Usage example 1: 8 | ```php 9 | $reader = new SimpleXMLReader; 10 | $reader->open("big.xml"); 11 | $reader->registerCallback("by-node-name", function($reader) { 12 | $element = $reader->expandSimpleXml(); // copy of the current node as a SimpleXMLElement object 13 | $attributes = $element->attributes(); // read element attributes 14 | /* ...your code here... */ 15 | return true; 16 | }); 17 | $reader->registerCallback("/by/xpath/query", function($reader) { 18 | $element = $reader->expandDomDocument(); // copy of the current node as a DOMNode object 19 | $attributes = $element->attributes(); // read element attributes 20 | /* ...your code here... */ 21 | return true; 22 | }); 23 | $reader->parse(); 24 | $reader->close(); 25 | 26 | ``` 27 | Usage example 2: http://github.com/dkrnl/SimpleXMLReader/blob/master/examples/example1.php 28 | 29 | License: Public Domain 30 | --------------------------------------------------------------------------------