├── LICENSE ├── README.md ├── autoload.php ├── composer.json ├── src ├── Contract │ └── ReaderInterface.php ├── Excel.php ├── Exception │ ├── ParserException.php │ └── ReaderException.php ├── Parser │ ├── Excel2007.php │ ├── Excel5.php │ ├── Excel5 │ │ ├── OLERead.php │ │ └── RC4.php │ └── Format.php └── Reader │ ├── BaseReader.php │ ├── Csv.php │ ├── Xls.php │ └── Xlsx.php └── tests ├── csvTest.php ├── files ├── 01.csv ├── 01.xls ├── 01.xlsx └── 02.csv ├── xlsTest.php └── xlsxTest.php /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # phpexcel 2 | A lightweight PHP library for reading spreadsheet files 3 | - Based on Generator、SeekableIterator and Countable 4 | - Support for reading by line, read data only 5 | 6 | ### Requirements 7 | 8 | - PHP 7.0 or higher 9 | 10 | ### Installation 11 | 12 | composer require asan/phpexcel 13 | 14 | ## Usage 15 | 16 | ### csv 17 | 18 | ``` 19 | // Simple setting 20 | $reader = Asan\PHPExcel\Excel::load('files/02.csv', 'GBK'); 21 | 22 | // Flexible setting 23 | $reader = Asan\PHPExcel\Excel::load('files/01.csv', function(Asan\PHPExcel\Reader\Csv $reader) { 24 | // Set row limit 25 | $reader->setRowLimit(10); 26 | 27 | // Set column limit 28 | $reader->setColumnLimit(10); 29 | 30 | // Ignore emoty row 31 | $reader->ignoreEmptyRow(true); 32 | 33 | // Set encoding 34 | //$reader->setInputEncoding('GBK'); 35 | 36 | // Set delimiter 37 | $reader->setDelimiter("\t"); 38 | }, 'GBK'); 39 | 40 | // skip to row 50 41 | $reader->seek(50); 42 | 43 | // Get the current row data 44 | $current = $reader->current(); 45 | 46 | // Get row count 47 | $count = $reader->count(); 48 | ``` 49 | 50 | ### xls 51 | 52 | ``` 53 | $reader = Asan\PHPExcel\Excel::load('files/01.xls', function(Asan\PHPExcel\Reader\Xls $reader) { 54 | // Set row limit 55 | $reader->setRowLimit(10); 56 | 57 | // Set column limit 58 | $reader->setColumnLimit(10); 59 | 60 | // Ignore emoty row 61 | $reader->ignoreEmptyRow(true); 62 | 63 | // Select sheet index 64 | $reader->setSheetIndex(1); 65 | }); 66 | 67 | // skip to row 50 68 | $reader->seek(50); 69 | 70 | // Get the current row data 71 | $current = $reader->current(); 72 | 73 | // Get row count 74 | $count = $reader->count(); 75 | 76 | // Get all sheets info 77 | $sheets = $reader->sheets(); 78 | ``` 79 | 80 | ### xlsx 81 | ``` 82 | $reader = Asan\PHPExcel\Excel::load('files/01.xlsx', function(Asan\PHPExcel\Reader\Xlsx $reader) { 83 | // Set row limit 84 | $reader->setRowLimit(10); 85 | 86 | // Set column limit 87 | $reader->setColumnLimit(10); 88 | 89 | // Ignore emoty row 90 | $reader->ignoreEmptyRow(true); 91 | 92 | // Select sheet index 93 | $reader->setSheetIndex(0); 94 | }); 95 | 96 | // skip to row 50 97 | $reader->seek(50); 98 | 99 | // Get the current row data 100 | $current = $reader->current(); 101 | 102 | // Get row count 103 | $count = $reader->count(); 104 | 105 | // Get all sheets info 106 | $sheets = $reader->sheets(); 107 | ``` 108 | -------------------------------------------------------------------------------- /autoload.php: -------------------------------------------------------------------------------- 1 | =7.0" 15 | }, 16 | "minimum-stability": "stable", 17 | "autoload": { 18 | "psr-4": {"Asan\\PHPExcel\\": "src/"} 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/Contract/ReaderInterface.php: -------------------------------------------------------------------------------- 1 | setInputEncoding($encoding); 64 | } 65 | 66 | return $reader->load($file); 67 | } 68 | 69 | /** 70 | * Identify file format 71 | * 72 | * @param string $ext 73 | * @return string 74 | */ 75 | protected static function getFormatByExtension($ext) { 76 | $formart = ''; 77 | 78 | switch ($ext) { 79 | /* 80 | |-------------------------------------------------------------------------- 81 | | Excel 2007 82 | |-------------------------------------------------------------------------- 83 | */ 84 | case 'xlsx': 85 | case 'xlsm': 86 | case 'xltx': 87 | case 'xltm': 88 | $formart = 'Xlsx'; 89 | break; 90 | 91 | /* 92 | |-------------------------------------------------------------------------- 93 | | Excel5 94 | |-------------------------------------------------------------------------- 95 | */ 96 | case 'xls': 97 | case 'xlt': 98 | $formart = 'Xls'; 99 | break; 100 | 101 | /* 102 | |-------------------------------------------------------------------------- 103 | | CSV 104 | |-------------------------------------------------------------------------- 105 | */ 106 | case 'csv': 107 | case 'txt': 108 | $formart = 'Csv'; 109 | break; 110 | } 111 | 112 | return $formart; 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/Exception/ParserException.php: -------------------------------------------------------------------------------- 1 | false]; 110 | 111 | /** 112 | * Use ZipArchive reader to extract the relevant data streams from the ZipArchive file 113 | * 114 | * @throws ParserException|ReaderException 115 | * @param string $file 116 | */ 117 | public function loadZip($file) { 118 | $this->openFile($file); 119 | 120 | // Setting base date 121 | if (!self::$baseDate) { 122 | self::$baseDate = new \DateTime; 123 | self::$baseDate->setTimezone(new \DateTimeZone('UTC')); 124 | self::$baseDate->setDate(1900, 1, 0); 125 | self::$baseDate->setTime(0, 0, 0); 126 | } 127 | 128 | if (function_exists('gmp_gcd')) { 129 | self::$runtimeInfo['GMPSupported'] = true; 130 | } 131 | } 132 | 133 | /** 134 | * Ignore empty row 135 | * 136 | * @param bool $ignoreEmpty 137 | * 138 | * @return $this 139 | */ 140 | public function ignoreEmptyRow($ignoreEmpty) { 141 | $this->ignoreEmpty = $ignoreEmpty; 142 | 143 | return $this; 144 | } 145 | 146 | /** 147 | * Whether is ignore empty row 148 | * 149 | * @return bool 150 | */ 151 | public function isIgnoreEmptyRow() { 152 | return $this->ignoreEmpty; 153 | } 154 | 155 | /** 156 | * Set sheet index 157 | * 158 | * @param int $index 159 | * 160 | * @return $this 161 | */ 162 | public function setSheetIndex($index) { 163 | if ($index != $this->sheetIndex) { 164 | $this->sheetIndex = $index; 165 | 166 | $this->getWorksheetXML(); 167 | } 168 | 169 | return $this; 170 | } 171 | 172 | /** 173 | * Get sheet index 174 | * 175 | * @return int 176 | */ 177 | public function getSheetIndex() { 178 | return $this->sheetIndex; 179 | } 180 | 181 | /** 182 | * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns) 183 | * 184 | * @throws ReaderException 185 | * @return array 186 | */ 187 | public function parseWorksheetInfo() { 188 | if ($this->sheets === null) { 189 | $workbookXML = simplexml_load_string( 190 | $this->securityScan($this->zip->getFromName('xl/workbook.xml')), 'SimpleXMLElement', self::getLibXmlLoaderOptions() 191 | ); 192 | 193 | $this->sheets = []; 194 | if (isset($workbookXML->sheets) && $workbookXML->sheets) { 195 | $xml = new \XMLReader(); 196 | 197 | $index = 0; 198 | foreach ($workbookXML->sheets->sheet as $sheet) { 199 | $info = [ 200 | 'name' => (string)$sheet['name'], 'lastColumnLetter' => '', 'lastColumnIndex' => 0, 201 | 'totalRows' => 0, 'totalColumns' => 0 202 | ]; 203 | 204 | $this->zip->extractTo($this->tmpDir, $file = 'xl/worksheets/sheet' . (++$index) . '.xml'); 205 | $xml->open($this->tmpDir . '/' . $file, null, self::getLibXmlLoaderOptions()); 206 | 207 | $xml->setParserProperty(\XMLReader::DEFAULTATTRS, true); 208 | 209 | $nonEmpty = false; 210 | $columnLetter = ''; 211 | while ($xml->read()) { 212 | if ($xml->name == 'row') { 213 | if (!$this->ignoreEmpty && $xml->nodeType == \XMLReader::ELEMENT) { 214 | $info['totalRows'] = (int)$xml->getAttribute('r'); 215 | } elseif ($xml->nodeType == \XMLReader::END_ELEMENT) { 216 | if ($this->ignoreEmpty && $nonEmpty) { 217 | $info['totalRows']++; 218 | $nonEmpty = false; 219 | } 220 | 221 | if ($columnLetter > $info['lastColumnLetter']) { 222 | $info['lastColumnLetter'] = $columnLetter; 223 | } 224 | } 225 | } elseif ($xml->name == 'c' && $xml->nodeType == \XMLReader::ELEMENT) { 226 | $columnLetter = preg_replace('{[^[:alpha:]]}S', '', $xml->getAttribute('r')); 227 | } elseif ($this->ignoreEmpty && !$nonEmpty && $xml->name == 'v' 228 | && $xml->nodeType == \XMLReader::ELEMENT && trim($xml->readString()) !== '') { 229 | 230 | $nonEmpty = true; 231 | } 232 | } 233 | 234 | if ($info['lastColumnLetter']) { 235 | $info['totalColumns'] = Format::columnIndexFromString($info['lastColumnLetter']); 236 | $info['lastColumnIndex'] = $info['totalColumns'] - 1; 237 | } 238 | 239 | $this->sheets[] = $info; 240 | } 241 | 242 | $xml->close(); 243 | } 244 | } 245 | 246 | return $this->sheets; 247 | } 248 | 249 | /** 250 | * Get shared string 251 | * 252 | * @param int $position 253 | * @return string 254 | */ 255 | protected function getSharedString($position) { 256 | $value = ''; 257 | 258 | $file = 'xl/sharedStrings.xml'; 259 | if ($this->sharedStringsXML === null) { 260 | $this->sharedStringsXML = new \XMLReader(); 261 | 262 | $this->zip->extractTo($this->tmpDir, $file); 263 | } 264 | 265 | if ($this->sharedStringsPosition < 0 || $position < $this->sharedStringsPosition) { 266 | $this->sharedStringsXML->open($this->tmpDir . '/' . $file, null, self::getLibXmlLoaderOptions()); 267 | 268 | $this->sharedStringsPosition = -1; 269 | } 270 | 271 | while ($this->sharedStringsXML->read()) { 272 | $name = $this->sharedStringsXML->name; 273 | $nodeType = $this->sharedStringsXML->nodeType; 274 | 275 | if ($name == 'si') { 276 | if ($nodeType == \XMLReader::ELEMENT) { 277 | $this->sharedStringsPosition++; 278 | } elseif ($position == $this->sharedStringsPosition && $nodeType == \XMLReader::END_ELEMENT) { 279 | break; 280 | } 281 | } elseif ($name == 't' && $position == $this->sharedStringsPosition && $nodeType == \XMLReader::ELEMENT) { 282 | $value .= trim($this->sharedStringsXML->readString()); 283 | } 284 | } 285 | 286 | return $value; 287 | } 288 | 289 | /** 290 | * Parse styles info 291 | * 292 | * @throws ReaderException 293 | */ 294 | protected function parseStyles() { 295 | if ($this->styleXfs === null) { 296 | $stylesXML = simplexml_load_string( 297 | $this->securityScan($this->zip->getFromName('xl/styles.xml')), 'SimpleXMLElement', self::getLibXmlLoaderOptions() 298 | ); 299 | 300 | $this->styleXfs = $this->formats = []; 301 | if ($stylesXML) { 302 | if (isset($stylesXML->cellXfs->xf) && $stylesXML->cellXfs->xf) { 303 | foreach ($stylesXML->cellXfs->xf as $xf) { 304 | $numFmtId = isset($xf['numFmtId']) ? (int)$xf['numFmtId'] : 0; 305 | if (isset($xf['applyNumberFormat']) || $numFmtId == 0) { 306 | // If format ID >= 164, it is a custom format and should be read from styleSheet\numFmts 307 | $this->styleXfs[] = $numFmtId; 308 | } else { 309 | // 0 for "General" format 310 | $this->styleXfs[] = Format::FORMAT_GENERAL; 311 | } 312 | } 313 | } 314 | 315 | if (isset($stylesXML->numFmts->numFmt) && $stylesXML->numFmts->numFmt) { 316 | foreach ($stylesXML->numFmts->numFmt as $numFmt) { 317 | if (isset($numFmt['numFmtId'], $numFmt['formatCode'])) { 318 | $this->formats[(int)$numFmt['numFmtId']] = (string)$numFmt['formatCode']; 319 | } 320 | } 321 | } 322 | } 323 | } 324 | } 325 | 326 | /** 327 | * Get worksheet XMLReader 328 | */ 329 | protected function getWorksheetXML() { 330 | if ($this->worksheetXML === null) { 331 | $this->worksheetXML = new \XMLReader(); 332 | } 333 | 334 | $this->worksheetXML->open( 335 | $this->tmpDir . '/xl/worksheets/sheet' . ($this->getSheetIndex() + 1) . '.xml', 336 | null, self::getLibXmlLoaderOptions() 337 | ); 338 | } 339 | 340 | /** 341 | * Get row data 342 | * 343 | * @param int $rowIndex 344 | * @param int $columnLimit 345 | * 346 | * @throws ReaderException 347 | * @return array|bool 348 | */ 349 | public function getRow($rowIndex, $columnLimit = 0) { 350 | $this->parseStyles(); 351 | $rowIndex === 0 && $this->getWorksheetXML(); 352 | 353 | $sharedString = false; 354 | $index = $styleId = 0; 355 | $row = $columnLimit ? array_fill(0, $columnLimit, '') : []; 356 | 357 | while ($canRead = $this->worksheetXML->read()) { 358 | $name = $this->worksheetXML->name; 359 | $type = $this->worksheetXML->nodeType; 360 | 361 | // End of row 362 | if ($name == 'row') { 363 | if (!$this->ignoreEmpty && $type == \XMLReader::ELEMENT 364 | && $rowIndex+1 != (int)$this->worksheetXML->getAttribute('r')) { 365 | 366 | $this->worksheetXML->moveToElement(); 367 | break; 368 | } 369 | 370 | if ($type == \XMLReader::END_ELEMENT) { 371 | break; 372 | } 373 | } 374 | 375 | if ($columnLimit > 0 && $index >= $columnLimit) { 376 | continue; 377 | } 378 | 379 | switch ($name) { 380 | // Cell 381 | case 'c': 382 | if ($type == \XMLReader::END_ELEMENT) { 383 | continue; 384 | } 385 | 386 | $styleId = (int)$this->worksheetXML->getAttribute('s'); 387 | $letter = preg_replace('{[^[:alpha:]]}S', '', $this->worksheetXML->getAttribute('r')); 388 | $index = Format::columnIndexFromString($letter) - 1; 389 | 390 | // Determine cell type 391 | $sharedString = false; 392 | if ($this->worksheetXML->getAttribute('t') == self::CELL_TYPE_SHARED_STR) { 393 | $sharedString = true; 394 | } 395 | 396 | break; 397 | 398 | // Cell value 399 | case 'v': 400 | case 'is': 401 | if ($type == \XMLReader::END_ELEMENT) { 402 | continue; 403 | } 404 | 405 | $value = $this->worksheetXML->readString(); 406 | if ($sharedString) { 407 | $value = $this->getSharedString($value); 408 | } 409 | 410 | // Format value if necessary 411 | if ($value !== '' && $styleId && isset($this->styleXfs[$styleId])) { 412 | $value = $this->formatValue($value, $styleId); 413 | } elseif ($value && is_numeric($value)) { 414 | $value = (float)$value; 415 | } 416 | 417 | $row[$index] = $value; 418 | break; 419 | } 420 | } 421 | 422 | if ($canRead === false) { 423 | return false; 424 | } 425 | 426 | return $row; 427 | } 428 | 429 | /** 430 | * Close ZipArchive、XMLReader and remove temp dir 431 | */ 432 | public function __destruct() { 433 | if ($this->zip && $this->tmpDir) { 434 | $this->zip->close(); 435 | } 436 | 437 | if ($this->worksheetXML) { 438 | $this->worksheetXML->close(); 439 | } 440 | 441 | if ($this->sharedStringsXML) { 442 | $this->sharedStringsXML->close(); 443 | } 444 | 445 | $this->removeDir($this->tmpDir); 446 | 447 | $this->zip = null; 448 | $this->worksheetXML = null; 449 | $this->sharedStringsXML = null; 450 | $this->tmpDir = null; 451 | } 452 | 453 | /** 454 | * Remove dir 455 | * 456 | * @param string $dir 457 | */ 458 | protected function removeDir($dir) { 459 | if($dir && is_dir($dir)) { 460 | $handle = opendir($dir); 461 | 462 | while($item = readdir($handle)) { 463 | if ($item != '.' && $item != '..') { 464 | is_file($item = $dir . '/' . $item) ? unlink($item) : $this->removeDir($item); 465 | } 466 | } 467 | 468 | closedir($handle); 469 | rmdir($dir); 470 | } 471 | } 472 | 473 | /** 474 | * Formats the value according to the index 475 | * 476 | * @param string $value 477 | * @param int $index Format index 478 | * 479 | * @throws \Exception 480 | * @return string Formatted cell value 481 | */ 482 | private function formatValue($value, $index) { 483 | if (!is_numeric($value)) { 484 | return $value; 485 | } 486 | 487 | if (isset($this->styleXfs[$index]) && $this->styleXfs[$index] !== false) { 488 | $index = $this->styleXfs[$index]; 489 | } else { 490 | return $value; 491 | } 492 | 493 | // A special case for the "General" format 494 | if ($index == 0) { 495 | return is_numeric($value) ? (float)$value : $value; 496 | } 497 | 498 | $format = $this->parsedFormats[$index] ?? []; 499 | 500 | if (empty($format)) { 501 | $format = [ 502 | 'code' => false, 'type' => false, 'scale' => 1, 'thousands' => false, 'currency' => false 503 | ]; 504 | 505 | if (isset(Format::$buildInFormats[$index])) { 506 | $format['code'] = Format::$buildInFormats[$index]; 507 | } elseif (isset($this->formats[$index])) { 508 | $format['code'] = str_replace('"', '', $this->formats[$index]); 509 | } 510 | 511 | // Format code found, now parsing the format 512 | if ($format['code']) { 513 | $sections = explode(';', $format['code']); 514 | $format['code'] = $sections[0]; 515 | 516 | switch (count($sections)) { 517 | case 2: 518 | if ($value < 0) { 519 | $format['code'] = $sections[1]; 520 | } 521 | 522 | $value = abs($value); 523 | break; 524 | 525 | case 3: 526 | case 4: 527 | if ($value < 0) { 528 | $format['code'] = $sections[1]; 529 | } elseif ($value == 0) { 530 | $format['code'] = $sections[2]; 531 | } 532 | 533 | $value = abs($value); 534 | break; 535 | } 536 | } 537 | 538 | // Stripping colors 539 | $format['code'] = trim(preg_replace('/^\\[[a-zA-Z]+\\]/', '', $format['code'])); 540 | 541 | // Percentages 542 | if (substr($format['code'], -1) == '%') { 543 | $format['type'] = 'Percentage'; 544 | } elseif (preg_match('/(\[\$[A-Z]*-[0-9A-F]*\])*[hmsdy]/i', $format['code'])) { 545 | $format['type'] = 'DateTime'; 546 | $format['code'] = trim(preg_replace('/^(\[\$[A-Z]*-[0-9A-F]*\])/i', '', $format['code'])); 547 | $format['code'] = strtolower($format['code']); 548 | $format['code'] = strtr($format['code'], Format::$dateFormatReplacements); 549 | 550 | if (strpos($format['code'], 'A') === false) { 551 | $format['code'] = strtr($format['code'], Format::$dateFormatReplacements24); 552 | } else { 553 | $format['code'] = strtr($format['code'], Format::$dateFormatReplacements12); 554 | } 555 | } elseif ($format['code'] == '[$EUR ]#,##0.00_-') { 556 | $format['type'] = 'Euro'; 557 | } else { 558 | // Removing skipped characters 559 | $format['code'] = preg_replace('/_./', '', $format['code']); 560 | 561 | // Removing unnecessary escaping 562 | $format['code'] = preg_replace("/\\\\/", '', $format['code']); 563 | 564 | // Removing string quotes 565 | $format['code'] = str_replace(['"', '*'], '', $format['code']); 566 | 567 | // Removing thousands separator 568 | if (strpos($format['code'], '0,0') !== false || strpos($format['code'], '#,#') !== false) { 569 | $format['thousands'] = true; 570 | } 571 | 572 | $format['code'] = str_replace(['0,0', '#,#'], ['00', '##'], $format['code']); 573 | 574 | // Scaling (Commas indicate the power) 575 | $scale = 1; 576 | $matches = []; 577 | 578 | if (preg_match('/(0|#)(,+)/', $format['code'], $matches)) { 579 | $scale = pow(1000, strlen($matches[2])); 580 | 581 | // Removing the commas 582 | $format['code'] = preg_replace(['/0,+/', '/#,+/'], ['0', '#'], $format['code']); 583 | } 584 | 585 | $format['scale'] = $scale; 586 | if (preg_match('/#?.*\?\/\?/', $format['code'])) { 587 | $format['type'] = 'Fraction'; 588 | } else { 589 | $format['code'] = str_replace('#', '', $format['code']); 590 | $matches = []; 591 | 592 | if (preg_match('/(0+)(\.?)(0*)/', preg_replace('/\[[^\]]+\]/', '', $format['code']), $matches)) { 593 | list(, $integer, $decimalPoint, $decimal) = $matches; 594 | 595 | $format['minWidth'] = strlen($integer) + strlen($decimalPoint) + strlen($decimal); 596 | $format['decimals'] = $decimal; 597 | $format['precision'] = strlen($format['decimals']); 598 | $format['pattern'] = '%0' . $format['minWidth'] . '.' . $format['precision'] . 'f'; 599 | } 600 | } 601 | 602 | $matches = []; 603 | if (preg_match('/\[\$(.*)\]/u', $format['code'], $matches)) { 604 | $currencyCode = explode('-', $matches[1]); 605 | if ($currencyCode) { 606 | $currencyCode = $currencyCode[0]; 607 | } 608 | 609 | if (!$currencyCode) { 610 | $currencyCode = self::$currencyCode; 611 | } 612 | 613 | $format['currency'] = $currencyCode; 614 | } 615 | 616 | $format['code'] = trim($format['code']); 617 | } 618 | 619 | $this->parsedFormats[$index] = $format; 620 | } 621 | 622 | // Applying format to value 623 | if ($format) { 624 | if ($format['code'] == '@') { 625 | return (string)$value; 626 | } elseif ($format['type'] == 'Percentage') { // Percentages 627 | if ($format['code'] === '0%') { 628 | $value = round(100*$value, 0) . '%'; 629 | } else { 630 | $value = sprintf('%.2f%%', round(100*$value, 2)); 631 | } 632 | } elseif ($format['type'] == 'DateTime') { // Dates and times 633 | $days = (int)$value; 634 | 635 | // Correcting for Feb 29, 1900 636 | if ($days > 60) { 637 | $days--; 638 | } 639 | 640 | // At this point time is a fraction of a day 641 | $time = ($value - (int)$value); 642 | 643 | // Here time is converted to seconds 644 | // Some loss of precision will occur 645 | $seconds = $time ? (int)($time*86400) : 0; 646 | 647 | $value = clone self::$baseDate; 648 | $value->add(new \DateInterval('P' . $days . 'D' . ($seconds ? 'T' . $seconds . 'S' : ''))); 649 | 650 | $value = $value->format($format['code']); 651 | } elseif ($format['type'] == 'Euro') { 652 | $value = 'EUR ' . sprintf('%1.2f', $value); 653 | } else { 654 | // Fractional numbers 655 | if ($format['type'] == 'Fraction' && ($value != (int)$value)) { 656 | $integer = floor(abs($value)); 657 | $decimal = fmod(abs($value), 1); 658 | 659 | // Removing the integer part and decimal point 660 | $decimal *= pow(10, strlen($decimal) - 2); 661 | $decimalDivisor = pow(10, strlen($decimal)); 662 | 663 | if (self::$runtimeInfo['GMPSupported']) { 664 | $GCD = gmp_strval(gmp_gcd($decimal, $decimalDivisor)); 665 | } else { 666 | $GCD = self::GCD($decimal, $decimalDivisor); 667 | } 668 | 669 | $adjDecimal = $decimal/$GCD; 670 | $adjDecimalDivisor = $decimalDivisor/$GCD; 671 | 672 | if (strpos($format['code'], '0') !== false || strpos($format['code'], '#') !== false 673 | || substr($format['code'], 0, 3) == '? ?') { 674 | 675 | // The integer part is shown separately apart from the fraction 676 | $value = ($value < 0 ? '-' : '') . $integer ? $integer . ' ' 677 | : '' . $adjDecimal . '/' . $adjDecimalDivisor; 678 | } else { 679 | // The fraction includes the integer part 680 | $adjDecimal += $integer * $adjDecimalDivisor; 681 | $value = ($value < 0 ? '-' : '') . $adjDecimal . '/' . $adjDecimalDivisor; 682 | } 683 | } else { 684 | // Scaling 685 | $value = $value/$format['scale']; 686 | if (!empty($format['minWidth']) && $format['decimals']) { 687 | if ($format['thousands']) { 688 | $value = number_format( 689 | $value, $format['precision'], self::$decimalSeparator, self::$thousandSeparator 690 | ); 691 | 692 | $value = preg_replace('/(0+)(\.?)(0*)/', $value, $format['code']); 693 | } else { 694 | if (preg_match('/[0#]E[+-]0/i', $format['code'])) { 695 | // Scientific format 696 | $value = sprintf('%5.2E', $value); 697 | } else { 698 | $value = sprintf($format['pattern'], $value); 699 | $value = preg_replace('/(0+)(\.?)(0*)/', $value, $format['code']); 700 | } 701 | } 702 | } 703 | } 704 | 705 | // currency/Accounting 706 | if ($format['currency']) { 707 | $value = preg_replace('', $format['currency'], $value); 708 | } 709 | } 710 | } 711 | 712 | return $value; 713 | } 714 | 715 | /** 716 | * Greatest common divisor calculation in case GMP extension is not enabled 717 | * 718 | * @param int $number1 719 | * @param int $number2 720 | * 721 | * @return int 722 | */ 723 | private static function GCD($number1, $number2) { 724 | $number1 = abs($number1); 725 | $number2 = abs($number2); 726 | 727 | if ($number1 + $number2 == 0) { 728 | return 0; 729 | } 730 | 731 | $number = 1; 732 | while ($number1 > 0) { 733 | $number = $number1; 734 | $number1 = $number2 % $number1; 735 | $number2 = $number; 736 | } 737 | 738 | return $number; 739 | } 740 | 741 | /** 742 | * Open file for reading 743 | * 744 | * @param string $file 745 | * 746 | * @throws ParserException|ReaderException 747 | */ 748 | public function openFile($file) { 749 | // Check if file exists 750 | if (!file_exists($file) || !is_readable($file)) { 751 | throw new ReaderException("Could not open file [$file] for reading! File does not exist."); 752 | } 753 | 754 | $this->zip = new \ZipArchive(); 755 | 756 | $xl = false; 757 | if ($this->zip->open($file) === true) { 758 | $this->tmpDir = sys_get_temp_dir() . '/' . uniqid(); 759 | 760 | // check if it is an OOXML archive 761 | $rels = simplexml_load_string( 762 | $this->securityScan($this->zip->getFromName('_rels/.rels')), 763 | 'SimpleXMLElement', self::getLibXmlLoaderOptions() 764 | ); 765 | 766 | if ($rels !== false) { 767 | foreach ($rels->Relationship as $rel) { 768 | switch ($rel["Type"]) { 769 | case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument": 770 | if ($rel["Target"] == 'xl/workbook.xml') { 771 | $xl = true; 772 | } 773 | 774 | break; 775 | } 776 | } 777 | } 778 | } 779 | 780 | if ($xl === false) { 781 | throw new ParserException("The file [$file] is not recognised as a zip archive"); 782 | } 783 | } 784 | 785 | /** 786 | * Scan theXML for use of = 0) { 816 | @libxml_disable_entity_loader($options == (LIBXML_DTDLOAD | LIBXML_DTDATTR)); 817 | } 818 | 819 | self::$libXmlLoaderOptions = $options; 820 | } 821 | 822 | /** 823 | * Get default options for libxml loader. 824 | * Defaults to LIBXML_DTDLOAD | LIBXML_DTDATTR when not set explicitly. 825 | * 826 | * @return int Default options for libxml loader 827 | */ 828 | public static function getLibXmlLoaderOptions() { 829 | if (is_null(self::$libXmlLoaderOptions) && defined(LIBXML_DTDLOAD)) { 830 | self::setLibXmlLoaderOptions(LIBXML_DTDLOAD | LIBXML_DTDATTR); 831 | } 832 | 833 | if (version_compare(PHP_VERSION, '5.2.11') >= 0) { 834 | @libxml_disable_entity_loader(self::$libXmlLoaderOptions == (LIBXML_DTDLOAD | LIBXML_DTDATTR)); 835 | } 836 | 837 | return self::$libXmlLoaderOptions; 838 | } 839 | } 840 | -------------------------------------------------------------------------------- /src/Parser/Excel5.php: -------------------------------------------------------------------------------- 1 | '#NULL!', 62 | 0x07 => '#DIV/0!', 63 | 0x0F => '#VALUE!', 64 | 0x17 => '#REF!', 65 | 0x1D => '#NAME?', 66 | 0x24 => '#NUM!', 67 | 0x2A => '#N/A' 68 | ]; 69 | 70 | /** 71 | * Base calendar year to use for calculations 72 | * 73 | * @var int 74 | */ 75 | private static $excelBaseDate = Format::CALENDAR_WINDOWS_1900; 76 | 77 | /** 78 | * Decimal separator 79 | * 80 | * @var string 81 | */ 82 | private static $decimalSeparator; 83 | 84 | /** 85 | * Thousands separator 86 | * 87 | * @var string 88 | */ 89 | private static $thousandsSeparator; 90 | 91 | /** 92 | * Currency code 93 | * 94 | * @var string 95 | */ 96 | private static $currencyCode; 97 | 98 | /** 99 | * Workbook stream data 100 | * 101 | * @var string 102 | */ 103 | private $data; 104 | 105 | /** 106 | * Size in bytes of $this->data 107 | * 108 | * @var int 109 | */ 110 | private $dataSize; 111 | 112 | /** 113 | * Current position in stream 114 | * 115 | * @var integer 116 | */ 117 | private $pos; 118 | 119 | /** 120 | * Worksheets 121 | * 122 | * @var array 123 | */ 124 | private $sheets; 125 | 126 | /** 127 | * BIFF version 128 | * 129 | * @var int 130 | */ 131 | private $version; 132 | 133 | /** 134 | * Codepage set in the Excel file being read. Only important for BIFF5 (Excel 5.0 - Excel 95) 135 | * For BIFF8 (Excel 97 - Excel 2003) this will always have the value 'UTF-16LE' 136 | * 137 | * @var string 138 | */ 139 | private $codePage; 140 | 141 | /** 142 | * Row data 143 | * 144 | * @var array 145 | */ 146 | private $row; 147 | 148 | /** 149 | * Shared formats 150 | * 151 | * @var array 152 | */ 153 | private $formats; 154 | 155 | /** 156 | * The current sheet of the file 157 | * 158 | * @var int 159 | */ 160 | private $sheetIndex = 0; 161 | 162 | /** 163 | * Ignore empty row 164 | * 165 | * @var bool 166 | */ 167 | private $ignoreEmpty = false; 168 | 169 | /** 170 | * The current row index of the sheet 171 | * 172 | * @var int 173 | */ 174 | private $rowIndex = 0; 175 | 176 | /** 177 | * Max column number 178 | * 179 | * @var int 180 | */ 181 | private $columnLimit = 0; 182 | 183 | /** 184 | * Whether to the end of the row 185 | * 186 | * @var bool 187 | */ 188 | private $eor = false; 189 | 190 | /** 191 | * Extended format record 192 | * 193 | * @var array 194 | */ 195 | private $xfRecords = []; 196 | 197 | /** 198 | * Shared strings. Only applies to BIFF8. 199 | * 200 | * @var array 201 | */ 202 | private $sst = []; 203 | 204 | /** 205 | * The type of encryption in use 206 | * 207 | * @var int 208 | */ 209 | private $encryption = 0; 210 | 211 | /** 212 | * The position in the stream after which contents are encrypted 213 | * 214 | * @var int 215 | */ 216 | private $encryptionStartPos = false; 217 | 218 | /** 219 | * The current RC4 decryption object 220 | * 221 | * @var RC4 222 | */ 223 | private $rc4Key = null; 224 | 225 | /** 226 | * The position in the stream that the RC4 decryption object was left at 227 | * 228 | * @var int 229 | */ 230 | private $rc4Pos = 0; 231 | 232 | /** 233 | * The current MD5 context state 234 | * 235 | * @var string 236 | */ 237 | private $md5Ctxt = null; 238 | 239 | /** 240 | * Use OLE reader to extract the relevant data streams from the OLE file 241 | * 242 | * @param string $file 243 | */ 244 | public function loadOLE($file) { 245 | $oleRead = new OLERead(); 246 | $oleRead->read($file); 247 | $this->data = $oleRead->getStream($oleRead->workbook); 248 | } 249 | 250 | /** 251 | * Ignore empty row 252 | * 253 | * @param bool $ignoreEmpty 254 | * 255 | * @return $this 256 | */ 257 | public function ignoreEmptyRow($ignoreEmpty) { 258 | $this->ignoreEmpty = $ignoreEmpty; 259 | 260 | return $this; 261 | } 262 | 263 | /** 264 | * Whether is ignore empty row 265 | * 266 | * @return bool 267 | */ 268 | public function isIgnoreEmptyRow() { 269 | return $this->ignoreEmpty; 270 | } 271 | 272 | /** 273 | * Set sheet index 274 | * 275 | * @param int $index 276 | * 277 | * @return $this 278 | */ 279 | public function setSheetIndex($index) { 280 | $this->sheetIndex = $index; 281 | 282 | return $this; 283 | } 284 | 285 | /** 286 | * Get sheet index 287 | * 288 | * @return int 289 | */ 290 | public function getSheetIndex() { 291 | return $this->sheetIndex; 292 | } 293 | 294 | /** 295 | * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns) 296 | * 297 | * @throws ParserException 298 | * @return array 299 | */ 300 | public function parseWorksheetInfo() { 301 | if ($this->sheets === null) { 302 | // total byte size of Excel data (workbook global substream + sheet substreams) 303 | $this->dataSize = strlen($this->data); 304 | $this->pos = 0; 305 | $this->codePage = 'CP1252'; 306 | $this->sheets = []; 307 | 308 | // Parse Workbook Global Substream 309 | while ($this->pos < $this->dataSize) { 310 | $code = Format::getUInt2d($this->data, $this->pos); 311 | 312 | switch ($code) { 313 | case self::XLS_TYPE_BOF: 314 | $this->readBof(); 315 | break; 316 | 317 | case self::XLS_TYPE_FILEPASS: 318 | $this->readFilepass(); 319 | break; 320 | 321 | case self::XLS_TYPE_CODEPAGE: 322 | $this->readCodepage(); 323 | break; 324 | 325 | case self::XLS_TYPE_DATEMODE: 326 | $this->readDateMode(); 327 | break; 328 | 329 | case self::XLS_TYPE_FORMAT: 330 | $this->readFormat(); 331 | break; 332 | 333 | case self::XLS_TYPE_XF: 334 | $this->readXf(); 335 | break; 336 | 337 | case self::XLS_TYPE_SST: 338 | $this->readSst(); 339 | break; 340 | 341 | case self::XLS_TYPE_SHEET: 342 | $this->readSheet(); 343 | break; 344 | 345 | case self::XLS_TYPE_EOF: 346 | $this->readDefault(); 347 | break 2; 348 | 349 | default: 350 | $this->readDefault(); 351 | break; 352 | } 353 | } 354 | 355 | // Parse the individual sheets 356 | foreach ($this->sheets as $key => $sheet) { 357 | if ($sheet['sheetType'] != 0x00) { 358 | // 0x00: Worksheet 359 | // 0x02: Chart 360 | // 0x06: Visual Basic module 361 | continue; 362 | } 363 | 364 | $sheet['lastColumnLetter'] = ''; 365 | $sheet['lastColumnIndex'] = null; 366 | $sheet['totalRows'] = 0; 367 | $sheet['totalColumns'] = 0; 368 | 369 | $lastRowIndex = 0; 370 | $this->pos = $sheet['offset']; 371 | while ($this->pos <= $this->dataSize - 4) { 372 | $code = Format::getUInt2d($this->data, $this->pos); 373 | 374 | switch ($code) { 375 | case self::XLS_TYPE_RK: 376 | case self::XLS_TYPE_LABELSST: 377 | case self::XLS_TYPE_NUMBER: 378 | case self::XLS_TYPE_FORMULA: 379 | case self::XLS_TYPE_BOOLERR: 380 | case self::XLS_TYPE_LABEL: 381 | $length = Format::getUInt2d($this->data, $this->pos + 2); 382 | $recordData = substr($this->data, $this->pos + 4, $length); 383 | 384 | // move stream pointer to next record 385 | $this->pos += 4 + $length; 386 | 387 | $rowIndex = Format::getUInt2d($recordData, 0) + 1; 388 | $columnIndex = Format::getUInt2d($recordData, 2); 389 | 390 | if ($this->ignoreEmpty) { 391 | if ($lastRowIndex < $rowIndex) { 392 | $sheet['totalRows']++; 393 | } 394 | 395 | $lastRowIndex = $rowIndex; 396 | } else { 397 | $sheet['totalRows'] = max($sheet['totalRows'], $rowIndex); 398 | } 399 | 400 | $sheet['lastColumnIndex'] = max($columnIndex, $sheet['lastColumnIndex']); 401 | break; 402 | 403 | case self::XLS_TYPE_BOF: 404 | $this->readBof(); 405 | break; 406 | 407 | case self::XLS_TYPE_EOF: 408 | $this->readDefault(); 409 | break 2; 410 | 411 | default: 412 | $this->readDefault(); 413 | break; 414 | } 415 | } 416 | 417 | if ($sheet['lastColumnIndex'] !== null) { 418 | $sheet['lastColumnLetter'] = Format::stringFromColumnIndex($sheet['lastColumnIndex']); 419 | } else { 420 | $sheet['lastColumnIndex'] = 0; 421 | } 422 | 423 | if ($sheet['lastColumnLetter']) { 424 | $sheet['totalColumns'] = $sheet['lastColumnIndex'] + 1; 425 | } 426 | 427 | $this->sheets[$key] = $sheet; 428 | } 429 | 430 | $this->pos = 0; 431 | } 432 | 433 | return $this->sheets; 434 | } 435 | 436 | /** 437 | * Get row data 438 | * 439 | * @param int $rowIndex 440 | * @param int $columnLimit 441 | * 442 | * @throws ParserException 443 | * @return array|bool 444 | */ 445 | public function getRow($rowIndex, $columnLimit = 0) { 446 | $this->parseWorksheetInfo(); 447 | 448 | // Rewind or change sheet 449 | if ($rowIndex === 0 || $this->pos < $this->sheets[$this->sheetIndex]['offset']) { 450 | $this->pos = $this->sheets[$this->sheetIndex]['offset']; 451 | } 452 | 453 | $endPos = $this->dataSize - 4; 454 | if (isset($this->sheets[$this->sheetIndex + 1]['offset'])) { 455 | $endPos = $this->sheets[$this->sheetIndex + 1]['offset'] - 4; 456 | } 457 | 458 | if ($this->pos >= $endPos) { 459 | return false; 460 | } 461 | 462 | $this->rowIndex = $rowIndex; 463 | $this->columnLimit = $columnLimit; 464 | $this->eor = false; 465 | $this->row = $columnLimit ? array_fill(0, $columnLimit, '') : []; 466 | 467 | while ($this->pos <= $endPos) { 468 | // Remember last position 469 | $lastPos = $this->pos; 470 | $code = Format::getUInt2d($this->data, $this->pos); 471 | 472 | switch ($code) { 473 | case self::XLS_TYPE_BOF: 474 | $this->readBof(); 475 | break; 476 | 477 | case self::XLS_TYPE_RK: 478 | $this->readRk(); 479 | break; 480 | 481 | case self::XLS_TYPE_LABELSST: 482 | $this->readLabelSst(); 483 | break; 484 | 485 | case self::XLS_TYPE_MULRK: 486 | $this->readMulRk(); 487 | break; 488 | 489 | case self::XLS_TYPE_NUMBER: 490 | $this->readNumber(); 491 | break; 492 | 493 | case self::XLS_TYPE_FORMULA: 494 | $this->readFormula(); 495 | break; 496 | 497 | case self::XLS_TYPE_BOOLERR: 498 | $this->readBoolErr(); 499 | break; 500 | 501 | case self::XLS_TYPE_MULBLANK: 502 | case self::XLS_TYPE_BLANK: 503 | $this->readBlank(); 504 | break; 505 | 506 | case self::XLS_TYPE_LABEL: 507 | $this->readLabel(); 508 | break; 509 | 510 | case self::XLS_TYPE_EOF: 511 | $this->readDefault(); 512 | break 2; 513 | 514 | default: 515 | $this->readDefault(); 516 | break; 517 | } 518 | 519 | //End of row 520 | if ($this->eor) { 521 | //Recover current position 522 | $this->pos = $lastPos; 523 | break; 524 | } 525 | } 526 | 527 | return $this->row; 528 | } 529 | 530 | /** 531 | * Add cell data 532 | * 533 | * @param int $row 534 | * @param int $column 535 | * @param mixed $value 536 | * @param int $xfIndex 537 | * @return bool 538 | */ 539 | private function addCell($row, $column, $value, $xfIndex) { 540 | if ($this->rowIndex != $row) { 541 | $this->eor = true; 542 | 543 | return false; 544 | } 545 | 546 | if (!$this->columnLimit || $column < $this->columnLimit) { 547 | $xfRecord = $this->xfRecords[$xfIndex]; 548 | $this->row[$column] = self::toFormattedString($value, $xfRecord['format']); 549 | } 550 | 551 | return true; 552 | } 553 | 554 | /** 555 | * Read BOF 556 | * 557 | * @throws ParserException 558 | */ 559 | private function readBof() { 560 | $length = Format::getUInt2d($this->data, $this->pos + 2); 561 | $recordData = substr($this->data, $this->pos + 4, $length); 562 | 563 | // move stream pointer to next record 564 | $this->pos += 4 + $length; 565 | 566 | // offset: 2; size: 2; type of the following data 567 | $substreamType = Format::getUInt2d($recordData, 2); 568 | 569 | switch ($substreamType) { 570 | case self::XLS_WORKBOOKGLOBALS: 571 | $version = Format::getUInt2d($recordData, 0); 572 | if (($version != self::XLS_BIFF8) && ($version != self::XLS_BIFF7)) { 573 | throw new ParserException('Cannot read this Excel file. Version is too old.', 1); 574 | } 575 | 576 | $this->version = $version; 577 | break; 578 | 579 | case self::XLS_WORKSHEET: 580 | // do not use this version information for anything 581 | // it is unreliable (OpenOffice doc, 5.8), use only version information from the global stream 582 | break; 583 | 584 | default: 585 | // substream, e.g. chart 586 | // just skip the entire substream 587 | do { 588 | $code = Format::getUInt2d($this->data, $this->pos); 589 | $this->readDefault(); 590 | } while ($code != self::XLS_TYPE_EOF && $this->pos < $this->dataSize); 591 | 592 | break; 593 | } 594 | } 595 | 596 | /** 597 | * SHEET 598 | * 599 | * This record is located in the Workbook Globals Substream and represents a sheet inside the workbook. 600 | * One SHEET record is written for each sheet. It stores the sheet name and a stream offset to the BOF 601 | * record of the respective Sheet Substream within the Workbook Stream. 602 | */ 603 | private function readSheet() { 604 | $length = Format::getUInt2d($this->data, $this->pos + 2); 605 | $recordData = substr($this->data, $this->pos + 4, $length); 606 | 607 | // offset: 0; size: 4; absolute stream position of the BOF record of the sheet 608 | // NOTE: not encrypted 609 | $offset = Format::getInt4d($this->data, $this->pos + 4); 610 | 611 | // move stream pointer to next record 612 | $this->pos += 4 + $length; 613 | 614 | // offset: 4; size: 1; sheet state 615 | switch (ord($recordData{4})) { 616 | case 0x00: 617 | $sheetState = self::SHEETSTATE_VISIBLE; 618 | break; 619 | 620 | case 0x01: 621 | $sheetState = self::SHEETSTATE_HIDDEN; 622 | break; 623 | 624 | case 0x02: 625 | $sheetState = self::SHEETSTATE_VERYHIDDEN; 626 | break; 627 | 628 | default: 629 | $sheetState = self::SHEETSTATE_VISIBLE; 630 | break; 631 | } 632 | 633 | // offset: 5; size: 1; sheet type 634 | $sheetType = ord($recordData{5}); 635 | 636 | // offset: 6; size: var; sheet name 637 | $name = ''; 638 | if ($this->version == self::XLS_BIFF8) { 639 | $string = self::readUnicodeStringShort(substr($recordData, 6)); 640 | $name = $string['value']; 641 | } elseif ($this->version == self::XLS_BIFF7) { 642 | $string = $this->readByteStringShort(substr($recordData, 6)); 643 | $name = $string['value']; 644 | } 645 | 646 | // ignore hidden sheet 647 | if ($sheetState == self::SHEETSTATE_VISIBLE) { 648 | $this->sheets[] = [ 649 | 'name' => $name, 'offset' => $offset, 'sheetState' => $sheetState, 'sheetType' => $sheetType 650 | ]; 651 | } 652 | } 653 | 654 | /** 655 | * Reads a general type of BIFF record. 656 | * Does nothing except for moving stream pointer forward to next record. 657 | */ 658 | private function readDefault() { 659 | $length = Format::getUInt2d($this->data, $this->pos + 2); 660 | //$recordData = $this->readRecordData($this->data, $this->pos + 4, $length); 661 | 662 | // move stream pointer to next record 663 | $this->pos += 4 + $length; 664 | } 665 | 666 | /** 667 | * FILEPASS 668 | * 669 | * This record is part of the File Protection Block. It contains information about the read/write password of 670 | * the file. All record contents following this record will be encrypted. 671 | * The decryption functions and objects used from here on in are based on the source of Spreadsheet-ParseExcel: 672 | * http://search.cpan.org/~jmcnamara/Spreadsheet-ParseExcel/ 673 | * 674 | * @throws ParserException 675 | */ 676 | private function readFilepass() { 677 | $length = Format::getUInt2d($this->data, $this->pos + 2); 678 | 679 | if ($length != 54) { 680 | throw new ParserException('Unexpected file pass record length', 2); 681 | } 682 | 683 | $recordData = $this->readRecordData($this->data, $this->pos + 4, $length); 684 | 685 | // move stream pointer to next record 686 | $this->pos += 4 + $length; 687 | 688 | if (!$this->verifyPassword('VelvetSweatshop', substr($recordData, 6, 16), substr($recordData, 22, 16), 689 | substr($recordData, 38, 16), $this->md5Ctxt)) { 690 | 691 | throw new ParserException('Decryption password incorrect', 3); 692 | } 693 | 694 | $this->encryption = self::MS_BIFF_CRYPTO_RC4; 695 | 696 | // Decryption required from the record after next onwards 697 | $this->encryptionStartPos = $this->pos + Format::getUInt2d($this->data, $this->pos + 2); 698 | } 699 | 700 | /** 701 | * Read record data from stream, decrypting as required 702 | * 703 | * @param string $data Data stream to read from 704 | * @param int $pos Position to start reading from 705 | * @param int $len Record data length 706 | * 707 | * @throws ParserException 708 | * @return string Record data 709 | */ 710 | private function readRecordData($data, $pos, $len) { 711 | $data = substr($data, $pos, $len); 712 | 713 | // File not encrypted, or record before encryption start point 714 | if ($this->encryption == self::MS_BIFF_CRYPTO_NONE || $pos < $this->encryptionStartPos) { 715 | return $data; 716 | } 717 | 718 | $recordData = ''; 719 | if ($this->encryption == self::MS_BIFF_CRYPTO_RC4) { 720 | $oldBlock = floor($this->rc4Pos / self::REKEY_BLOCK); 721 | $block = floor($pos / self::REKEY_BLOCK); 722 | $endBlock = floor(($pos + $len) / self::REKEY_BLOCK); 723 | 724 | // Spin an RC4 decryptor to the right spot. If we have a decryptor sitting 725 | // at a point earlier in the current block, re-use it as we can save some time. 726 | if ($block != $oldBlock || $pos < $this->rc4Pos || !$this->rc4Key) { 727 | $this->rc4Key = $this->makeKey($block, $this->md5Ctxt); 728 | $step = $pos % self::REKEY_BLOCK; 729 | } else { 730 | $step = $pos - $this->rc4Pos; 731 | } 732 | 733 | $this->rc4Key->RC4(str_repeat("\0", $step)); 734 | 735 | // Decrypt record data (re-keying at the end of every block) 736 | while ($block != $endBlock) { 737 | $step = self::REKEY_BLOCK - ($pos % self::REKEY_BLOCK); 738 | $recordData .= $this->rc4Key->RC4(substr($data, 0, $step)); 739 | 740 | $data = substr($data, $step); 741 | $pos += $step; 742 | $len -= $step; 743 | $block++; 744 | 745 | $this->rc4Key = $this->makeKey($block, $this->md5Ctxt); 746 | } 747 | 748 | $recordData .= $this->rc4Key->RC4(substr($data, 0, $len)); 749 | 750 | // Keep track of the position of this decryptor. 751 | // We'll try and re-use it later if we can to speed things up 752 | $this->rc4Pos = $pos + $len; 753 | 754 | } elseif ($this->encryption == self::MS_BIFF_CRYPTO_XOR) { 755 | throw new ParserException('XOr encryption not supported', 4); 756 | } 757 | 758 | return $recordData; 759 | } 760 | 761 | /** 762 | * Make an RC4 decryptor for the given block 763 | * 764 | * @param int $block Block for which to create decrypto 765 | * @param string $valContext MD5 context state 766 | * 767 | * @return RC4 768 | */ 769 | private function makeKey($block, $valContext) { 770 | $pw = str_repeat("\0", 64); 771 | 772 | for ($i = 0; $i < 5; $i++) { 773 | $pw[$i] = $valContext[$i]; 774 | } 775 | 776 | $pw[5] = chr($block & 0xff); 777 | $pw[6] = chr(($block >> 8) & 0xff); 778 | $pw[7] = chr(($block >> 16) & 0xff); 779 | $pw[8] = chr(($block >> 24) & 0xff); 780 | 781 | $pw[9] = "\x80"; 782 | $pw[56] = "\x48"; 783 | 784 | return new RC4(md5($pw)); 785 | } 786 | 787 | /** 788 | * Verify RC4 file password 789 | * 790 | * @var string $password Password to check 791 | * @var string $docid Document id 792 | * @var string $salt_data Salt data 793 | * @var string $hashedsalt_data Hashed salt data 794 | * @var string &$valContext Set to the MD5 context of the value 795 | * 796 | * @return bool Success 797 | */ 798 | private function verifyPassword($password, $docid, $salt_data, $hashedsalt_data, &$valContext) { 799 | $pw = str_repeat("\0", 64); 800 | 801 | for ($i = 0; $i < strlen($password); $i++) { 802 | $o = ord(substr($password, $i, 1)); 803 | $pw[2 * $i] = chr($o & 0xff); 804 | $pw[2 * $i + 1] = chr(($o >> 8) & 0xff); 805 | } 806 | 807 | $pw[2 * $i] = chr(0x80); 808 | $pw[56] = chr(($i << 4) & 0xff); 809 | 810 | $mdContext1 = md5($pw); 811 | 812 | $offset = 0; 813 | $keyOffset = 0; 814 | $toCopy = 5; 815 | 816 | while ($offset != 16) { 817 | if ((64 - $offset) < 5) { 818 | $toCopy = 64 - $offset; 819 | } 820 | 821 | for ($i = 0; $i <= $toCopy; $i++) { 822 | $pw[$offset + $i] = $mdContext1[$keyOffset + $i]; 823 | } 824 | 825 | $offset += $toCopy; 826 | 827 | if ($offset == 64) { 828 | $keyOffset = $toCopy; 829 | $toCopy = 5 - $toCopy; 830 | $offset = 0; 831 | continue; 832 | } 833 | 834 | $keyOffset = 0; 835 | $toCopy = 5; 836 | for ($i = 0; $i < 16; $i++) { 837 | $pw[$offset + $i] = $docid[$i]; 838 | } 839 | $offset += 16; 840 | } 841 | 842 | $pw[16] = "\x80"; 843 | for ($i = 0; $i < 47; $i++) { 844 | $pw[17 + $i] = "\0"; 845 | } 846 | $pw[56] = "\x80"; 847 | $pw[57] = "\x0a"; 848 | 849 | $valContext = md5($pw); 850 | 851 | $key = $this->makeKey(0, $valContext); 852 | 853 | $salt = $key->RC4($salt_data); 854 | $hashedsalt = $key->RC4($hashedsalt_data); 855 | 856 | $salt .= "\x80" . str_repeat("\0", 47); 857 | $salt[56] = "\x80"; 858 | 859 | $mdContext2 = md5($salt); 860 | 861 | return $mdContext2 == $hashedsalt; 862 | } 863 | 864 | /** 865 | * CODEPAGE 866 | * 867 | * This record stores the text encoding used to write byte strings, stored as MS Windows code page identifier. 868 | * 869 | * @throws ParserException 870 | */ 871 | private function readCodepage() { 872 | $length = Format::getUInt2d($this->data, $this->pos + 2); 873 | $recordData = substr($this->data, $this->pos + 4, $length); 874 | 875 | // move stream pointer to next record 876 | $this->pos += 4 + $length; 877 | 878 | // offset: 0; size: 2; code page identifier 879 | $codePage = Format::getUInt2d($recordData, 0); 880 | $this->codePage = self::NumberToName($codePage); 881 | } 882 | 883 | /** 884 | * DATEMODE 885 | * This record specifies the base date for displaying date values. All dates are stored as count of days 886 | * past this base date. In BIFF2-BIFF4 this record is part of the Calculation Settings Block. In BIFF5-BIFF8 887 | * it is stored in the Workbook Globals Substream. 888 | */ 889 | private function readDateMode() { 890 | $length = Format::getUInt2d($this->data, $this->pos + 2); 891 | $recordData = substr($this->data, $this->pos + 4, $length); 892 | 893 | // move stream pointer to next record 894 | $this->pos += 4 + $length; 895 | 896 | // offset: 0; size: 2; 0 = base 1900, 1 = base 1904 897 | self::$excelBaseDate = Format::CALENDAR_WINDOWS_1900; 898 | if (ord($recordData{0}) == 1) { 899 | self::$excelBaseDate = Format::CALENDAR_MAC_1904; 900 | } 901 | } 902 | 903 | /** 904 | * FORMAT 905 | * 906 | * This record contains information about a number format. All FORMAT records occur together in a sequential list. 907 | * In BIFF2-BIFF4 other records referencing a FORMAT record contain a zero-based index into this list. From BIFF5 908 | * on the FORMAT record contains the index itself that will be used by other records. 909 | */ 910 | private function readFormat() { 911 | $length = Format::getUInt2d($this->data, $this->pos + 2); 912 | $recordData = substr($this->data, $this->pos + 4, $length); 913 | 914 | // move stream pointer to next record 915 | $this->pos += 4 + $length; 916 | 917 | $indexCode = Format::getUInt2d($recordData, 0); 918 | if ($this->version == self::XLS_BIFF8) { 919 | $string = self::readUnicodeStringLong(substr($recordData, 2)); 920 | } else { 921 | // BIFF7 922 | $string = $this->readByteStringShort(substr($recordData, 2)); 923 | } 924 | 925 | $formatString = $string['value']; 926 | $this->formats[$indexCode] = $formatString; 927 | } 928 | 929 | /** 930 | * XF - Extended Format 931 | * 932 | * This record contains formatting information for cells, rows, columns or styles. 933 | * According to http://support.microsoft.com/kb/147732 there are always at least 15 cell style XF and 1 cell XF. 934 | * Inspection of Excel files generated by MS Office Excel shows that XF records 0-14 are cell style XF and XF 935 | * record 15 is a cell XF. We only read the first cell style XF and skip the remaining cell style XF records 936 | * We read all cell XF records. 937 | */ 938 | private function readXf() { 939 | $length = Format::getUInt2d($this->data, $this->pos + 2); 940 | $recordData = substr($this->data, $this->pos + 4, $length); 941 | 942 | // move stream pointer to next record 943 | $this->pos += 4 + $length; 944 | 945 | // offset: 2; size: 2; Index to FORMAT record 946 | $numberFormatIndex = Format::getUInt2d($recordData, 2); 947 | if (isset($this->formats[$numberFormatIndex])) { 948 | // then we have user-defined format code 949 | $numberFormat = $this->formats[$numberFormatIndex]; 950 | } elseif (isset(Format::$buildInFormats[$numberFormatIndex])) { 951 | // then we have built-in format code 952 | $numberFormat = Format::$buildInFormats[$numberFormatIndex]; 953 | } else { 954 | // we set the general format code 955 | $numberFormat = Format::FORMAT_GENERAL; 956 | } 957 | 958 | $this->xfRecords[] = ['index' => $numberFormatIndex, 'format' => $numberFormat]; 959 | } 960 | 961 | /** 962 | * SST - Shared String Table 963 | * 964 | * This record contains a list of all strings used anywhere in the workbook. Each string occurs only once. 965 | * The workbook uses indexes into the list to reference the strings. 966 | **/ 967 | private function readSst() { 968 | // offset within (spliced) record data 969 | $pos = 0; 970 | 971 | // get spliced record data 972 | $splicedRecordData = $this->getSplicedRecordData(); 973 | $recordData = $splicedRecordData['recordData']; 974 | $spliceOffsets = $splicedRecordData['spliceOffsets']; 975 | 976 | // offset: 0; size: 4; total number of strings in the workbook 977 | $pos += 4; 978 | 979 | // offset: 4; size: 4; number of following strings ($nm) 980 | $nm = Format::getInt4d($recordData, 4); 981 | 982 | $pos += 4; 983 | 984 | // loop through the Unicode strings (16-bit length) 985 | for ($i = 0; $i < $nm; ++$i) { 986 | if (!isset($recordData[$pos + 2])) { 987 | break; 988 | } 989 | 990 | // number of characters in the Unicode string 991 | $numChars = Format::getUInt2d($recordData, $pos); 992 | $pos += 2; 993 | 994 | // option flags 995 | $optionFlags = ord($recordData[$pos]); 996 | ++$pos; 997 | 998 | // bit: 0; mask: 0x01; 0 = compressed; 1 = uncompressed 999 | $isCompressed = (($optionFlags & 0x01) == 0) ; 1000 | 1001 | // bit: 2; mask: 0x02; 0 = ordinary; 1 = Asian phonetic 1002 | $hasAsian = (($optionFlags & 0x04) != 0); 1003 | 1004 | // bit: 3; mask: 0x03; 0 = ordinary; 1 = Rich-Text 1005 | $formattingRuns = 0; 1006 | $hasRichText = (($optionFlags & 0x08) != 0); 1007 | if ($hasRichText && isset($recordData[$pos])) { 1008 | // number of Rich-Text formatting runs 1009 | $formattingRuns = Format::getUInt2d($recordData, $pos); 1010 | $pos += 2; 1011 | } 1012 | 1013 | $extendedRunLength = 0; 1014 | if ($hasAsian && isset($recordData[$pos])) { 1015 | // size of Asian phonetic setting 1016 | $extendedRunLength = Format::getInt4d($recordData, $pos); 1017 | $pos += 4; 1018 | } 1019 | 1020 | // expected byte length of character array if not split 1021 | $len = ($isCompressed) ? $numChars : $numChars * 2; 1022 | 1023 | // look up limit position 1024 | $limitPos = 0; 1025 | foreach ($spliceOffsets as $spliceOffset) { 1026 | // it can happen that the string is empty, therefore we need 1027 | // <= and not just < 1028 | if ($pos <= $spliceOffset) { 1029 | $limitPos = $spliceOffset; 1030 | break; 1031 | } 1032 | } 1033 | 1034 | if ($pos + $len <= $limitPos) { 1035 | // character array is not split between records 1036 | $retStr = substr($recordData, $pos, $len); 1037 | $pos += $len; 1038 | } else { 1039 | // character array is split between records 1040 | // first part of character array 1041 | $retStr = substr($recordData, $pos, $limitPos - $pos); 1042 | $bytesRead = $limitPos - $pos; 1043 | 1044 | // remaining characters in Unicode string 1045 | $charsLeft = $numChars - (($isCompressed) ? $bytesRead : ($bytesRead / 2)); 1046 | $pos = $limitPos; 1047 | 1048 | // keep reading the characters 1049 | while ($charsLeft > 0) { 1050 | // look up next limit position, in case the string span more than one continue record 1051 | foreach ($spliceOffsets as $spliceOffset) { 1052 | if ($pos < $spliceOffset) { 1053 | $limitPos = $spliceOffset; 1054 | break; 1055 | } 1056 | } 1057 | 1058 | if (!isset($recordData[$pos])) { 1059 | break; 1060 | } 1061 | 1062 | // repeated option flags 1063 | // OpenOffice.org documentation 5.21 1064 | $option = ord($recordData[$pos]); 1065 | ++$pos; 1066 | 1067 | if ($isCompressed && ($option == 0)) { 1068 | // 1st fragment compressed 1069 | // this fragment compressed 1070 | $len = min($charsLeft, $limitPos - $pos); 1071 | $retStr .= substr($recordData, $pos, $len); 1072 | $charsLeft -= $len; 1073 | $isCompressed = true; 1074 | } elseif (!$isCompressed && ($option != 0)) { 1075 | // 1st fragment uncompressed 1076 | // this fragment uncompressed 1077 | $len = min($charsLeft * 2, $limitPos - $pos); 1078 | $retStr .= substr($recordData, $pos, $len); 1079 | $charsLeft -= $len / 2; 1080 | $isCompressed = false; 1081 | } elseif (!$isCompressed && ($option == 0)) { 1082 | // 1st fragment uncompressed 1083 | // this fragment compressed 1084 | $len = min($charsLeft, $limitPos - $pos); 1085 | for ($j = 0; $j < $len; ++$j) { 1086 | if (!isset($recordData[$pos + $j])) { 1087 | break; 1088 | } 1089 | 1090 | $retStr .= $recordData[$pos + $j] . chr(0); 1091 | } 1092 | 1093 | $charsLeft -= $len; 1094 | $isCompressed = false; 1095 | } else { 1096 | // 1st fragment compressed 1097 | // this fragment uncompressed 1098 | $newStr = ''; 1099 | $jMax = strlen($retStr); 1100 | for ($j = 0; $j < $jMax; ++$j) { 1101 | $newStr .= $retStr[$j] . chr(0); 1102 | } 1103 | 1104 | $retStr = $newStr; 1105 | $len = min($charsLeft * 2, $limitPos - $pos); 1106 | $retStr .= substr($recordData, $pos, $len); 1107 | $charsLeft -= $len / 2; 1108 | $isCompressed = false; 1109 | } 1110 | 1111 | $pos += $len; 1112 | } 1113 | } 1114 | 1115 | // convert to UTF-8 1116 | $retStr = self::encodeUTF16($retStr, $isCompressed); 1117 | 1118 | // read additional Rich-Text information, if any 1119 | // $fmtRuns = []; 1120 | if ($hasRichText) { 1121 | // list of formatting runs 1122 | /*for ($j = 0; $j < $formattingRuns; ++$j) { 1123 | // first formatted character; zero-based 1124 | $charPos = Format::getUInt2d($recordData, $pos + $j * 4); 1125 | 1126 | // index to font record 1127 | $fontIndex = Format::getUInt2d($recordData, $pos + 2 + $j * 4); 1128 | $fmtRuns[] = ['charPos' => $charPos, 'fontIndex' => $fontIndex]; 1129 | }*/ 1130 | 1131 | $pos += 4 * $formattingRuns; 1132 | } 1133 | 1134 | // read additional Asian phonetics information, if any 1135 | if ($hasAsian) { 1136 | // For Asian phonetic settings, we skip the extended string data 1137 | $pos += $extendedRunLength; 1138 | } 1139 | 1140 | // store the shared sting 1141 | $this->sst[] = ['value' => $retStr]; 1142 | } 1143 | } 1144 | 1145 | /** 1146 | * Read RK record 1147 | * 1148 | * This record represents a cell that contains an RK value (encoded integer or floating-point value). If a 1149 | * floating-point value cannot be encoded to an RK value, a NUMBER record will be written. This record replaces 1150 | * the record INTEGER written in BIFF2. 1151 | */ 1152 | private function readRk() { 1153 | $length = Format::getUInt2d($this->data, $this->pos + 2); 1154 | $recordData = substr($this->data, $this->pos + 4, $length); 1155 | 1156 | // move stream pointer to next record 1157 | $this->pos += 4 + $length; 1158 | 1159 | // offset: 0; size: 2; index to row 1160 | $row = Format::getUInt2d($recordData, 0); 1161 | 1162 | // offset: 2; size: 2; index to column 1163 | $column = Format::getUInt2d($recordData, 2); 1164 | 1165 | // offset: 4; size: 2; index to XF record 1166 | $xfIndex = Format::getUInt2d($recordData, 4); 1167 | 1168 | // offset: 6; size: 4; RK value 1169 | $rkNum = Format::getInt4d($recordData, 6); 1170 | $numValue = self::getIEEE754($rkNum); 1171 | 1172 | // add cell 1173 | $this->addCell($row, $column, $numValue, $xfIndex); 1174 | } 1175 | 1176 | /** 1177 | * Read LABELSST record 1178 | * 1179 | * This record represents a cell that contains a string. It replaces the LABEL record and RSTRING record used in 1180 | * BIFF2-BIFF5. 1181 | */ 1182 | private function readLabelSst() { 1183 | $length = Format::getUInt2d($this->data, $this->pos + 2); 1184 | $recordData = substr($this->data, $this->pos + 4, $length); 1185 | 1186 | $this->pos += 4 + $length; 1187 | $xfIndex = Format::getUInt2d($recordData, 4); 1188 | $row = Format::getUInt2d($recordData, 0); 1189 | $column = Format::getUInt2d($recordData, 2); 1190 | 1191 | // offset: 6; size: 4; index to SST record 1192 | $index = Format::getInt4d($recordData, 6); 1193 | $this->addCell($row, $column, $this->sst[$index]['value'], $xfIndex); 1194 | } 1195 | 1196 | /** 1197 | * Read MULRK record 1198 | * 1199 | * This record represents a cell range containing RK value cells. All cells are located in the same row. 1200 | */ 1201 | private function readMulRk() { 1202 | $length = Format::getUInt2d($this->data, $this->pos + 2); 1203 | $recordData = substr($this->data, $this->pos + 4, $length); 1204 | 1205 | // move stream pointer to next record 1206 | $this->pos += 4 + $length; 1207 | 1208 | // offset: 0; size: 2; index to row 1209 | $row = Format::getUInt2d($recordData, 0); 1210 | 1211 | // offset: 2; size: 2; index to first column 1212 | $colFirst = Format::getUInt2d($recordData, 2); 1213 | 1214 | // offset: var; size: 2; index to last column 1215 | $colLast = Format::getUInt2d($recordData, $length - 2); 1216 | $columns = $colLast - $colFirst + 1; 1217 | 1218 | // offset within record data 1219 | $offset = 4; 1220 | for ($i = 0; $i < $columns; ++$i) { 1221 | // offset: var; size: 2; index to XF record 1222 | $xfIndex = Format::getUInt2d($recordData, $offset); 1223 | 1224 | // offset: var; size: 4; RK value 1225 | $numValue = self::getIEEE754(Format::getInt4d($recordData, $offset + 2)); 1226 | 1227 | $this->addCell($row, $colFirst + $i, $numValue, $xfIndex); 1228 | 1229 | $offset += 6; 1230 | } 1231 | } 1232 | 1233 | /** 1234 | * Read NUMBER record 1235 | * 1236 | * This record represents a cell that contains a floating-point value. 1237 | */ 1238 | private function readNumber() { 1239 | $length = Format::getUInt2d($this->data, $this->pos + 2); 1240 | $recordData = substr($this->data, $this->pos + 4, $length); 1241 | 1242 | // move stream pointer to next record 1243 | $this->pos += 4 + $length; 1244 | 1245 | // offset: 0; size: 2; index to row 1246 | $row = Format::getUInt2d($recordData, 0); 1247 | 1248 | // offset: 2; size 2; index to column 1249 | $column = Format::getUInt2d($recordData, 2); 1250 | 1251 | // offset 4; size: 2; index to XF record 1252 | $xfIndex = Format::getUInt2d($recordData, 4); 1253 | $numValue = self::extractNumber(substr($recordData, 6, 8)); 1254 | 1255 | $this->addCell($row, $column, $numValue, $xfIndex); 1256 | } 1257 | 1258 | /** 1259 | * Read FORMULA record + perhaps a following STRING record if formula result is a string 1260 | * This record contains the token array and the result of a formula cell. 1261 | */ 1262 | private function readFormula() { 1263 | $length = Format::getUInt2d($this->data, $this->pos + 2); 1264 | $recordData = substr($this->data, $this->pos + 4, $length); 1265 | 1266 | // move stream pointer to next record 1267 | $this->pos += 4 + $length; 1268 | 1269 | // offset: 0; size: 2; row index 1270 | $row = Format::getUInt2d($recordData, 0); 1271 | 1272 | // offset: 2; size: 2; col index 1273 | $column = Format::getUInt2d($recordData, 2); 1274 | 1275 | // offset 4; size: 2; index to XF record 1276 | $xfIndex = Format::getUInt2d($recordData, 4); 1277 | 1278 | // offset: 6; size: 8; result of the formula 1279 | if ((ord($recordData{6}) == 0) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) { 1280 | // read STRING record 1281 | $value = $this->readString(); 1282 | } elseif ((ord($recordData{6}) == 1) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) { 1283 | // Boolean formula. Result is in +2; 0=false, 1=true 1284 | $value = (bool) ord($recordData{8}); 1285 | } elseif ((ord($recordData{6}) == 2) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) { 1286 | // Error formula. Error code is in +2 1287 | $value = self::mapErrorCode(ord($recordData{8})); 1288 | } elseif ((ord($recordData{6}) == 3) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) { 1289 | // Formula result is a null string 1290 | $value = ''; 1291 | } else { 1292 | // forumla result is a number, first 14 bytes like _NUMBER record 1293 | $value = self::extractNumber(substr($recordData, 6, 8)); 1294 | } 1295 | 1296 | $this->addCell($row, $column, $value, $xfIndex); 1297 | } 1298 | 1299 | /** 1300 | * Read a STRING record from current stream position and advance the stream pointer to next record. 1301 | * This record is used for storing result from FORMULA record when it is a string, and it occurs 1302 | * directly after the FORMULA record 1303 | * 1304 | * @return string The string contents as UTF-8 1305 | */ 1306 | private function readString() { 1307 | $length = Format::getUInt2d($this->data, $this->pos + 2); 1308 | $recordData = substr($this->data, $this->pos + 4, $length); 1309 | 1310 | // move stream pointer to next record 1311 | $this->pos += 4 + $length; 1312 | if ($this->version == self::XLS_BIFF8) { 1313 | $string = self::readUnicodeStringLong($recordData); 1314 | $value = $string['value']; 1315 | } else { 1316 | $string = $this->readByteStringLong($recordData); 1317 | $value = $string['value']; 1318 | } 1319 | 1320 | return $value; 1321 | } 1322 | 1323 | /** 1324 | * Read BOOLERR record 1325 | * 1326 | * This record represents a Boolean value or error value cell. 1327 | */ 1328 | private function readBoolErr() { 1329 | $length = Format::getUInt2d($this->data, $this->pos + 2); 1330 | $recordData = substr($this->data, $this->pos + 4, $length); 1331 | 1332 | // move stream pointer to next record 1333 | $this->pos += 4 + $length; 1334 | 1335 | // offset: 0; size: 2; row index 1336 | $row = Format::getUInt2d($recordData, 0); 1337 | 1338 | // offset: 2; size: 2; column index 1339 | $column = Format::getUInt2d($recordData, 2); 1340 | 1341 | // offset: 4; size: 2; index to XF record 1342 | $xfIndex = Format::getUInt2d($recordData, 4); 1343 | 1344 | // offset: 6; size: 1; the boolean value or error value 1345 | $boolError = ord($recordData{6}); 1346 | 1347 | // offset: 7; size: 1; 0=boolean; 1=error 1348 | $isError = ord($recordData{7}); 1349 | 1350 | switch ($isError) { 1351 | case 0: // boolean 1352 | $value = (bool)$boolError; 1353 | 1354 | // add cell value 1355 | $this->addCell($row, $column, $value, $xfIndex); 1356 | break; 1357 | case 1: // error type 1358 | $value = self::mapErrorCode($boolError); 1359 | 1360 | // add cell value 1361 | $this->addCell($row, $column, $value, $xfIndex); 1362 | break; 1363 | } 1364 | } 1365 | 1366 | /** 1367 | * Read BLANK record 1368 | */ 1369 | private function readBlank() { 1370 | $length = Format::getUInt2d($this->data, $this->pos + 2); 1371 | $recordData = substr($this->data, $this->pos + 4, $length); 1372 | 1373 | // move stream pointer to next record 1374 | $this->pos += 4 + $length; 1375 | 1376 | // offset: 0; size: 2; row index 1377 | $row = Format::getUInt2d($recordData, 0); 1378 | 1379 | // offset: 2; size: 2; col index 1380 | $column = Format::getUInt2d($recordData, 2); 1381 | 1382 | // offset: 4; size: 2; XF index 1383 | $xfIndex = Format::getUInt2d($recordData, 4); 1384 | 1385 | $this->addCell($row, $column, '', $xfIndex); 1386 | } 1387 | 1388 | /** 1389 | * Read LABEL record 1390 | * 1391 | * This record represents a cell that contains a string. In BIFF8 it is usually replaced by the LABELSST record. 1392 | * Excel still uses this record, if it copies unformatted text cells to the clipboard. 1393 | */ 1394 | private function readLabel() { 1395 | $length = Format::getUInt2d($this->data, $this->pos + 2); 1396 | $recordData = substr($this->data, $this->pos + 4, $length); 1397 | 1398 | // move stream pointer to next record 1399 | $this->pos += 4 + $length; 1400 | 1401 | // offset: 0; size: 2; index to row 1402 | $row = Format::getUInt2d($recordData, 0); 1403 | 1404 | // offset: 2; size: 2; index to column 1405 | $column = Format::getUInt2d($recordData, 2); 1406 | 1407 | // offset: 4; size: 2; XF index 1408 | $xfIndex = Format::getUInt2d($recordData, 4); 1409 | 1410 | // add cell value 1411 | if ($this->version == self::XLS_BIFF8) { 1412 | $string = self::readUnicodeStringLong(substr($recordData, 6)); 1413 | $value = $string['value']; 1414 | } else { 1415 | $string = $this->readByteStringLong(substr($recordData, 6)); 1416 | $value = $string['value']; 1417 | } 1418 | 1419 | $this->addCell($row, $column, $value, $xfIndex); 1420 | } 1421 | 1422 | /** 1423 | * Map error code, e.g. '#N/A' 1424 | * 1425 | * @param int $code 1426 | * @return string 1427 | */ 1428 | private static function mapErrorCode($code) { 1429 | if (isset(self::$errorCode[$code])) { 1430 | return self::$errorCode[$code]; 1431 | } 1432 | 1433 | return false; 1434 | } 1435 | 1436 | /** 1437 | * Convert a value in a pre-defined format to a PHP string 1438 | * 1439 | * @param mixed $value Value to format 1440 | * @param string $format Format code 1441 | * @return string 1442 | */ 1443 | private static function toFormattedString($value = '0', $format = Format::FORMAT_GENERAL) { 1444 | // For now we do not treat strings although section 4 of a format code affects strings 1445 | if (!is_numeric($value)) { 1446 | return $value; 1447 | } 1448 | 1449 | // For 'General' format code, we just pass the value although this is not entirely the way Excel does it, 1450 | // it seems to round numbers to a total of 10 digits. 1451 | if (($format === Format::FORMAT_GENERAL) || ($format === Format::FORMAT_TEXT)) { 1452 | return $value; 1453 | } 1454 | 1455 | // Convert any other escaped characters to quoted strings, e.g. (\T to "T") 1456 | $format = preg_replace('/(\\\(.))(?=(?:[^"]|"[^"]*")*$)/u', '"${2}"', $format); 1457 | 1458 | // Get the sections, there can be up to four sections, separated with a semi-colon (but only if not a quoted literal) 1459 | $sections = preg_split('/(;)(?=(?:[^"]|"[^"]*")*$)/u', $format); 1460 | 1461 | // Extract the relevant section depending on whether number is positive, negative, or zero? 1462 | // Text not supported yet. 1463 | // Here is how the sections apply to various values in Excel: 1464 | // 1 section: [POSITIVE/NEGATIVE/ZERO/TEXT] 1465 | // 2 sections: [POSITIVE/ZERO/TEXT] [NEGATIVE] 1466 | // 3 sections: [POSITIVE/TEXT] [NEGATIVE] [ZERO] 1467 | // 4 sections: [POSITIVE] [NEGATIVE] [ZERO] [TEXT] 1468 | switch (count($sections)) { 1469 | case 1: 1470 | $format = $sections[0]; 1471 | break; 1472 | 1473 | case 2: 1474 | $format = ($value >= 0) ? $sections[0] : $sections[1]; 1475 | $value = abs($value); // Use the absolute value 1476 | break; 1477 | 1478 | case 3: 1479 | $format = ($value > 0) ? $sections[0] : ( ($value < 0) ? $sections[1] : $sections[2]); 1480 | $value = abs($value); // Use the absolute value 1481 | break; 1482 | 1483 | case 4: 1484 | $format = ($value > 0) ? $sections[0] : ( ($value < 0) ? $sections[1] : $sections[2]); 1485 | $value = abs($value); // Use the absolute value 1486 | break; 1487 | 1488 | default: 1489 | // something is wrong, just use first section 1490 | $format = $sections[0]; 1491 | break; 1492 | } 1493 | 1494 | // In Excel formats, "_" is used to add spacing, 1495 | // The following character indicates the size of the spacing, which we can't do in HTML, so we just use a standard space 1496 | $format = preg_replace('/_./', ' ', $format); 1497 | 1498 | // Save format with color information for later use below 1499 | //$formatColor = $format; 1500 | 1501 | // Strip color information 1502 | $colorRegex = '/^\\[[a-zA-Z]+\\]/'; 1503 | $format = preg_replace($colorRegex, '', $format); 1504 | 1505 | // Let's begin inspecting the format and converting the value to a formatted string 1506 | // Check for date/time characters (not inside quotes) 1507 | if (preg_match('/(\[\$[A-Z]*-[0-9A-F]*\])*[hmsdy](?=(?:[^"]|"[^"]*")*$)/miu', $format, $matches)) { 1508 | // datetime format 1509 | self::formatAsDate($value, $format); 1510 | } elseif (preg_match('/%$/', $format)) { 1511 | // % number format 1512 | self::formatAsPercentage($value, $format); 1513 | } else { 1514 | if ($format === Format::FORMAT_CURRENCY_EUR_SIMPLE) { 1515 | $value = 'EUR ' . sprintf('%1.2f', $value); 1516 | } else { 1517 | // Some non-number strings are quoted, so we'll get rid of the quotes, likewise any positional * symbols 1518 | $format = str_replace(['"', '*'], '', $format); 1519 | 1520 | // Find out if we need thousands separator 1521 | // This is indicated by a comma enclosed by a digit placeholder: 1522 | // #,# or 0,0 1523 | $useThousands = preg_match('/(#,#|0,0)/', $format); 1524 | if ($useThousands) { 1525 | $format = preg_replace('/0,0/', '00', $format); 1526 | $format = preg_replace('/#,#/', '##', $format); 1527 | } 1528 | 1529 | // Scale thousands, millions,... 1530 | // This is indicated by a number of commas after a digit placeholder: 1531 | // #, or 0.0,, 1532 | $scale = 1; // same as no scale 1533 | $matches = []; 1534 | if (preg_match('/(#|0)(,+)/', $format, $matches)) { 1535 | $scale = pow(1000, strlen($matches[2])); 1536 | 1537 | // strip the commas 1538 | $format = preg_replace('/0,+/', '0', $format); 1539 | $format = preg_replace('/#,+/', '#', $format); 1540 | } 1541 | 1542 | if (preg_match('/#?.*\?\/\?/', $format, $m)) { 1543 | //echo 'Format mask is fractional '.$format.'
'; 1544 | if ($value != (int)$value) { 1545 | self::formatAsFraction($value, $format); 1546 | } 1547 | } else { 1548 | // Handle the number itself 1549 | // scale number 1550 | $value = $value / $scale; 1551 | 1552 | // Strip # 1553 | $format = preg_replace('/\\#/', '0', $format); 1554 | $n = "/\[[^\]]+\]/"; 1555 | $m = preg_replace($n, '', $format); 1556 | $numberRegex = "/(0+)(\.?)(0*)/"; 1557 | if (preg_match($numberRegex, $m, $matches)) { 1558 | $left = $matches[1]; 1559 | $dec = $matches[2]; 1560 | $right = $matches[3]; 1561 | 1562 | // minimun width of formatted number (including dot) 1563 | $minWidth = strlen($left) + strlen($dec) + strlen($right); 1564 | if ($useThousands) { 1565 | $value = number_format( 1566 | $value, 1567 | strlen($right), 1568 | self::getDecimalSeparator(), 1569 | self::getThousandsSeparator() 1570 | ); 1571 | 1572 | $value = preg_replace($numberRegex, $value, $format); 1573 | } else { 1574 | if (preg_match('/[0#]E[+-]0/i', $format)) { 1575 | //Scientific format 1576 | $value = sprintf('%5.2E', $value); 1577 | } elseif (preg_match('/0([^\d\.]+)0/', $format)) { 1578 | $value = self::complexNumberFormatMask($value, $format); 1579 | } else { 1580 | $sprintfPattern = "%0$minWidth." . strlen($right) . "f"; 1581 | $value = sprintf($sprintfPattern, $value); 1582 | $value = preg_replace($numberRegex, $value, $format); 1583 | } 1584 | } 1585 | } 1586 | } 1587 | 1588 | if (preg_match('/\[\$(.*)\]/u', $format, $m)) { 1589 | // Currency or Accounting 1590 | //$currencyFormat = $m[0]; 1591 | $currencyCode = $m[1]; 1592 | list($currencyCode) = explode('-', $currencyCode); 1593 | 1594 | if ($currencyCode == '') { 1595 | $currencyCode = self::getCurrencyCode(); 1596 | } 1597 | 1598 | $value = preg_replace('/\[\$([^\]]*)\]/u', $currencyCode, $value); 1599 | } 1600 | } 1601 | } 1602 | 1603 | return $value; 1604 | } 1605 | 1606 | /** 1607 | * Reads a record from current position in data stream and continues reading data as long as CONTINUE records 1608 | * are found. Splices the record data pieces and returns the combined string as if record data is in one piece. 1609 | * Moves to next current position in data stream to start of next record different from a CONtINUE record 1610 | * 1611 | * @return array 1612 | */ 1613 | private function getSplicedRecordData() { 1614 | $i = 0; 1615 | $data = ''; 1616 | $spliceOffsets = [0]; 1617 | 1618 | do { 1619 | ++$i; 1620 | // offset: 0; size: 2; identifier 1621 | //$identifier = Cell::getInt2d($this->data, $this->pos); 1622 | 1623 | // offset: 2; size: 2; length 1624 | $length = Format::getUInt2d($this->data, $this->pos + 2); 1625 | $data .= substr($this->data, $this->pos + 4, $length); 1626 | $spliceOffsets[$i] = $spliceOffsets[$i - 1] + $length; 1627 | 1628 | $this->pos += 4 + $length; 1629 | $nextIdentifier = Format::getUInt2d($this->data, $this->pos); 1630 | } while ($nextIdentifier == self::XLS_TYPE_CONTINUE); 1631 | 1632 | return ['recordData' => $data, 'spliceOffsets' => $spliceOffsets]; 1633 | } 1634 | 1635 | /** 1636 | * Get the decimal separator. If it has not yet been set explicitly, try to obtain number formatting 1637 | * information from locale. 1638 | * 1639 | * @return string 1640 | */ 1641 | private static function getDecimalSeparator() { 1642 | if (!isset(self::$decimalSeparator)) { 1643 | $localeconv = localeconv(); 1644 | 1645 | self::$decimalSeparator = ($localeconv['decimal_point'] != '') ? $localeconv['decimal_point'] 1646 | : $localeconv['mon_decimal_point']; 1647 | 1648 | if (self::$decimalSeparator == '') { 1649 | // Default to . 1650 | self::$decimalSeparator = '.'; 1651 | } 1652 | } 1653 | 1654 | return self::$decimalSeparator; 1655 | } 1656 | 1657 | /** 1658 | * Get the thousands separator. If it has not yet been set explicitly, try to obtain number formatting 1659 | * information from locale. 1660 | * 1661 | * @return string 1662 | */ 1663 | private static function getThousandsSeparator() { 1664 | if (!isset(self::$thousandsSeparator)) { 1665 | $localeconv = localeconv(); 1666 | 1667 | self::$thousandsSeparator = ($localeconv['thousands_sep'] != '') ? $localeconv['thousands_sep'] 1668 | : $localeconv['mon_thousands_sep']; 1669 | 1670 | if (self::$thousandsSeparator == '') { 1671 | // Default to . 1672 | self::$thousandsSeparator = ','; 1673 | } 1674 | } 1675 | 1676 | return self::$thousandsSeparator; 1677 | } 1678 | 1679 | /** 1680 | * Get the currency code. If it has not yet been set explicitly, try to obtain the symbol information from locale. 1681 | * 1682 | * @return string 1683 | */ 1684 | private static function getCurrencyCode() { 1685 | if (!isset(self::$currencyCode)) { 1686 | $localeconv = localeconv(); 1687 | 1688 | self::$currencyCode = ($localeconv['currency_symbol'] != '') ? $localeconv['currency_symbol'] 1689 | : $localeconv['int_curr_symbol']; 1690 | 1691 | if (self::$currencyCode == '') { 1692 | // Default to $ 1693 | self::$currencyCode = '$'; 1694 | } 1695 | } 1696 | 1697 | return self::$currencyCode; 1698 | } 1699 | 1700 | private static function complexNumberFormatMask($number, $mask) { 1701 | $sign = ($number < 0.0); 1702 | $number = abs($number); 1703 | 1704 | if (strpos($mask, '.') !== false) { 1705 | $numbers = explode('.', $number . '.0'); 1706 | $masks = explode('.', $mask . '.0'); 1707 | $result1 = self::complexNumberFormatMask($numbers[0], $masks[0]); 1708 | $result2 = strrev(self::complexNumberFormatMask(strrev($numbers[1]), strrev($masks[1]))); 1709 | 1710 | return (($sign) ? '-' : '') . $result1 . '.' . $result2; 1711 | } 1712 | 1713 | $r = preg_match_all('/0+/', $mask, $result, PREG_OFFSET_CAPTURE); 1714 | if ($r > 1) { 1715 | $result = array_reverse($result[0]); 1716 | 1717 | $offset = 0; 1718 | foreach ($result as $block) { 1719 | $divisor = 1 . $block[0]; 1720 | $size = strlen($block[0]); 1721 | $offset = $block[1]; 1722 | $blockValue = sprintf('%0' . $size . 'd', fmod($number, $divisor)); 1723 | 1724 | $number = floor($number / $divisor); 1725 | $mask = substr_replace($mask, $blockValue, $offset, $size); 1726 | } 1727 | 1728 | if ($number > 0) { 1729 | $mask = substr_replace($mask, $number, $offset, 0); 1730 | } 1731 | 1732 | $result = $mask; 1733 | } else { 1734 | $result = $number; 1735 | } 1736 | 1737 | return (($sign) ? '-' : '') . $result; 1738 | } 1739 | 1740 | /** 1741 | * Convert Microsoft Code Page Identifier to Code Page Name which iconv and mbstring understands 1742 | * 1743 | * @param int $codePage Microsoft Code Page Indentifier 1744 | * 1745 | * @throws ParserException 1746 | * @return string Code Page Name 1747 | */ 1748 | private static function NumberToName($codePage = 1252) { 1749 | switch ($codePage) { 1750 | case 367: 1751 | return 'ASCII'; //ASCII 1752 | 1753 | case 437: 1754 | return 'CP437'; //OEM US 1755 | 1756 | case 720: 1757 | throw new ParserException('Code page 720 not supported.', 5); //OEM Arabic 1758 | 1759 | case 737: 1760 | return 'CP737'; //OEM Greek 1761 | 1762 | case 775: 1763 | return 'CP775'; //OEM Baltic 1764 | 1765 | case 850: 1766 | return 'CP850'; //OEM Latin I 1767 | 1768 | case 852: 1769 | return 'CP852'; //OEM Latin II (Central European) 1770 | 1771 | case 855: 1772 | return 'CP855'; //OEM Cyrillic 1773 | 1774 | case 857: 1775 | return 'CP857'; //OEM Turkish 1776 | 1777 | case 858: 1778 | return 'CP858'; //OEM Multilingual Latin I with Euro 1779 | 1780 | case 860: 1781 | return 'CP860'; //OEM Portugese 1782 | 1783 | case 861: 1784 | return 'CP861'; //OEM Icelandic 1785 | 1786 | case 862: 1787 | return 'CP862'; //OEM Hebrew 1788 | 1789 | case 863: 1790 | return 'CP863'; //OEM Canadian (French) 1791 | 1792 | case 864: 1793 | return 'CP864'; //OEM Arabic 1794 | 1795 | case 865: 1796 | return 'CP865'; //OEM Nordic 1797 | 1798 | case 866: 1799 | return 'CP866'; //OEM Cyrillic (Russian) 1800 | 1801 | case 869: 1802 | return 'CP869'; //OEM Greek (Modern) 1803 | 1804 | case 874: 1805 | return 'CP874'; //ANSI Thai 1806 | 1807 | case 932: 1808 | return 'CP932'; //ANSI Japanese Shift-JIS 1809 | 1810 | case 936: 1811 | return 'CP936'; //ANSI Chinese Simplified GBK 1812 | 1813 | case 949: 1814 | return 'CP949'; //ANSI Korean (Wansung) 1815 | 1816 | case 950: 1817 | return 'CP950'; //ANSI Chinese Traditional BIG5 1818 | 1819 | case 1200: 1820 | return 'UTF-16LE'; //UTF-16 (BIFF8) 1821 | 1822 | case 1250: 1823 | return 'CP1250'; //ANSI Latin II (Central European) 1824 | 1825 | case 1251: 1826 | return 'CP1251'; //ANSI Cyrillic 1827 | 1828 | case 0: //CodePage is not always correctly set when the xls file was saved by Apple's Numbers program 1829 | case 1252: 1830 | return 'CP1252'; //ANSI Latin I (BIFF4-BIFF7) 1831 | 1832 | case 1253: 1833 | return 'CP1253'; //ANSI Greek 1834 | 1835 | case 1254: 1836 | return 'CP1254'; //ANSI Turkish 1837 | 1838 | case 1255: 1839 | return 'CP1255'; //ANSI Hebrew 1840 | 1841 | case 1256: 1842 | return 'CP1256'; //ANSI Arabic 1843 | 1844 | case 1257: 1845 | return 'CP1257'; //ANSI Baltic 1846 | 1847 | case 1258: 1848 | return 'CP1258'; //ANSI Vietnamese 1849 | 1850 | case 1361: 1851 | return 'CP1361'; //ANSI Korean (Johab) 1852 | 1853 | case 10000: 1854 | return 'MAC'; //Apple Roman 1855 | 1856 | case 10001: 1857 | return 'CP932'; //Macintosh Japanese 1858 | 1859 | case 10002: 1860 | return 'CP950'; //Macintosh Chinese Traditional 1861 | 1862 | case 10003: 1863 | return 'CP1361'; //Macintosh Korean 1864 | 1865 | case 10004: 1866 | return 'MACARABIC'; // Apple Arabic 1867 | 1868 | case 10005: 1869 | return 'MACHEBREW'; //Apple Hebrew 1870 | 1871 | case 10006: 1872 | return 'MACGREEK'; //Macintosh Greek 1873 | 1874 | case 10007: 1875 | return 'MACCYRILLIC'; //Macintosh Cyrillic 1876 | 1877 | case 10008: 1878 | return 'CP936'; //Macintosh - Simplified Chinese (GB 2312) 1879 | 1880 | case 10010: 1881 | return 'MACROMANIA'; //Macintosh Romania 1882 | 1883 | case 10017: 1884 | return 'MACUKRAINE'; //Macintosh Ukraine 1885 | 1886 | case 10021: 1887 | return 'MACTHAI'; //Macintosh Thai 1888 | 1889 | case 10029: 1890 | return 'MACCENTRALEUROPE'; //Macintosh Central Europe 1891 | 1892 | case 10079: 1893 | return 'MACICELAND'; //Macintosh Icelandic 1894 | 1895 | case 10081: 1896 | return 'MACTURKISH'; //Macintosh Turkish 1897 | 1898 | case 10082: 1899 | return 'MACCROATIAN'; //Macintosh Croatian 1900 | 1901 | case 21010: 1902 | return 'UTF-16LE'; //UTF-16 (BIFF8) This isn't correct, but some Excel writer libraries erroneously 1903 | // use Codepage 21010 for UTF-16LE 1904 | 1905 | case 32768: 1906 | return 'MAC'; //Apple Roman 1907 | 1908 | case 32769: 1909 | throw new ParserException('Code page 32769 not supported.', 6); //ANSI Latin I (BIFF2-BIFF3) 1910 | 1911 | case 65000: 1912 | return 'UTF-7'; //Unicode (UTF-7) 1913 | 1914 | case 65001: 1915 | return 'UTF-8'; //Unicode (UTF-8) 1916 | } 1917 | 1918 | throw new ParserException("Unknown codepage: $codePage", 7); 1919 | } 1920 | 1921 | /** 1922 | * Read byte string (8-bit string length). OpenOffice documentation: 2.5.2 1923 | * 1924 | * @param string $subData 1925 | * 1926 | * @return array 1927 | */ 1928 | private function readByteStringShort($subData) { 1929 | // offset: 0; size: 1; length of the string (character count) 1930 | $ln = ord($subData[0]); 1931 | 1932 | // offset: 1: size: var; character array (8-bit characters) 1933 | $value = $this->decodeCodepage(substr($subData, 1, $ln)); 1934 | 1935 | // size in bytes of data structure 1936 | return ['value' => $value, 'size' => 1 + $ln]; 1937 | } 1938 | 1939 | /** 1940 | * Read byte string (16-bit string length). OpenOffice documentation: 2.5.2 1941 | * 1942 | * @param string $subData 1943 | * @return array 1944 | */ 1945 | private function readByteStringLong($subData) { 1946 | // offset: 0; size: 2; length of the string (character count) 1947 | $ln = Format::getUInt2d($subData, 0); 1948 | 1949 | // offset: 2: size: var; character array (8-bit characters) 1950 | $value = $this->decodeCodepage(substr($subData, 2)); 1951 | 1952 | // size in bytes of data structure 1953 | return ['value' => $value, 'size' => 2 + $ln]; 1954 | } 1955 | 1956 | private static function formatAsDate(&$value, &$format) { 1957 | // strip off first part containing e.g. [$-F800] or [$USD-409] 1958 | // general syntax: [$-] 1959 | // language info is in hexadecimal 1960 | $format = preg_replace('/^(\[\$[A-Z]*-[0-9A-F]*\])/i', '', $format); 1961 | 1962 | // OpenOffice.org uses upper-case number formats, e.g. 'YYYY', convert to lower-case; 1963 | // but we don't want to change any quoted strings 1964 | $format = preg_replace_callback('/(?:^|")([^"]*)(?:$|")/', ['self', 'setLowercaseCallback'], $format); 1965 | 1966 | // Only process the non-quoted blocks for date format characters 1967 | $blocks = explode('"', $format); 1968 | 1969 | foreach($blocks as $key => &$block) { 1970 | if ($key % 2 == 0) { 1971 | $block = strtr($block, Format::$dateFormatReplacements); 1972 | if (strpos($block, 'A') === false) { 1973 | // 24-hour time format 1974 | $block = strtr($block, Format::$dateFormatReplacements24); 1975 | } else { 1976 | // 12-hour time format 1977 | $block = strtr($block, Format::$dateFormatReplacements12); 1978 | } 1979 | } 1980 | } 1981 | 1982 | $format = implode('"', $blocks); 1983 | 1984 | // escape any quoted characters so that DateTime format() will render them correctly 1985 | $format = preg_replace_callback('/"(.*)"/U', ['self', 'escapeQuotesCallback'], $format); 1986 | $dateObj = self::ExcelToPHPObject($value); 1987 | 1988 | $value = $dateObj->format($format); 1989 | } 1990 | 1991 | private static function setLowercaseCallback($matches) { 1992 | return mb_strtolower($matches[0]); 1993 | } 1994 | 1995 | private static function escapeQuotesCallback($matches) { 1996 | return '\\' . implode('\\', str_split($matches[1])); 1997 | } 1998 | 1999 | /** 2000 | * Convert a date from Excel to a PHP Date/Time object 2001 | * 2002 | * @param int $dateValue Excel date/time value 2003 | * 2004 | * @return \DateTime PHP date/time object 2005 | */ 2006 | private static function ExcelToPHPObject($dateValue = 0) { 2007 | $dateTime = self::ExcelToPHP($dateValue); 2008 | 2009 | $days = floor($dateTime / 86400); 2010 | $time = round((($dateTime / 86400) - $days) * 86400); 2011 | $hours = round($time / 3600); 2012 | $minutes = round($time / 60) - ($hours * 60); 2013 | $seconds = round($time) - ($hours * 3600) - ($minutes * 60); 2014 | 2015 | $dateObj = new \DateTime("1-Jan-1970+$days days"); 2016 | $dateObj->setTime($hours, $minutes, $seconds); 2017 | 2018 | return $dateObj; 2019 | } 2020 | 2021 | /** 2022 | * Convert a date from Excel to PHP 2023 | * 2024 | * @param int $dateValue Excel date/time value 2025 | * 2026 | * @return int PHP serialized date/time 2027 | */ 2028 | private static function ExcelToPHP($dateValue = 0) { 2029 | if (self::$excelBaseDate == Format::CALENDAR_WINDOWS_1900) { 2030 | $excelBaseDate = 25569; 2031 | 2032 | //Adjust for the spurious 29-Feb-1900 (Day 60) 2033 | if ($dateValue < 60) { 2034 | --$excelBaseDate; 2035 | } 2036 | } else { 2037 | $excelBaseDate = 24107; 2038 | } 2039 | 2040 | // Perform conversion 2041 | if ($dateValue >= 1) { 2042 | $utcDays = $dateValue - $excelBaseDate; 2043 | $returnValue = round($utcDays * 86400); 2044 | 2045 | if (($returnValue <= PHP_INT_MAX) && ($returnValue >= -PHP_INT_MAX)) { 2046 | $returnValue = (integer) $returnValue; 2047 | } 2048 | } else { 2049 | $hours = round($dateValue * 24); 2050 | $mins = round($dateValue * 1440) - round($hours * 60); 2051 | $secs = round($dateValue * 86400) - round($hours * 3600) - round($mins * 60); 2052 | 2053 | $returnValue = (integer) gmmktime($hours, $mins, $secs); 2054 | } 2055 | 2056 | return $returnValue; 2057 | } 2058 | 2059 | private static function formatAsPercentage(&$value, &$format) { 2060 | if ($format === Format::FORMAT_PERCENTAGE) { 2061 | $value = round((100 * $value), 0) . '%'; 2062 | } else { 2063 | if (preg_match('/\.[#0]+/i', $format, $m)) { 2064 | $s = substr($m[0], 0, 1) . (strlen($m[0]) - 1); 2065 | $format = str_replace($m[0], $s, $format); 2066 | } 2067 | 2068 | if (preg_match('/^[#0]+/', $format, $m)) { 2069 | $format = str_replace($m[0], strlen($m[0]), $format); 2070 | } 2071 | 2072 | $format = '%' . str_replace('%', 'f%%', $format); 2073 | $value = sprintf($format, 100 * $value); 2074 | } 2075 | } 2076 | 2077 | private static function formatAsFraction(&$value, &$format) { 2078 | $sign = ($value < 0) ? '-' : ''; 2079 | $integerPart = floor(abs($value)); 2080 | $decimalPart = trim(fmod(abs($value), 1), '0.'); 2081 | $decimalLength = strlen($decimalPart); 2082 | $decimalDivisor = pow(10, $decimalLength); 2083 | 2084 | $GCD = self::GCD([$decimalPart, $decimalDivisor]); 2085 | $adjustedDecimalPart = $decimalPart/$GCD; 2086 | $adjustedDecimalDivisor = $decimalDivisor/$GCD; 2087 | 2088 | if ((strpos($format, '0') !== false) || (strpos($format, '#') !== false) || (substr($format, 0, 3) == '? ?')) { 2089 | if ($integerPart == 0) { 2090 | $integerPart = ''; 2091 | } 2092 | 2093 | $value = "$sign$integerPart $adjustedDecimalPart/$adjustedDecimalDivisor"; 2094 | } else { 2095 | $adjustedDecimalPart += $integerPart * $adjustedDecimalDivisor; 2096 | $value = "$sign$adjustedDecimalPart/$adjustedDecimalDivisor"; 2097 | } 2098 | } 2099 | 2100 | /** 2101 | * GCD 2102 | * 2103 | * Returns the greatest common divisor of a series of numbers. The greatest common divisor is the largest 2104 | * integer that divides both number1 and number2 without a remainder. 2105 | * Excel Function: 2106 | * GCD(number1[,number2[, ...]]) 2107 | * 2108 | * @param array $params 2109 | * 2110 | * @return integer Greatest Common Divisor 2111 | */ 2112 | private static function GCD($params) { 2113 | $returnValue = 1; 2114 | $allValuesFactors = []; 2115 | 2116 | // Loop through arguments 2117 | $flattenArr = self::flattenArray($params); 2118 | foreach ($flattenArr as $value) { 2119 | if (!is_numeric($value)) { 2120 | return '#VALUE!'; 2121 | } elseif ($value == 0) { 2122 | continue; 2123 | } elseif ($value < 0) { 2124 | return '#NULL!'; 2125 | } 2126 | 2127 | $factors = self::factors($value); 2128 | $countedFactors = array_count_values($factors); 2129 | $allValuesFactors[] = $countedFactors; 2130 | } 2131 | 2132 | $allValuesCount = count($allValuesFactors); 2133 | if ($allValuesCount == 0) { 2134 | return 0; 2135 | } 2136 | 2137 | $mergedArray = $allValuesFactors[0]; 2138 | for ($i=1; $i < $allValuesCount; ++$i) { 2139 | $mergedArray = array_intersect_key($mergedArray, $allValuesFactors[$i]); 2140 | } 2141 | 2142 | $mergedArrayValues = count($mergedArray); 2143 | 2144 | if ($mergedArrayValues == 0) { 2145 | return $returnValue; 2146 | } elseif ($mergedArrayValues > 1) { 2147 | foreach ($mergedArray as $mergedKey => $mergedValue) { 2148 | foreach ($allValuesFactors as $highestPowerTest) { 2149 | foreach ($highestPowerTest as $testKey => $testValue) { 2150 | if (($testKey == $mergedKey) && ($testValue < $mergedValue)) { 2151 | $mergedArray[$mergedKey] = $testValue; 2152 | $mergedValue = $testValue; 2153 | } 2154 | } 2155 | } 2156 | } 2157 | 2158 | $returnValue = 1; 2159 | foreach ($mergedArray as $key => $value) { 2160 | $returnValue *= pow($key, $value); 2161 | } 2162 | 2163 | return $returnValue; 2164 | } else { 2165 | $keys = array_keys($mergedArray); 2166 | $key = $keys[0]; 2167 | $value = $mergedArray[$key]; 2168 | 2169 | foreach ($allValuesFactors as $testValue) { 2170 | foreach ($testValue as $mergedKey => $mergedValue) { 2171 | if (($mergedKey == $key) && ($mergedValue < $value)) { 2172 | $value = $mergedValue; 2173 | } 2174 | } 2175 | } 2176 | 2177 | return pow($key, $value); 2178 | } 2179 | } 2180 | 2181 | /** 2182 | * Convert a multi-dimensional array to a simple 1-dimensional array 2183 | * 2184 | * @param array $array Array to be flattened 2185 | * 2186 | * @return array Flattened array 2187 | */ 2188 | private static function flattenArray($array) { 2189 | if (!is_array($array)) { 2190 | return (array) $array; 2191 | } 2192 | 2193 | $arrayValues = []; 2194 | foreach ($array as $value) { 2195 | if (is_array($value)) { 2196 | foreach ($value as $val) { 2197 | if (is_array($val)) { 2198 | foreach ($val as $v) { 2199 | $arrayValues[] = $v; 2200 | } 2201 | } else { 2202 | $arrayValues[] = $val; 2203 | } 2204 | } 2205 | } else { 2206 | $arrayValues[] = $value; 2207 | } 2208 | } 2209 | 2210 | return $arrayValues; 2211 | } 2212 | 2213 | /** 2214 | * Return an array of the factors of the input value 2215 | * 2216 | * @param int $value 2217 | * 2218 | * @return array 2219 | */ 2220 | private static function factors($value) { 2221 | $startVal = floor(sqrt($value)); 2222 | $factorArray = []; 2223 | 2224 | for ($i = $startVal; $i > 1; --$i) { 2225 | if (($value % $i) == 0) { 2226 | $factorArray = array_merge($factorArray, self::factors($value / $i)); 2227 | $factorArray = array_merge($factorArray, self::factors($i)); 2228 | 2229 | if ($i <= sqrt($value)) { 2230 | break; 2231 | } 2232 | } 2233 | } 2234 | 2235 | if (!empty($factorArray)) { 2236 | rsort($factorArray); 2237 | 2238 | return $factorArray; 2239 | } 2240 | 2241 | return [(int) $value]; 2242 | } 2243 | 2244 | /** 2245 | * Read Unicode string with no string length field, but with known character count this function is under 2246 | * construction, needs to support rich text, and Asian phonetic settings 2247 | * 2248 | * @param string $subData 2249 | * @param int $characterCount 2250 | * 2251 | * @return array 2252 | */ 2253 | private static function readUnicodeString($subData, $characterCount) { 2254 | // offset: 0: size: 1; option flags 2255 | // bit: 0; mask: 0x01; character compression (0 = compressed 8-bit, 1 = uncompressed 16-bit) 2256 | $isCompressed = !((0x01 & ord($subData[0])) >> 0); 2257 | 2258 | // offset: 1: size: var; character array 2259 | // this offset assumes richtext and Asian phonetic settings are off which is generally wrong 2260 | // needs to be fixed 2261 | $value = self::encodeUTF16( 2262 | substr($subData, 1, $isCompressed ? $characterCount : 2 * $characterCount), $isCompressed 2263 | ); 2264 | 2265 | // the size in bytes including the option flags 2266 | return ['value' => $value, 'size' => $isCompressed ? 1 + $characterCount : 1 + 2 * $characterCount]; 2267 | } 2268 | 2269 | /** 2270 | * Extracts an Excel Unicode short string (8-bit string length), this function will automatically find out 2271 | * where the Unicode string ends. 2272 | * 2273 | * @param string $subData 2274 | * 2275 | * @return array 2276 | */ 2277 | private static function readUnicodeStringShort($subData) { 2278 | // offset: 0: size: 1; length of the string (character count) 2279 | $characterCount = ord($subData[0]); 2280 | $string = self::readUnicodeString(substr($subData, 1), $characterCount); 2281 | 2282 | // add 1 for the string length 2283 | $string['size'] += 1; 2284 | 2285 | return $string; 2286 | } 2287 | 2288 | /** 2289 | * Extracts an Excel Unicode long string (16-bit string length), this function is under construction, 2290 | * needs to support rich text, and Asian phonetic settings 2291 | * 2292 | * @param string $subData 2293 | * 2294 | * @return array 2295 | */ 2296 | private static function readUnicodeStringLong($subData) { 2297 | // offset: 0: size: 2; length of the string (character count) 2298 | $characterCount = Format::getUInt2d($subData, 0); 2299 | $string = self::readUnicodeString(substr($subData, 2), $characterCount); 2300 | 2301 | // add 2 for the string length 2302 | $string['size'] += 2; 2303 | 2304 | return $string; 2305 | } 2306 | 2307 | private static function getIEEE754($rkNum) { 2308 | if (($rkNum & 0x02) != 0) { 2309 | $value = $rkNum >> 2; 2310 | } else { 2311 | // changes by mmp, info on IEEE754 encoding from 2312 | // research.microsoft.com/~hollasch/cgindex/coding/ieeefloat.html 2313 | // The RK format calls for using only the most significant 30 bits of the 64 bit floating point value. 2314 | // The other 34 bits are assumed to be 0 so we use the upper 30 bits of $rknum as follows... 2315 | $sign = ($rkNum & 0x80000000) >> 31; 2316 | $exp = ($rkNum & 0x7ff00000) >> 20; 2317 | 2318 | $mantissa = (0x100000 | ($rkNum & 0x000ffffc)); 2319 | $value = $mantissa / pow(2, (20- ($exp - 1023))); 2320 | 2321 | if ($sign) { 2322 | $value = -1 * $value; 2323 | } 2324 | //end of changes by mmp 2325 | } 2326 | 2327 | if (($rkNum & 0x01) != 0) { 2328 | $value /= 100; 2329 | } 2330 | 2331 | return $value; 2332 | } 2333 | 2334 | /** 2335 | * Get UTF-8 string from (compressed or uncompressed) UTF-16 string 2336 | * 2337 | * @param string $string 2338 | * @param bool $compressed 2339 | * 2340 | * @return string 2341 | */ 2342 | private static function encodeUTF16($string, $compressed = false) { 2343 | if ($compressed) { 2344 | $string = self::uncompressByteString($string); 2345 | } 2346 | 2347 | return mb_convert_encoding($string, 'UTF-8', 'UTF-16LE'); 2348 | } 2349 | 2350 | /** 2351 | * Convert string to UTF-8. Only used for BIFF5. 2352 | * 2353 | * @param string $string 2354 | * 2355 | * @return string 2356 | */ 2357 | private function decodeCodepage($string) { 2358 | return mb_convert_encoding($string, 'UTF-8', $this->codePage); 2359 | } 2360 | 2361 | /** 2362 | * Convert UTF-16 string in compressed notation to uncompressed form. Only used for BIFF8. 2363 | * 2364 | * @param string $string 2365 | * 2366 | * @return string 2367 | */ 2368 | private static function uncompressByteString($string) { 2369 | $uncompressedString = ''; 2370 | $strLen = strlen($string); 2371 | 2372 | for ($i = 0; $i < $strLen; ++$i) { 2373 | $uncompressedString .= $string[$i] . "\0"; 2374 | } 2375 | 2376 | return $uncompressedString; 2377 | } 2378 | 2379 | /** 2380 | * Reads first 8 bytes of a string and return IEEE 754 float 2381 | * 2382 | * @param string $data Binary string that is at least 8 bytes long 2383 | * 2384 | * @return float 2385 | */ 2386 | private static function extractNumber($data) { 2387 | $rkNumHigh = Format::getInt4d($data, 4); 2388 | $rkNumLow = Format::getInt4d($data, 0); 2389 | 2390 | $sign = ($rkNumHigh & 0x80000000) >> 31; 2391 | $exp = (($rkNumHigh & 0x7ff00000) >> 20) - 1023; 2392 | $mantissa = (0x100000 | ($rkNumHigh & 0x000fffff)); 2393 | 2394 | $mantissaLow1 = ($rkNumLow & 0x80000000) >> 31; 2395 | $mantissaLow2 = ($rkNumLow & 0x7fffffff); 2396 | $value = $mantissa / pow(2, (20 - $exp)); 2397 | 2398 | if ($mantissaLow1 != 0) { 2399 | $value += 1 / pow(2, (21 - $exp)); 2400 | } 2401 | 2402 | $value += $mantissaLow2 / pow(2, (52 - $exp)); 2403 | 2404 | if ($sign) { 2405 | $value *= -1; 2406 | } 2407 | 2408 | return $value; 2409 | } 2410 | } 2411 | -------------------------------------------------------------------------------- /src/Parser/Excel5/OLERead.php: -------------------------------------------------------------------------------- 1 | openFile($file); 70 | 71 | // Total number of sectors used for the SAT 72 | $this->numBigBlockDepotBlocks = Format::getInt4d($this->data, self::NUM_BIG_BLOCK_DEPOT_BLOCKS_POS); 73 | 74 | // SecID of the first sector of the directory stream 75 | $this->rootStartBlock = Format::getInt4d($this->data, self::ROOT_START_BLOCK_POS); 76 | 77 | // SecID of the first sector of the SSAT (or -2 if not extant) 78 | $this->sbdStartBlock = Format::getInt4d($this->data, self::SMALL_BLOCK_DEPOT_BLOCK_POS); 79 | 80 | // SecID of the first sector of the MSAT (or -2 if no additional sectors are used) 81 | $this->extensionBlock = Format::getInt4d($this->data, self::EXTENSION_BLOCK_POS); 82 | 83 | // Total number of sectors used by MSAT 84 | $this->numExtensionBlocks = Format::getInt4d($this->data, self::NUM_EXTENSION_BLOCK_POS); 85 | 86 | $bigBlockDepotBlocks = []; 87 | $pos = self::BIG_BLOCK_DEPOT_BLOCKS_POS; 88 | $bbdBlocks = $this->numBigBlockDepotBlocks; 89 | if ($this->numExtensionBlocks != 0) { 90 | $bbdBlocks = (self::BIG_BLOCK_SIZE - self::BIG_BLOCK_DEPOT_BLOCKS_POS) / 4; 91 | } 92 | 93 | for ($i = 0; $i < $bbdBlocks; ++$i) { 94 | $bigBlockDepotBlocks[$i] = Format::getInt4d($this->data, $pos); 95 | $pos += 4; 96 | } 97 | 98 | for ($j = 0; $j < $this->numExtensionBlocks; ++$j) { 99 | $pos = ($this->extensionBlock + 1) * self::BIG_BLOCK_SIZE; 100 | $blocksToRead = min($this->numBigBlockDepotBlocks - $bbdBlocks, self::BIG_BLOCK_SIZE / 4 - 1); 101 | 102 | for ($i = $bbdBlocks; $i < $bbdBlocks + $blocksToRead; ++$i) { 103 | $bigBlockDepotBlocks[$i] = Format::getInt4d($this->data, $pos); 104 | $pos += 4; 105 | } 106 | 107 | $bbdBlocks += $blocksToRead; 108 | if ($bbdBlocks < $this->numBigBlockDepotBlocks) { 109 | $this->extensionBlock = Format::getInt4d($this->data, $pos); 110 | } 111 | } 112 | 113 | $this->bigBlockChain = ''; 114 | $bbs = self::BIG_BLOCK_SIZE / 4; 115 | for ($i = 0; $i < $this->numBigBlockDepotBlocks; ++$i) { 116 | $pos = ($bigBlockDepotBlocks[$i] + 1) * self::BIG_BLOCK_SIZE; 117 | $this->bigBlockChain .= substr($this->data, $pos, 4 * $bbs); 118 | } 119 | 120 | $sbdBlock = $this->sbdStartBlock; 121 | $this->smallBlockChain = ''; 122 | while ($sbdBlock != -2) { 123 | $pos = ($sbdBlock + 1) * self::BIG_BLOCK_SIZE; 124 | $this->smallBlockChain .= substr($this->data, $pos, 4 * $bbs); 125 | 126 | $sbdBlock = Format::getInt4d($this->bigBlockChain, $sbdBlock * 4); 127 | } 128 | 129 | // read the directory stream 130 | $block = $this->rootStartBlock; 131 | $this->entry = $this->readData($block); 132 | $this->readPropertySets(); 133 | } 134 | 135 | /** 136 | * Open file for reading 137 | * 138 | * @param string $file 139 | * 140 | * @throws ReaderException|ParserException 141 | */ 142 | public function openFile($file) { 143 | // Check if file exists 144 | if (!file_exists($file) || !is_readable($file)) { 145 | throw new ReaderException("Could not open file [$file] for reading! File does not exist."); 146 | } 147 | 148 | // Get the file data 149 | $this->data = file_get_contents($file); 150 | 151 | // Check OLE identifier 152 | if (empty($this->data) || substr($this->data, 0, 8) != self::IDENTIFIER_OLE) { 153 | throw new ParserException("The file [$file] is not recognised as an OLE file"); 154 | } 155 | } 156 | 157 | /** 158 | * Extract binary stream data. 159 | * 160 | * @param int $stream 161 | * 162 | * @return string|null 163 | */ 164 | public function getStream($stream) { 165 | if ($stream === null) { 166 | return null; 167 | } 168 | 169 | $streamData = ''; 170 | if ($this->props[$stream]['size'] < self::SMALL_BLOCK_THRESHOLD) { 171 | $rootData = $this->readData($this->props[$this->rootEntry]['startBlock']); 172 | $block = $this->props[$stream]['startBlock']; 173 | 174 | while ($block != -2) { 175 | $pos = $block * self::SMALL_BLOCK_SIZE; 176 | $streamData .= substr($rootData, $pos, self::SMALL_BLOCK_SIZE); 177 | $block = Format::getInt4d($this->smallBlockChain, $block * 4); 178 | } 179 | 180 | return $streamData; 181 | } 182 | 183 | $numBlocks = $this->props[$stream]['size'] / self::BIG_BLOCK_SIZE; 184 | if ($this->props[$stream]['size'] % self::BIG_BLOCK_SIZE != 0) { 185 | ++$numBlocks; 186 | } 187 | 188 | if ($numBlocks == 0) { 189 | return ''; 190 | } 191 | 192 | $block = $this->props[$stream]['startBlock']; 193 | while ($block != -2) { 194 | $pos = ($block + 1) * self::BIG_BLOCK_SIZE; 195 | $streamData .= substr($this->data, $pos, self::BIG_BLOCK_SIZE); 196 | $block = Format::getInt4d($this->bigBlockChain, $block * 4); 197 | } 198 | 199 | return $streamData; 200 | } 201 | 202 | /** 203 | * Read a standard stream (by joining sectors using information from SAT). 204 | * 205 | * @param int $bl Sector ID where the stream starts 206 | * 207 | * @return string 208 | */ 209 | protected function readData($bl) { 210 | $block = $bl; 211 | $data = ''; 212 | 213 | while ($block != -2) { 214 | $pos = ($block + 1) * self::BIG_BLOCK_SIZE; 215 | $data .= substr($this->data, $pos, self::BIG_BLOCK_SIZE); 216 | $block = Format::getInt4d($this->bigBlockChain, $block * 4); 217 | } 218 | 219 | return $data; 220 | } 221 | 222 | /** 223 | * Read entries in the directory stream. 224 | */ 225 | protected function readPropertySets() { 226 | $offset = 0; 227 | 228 | // loop through entires, each entry is 128 bytes 229 | $entryLen = strlen($this->entry); 230 | while ($offset < $entryLen) { 231 | // entry data (128 bytes) 232 | $d = substr($this->entry, $offset, self::PROPERTY_STORAGE_BLOCK_SIZE); 233 | 234 | // size in bytes of name 235 | $nameSize = ord($d[self::SIZE_OF_NAME_POS]) | (ord($d[self::SIZE_OF_NAME_POS + 1]) << 8); 236 | 237 | // type of entry 238 | $type = ord($d[self::TYPE_POS]); 239 | 240 | // sectorID of first sector or short sector, if this entry refers to a stream (the case with workbook) 241 | // sectorID of first sector of the short-stream container stream, if this entry is root entry 242 | $startBlock = Format::getInt4d($d, self::START_BLOCK_POS); 243 | $size = Format::getInt4d($d, self::SIZE_POS); 244 | $name = str_replace("\x00", '', substr($d, 0, $nameSize)); 245 | $this->props[] = [ 246 | 'name' => $name, 247 | 'type' => $type, 248 | 'startBlock' => $startBlock, 249 | 'size' => $size, 250 | ]; 251 | 252 | // tmp helper to simplify checks 253 | $upName = strtoupper($name); 254 | 255 | // Workbook directory entry (BIFF5 uses Book, BIFF8 uses Workbook) 256 | if (($upName === 'WORKBOOK') || ($upName === 'BOOK')) { 257 | $this->workbook = count($this->props) - 1; 258 | } elseif ($upName === 'ROOT ENTRY' || $upName === 'R') { 259 | // Root entry 260 | $this->rootEntry = count($this->props) - 1; 261 | } 262 | 263 | // Summary information 264 | if ($name == chr(5) . 'SummaryInformation') { 265 | $this->summaryInformation = count($this->props) - 1; 266 | } 267 | 268 | // Additional Document Summary information 269 | if ($name == chr(5) . 'DocumentSummaryInformation') { 270 | $this->documentSummaryInformation = count($this->props) - 1; 271 | } 272 | 273 | $offset += self::PROPERTY_STORAGE_BLOCK_SIZE; 274 | } 275 | } 276 | } 277 | -------------------------------------------------------------------------------- /src/Parser/Excel5/RC4.php: -------------------------------------------------------------------------------- 1 | i = 0; $this->i < 256; $this->i++) { 25 | $this->s[$this->i] = $this->i; 26 | } 27 | 28 | $this->j = 0; 29 | for ($this->i = 0; $this->i < 256; $this->i++) { 30 | $this->j = ($this->j + $this->s[$this->i] + ord($key[$this->i % $len])) % 256; 31 | $t = $this->s[$this->i]; 32 | $this->s[$this->i] = $this->s[$this->j]; 33 | $this->s[$this->j] = $t; 34 | } 35 | 36 | $this->i = $this->j = 0; 37 | } 38 | 39 | /** 40 | * Symmetric decryption/encryption function 41 | * 42 | * @param string $data Data to encrypt/decrypt 43 | * 44 | * @return string 45 | */ 46 | public function RC4($data) { 47 | $len = strlen($data); 48 | 49 | for ($c = 0; $c < $len; $c++) { 50 | $this->i = ($this->i + 1) % 256; 51 | $this->j = ($this->j + $this->s[$this->i]) % 256; 52 | $t = $this->s[$this->i]; 53 | $this->s[$this->i] = $this->s[$this->j]; 54 | $this->s[$this->j] = $t; 55 | 56 | $t = ($this->s[$this->i] + $this->s[$this->j]) % 256; 57 | 58 | $data[$c] = chr(ord($data[$c]) ^ $this->s[$t]); 59 | } 60 | 61 | return $data; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/Parser/Format.php: -------------------------------------------------------------------------------- 1 | self::FORMAT_GENERAL, 29 | 1 => '0', 30 | 2 => '0.00', 31 | 3 => '#,##0', 32 | 4 => '#,##0.00', 33 | 5 => '"$"#,##0_),("$"#,##0)', 34 | 6 => '"$"#,##0_),[Red]("$"#,##0)', 35 | 7 => '"$"#,##0.00_),("$"#,##0.00)', 36 | 8 => '"$"#,##0.00_),[Red]("$"#,##0.00)', 37 | 9 => '0%', 38 | 10 => '0.00%', 39 | //11 => '0.00E+00', 40 | 12 => '# ?/?', 41 | 13 => '# ??/??', 42 | 14 => 'yyyy/m/d', 43 | 15 => 'd-mmm-yy', 44 | 16 => 'd-mmm', 45 | 17 => 'mmm-yy', 46 | 18 => 'h:mm AM/PM', 47 | 19 => 'h:mm:ss AM/PM', 48 | 20 => 'h:mm', 49 | 21 => 'h:mm:ss', 50 | 22 => 'yyyy/m/d h:mm', 51 | 52 | // 补充 53 | 28 => 'm月d日', 54 | 31 => 'yyyy年m月d日', 55 | 32 => 'h时i分', 56 | 33 => 'h时i分ss秒', 57 | 34 => 'AM/PM h时i分', 58 | 35 => 'AM/PM h时i分ss秒', 59 | 55 => 'AM/PM h时i分', 60 | 56 => 'AM/PM h时i分ss秒', 61 | 58 => 'm月d日', 62 | 63 | 37 => '#,##0_),(#,##0)', 64 | 38 => '#,##0_),[Red](#,##0)', 65 | 39 => '#,##0.00_),(#,##0.00)', 66 | 40 => '#,##0.00_),[Red](#,##0.00)', 67 | 41 => '_("$"* #,##0_),_("$"* (#,##0),_("$"* "-"_),_(@_)', 68 | 42 => '_(* #,##0_),_(* (#,##0),_(* "-"_),_(@_)', 69 | 43 => '_(* #,##0.00_),_(* (#,##0.00),_(* "-"??_),_(@_)', 70 | 44 => '_("$"* #,##0.00_),_("$"* \(#,##0.00\),_("$"* "-"??_),_(@_)', 71 | 45 => 'mm:ss', 72 | 46 => '[h]:mm:ss', 73 | 47 => 'mm:ss.0', 74 | 48 => '##0.0E+0', 75 | 49 => '@', 76 | 77 | // CHT 78 | 27 => 'yyyy年m月', 79 | 30 => 'm/d/yy', 80 | 36 => '[$-404]e/m/d', 81 | 50 => '[$-404]e/m/d', 82 | 57 => 'yyyy年m月', 83 | 84 | // THA 85 | 59 => 't0', 86 | 60 => 't0.00', 87 | 61 => 't#,##0', 88 | 62 => 't#,##0.00', 89 | 67 => 't0%', 90 | 68 => 't0.00%', 91 | 69 => 't# ?/?', 92 | 70 => 't# ??/??' 93 | ]; 94 | 95 | /** 96 | * Search/replace values to convert Excel date/time format masks to PHP format masks 97 | * 98 | * @var array 99 | */ 100 | public static $dateFormatReplacements = [ 101 | // first remove escapes related to non-format characters 102 | '\\' => '', 103 | 104 | // 12-hour suffix 105 | 'am/pm' => 'A', 106 | 107 | // 2-digit year 108 | 'e' => 'Y', 109 | 'yyyy' => 'Y', 110 | 'yy' => 'y', 111 | 112 | // first letter of month - no php equivalent 113 | 'mmmmm' => 'M', 114 | 115 | // full month name 116 | 'mmmm' => 'F', 117 | 118 | // short month name 119 | 'mmm' => 'M', 120 | 121 | // mm is minutes if time, but can also be month w/leading zero 122 | // so we try to identify times be the inclusion of a : separator in the mask 123 | // It isn't perfect, but the best way I know how 124 | ':mm' => ':i', 125 | 'mm:' => 'i:', 126 | 127 | // month leading zero 128 | 'mm' => 'm', 129 | 'm' => 'n', 130 | 131 | // full day of week name 132 | 'dddd' => 'l', 133 | 134 | // short day of week name 135 | 'ddd' => 'D', 136 | 137 | // days leading zero 138 | 'dd' => 'd', 139 | 'd' => 'j', 140 | 141 | // seconds 142 | 'ss' => 's', 143 | 144 | // fractional seconds - no php equivalent 145 | '.s' => '' 146 | ]; 147 | 148 | /** 149 | * Search/replace values to convert Excel date/time format masks hours to PHP format masks (24 hr clock) 150 | * 151 | * @var array 152 | */ 153 | public static $dateFormatReplacements24 = [ 154 | 'hh' => 'H', 155 | 'h' => 'G' 156 | ]; 157 | 158 | /** 159 | * Search/replace values to convert Excel date/time format masks hours to PHP format masks (12 hr clock) 160 | * 161 | * @var array 162 | */ 163 | public static $dateFormatReplacements12 = [ 164 | 'hh' => 'h', 165 | 'h' => 'g' 166 | ]; 167 | 168 | /** 169 | * Column index from string 170 | * 171 | * @param string $label 172 | * 173 | * @throws \Exception 174 | * @return int 175 | */ 176 | public static function columnIndexFromString($label = 'A') { 177 | // Using a lookup cache adds a slight memory overhead, but boosts speed 178 | // caching using a static within the method is faster than a class static, 179 | // though it's additional memory overhead 180 | static $indexCache = []; 181 | 182 | if (isset($indexCache[$label])) { 183 | return $indexCache[$label]; 184 | } 185 | 186 | // It's surprising how costly the strtoupper() and ord() calls actually are, so we use a lookup array rather 187 | // than use ord() and make it case insensitive to get rid of the strtoupper() as well. Because it's a static, 188 | // there's no significant memory overhead either 189 | static $columnLookup = [ 190 | 'A' => 1, 'B' => 2, 'C' => 3, 'D' => 4, 'E' => 5, 'F' => 6, 'G' => 7, 'H' => 8, 'I' => 9, 'J' => 10, 191 | 'K' => 11, 'L' => 12, 'M' => 13, 'N' => 14, 'O' => 15, 'P' => 16, 'Q' => 17, 'R' => 18, 'S' => 19, 192 | 'T' => 20, 'U' => 21, 'V' => 22, 'W' => 23, 'X' => 24, 'Y' => 25, 'Z' => 26, 'a' => 1, 'b' => 2, 'c' => 3, 193 | 'd' => 4, 'e' => 5, 'f' => 6, 'g' => 7, 'h' => 8, 'i' => 9, 'j' => 10, 'k' => 11, 'l' => 12, 'm' => 13, 194 | 'n' => 14, 'o' => 15, 'p' => 16, 'q' => 17, 'r' => 18, 's' => 19, 't' => 20, 'u' => 21, 'v' => 22, 195 | 'w' => 23, 'x' => 24, 'y' => 25, 'z' => 26 196 | ]; 197 | 198 | // We also use the language construct isset() rather than the more costly strlen() function to match the length 199 | // of $pString for improved performance 200 | if (!isset($indexCache[$label])) { 201 | if (!isset($label{0}) || isset($label{3})) { 202 | throw new ParserException('Column string can not be empty or longer than 3 characters'); 203 | } 204 | 205 | if (!isset($label{1})) { 206 | $indexCache[$label] = $columnLookup[$label]; 207 | } elseif (!isset($label{2})) { 208 | $indexCache[$label] = $columnLookup[$label{0}] * 26 + $columnLookup[$label{1}]; 209 | } else { 210 | $indexCache[$label] = $columnLookup[$label{0}] * 676 + $columnLookup[$label{1}] * 26 211 | + $columnLookup[$label{2}]; 212 | } 213 | } 214 | 215 | return $indexCache[$label]; 216 | } 217 | 218 | /** 219 | * String from columnindex 220 | * 221 | * @param int $column 222 | * @return string 223 | */ 224 | public static function stringFromColumnIndex($column = 0) { 225 | // Using a lookup cache adds a slight memory overhead, but boosts speed 226 | // caching using a static within the method is faster than a class static, 227 | // though it's additional memory overhead 228 | static $stringCache = []; 229 | 230 | if (!isset($stringCache[$column])) { 231 | // Determine column string 232 | if ($column < 26) { 233 | $stringCache[$column] = chr(65 + $column); 234 | } elseif ($column < 702) { 235 | $stringCache[$column] = chr(64 + ($column / 26)) . chr(65 + $column % 26); 236 | } else { 237 | $stringCache[$column] = chr(64 + (($column - 26) / 676)) . chr(65 + ((($column - 26) % 676) / 26)) 238 | . chr(65 + $column % 26); 239 | } 240 | } 241 | 242 | return $stringCache[$column]; 243 | } 244 | 245 | /** 246 | * Read 16-bit unsigned integer 247 | * 248 | * @param string $data 249 | * @param int $pos 250 | * @return int 251 | */ 252 | public static function getUInt2d($data, $pos) { 253 | return ord($data[$pos]) | (ord($data[$pos + 1]) << 8); 254 | } 255 | 256 | /** 257 | * Read 32-bit signed integer 258 | * 259 | * @param string $data 260 | * @param int $pos 261 | * @return int 262 | */ 263 | public static function getInt4d($data, $pos) { 264 | // FIX: represent numbers correctly on 64-bit system 265 | // http://sourceforge.net/tracker/index.php?func=detail&aid=1487372&group_id=99160&atid=623334 266 | // Hacked by Andreas Rehm 2006 to ensure correct result of the <<24 block on 32 and 64bit systems 267 | $ord24 = ord($data[$pos + 3]); 268 | 269 | if ($ord24 >= 128) { 270 | // negative number 271 | $ord24 = -abs((256 - $ord24) << 24); 272 | } else { 273 | $ord24 = ($ord24 & 127) << 24; 274 | } 275 | 276 | return ord($data[$pos]) | (ord($data[$pos + 1]) << 8) | (ord($data[$pos + 2]) << 16) | $ord24; 277 | } 278 | } 279 | -------------------------------------------------------------------------------- /src/Reader/BaseReader.php: -------------------------------------------------------------------------------- 1 | generator->current(); 48 | } 49 | 50 | /** 51 | * Move forward to next element 52 | */ 53 | public function next() { 54 | $this->generator->next(); 55 | } 56 | 57 | /** 58 | * Return the key of the current element 59 | * 60 | * @return int 61 | */ 62 | public function key() { 63 | return $this->generator->key(); 64 | } 65 | 66 | /** 67 | * Checks if current position is valid 68 | * 69 | * @return bool 70 | */ 71 | public function valid() { 72 | return $this->generator->valid(); 73 | } 74 | 75 | /** 76 | * Rewind the Iterator to the first element 77 | */ 78 | public function rewind() { 79 | $this->generator = $this->makeGenerator(); 80 | } 81 | 82 | /** 83 | * Make the generator 84 | */ 85 | protected function makeGenerator() { 86 | 87 | } 88 | 89 | /** 90 | * Ignore empty row 91 | * 92 | * @param bool $ignoreEmpty 93 | */ 94 | public function ignoreEmptyRow($ignoreEmpty = false) { 95 | 96 | } 97 | 98 | /** 99 | * Set row limit 100 | * 101 | * @param int $limit 102 | * @return $this 103 | */ 104 | public function setRowLimit($limit = null) { 105 | $this->rowLimit = $limit; 106 | 107 | return $this; 108 | } 109 | 110 | /** 111 | * Get row limit 112 | * 113 | * @return int 114 | */ 115 | public function getRowLimit() { 116 | return $this->rowLimit; 117 | } 118 | 119 | /** 120 | * Set column limit 121 | * 122 | * @param int $limit 123 | * @return $this 124 | */ 125 | public function setColumnLimit($limit = null) { 126 | $this->columnLimit = $limit; 127 | 128 | return $this; 129 | } 130 | 131 | /** 132 | * Takes a row and traverses the file to that row 133 | * 134 | * @param int $row 135 | */ 136 | public function seek($row) { 137 | if ($row <= 0) { 138 | throw new \InvalidArgumentException("Row $row is invalid"); 139 | } 140 | 141 | $key = $this->key(); 142 | 143 | if ($key !== --$row) { 144 | if ($row < $key || is_null($key) || $row == 0) { 145 | $this->rewind(); 146 | } 147 | 148 | while ($this->valid() && $row > $this->key()) { 149 | $this->next(); 150 | } 151 | } 152 | } 153 | 154 | /** 155 | * Get column limit 156 | * 157 | * @return int 158 | */ 159 | public function getColumnLimit() { 160 | return $this->columnLimit; 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /src/Reader/Csv.php: -------------------------------------------------------------------------------- 1 | openFile($file); 69 | 70 | $this->autoDetection(); 71 | 72 | $this->generator = $this->makeGenerator(); 73 | 74 | ini_set('auto_detect_line_endings', $lineEnding); 75 | 76 | return $this; 77 | } 78 | 79 | /** 80 | * Count elements of the selected sheet 81 | * 82 | * @return int 83 | */ 84 | public function count() { 85 | if ($this->count === null) { 86 | $position = ftell($this->fileHandle); 87 | $this->count = iterator_count($this->makeGenerator(true)); 88 | fseek($this->fileHandle, $position); 89 | } 90 | 91 | return $this->count; 92 | } 93 | 94 | /** 95 | * Make the generator 96 | * 97 | * @param bool $calculate 98 | * @return \Generator 99 | */ 100 | protected function makeGenerator($calculate = false) { 101 | fseek($this->fileHandle, $this->start); 102 | 103 | $finish = 0; 104 | while (($row = fgetcsv($this->fileHandle, 0, $this->delimiter, $this->enclosure)) !== false) { 105 | if ($this->ignoreEmpty && (empty($row) || trim(implode('', $row)) === '')) { 106 | continue; 107 | } 108 | 109 | if ($calculate) { 110 | yield; 111 | continue; 112 | } 113 | 114 | if ($this->rowLimit > 0 && ++$finish > $this->rowLimit) { 115 | break; 116 | } 117 | 118 | if ($this->columnLimit > 0) { 119 | $row = array_slice($row, 0, $this->columnLimit); 120 | } 121 | 122 | foreach ($row as &$value) { 123 | if ($value != '') { 124 | if (is_numeric($value)) { 125 | $value = (float)$value; 126 | } 127 | 128 | // Convert encoding if necessary 129 | if ($this->inputEncoding !== 'UTF-8') { 130 | $value = mb_convert_encoding($value, 'UTF-8', $this->inputEncoding); 131 | } 132 | } 133 | } 134 | 135 | unset($value); 136 | 137 | yield $row; 138 | } 139 | } 140 | 141 | /** 142 | * Detect the file delimiter and encoding 143 | */ 144 | protected function autoDetection() { 145 | if (($this->delimiter !== null && $this->inputEncoding !== null) 146 | || ($line = fgets($this->fileHandle)) === false) { 147 | 148 | return; 149 | } 150 | 151 | if ($this->delimiter === null) { 152 | $this->delimiter = ','; 153 | 154 | if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) { 155 | $this->delimiter = substr($line, 4, 1); 156 | } 157 | } 158 | 159 | if ($this->inputEncoding === null) { 160 | $this->inputEncoding = 'UTF-8'; 161 | 162 | if (($bom = substr($line, 0, 4)) == "\xFF\xFE\x00\x00" || $bom == "\x00\x00\xFE\xFF") { 163 | $this->start = 4; 164 | $this->inputEncoding = 'UTF-32'; 165 | } elseif (($bom = substr($line, 0, 2)) == "\xFF\xFE" || $bom == "\xFE\xFF") { 166 | $this->start = 2; 167 | $this->inputEncoding = 'UTF-16'; 168 | } elseif (($bom = substr($line, 0, 3)) == "\xEF\xBB\xBF") { 169 | $this->start = 3; 170 | } 171 | 172 | if (!$this->start) { 173 | $encoding = mb_detect_encoding($line, 'ASCII, UTF-8, GB2312, GBK'); 174 | 175 | if ($encoding) { 176 | if ($encoding == 'EUC-CN') { 177 | $encoding = 'GB2312'; 178 | } elseif ($encoding == 'CP936') { 179 | $encoding = 'GBK'; 180 | } 181 | 182 | $this->inputEncoding = $encoding; 183 | } 184 | } 185 | } 186 | 187 | fseek($this->fileHandle, $this->start); 188 | } 189 | 190 | /** 191 | * Ignore empty row 192 | * 193 | * @param bool $ignoreEmpty 194 | * 195 | * @return $this 196 | */ 197 | public function ignoreEmptyRow($ignoreEmpty = false) { 198 | $this->ignoreEmpty = $ignoreEmpty; 199 | 200 | return $this; 201 | } 202 | 203 | /** 204 | * Set input encoding 205 | * 206 | * @param string $encoding 207 | * @return $this 208 | */ 209 | public function setInputEncoding($encoding = 'UTF-8') { 210 | $this->inputEncoding = $encoding; 211 | 212 | return $this; 213 | } 214 | 215 | /** 216 | * Get input encoding 217 | * 218 | * @return string 219 | */ 220 | public function getInputEncoding() { 221 | return $this->inputEncoding; 222 | } 223 | 224 | /** 225 | * Set delimiter 226 | * 227 | * @param string $delimiter Delimiter, defaults to , 228 | * @return $this 229 | */ 230 | public function setDelimiter($delimiter = ',') { 231 | $this->delimiter = $delimiter; 232 | 233 | return $this; 234 | } 235 | 236 | /** 237 | * Get delimiter 238 | * 239 | * @return string 240 | */ 241 | public function getDelimiter() { 242 | return $this->delimiter; 243 | } 244 | 245 | /** 246 | * Set enclosure 247 | * 248 | * @param string $enclosure Enclosure, defaults to " 249 | * @return $this 250 | */ 251 | public function setEnclosure($enclosure = '"') { 252 | if ($enclosure == '') { 253 | $enclosure = '"'; 254 | } 255 | 256 | $this->enclosure = $enclosure; 257 | 258 | return $this; 259 | } 260 | 261 | /** 262 | * Get enclosure 263 | * 264 | * @return string 265 | */ 266 | public function getEnclosure() { 267 | return $this->enclosure; 268 | } 269 | 270 | /** 271 | * Can the current Reader read the file? 272 | * 273 | * @param string $file 274 | * 275 | * @return bool 276 | */ 277 | public function canRead($file) { 278 | try { 279 | $this->openFile($file); 280 | } catch (\Exception $e) { 281 | return false; 282 | } 283 | 284 | fclose($this->fileHandle); 285 | 286 | return true; 287 | } 288 | 289 | /** 290 | * Open file for reading 291 | * 292 | * @param string $file 293 | * 294 | * @throws ReaderException 295 | */ 296 | protected function openFile($file) { 297 | // Check if file exists 298 | if (!file_exists($file) || !is_readable($file)) { 299 | throw new ReaderException("Could not open file [$file] for reading! File does not exist."); 300 | } 301 | 302 | // Open file 303 | $this->fileHandle = fopen($file, 'r'); 304 | if ($this->fileHandle === false) { 305 | throw new ReaderException("Could not open file [$file] for reading."); 306 | } 307 | } 308 | 309 | /** 310 | * Close file and release generator 311 | */ 312 | public function __destruct() { 313 | if ($this->fileHandle) { 314 | fclose($this->fileHandle); 315 | } 316 | 317 | $this->generator = null; 318 | } 319 | } 320 | -------------------------------------------------------------------------------- /src/Reader/Xls.php: -------------------------------------------------------------------------------- 1 | parser = new Excel5(); 30 | } 31 | 32 | /** 33 | * Loads Excel from file 34 | * 35 | * @param string $file 36 | * 37 | * @return $this 38 | */ 39 | public function load($file) { 40 | $this->parser->loadOLE($file); 41 | 42 | $this->generator = $this->makeGenerator(); 43 | 44 | return $this; 45 | } 46 | 47 | /** 48 | * Count elements of the selected sheet 49 | * 50 | * @param bool $all 51 | * @return int|array 52 | */ 53 | public function count($all = false) { 54 | if ($this->count === null) { 55 | $row = $column = 0; 56 | if ($sheet = $this->sheets($this->parser->getSheetIndex())) { 57 | $row = $sheet['totalRows'] ?? 0; 58 | $column = $sheet['totalColumns'] ?? 0; 59 | } 60 | 61 | $this->count = [ 62 | $this->rowLimit > 0 ? min($row, $this->rowLimit) : $row, 63 | $this->columnLimit > 0 ? min($column, $this->columnLimit) : $column 64 | ]; 65 | } 66 | 67 | return $all ? $this->count : $this->count[0]; 68 | } 69 | 70 | /** 71 | * Get the work sheets info 72 | * 73 | * @param int $index 74 | * @return array 75 | */ 76 | public function sheets($index = null) { 77 | $sheets = $this->parser->parseWorksheetInfo(); 78 | 79 | if ($index !== null) { 80 | return $sheets[$index] ?? []; 81 | } 82 | 83 | return $sheets; 84 | } 85 | 86 | /** 87 | * Make the generator 88 | * 89 | * @return \Generator 90 | */ 91 | protected function makeGenerator() { 92 | list($rowLimit, $columnLimit) = $this->count(true); 93 | 94 | $line = $finish = 0; 95 | while ($finish < $rowLimit && ($row = $this->parser->getRow($line++, $columnLimit)) !== false) { 96 | if ($this->parser->isIgnoreEmptyRow() && trim(implode('', $row)) === '') { 97 | continue; 98 | } 99 | 100 | $finish++; 101 | yield $row; 102 | } 103 | } 104 | 105 | /** 106 | * Ignore empty row 107 | * 108 | * @param bool $ignoreEmpty 109 | * 110 | * @return $this 111 | */ 112 | public function ignoreEmptyRow($ignoreEmpty = false) { 113 | $this->parser->ignoreEmptyRow($ignoreEmpty); 114 | 115 | return $this; 116 | } 117 | 118 | /** 119 | * Set sheet index 120 | * 121 | * @param int $index 122 | * @return $this 123 | */ 124 | public function setSheetIndex($index) { 125 | if ($index != $this->parser->getSheetIndex()) { 126 | $this->parser->setSheetIndex($index); 127 | 128 | $this->count = null; 129 | $this->rewind(); 130 | } 131 | 132 | return $this; 133 | } 134 | 135 | /** 136 | * Can the current Reader read the file? 137 | * 138 | * @param string $file 139 | * 140 | * @return bool 141 | */ 142 | public function canRead($file) { 143 | try { 144 | // Use ParseXL for the hard work. 145 | $ole = new OLERead(); 146 | 147 | // open file 148 | $ole->openFile($file); 149 | } catch (\Exception $e) { 150 | return false; 151 | } 152 | 153 | return true; 154 | } 155 | 156 | /** 157 | * Release parser and generator 158 | */ 159 | public function __destruct() { 160 | $this->parser = null; 161 | $this->generator = null; 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/Reader/Xlsx.php: -------------------------------------------------------------------------------- 1 | parser = new Excel2007(); 29 | } 30 | 31 | /** 32 | * Loads Excel from file 33 | * 34 | * @param string $file 35 | * 36 | * @return $this 37 | */ 38 | public function load($file) { 39 | $this->parser->loadZip($file); 40 | 41 | $this->generator = $this->makeGenerator(); 42 | 43 | return $this; 44 | } 45 | 46 | /** 47 | * Count elements of an object 48 | * 49 | * @param bool $all 50 | * @return int|array 51 | */ 52 | public function count($all = false) { 53 | if ($this->count === null) { 54 | $row = $column = 0; 55 | if ($sheet = $this->sheets($this->parser->getSheetIndex())) { 56 | $row = $sheet['totalRows'] ?? 0; 57 | $column = $sheet['totalColumns'] ?? 0; 58 | } 59 | 60 | $this->count = [ 61 | $this->rowLimit > 0 ? min($row, $this->rowLimit) : $row, 62 | $this->columnLimit > 0 ? min($column, $this->columnLimit) : $column 63 | ]; 64 | } 65 | 66 | return $all ? $this->count : $this->count[0]; 67 | } 68 | 69 | /** 70 | * Get the work sheets info 71 | * 72 | * @param int $index 73 | * @return array 74 | */ 75 | public function sheets($index = null) { 76 | $sheets = $this->parser->parseWorksheetInfo(); 77 | 78 | if ($index !== null) { 79 | return $sheets[$index] ?? []; 80 | } 81 | 82 | return $sheets; 83 | } 84 | 85 | /** 86 | * Make the generator 87 | * 88 | * @return \Generator 89 | */ 90 | protected function makeGenerator() { 91 | list($rowLimit, $columnLimit) = $this->count(true); 92 | 93 | $line = $finish = 0; 94 | while ($finish < $rowLimit && ($row = $this->parser->getRow($line++, $columnLimit)) !== false) { 95 | if ($this->parser->isIgnoreEmptyRow() && trim(implode('', $row)) === '') { 96 | continue; 97 | } 98 | 99 | $finish++; 100 | yield $row; 101 | } 102 | } 103 | 104 | /** 105 | * Ignore empty row 106 | * 107 | * @param bool $ignoreEmpty 108 | * 109 | * @return $this 110 | */ 111 | public function ignoreEmptyRow($ignoreEmpty = false) { 112 | $this->parser->ignoreEmptyRow($ignoreEmpty); 113 | 114 | return $this; 115 | } 116 | 117 | /** 118 | * Set sheet index 119 | * 120 | * @param int $index 121 | * @return $this 122 | */ 123 | public function setSheetIndex($index = 0) { 124 | if ($index != $this->parser->getSheetIndex()) { 125 | $this->parser->setSheetIndex($index); 126 | 127 | $this->count = null; 128 | $this->rewind(); 129 | } 130 | 131 | return $this; 132 | } 133 | 134 | /** 135 | * Can the current Reader read the file? 136 | * 137 | * @param string $file 138 | * 139 | * @return bool 140 | */ 141 | public function canRead($file) { 142 | try { 143 | $parser = new Excel2007(); 144 | 145 | // open file 146 | $parser->openFile($file); 147 | } catch (\Exception $e) { 148 | return false; 149 | } 150 | 151 | return true; 152 | } 153 | 154 | /** 155 | * Release parser and generator 156 | */ 157 | public function __destruct() { 158 | $this->parser = null; 159 | $this->generator = null; 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /tests/csvTest.php: -------------------------------------------------------------------------------- 1 | setRowLimit(5); 15 | $reader->setColumnLimit(10); 16 | 17 | $reader->ignoreEmptyRow(true); 18 | 19 | //$reader->setInputEncoding('UTF-8'); 20 | $reader->setDelimiter("\t"); 21 | }); 22 | 23 | foreach ($reader as $row) { 24 | var_dump($row); 25 | } 26 | 27 | $reader->seek(2); 28 | 29 | $count = $reader->count(); 30 | //$reader->seek(1); 31 | $current = $reader->current(); 32 | 33 | $time = microtime(true) - $start; 34 | $use = memory_get_usage() - $memory; 35 | var_dump($current, $count, $time, $use/1024/1024); 36 | -------------------------------------------------------------------------------- /tests/files/01.csv: -------------------------------------------------------------------------------- 1 | 姓名,称呼,性别,QQ,手机,电话,邮箱,传真,公司,职务,网址,地址,备注 2 | 巴蒂,黑曼巴,男,654333,18643910100,0755-07551255,5310100@sina.com,7.55576E+11,xxxx有限公司,测试工程师,www.baidu1.com,美国洛杉矶,NBA球员 3 | -------------------------------------------------------------------------------- /tests/files/01.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Janson-Leung/PHPExcel/9df8bd178a41de108ebdc65e90a335a28f1c5959/tests/files/01.xls -------------------------------------------------------------------------------- /tests/files/01.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Janson-Leung/PHPExcel/9df8bd178a41de108ebdc65e90a335a28f1c5959/tests/files/01.xlsx -------------------------------------------------------------------------------- /tests/files/02.csv: -------------------------------------------------------------------------------- 1 | 10 2 | 21 22 3 | 311 4 | 5 | 407 6 | 7 | 8 | 9 | 10 | 104 -------------------------------------------------------------------------------- /tests/xlsTest.php: -------------------------------------------------------------------------------- 1 | setRowLimit(5); 15 | $reader->setColumnLimit(10); 16 | 17 | //$reader->setSheetIndex(1); 18 | }); 19 | 20 | foreach ($reader as $row) { 21 | var_dump($row); 22 | } 23 | 24 | $reader->seek(50); 25 | 26 | //$reader->seek(5); 27 | $count = $reader->count(); 28 | $current = $reader->current(); 29 | 30 | $sheets = $reader->sheets(); 31 | 32 | $time = microtime(true) - $start; 33 | $use = memory_get_usage() - $memory; 34 | 35 | var_dump($current, $count, $sheets, $time, $use/1024/1024); 36 | -------------------------------------------------------------------------------- /tests/xlsxTest.php: -------------------------------------------------------------------------------- 1 | setRowLimit(10); 15 | $reader->setColumnLimit(10); 16 | 17 | $reader->ignoreEmptyRow(true); 18 | 19 | //$reader->setSheetIndex(0); 20 | }); 21 | 22 | foreach ($reader as $row) { 23 | var_dump($row); 24 | } 25 | 26 | //$reader->seek(50); 27 | 28 | $count = $reader->count(); 29 | $reader->seek(2); 30 | $current = $reader->current(); 31 | 32 | $sheets = $reader->sheets(); 33 | 34 | $time = microtime(true) - $start; 35 | $use = memory_get_usage() - $memory; 36 | 37 | var_dump($current, $count, $sheets, $time, $use/1024/1024); --------------------------------------------------------------------------------