├── LICENSE
├── README.md
├── autoload.php
├── composer.json
├── src
├── Contract
│ └── ReaderInterface.php
├── Excel.php
├── Exception
│ ├── ParserException.php
│ └── ReaderException.php
├── Parser
│ ├── Excel2007.php
│ ├── Excel5.php
│ ├── Excel5
│ │ ├── OLERead.php
│ │ └── RC4.php
│ └── Format.php
└── Reader
│ ├── BaseReader.php
│ ├── Csv.php
│ ├── Xls.php
│ └── Xlsx.php
└── tests
├── csvTest.php
├── files
├── 01.csv
├── 01.xls
├── 01.xlsx
└── 02.csv
├── xlsTest.php
└── xlsxTest.php
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # phpexcel
2 | A lightweight PHP library for reading spreadsheet files
3 | - Based on Generator、SeekableIterator and Countable
4 | - Support for reading by line, read data only
5 |
6 | ### Requirements
7 |
8 | - PHP 7.0 or higher
9 |
10 | ### Installation
11 |
12 | composer require asan/phpexcel
13 |
14 | ## Usage
15 |
16 | ### csv
17 |
18 | ```
19 | // Simple setting
20 | $reader = Asan\PHPExcel\Excel::load('files/02.csv', 'GBK');
21 |
22 | // Flexible setting
23 | $reader = Asan\PHPExcel\Excel::load('files/01.csv', function(Asan\PHPExcel\Reader\Csv $reader) {
24 | // Set row limit
25 | $reader->setRowLimit(10);
26 |
27 | // Set column limit
28 | $reader->setColumnLimit(10);
29 |
30 | // Ignore emoty row
31 | $reader->ignoreEmptyRow(true);
32 |
33 | // Set encoding
34 | //$reader->setInputEncoding('GBK');
35 |
36 | // Set delimiter
37 | $reader->setDelimiter("\t");
38 | }, 'GBK');
39 |
40 | // skip to row 50
41 | $reader->seek(50);
42 |
43 | // Get the current row data
44 | $current = $reader->current();
45 |
46 | // Get row count
47 | $count = $reader->count();
48 | ```
49 |
50 | ### xls
51 |
52 | ```
53 | $reader = Asan\PHPExcel\Excel::load('files/01.xls', function(Asan\PHPExcel\Reader\Xls $reader) {
54 | // Set row limit
55 | $reader->setRowLimit(10);
56 |
57 | // Set column limit
58 | $reader->setColumnLimit(10);
59 |
60 | // Ignore emoty row
61 | $reader->ignoreEmptyRow(true);
62 |
63 | // Select sheet index
64 | $reader->setSheetIndex(1);
65 | });
66 |
67 | // skip to row 50
68 | $reader->seek(50);
69 |
70 | // Get the current row data
71 | $current = $reader->current();
72 |
73 | // Get row count
74 | $count = $reader->count();
75 |
76 | // Get all sheets info
77 | $sheets = $reader->sheets();
78 | ```
79 |
80 | ### xlsx
81 | ```
82 | $reader = Asan\PHPExcel\Excel::load('files/01.xlsx', function(Asan\PHPExcel\Reader\Xlsx $reader) {
83 | // Set row limit
84 | $reader->setRowLimit(10);
85 |
86 | // Set column limit
87 | $reader->setColumnLimit(10);
88 |
89 | // Ignore emoty row
90 | $reader->ignoreEmptyRow(true);
91 |
92 | // Select sheet index
93 | $reader->setSheetIndex(0);
94 | });
95 |
96 | // skip to row 50
97 | $reader->seek(50);
98 |
99 | // Get the current row data
100 | $current = $reader->current();
101 |
102 | // Get row count
103 | $count = $reader->count();
104 |
105 | // Get all sheets info
106 | $sheets = $reader->sheets();
107 | ```
108 |
--------------------------------------------------------------------------------
/autoload.php:
--------------------------------------------------------------------------------
1 | =7.0"
15 | },
16 | "minimum-stability": "stable",
17 | "autoload": {
18 | "psr-4": {"Asan\\PHPExcel\\": "src/"}
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/Contract/ReaderInterface.php:
--------------------------------------------------------------------------------
1 | setInputEncoding($encoding);
64 | }
65 |
66 | return $reader->load($file);
67 | }
68 |
69 | /**
70 | * Identify file format
71 | *
72 | * @param string $ext
73 | * @return string
74 | */
75 | protected static function getFormatByExtension($ext) {
76 | $formart = '';
77 |
78 | switch ($ext) {
79 | /*
80 | |--------------------------------------------------------------------------
81 | | Excel 2007
82 | |--------------------------------------------------------------------------
83 | */
84 | case 'xlsx':
85 | case 'xlsm':
86 | case 'xltx':
87 | case 'xltm':
88 | $formart = 'Xlsx';
89 | break;
90 |
91 | /*
92 | |--------------------------------------------------------------------------
93 | | Excel5
94 | |--------------------------------------------------------------------------
95 | */
96 | case 'xls':
97 | case 'xlt':
98 | $formart = 'Xls';
99 | break;
100 |
101 | /*
102 | |--------------------------------------------------------------------------
103 | | CSV
104 | |--------------------------------------------------------------------------
105 | */
106 | case 'csv':
107 | case 'txt':
108 | $formart = 'Csv';
109 | break;
110 | }
111 |
112 | return $formart;
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/src/Exception/ParserException.php:
--------------------------------------------------------------------------------
1 | false];
110 |
111 | /**
112 | * Use ZipArchive reader to extract the relevant data streams from the ZipArchive file
113 | *
114 | * @throws ParserException|ReaderException
115 | * @param string $file
116 | */
117 | public function loadZip($file) {
118 | $this->openFile($file);
119 |
120 | // Setting base date
121 | if (!self::$baseDate) {
122 | self::$baseDate = new \DateTime;
123 | self::$baseDate->setTimezone(new \DateTimeZone('UTC'));
124 | self::$baseDate->setDate(1900, 1, 0);
125 | self::$baseDate->setTime(0, 0, 0);
126 | }
127 |
128 | if (function_exists('gmp_gcd')) {
129 | self::$runtimeInfo['GMPSupported'] = true;
130 | }
131 | }
132 |
133 | /**
134 | * Ignore empty row
135 | *
136 | * @param bool $ignoreEmpty
137 | *
138 | * @return $this
139 | */
140 | public function ignoreEmptyRow($ignoreEmpty) {
141 | $this->ignoreEmpty = $ignoreEmpty;
142 |
143 | return $this;
144 | }
145 |
146 | /**
147 | * Whether is ignore empty row
148 | *
149 | * @return bool
150 | */
151 | public function isIgnoreEmptyRow() {
152 | return $this->ignoreEmpty;
153 | }
154 |
155 | /**
156 | * Set sheet index
157 | *
158 | * @param int $index
159 | *
160 | * @return $this
161 | */
162 | public function setSheetIndex($index) {
163 | if ($index != $this->sheetIndex) {
164 | $this->sheetIndex = $index;
165 |
166 | $this->getWorksheetXML();
167 | }
168 |
169 | return $this;
170 | }
171 |
172 | /**
173 | * Get sheet index
174 | *
175 | * @return int
176 | */
177 | public function getSheetIndex() {
178 | return $this->sheetIndex;
179 | }
180 |
181 | /**
182 | * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns)
183 | *
184 | * @throws ReaderException
185 | * @return array
186 | */
187 | public function parseWorksheetInfo() {
188 | if ($this->sheets === null) {
189 | $workbookXML = simplexml_load_string(
190 | $this->securityScan($this->zip->getFromName('xl/workbook.xml')), 'SimpleXMLElement', self::getLibXmlLoaderOptions()
191 | );
192 |
193 | $this->sheets = [];
194 | if (isset($workbookXML->sheets) && $workbookXML->sheets) {
195 | $xml = new \XMLReader();
196 |
197 | $index = 0;
198 | foreach ($workbookXML->sheets->sheet as $sheet) {
199 | $info = [
200 | 'name' => (string)$sheet['name'], 'lastColumnLetter' => '', 'lastColumnIndex' => 0,
201 | 'totalRows' => 0, 'totalColumns' => 0
202 | ];
203 |
204 | $this->zip->extractTo($this->tmpDir, $file = 'xl/worksheets/sheet' . (++$index) . '.xml');
205 | $xml->open($this->tmpDir . '/' . $file, null, self::getLibXmlLoaderOptions());
206 |
207 | $xml->setParserProperty(\XMLReader::DEFAULTATTRS, true);
208 |
209 | $nonEmpty = false;
210 | $columnLetter = '';
211 | while ($xml->read()) {
212 | if ($xml->name == 'row') {
213 | if (!$this->ignoreEmpty && $xml->nodeType == \XMLReader::ELEMENT) {
214 | $info['totalRows'] = (int)$xml->getAttribute('r');
215 | } elseif ($xml->nodeType == \XMLReader::END_ELEMENT) {
216 | if ($this->ignoreEmpty && $nonEmpty) {
217 | $info['totalRows']++;
218 | $nonEmpty = false;
219 | }
220 |
221 | if ($columnLetter > $info['lastColumnLetter']) {
222 | $info['lastColumnLetter'] = $columnLetter;
223 | }
224 | }
225 | } elseif ($xml->name == 'c' && $xml->nodeType == \XMLReader::ELEMENT) {
226 | $columnLetter = preg_replace('{[^[:alpha:]]}S', '', $xml->getAttribute('r'));
227 | } elseif ($this->ignoreEmpty && !$nonEmpty && $xml->name == 'v'
228 | && $xml->nodeType == \XMLReader::ELEMENT && trim($xml->readString()) !== '') {
229 |
230 | $nonEmpty = true;
231 | }
232 | }
233 |
234 | if ($info['lastColumnLetter']) {
235 | $info['totalColumns'] = Format::columnIndexFromString($info['lastColumnLetter']);
236 | $info['lastColumnIndex'] = $info['totalColumns'] - 1;
237 | }
238 |
239 | $this->sheets[] = $info;
240 | }
241 |
242 | $xml->close();
243 | }
244 | }
245 |
246 | return $this->sheets;
247 | }
248 |
249 | /**
250 | * Get shared string
251 | *
252 | * @param int $position
253 | * @return string
254 | */
255 | protected function getSharedString($position) {
256 | $value = '';
257 |
258 | $file = 'xl/sharedStrings.xml';
259 | if ($this->sharedStringsXML === null) {
260 | $this->sharedStringsXML = new \XMLReader();
261 |
262 | $this->zip->extractTo($this->tmpDir, $file);
263 | }
264 |
265 | if ($this->sharedStringsPosition < 0 || $position < $this->sharedStringsPosition) {
266 | $this->sharedStringsXML->open($this->tmpDir . '/' . $file, null, self::getLibXmlLoaderOptions());
267 |
268 | $this->sharedStringsPosition = -1;
269 | }
270 |
271 | while ($this->sharedStringsXML->read()) {
272 | $name = $this->sharedStringsXML->name;
273 | $nodeType = $this->sharedStringsXML->nodeType;
274 |
275 | if ($name == 'si') {
276 | if ($nodeType == \XMLReader::ELEMENT) {
277 | $this->sharedStringsPosition++;
278 | } elseif ($position == $this->sharedStringsPosition && $nodeType == \XMLReader::END_ELEMENT) {
279 | break;
280 | }
281 | } elseif ($name == 't' && $position == $this->sharedStringsPosition && $nodeType == \XMLReader::ELEMENT) {
282 | $value .= trim($this->sharedStringsXML->readString());
283 | }
284 | }
285 |
286 | return $value;
287 | }
288 |
289 | /**
290 | * Parse styles info
291 | *
292 | * @throws ReaderException
293 | */
294 | protected function parseStyles() {
295 | if ($this->styleXfs === null) {
296 | $stylesXML = simplexml_load_string(
297 | $this->securityScan($this->zip->getFromName('xl/styles.xml')), 'SimpleXMLElement', self::getLibXmlLoaderOptions()
298 | );
299 |
300 | $this->styleXfs = $this->formats = [];
301 | if ($stylesXML) {
302 | if (isset($stylesXML->cellXfs->xf) && $stylesXML->cellXfs->xf) {
303 | foreach ($stylesXML->cellXfs->xf as $xf) {
304 | $numFmtId = isset($xf['numFmtId']) ? (int)$xf['numFmtId'] : 0;
305 | if (isset($xf['applyNumberFormat']) || $numFmtId == 0) {
306 | // If format ID >= 164, it is a custom format and should be read from styleSheet\numFmts
307 | $this->styleXfs[] = $numFmtId;
308 | } else {
309 | // 0 for "General" format
310 | $this->styleXfs[] = Format::FORMAT_GENERAL;
311 | }
312 | }
313 | }
314 |
315 | if (isset($stylesXML->numFmts->numFmt) && $stylesXML->numFmts->numFmt) {
316 | foreach ($stylesXML->numFmts->numFmt as $numFmt) {
317 | if (isset($numFmt['numFmtId'], $numFmt['formatCode'])) {
318 | $this->formats[(int)$numFmt['numFmtId']] = (string)$numFmt['formatCode'];
319 | }
320 | }
321 | }
322 | }
323 | }
324 | }
325 |
326 | /**
327 | * Get worksheet XMLReader
328 | */
329 | protected function getWorksheetXML() {
330 | if ($this->worksheetXML === null) {
331 | $this->worksheetXML = new \XMLReader();
332 | }
333 |
334 | $this->worksheetXML->open(
335 | $this->tmpDir . '/xl/worksheets/sheet' . ($this->getSheetIndex() + 1) . '.xml',
336 | null, self::getLibXmlLoaderOptions()
337 | );
338 | }
339 |
340 | /**
341 | * Get row data
342 | *
343 | * @param int $rowIndex
344 | * @param int $columnLimit
345 | *
346 | * @throws ReaderException
347 | * @return array|bool
348 | */
349 | public function getRow($rowIndex, $columnLimit = 0) {
350 | $this->parseStyles();
351 | $rowIndex === 0 && $this->getWorksheetXML();
352 |
353 | $sharedString = false;
354 | $index = $styleId = 0;
355 | $row = $columnLimit ? array_fill(0, $columnLimit, '') : [];
356 |
357 | while ($canRead = $this->worksheetXML->read()) {
358 | $name = $this->worksheetXML->name;
359 | $type = $this->worksheetXML->nodeType;
360 |
361 | // End of row
362 | if ($name == 'row') {
363 | if (!$this->ignoreEmpty && $type == \XMLReader::ELEMENT
364 | && $rowIndex+1 != (int)$this->worksheetXML->getAttribute('r')) {
365 |
366 | $this->worksheetXML->moveToElement();
367 | break;
368 | }
369 |
370 | if ($type == \XMLReader::END_ELEMENT) {
371 | break;
372 | }
373 | }
374 |
375 | if ($columnLimit > 0 && $index >= $columnLimit) {
376 | continue;
377 | }
378 |
379 | switch ($name) {
380 | // Cell
381 | case 'c':
382 | if ($type == \XMLReader::END_ELEMENT) {
383 | continue;
384 | }
385 |
386 | $styleId = (int)$this->worksheetXML->getAttribute('s');
387 | $letter = preg_replace('{[^[:alpha:]]}S', '', $this->worksheetXML->getAttribute('r'));
388 | $index = Format::columnIndexFromString($letter) - 1;
389 |
390 | // Determine cell type
391 | $sharedString = false;
392 | if ($this->worksheetXML->getAttribute('t') == self::CELL_TYPE_SHARED_STR) {
393 | $sharedString = true;
394 | }
395 |
396 | break;
397 |
398 | // Cell value
399 | case 'v':
400 | case 'is':
401 | if ($type == \XMLReader::END_ELEMENT) {
402 | continue;
403 | }
404 |
405 | $value = $this->worksheetXML->readString();
406 | if ($sharedString) {
407 | $value = $this->getSharedString($value);
408 | }
409 |
410 | // Format value if necessary
411 | if ($value !== '' && $styleId && isset($this->styleXfs[$styleId])) {
412 | $value = $this->formatValue($value, $styleId);
413 | } elseif ($value && is_numeric($value)) {
414 | $value = (float)$value;
415 | }
416 |
417 | $row[$index] = $value;
418 | break;
419 | }
420 | }
421 |
422 | if ($canRead === false) {
423 | return false;
424 | }
425 |
426 | return $row;
427 | }
428 |
429 | /**
430 | * Close ZipArchive、XMLReader and remove temp dir
431 | */
432 | public function __destruct() {
433 | if ($this->zip && $this->tmpDir) {
434 | $this->zip->close();
435 | }
436 |
437 | if ($this->worksheetXML) {
438 | $this->worksheetXML->close();
439 | }
440 |
441 | if ($this->sharedStringsXML) {
442 | $this->sharedStringsXML->close();
443 | }
444 |
445 | $this->removeDir($this->tmpDir);
446 |
447 | $this->zip = null;
448 | $this->worksheetXML = null;
449 | $this->sharedStringsXML = null;
450 | $this->tmpDir = null;
451 | }
452 |
453 | /**
454 | * Remove dir
455 | *
456 | * @param string $dir
457 | */
458 | protected function removeDir($dir) {
459 | if($dir && is_dir($dir)) {
460 | $handle = opendir($dir);
461 |
462 | while($item = readdir($handle)) {
463 | if ($item != '.' && $item != '..') {
464 | is_file($item = $dir . '/' . $item) ? unlink($item) : $this->removeDir($item);
465 | }
466 | }
467 |
468 | closedir($handle);
469 | rmdir($dir);
470 | }
471 | }
472 |
473 | /**
474 | * Formats the value according to the index
475 | *
476 | * @param string $value
477 | * @param int $index Format index
478 | *
479 | * @throws \Exception
480 | * @return string Formatted cell value
481 | */
482 | private function formatValue($value, $index) {
483 | if (!is_numeric($value)) {
484 | return $value;
485 | }
486 |
487 | if (isset($this->styleXfs[$index]) && $this->styleXfs[$index] !== false) {
488 | $index = $this->styleXfs[$index];
489 | } else {
490 | return $value;
491 | }
492 |
493 | // A special case for the "General" format
494 | if ($index == 0) {
495 | return is_numeric($value) ? (float)$value : $value;
496 | }
497 |
498 | $format = $this->parsedFormats[$index] ?? [];
499 |
500 | if (empty($format)) {
501 | $format = [
502 | 'code' => false, 'type' => false, 'scale' => 1, 'thousands' => false, 'currency' => false
503 | ];
504 |
505 | if (isset(Format::$buildInFormats[$index])) {
506 | $format['code'] = Format::$buildInFormats[$index];
507 | } elseif (isset($this->formats[$index])) {
508 | $format['code'] = str_replace('"', '', $this->formats[$index]);
509 | }
510 |
511 | // Format code found, now parsing the format
512 | if ($format['code']) {
513 | $sections = explode(';', $format['code']);
514 | $format['code'] = $sections[0];
515 |
516 | switch (count($sections)) {
517 | case 2:
518 | if ($value < 0) {
519 | $format['code'] = $sections[1];
520 | }
521 |
522 | $value = abs($value);
523 | break;
524 |
525 | case 3:
526 | case 4:
527 | if ($value < 0) {
528 | $format['code'] = $sections[1];
529 | } elseif ($value == 0) {
530 | $format['code'] = $sections[2];
531 | }
532 |
533 | $value = abs($value);
534 | break;
535 | }
536 | }
537 |
538 | // Stripping colors
539 | $format['code'] = trim(preg_replace('/^\\[[a-zA-Z]+\\]/', '', $format['code']));
540 |
541 | // Percentages
542 | if (substr($format['code'], -1) == '%') {
543 | $format['type'] = 'Percentage';
544 | } elseif (preg_match('/(\[\$[A-Z]*-[0-9A-F]*\])*[hmsdy]/i', $format['code'])) {
545 | $format['type'] = 'DateTime';
546 | $format['code'] = trim(preg_replace('/^(\[\$[A-Z]*-[0-9A-F]*\])/i', '', $format['code']));
547 | $format['code'] = strtolower($format['code']);
548 | $format['code'] = strtr($format['code'], Format::$dateFormatReplacements);
549 |
550 | if (strpos($format['code'], 'A') === false) {
551 | $format['code'] = strtr($format['code'], Format::$dateFormatReplacements24);
552 | } else {
553 | $format['code'] = strtr($format['code'], Format::$dateFormatReplacements12);
554 | }
555 | } elseif ($format['code'] == '[$EUR ]#,##0.00_-') {
556 | $format['type'] = 'Euro';
557 | } else {
558 | // Removing skipped characters
559 | $format['code'] = preg_replace('/_./', '', $format['code']);
560 |
561 | // Removing unnecessary escaping
562 | $format['code'] = preg_replace("/\\\\/", '', $format['code']);
563 |
564 | // Removing string quotes
565 | $format['code'] = str_replace(['"', '*'], '', $format['code']);
566 |
567 | // Removing thousands separator
568 | if (strpos($format['code'], '0,0') !== false || strpos($format['code'], '#,#') !== false) {
569 | $format['thousands'] = true;
570 | }
571 |
572 | $format['code'] = str_replace(['0,0', '#,#'], ['00', '##'], $format['code']);
573 |
574 | // Scaling (Commas indicate the power)
575 | $scale = 1;
576 | $matches = [];
577 |
578 | if (preg_match('/(0|#)(,+)/', $format['code'], $matches)) {
579 | $scale = pow(1000, strlen($matches[2]));
580 |
581 | // Removing the commas
582 | $format['code'] = preg_replace(['/0,+/', '/#,+/'], ['0', '#'], $format['code']);
583 | }
584 |
585 | $format['scale'] = $scale;
586 | if (preg_match('/#?.*\?\/\?/', $format['code'])) {
587 | $format['type'] = 'Fraction';
588 | } else {
589 | $format['code'] = str_replace('#', '', $format['code']);
590 | $matches = [];
591 |
592 | if (preg_match('/(0+)(\.?)(0*)/', preg_replace('/\[[^\]]+\]/', '', $format['code']), $matches)) {
593 | list(, $integer, $decimalPoint, $decimal) = $matches;
594 |
595 | $format['minWidth'] = strlen($integer) + strlen($decimalPoint) + strlen($decimal);
596 | $format['decimals'] = $decimal;
597 | $format['precision'] = strlen($format['decimals']);
598 | $format['pattern'] = '%0' . $format['minWidth'] . '.' . $format['precision'] . 'f';
599 | }
600 | }
601 |
602 | $matches = [];
603 | if (preg_match('/\[\$(.*)\]/u', $format['code'], $matches)) {
604 | $currencyCode = explode('-', $matches[1]);
605 | if ($currencyCode) {
606 | $currencyCode = $currencyCode[0];
607 | }
608 |
609 | if (!$currencyCode) {
610 | $currencyCode = self::$currencyCode;
611 | }
612 |
613 | $format['currency'] = $currencyCode;
614 | }
615 |
616 | $format['code'] = trim($format['code']);
617 | }
618 |
619 | $this->parsedFormats[$index] = $format;
620 | }
621 |
622 | // Applying format to value
623 | if ($format) {
624 | if ($format['code'] == '@') {
625 | return (string)$value;
626 | } elseif ($format['type'] == 'Percentage') { // Percentages
627 | if ($format['code'] === '0%') {
628 | $value = round(100*$value, 0) . '%';
629 | } else {
630 | $value = sprintf('%.2f%%', round(100*$value, 2));
631 | }
632 | } elseif ($format['type'] == 'DateTime') { // Dates and times
633 | $days = (int)$value;
634 |
635 | // Correcting for Feb 29, 1900
636 | if ($days > 60) {
637 | $days--;
638 | }
639 |
640 | // At this point time is a fraction of a day
641 | $time = ($value - (int)$value);
642 |
643 | // Here time is converted to seconds
644 | // Some loss of precision will occur
645 | $seconds = $time ? (int)($time*86400) : 0;
646 |
647 | $value = clone self::$baseDate;
648 | $value->add(new \DateInterval('P' . $days . 'D' . ($seconds ? 'T' . $seconds . 'S' : '')));
649 |
650 | $value = $value->format($format['code']);
651 | } elseif ($format['type'] == 'Euro') {
652 | $value = 'EUR ' . sprintf('%1.2f', $value);
653 | } else {
654 | // Fractional numbers
655 | if ($format['type'] == 'Fraction' && ($value != (int)$value)) {
656 | $integer = floor(abs($value));
657 | $decimal = fmod(abs($value), 1);
658 |
659 | // Removing the integer part and decimal point
660 | $decimal *= pow(10, strlen($decimal) - 2);
661 | $decimalDivisor = pow(10, strlen($decimal));
662 |
663 | if (self::$runtimeInfo['GMPSupported']) {
664 | $GCD = gmp_strval(gmp_gcd($decimal, $decimalDivisor));
665 | } else {
666 | $GCD = self::GCD($decimal, $decimalDivisor);
667 | }
668 |
669 | $adjDecimal = $decimal/$GCD;
670 | $adjDecimalDivisor = $decimalDivisor/$GCD;
671 |
672 | if (strpos($format['code'], '0') !== false || strpos($format['code'], '#') !== false
673 | || substr($format['code'], 0, 3) == '? ?') {
674 |
675 | // The integer part is shown separately apart from the fraction
676 | $value = ($value < 0 ? '-' : '') . $integer ? $integer . ' '
677 | : '' . $adjDecimal . '/' . $adjDecimalDivisor;
678 | } else {
679 | // The fraction includes the integer part
680 | $adjDecimal += $integer * $adjDecimalDivisor;
681 | $value = ($value < 0 ? '-' : '') . $adjDecimal . '/' . $adjDecimalDivisor;
682 | }
683 | } else {
684 | // Scaling
685 | $value = $value/$format['scale'];
686 | if (!empty($format['minWidth']) && $format['decimals']) {
687 | if ($format['thousands']) {
688 | $value = number_format(
689 | $value, $format['precision'], self::$decimalSeparator, self::$thousandSeparator
690 | );
691 |
692 | $value = preg_replace('/(0+)(\.?)(0*)/', $value, $format['code']);
693 | } else {
694 | if (preg_match('/[0#]E[+-]0/i', $format['code'])) {
695 | // Scientific format
696 | $value = sprintf('%5.2E', $value);
697 | } else {
698 | $value = sprintf($format['pattern'], $value);
699 | $value = preg_replace('/(0+)(\.?)(0*)/', $value, $format['code']);
700 | }
701 | }
702 | }
703 | }
704 |
705 | // currency/Accounting
706 | if ($format['currency']) {
707 | $value = preg_replace('', $format['currency'], $value);
708 | }
709 | }
710 | }
711 |
712 | return $value;
713 | }
714 |
715 | /**
716 | * Greatest common divisor calculation in case GMP extension is not enabled
717 | *
718 | * @param int $number1
719 | * @param int $number2
720 | *
721 | * @return int
722 | */
723 | private static function GCD($number1, $number2) {
724 | $number1 = abs($number1);
725 | $number2 = abs($number2);
726 |
727 | if ($number1 + $number2 == 0) {
728 | return 0;
729 | }
730 |
731 | $number = 1;
732 | while ($number1 > 0) {
733 | $number = $number1;
734 | $number1 = $number2 % $number1;
735 | $number2 = $number;
736 | }
737 |
738 | return $number;
739 | }
740 |
741 | /**
742 | * Open file for reading
743 | *
744 | * @param string $file
745 | *
746 | * @throws ParserException|ReaderException
747 | */
748 | public function openFile($file) {
749 | // Check if file exists
750 | if (!file_exists($file) || !is_readable($file)) {
751 | throw new ReaderException("Could not open file [$file] for reading! File does not exist.");
752 | }
753 |
754 | $this->zip = new \ZipArchive();
755 |
756 | $xl = false;
757 | if ($this->zip->open($file) === true) {
758 | $this->tmpDir = sys_get_temp_dir() . '/' . uniqid();
759 |
760 | // check if it is an OOXML archive
761 | $rels = simplexml_load_string(
762 | $this->securityScan($this->zip->getFromName('_rels/.rels')),
763 | 'SimpleXMLElement', self::getLibXmlLoaderOptions()
764 | );
765 |
766 | if ($rels !== false) {
767 | foreach ($rels->Relationship as $rel) {
768 | switch ($rel["Type"]) {
769 | case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument":
770 | if ($rel["Target"] == 'xl/workbook.xml') {
771 | $xl = true;
772 | }
773 |
774 | break;
775 | }
776 | }
777 | }
778 | }
779 |
780 | if ($xl === false) {
781 | throw new ParserException("The file [$file] is not recognised as a zip archive");
782 | }
783 | }
784 |
785 | /**
786 | * Scan theXML for use of = 0) {
816 | @libxml_disable_entity_loader($options == (LIBXML_DTDLOAD | LIBXML_DTDATTR));
817 | }
818 |
819 | self::$libXmlLoaderOptions = $options;
820 | }
821 |
822 | /**
823 | * Get default options for libxml loader.
824 | * Defaults to LIBXML_DTDLOAD | LIBXML_DTDATTR when not set explicitly.
825 | *
826 | * @return int Default options for libxml loader
827 | */
828 | public static function getLibXmlLoaderOptions() {
829 | if (is_null(self::$libXmlLoaderOptions) && defined(LIBXML_DTDLOAD)) {
830 | self::setLibXmlLoaderOptions(LIBXML_DTDLOAD | LIBXML_DTDATTR);
831 | }
832 |
833 | if (version_compare(PHP_VERSION, '5.2.11') >= 0) {
834 | @libxml_disable_entity_loader(self::$libXmlLoaderOptions == (LIBXML_DTDLOAD | LIBXML_DTDATTR));
835 | }
836 |
837 | return self::$libXmlLoaderOptions;
838 | }
839 | }
840 |
--------------------------------------------------------------------------------
/src/Parser/Excel5.php:
--------------------------------------------------------------------------------
1 | '#NULL!',
62 | 0x07 => '#DIV/0!',
63 | 0x0F => '#VALUE!',
64 | 0x17 => '#REF!',
65 | 0x1D => '#NAME?',
66 | 0x24 => '#NUM!',
67 | 0x2A => '#N/A'
68 | ];
69 |
70 | /**
71 | * Base calendar year to use for calculations
72 | *
73 | * @var int
74 | */
75 | private static $excelBaseDate = Format::CALENDAR_WINDOWS_1900;
76 |
77 | /**
78 | * Decimal separator
79 | *
80 | * @var string
81 | */
82 | private static $decimalSeparator;
83 |
84 | /**
85 | * Thousands separator
86 | *
87 | * @var string
88 | */
89 | private static $thousandsSeparator;
90 |
91 | /**
92 | * Currency code
93 | *
94 | * @var string
95 | */
96 | private static $currencyCode;
97 |
98 | /**
99 | * Workbook stream data
100 | *
101 | * @var string
102 | */
103 | private $data;
104 |
105 | /**
106 | * Size in bytes of $this->data
107 | *
108 | * @var int
109 | */
110 | private $dataSize;
111 |
112 | /**
113 | * Current position in stream
114 | *
115 | * @var integer
116 | */
117 | private $pos;
118 |
119 | /**
120 | * Worksheets
121 | *
122 | * @var array
123 | */
124 | private $sheets;
125 |
126 | /**
127 | * BIFF version
128 | *
129 | * @var int
130 | */
131 | private $version;
132 |
133 | /**
134 | * Codepage set in the Excel file being read. Only important for BIFF5 (Excel 5.0 - Excel 95)
135 | * For BIFF8 (Excel 97 - Excel 2003) this will always have the value 'UTF-16LE'
136 | *
137 | * @var string
138 | */
139 | private $codePage;
140 |
141 | /**
142 | * Row data
143 | *
144 | * @var array
145 | */
146 | private $row;
147 |
148 | /**
149 | * Shared formats
150 | *
151 | * @var array
152 | */
153 | private $formats;
154 |
155 | /**
156 | * The current sheet of the file
157 | *
158 | * @var int
159 | */
160 | private $sheetIndex = 0;
161 |
162 | /**
163 | * Ignore empty row
164 | *
165 | * @var bool
166 | */
167 | private $ignoreEmpty = false;
168 |
169 | /**
170 | * The current row index of the sheet
171 | *
172 | * @var int
173 | */
174 | private $rowIndex = 0;
175 |
176 | /**
177 | * Max column number
178 | *
179 | * @var int
180 | */
181 | private $columnLimit = 0;
182 |
183 | /**
184 | * Whether to the end of the row
185 | *
186 | * @var bool
187 | */
188 | private $eor = false;
189 |
190 | /**
191 | * Extended format record
192 | *
193 | * @var array
194 | */
195 | private $xfRecords = [];
196 |
197 | /**
198 | * Shared strings. Only applies to BIFF8.
199 | *
200 | * @var array
201 | */
202 | private $sst = [];
203 |
204 | /**
205 | * The type of encryption in use
206 | *
207 | * @var int
208 | */
209 | private $encryption = 0;
210 |
211 | /**
212 | * The position in the stream after which contents are encrypted
213 | *
214 | * @var int
215 | */
216 | private $encryptionStartPos = false;
217 |
218 | /**
219 | * The current RC4 decryption object
220 | *
221 | * @var RC4
222 | */
223 | private $rc4Key = null;
224 |
225 | /**
226 | * The position in the stream that the RC4 decryption object was left at
227 | *
228 | * @var int
229 | */
230 | private $rc4Pos = 0;
231 |
232 | /**
233 | * The current MD5 context state
234 | *
235 | * @var string
236 | */
237 | private $md5Ctxt = null;
238 |
239 | /**
240 | * Use OLE reader to extract the relevant data streams from the OLE file
241 | *
242 | * @param string $file
243 | */
244 | public function loadOLE($file) {
245 | $oleRead = new OLERead();
246 | $oleRead->read($file);
247 | $this->data = $oleRead->getStream($oleRead->workbook);
248 | }
249 |
250 | /**
251 | * Ignore empty row
252 | *
253 | * @param bool $ignoreEmpty
254 | *
255 | * @return $this
256 | */
257 | public function ignoreEmptyRow($ignoreEmpty) {
258 | $this->ignoreEmpty = $ignoreEmpty;
259 |
260 | return $this;
261 | }
262 |
263 | /**
264 | * Whether is ignore empty row
265 | *
266 | * @return bool
267 | */
268 | public function isIgnoreEmptyRow() {
269 | return $this->ignoreEmpty;
270 | }
271 |
272 | /**
273 | * Set sheet index
274 | *
275 | * @param int $index
276 | *
277 | * @return $this
278 | */
279 | public function setSheetIndex($index) {
280 | $this->sheetIndex = $index;
281 |
282 | return $this;
283 | }
284 |
285 | /**
286 | * Get sheet index
287 | *
288 | * @return int
289 | */
290 | public function getSheetIndex() {
291 | return $this->sheetIndex;
292 | }
293 |
294 | /**
295 | * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns)
296 | *
297 | * @throws ParserException
298 | * @return array
299 | */
300 | public function parseWorksheetInfo() {
301 | if ($this->sheets === null) {
302 | // total byte size of Excel data (workbook global substream + sheet substreams)
303 | $this->dataSize = strlen($this->data);
304 | $this->pos = 0;
305 | $this->codePage = 'CP1252';
306 | $this->sheets = [];
307 |
308 | // Parse Workbook Global Substream
309 | while ($this->pos < $this->dataSize) {
310 | $code = Format::getUInt2d($this->data, $this->pos);
311 |
312 | switch ($code) {
313 | case self::XLS_TYPE_BOF:
314 | $this->readBof();
315 | break;
316 |
317 | case self::XLS_TYPE_FILEPASS:
318 | $this->readFilepass();
319 | break;
320 |
321 | case self::XLS_TYPE_CODEPAGE:
322 | $this->readCodepage();
323 | break;
324 |
325 | case self::XLS_TYPE_DATEMODE:
326 | $this->readDateMode();
327 | break;
328 |
329 | case self::XLS_TYPE_FORMAT:
330 | $this->readFormat();
331 | break;
332 |
333 | case self::XLS_TYPE_XF:
334 | $this->readXf();
335 | break;
336 |
337 | case self::XLS_TYPE_SST:
338 | $this->readSst();
339 | break;
340 |
341 | case self::XLS_TYPE_SHEET:
342 | $this->readSheet();
343 | break;
344 |
345 | case self::XLS_TYPE_EOF:
346 | $this->readDefault();
347 | break 2;
348 |
349 | default:
350 | $this->readDefault();
351 | break;
352 | }
353 | }
354 |
355 | // Parse the individual sheets
356 | foreach ($this->sheets as $key => $sheet) {
357 | if ($sheet['sheetType'] != 0x00) {
358 | // 0x00: Worksheet
359 | // 0x02: Chart
360 | // 0x06: Visual Basic module
361 | continue;
362 | }
363 |
364 | $sheet['lastColumnLetter'] = '';
365 | $sheet['lastColumnIndex'] = null;
366 | $sheet['totalRows'] = 0;
367 | $sheet['totalColumns'] = 0;
368 |
369 | $lastRowIndex = 0;
370 | $this->pos = $sheet['offset'];
371 | while ($this->pos <= $this->dataSize - 4) {
372 | $code = Format::getUInt2d($this->data, $this->pos);
373 |
374 | switch ($code) {
375 | case self::XLS_TYPE_RK:
376 | case self::XLS_TYPE_LABELSST:
377 | case self::XLS_TYPE_NUMBER:
378 | case self::XLS_TYPE_FORMULA:
379 | case self::XLS_TYPE_BOOLERR:
380 | case self::XLS_TYPE_LABEL:
381 | $length = Format::getUInt2d($this->data, $this->pos + 2);
382 | $recordData = substr($this->data, $this->pos + 4, $length);
383 |
384 | // move stream pointer to next record
385 | $this->pos += 4 + $length;
386 |
387 | $rowIndex = Format::getUInt2d($recordData, 0) + 1;
388 | $columnIndex = Format::getUInt2d($recordData, 2);
389 |
390 | if ($this->ignoreEmpty) {
391 | if ($lastRowIndex < $rowIndex) {
392 | $sheet['totalRows']++;
393 | }
394 |
395 | $lastRowIndex = $rowIndex;
396 | } else {
397 | $sheet['totalRows'] = max($sheet['totalRows'], $rowIndex);
398 | }
399 |
400 | $sheet['lastColumnIndex'] = max($columnIndex, $sheet['lastColumnIndex']);
401 | break;
402 |
403 | case self::XLS_TYPE_BOF:
404 | $this->readBof();
405 | break;
406 |
407 | case self::XLS_TYPE_EOF:
408 | $this->readDefault();
409 | break 2;
410 |
411 | default:
412 | $this->readDefault();
413 | break;
414 | }
415 | }
416 |
417 | if ($sheet['lastColumnIndex'] !== null) {
418 | $sheet['lastColumnLetter'] = Format::stringFromColumnIndex($sheet['lastColumnIndex']);
419 | } else {
420 | $sheet['lastColumnIndex'] = 0;
421 | }
422 |
423 | if ($sheet['lastColumnLetter']) {
424 | $sheet['totalColumns'] = $sheet['lastColumnIndex'] + 1;
425 | }
426 |
427 | $this->sheets[$key] = $sheet;
428 | }
429 |
430 | $this->pos = 0;
431 | }
432 |
433 | return $this->sheets;
434 | }
435 |
436 | /**
437 | * Get row data
438 | *
439 | * @param int $rowIndex
440 | * @param int $columnLimit
441 | *
442 | * @throws ParserException
443 | * @return array|bool
444 | */
445 | public function getRow($rowIndex, $columnLimit = 0) {
446 | $this->parseWorksheetInfo();
447 |
448 | // Rewind or change sheet
449 | if ($rowIndex === 0 || $this->pos < $this->sheets[$this->sheetIndex]['offset']) {
450 | $this->pos = $this->sheets[$this->sheetIndex]['offset'];
451 | }
452 |
453 | $endPos = $this->dataSize - 4;
454 | if (isset($this->sheets[$this->sheetIndex + 1]['offset'])) {
455 | $endPos = $this->sheets[$this->sheetIndex + 1]['offset'] - 4;
456 | }
457 |
458 | if ($this->pos >= $endPos) {
459 | return false;
460 | }
461 |
462 | $this->rowIndex = $rowIndex;
463 | $this->columnLimit = $columnLimit;
464 | $this->eor = false;
465 | $this->row = $columnLimit ? array_fill(0, $columnLimit, '') : [];
466 |
467 | while ($this->pos <= $endPos) {
468 | // Remember last position
469 | $lastPos = $this->pos;
470 | $code = Format::getUInt2d($this->data, $this->pos);
471 |
472 | switch ($code) {
473 | case self::XLS_TYPE_BOF:
474 | $this->readBof();
475 | break;
476 |
477 | case self::XLS_TYPE_RK:
478 | $this->readRk();
479 | break;
480 |
481 | case self::XLS_TYPE_LABELSST:
482 | $this->readLabelSst();
483 | break;
484 |
485 | case self::XLS_TYPE_MULRK:
486 | $this->readMulRk();
487 | break;
488 |
489 | case self::XLS_TYPE_NUMBER:
490 | $this->readNumber();
491 | break;
492 |
493 | case self::XLS_TYPE_FORMULA:
494 | $this->readFormula();
495 | break;
496 |
497 | case self::XLS_TYPE_BOOLERR:
498 | $this->readBoolErr();
499 | break;
500 |
501 | case self::XLS_TYPE_MULBLANK:
502 | case self::XLS_TYPE_BLANK:
503 | $this->readBlank();
504 | break;
505 |
506 | case self::XLS_TYPE_LABEL:
507 | $this->readLabel();
508 | break;
509 |
510 | case self::XLS_TYPE_EOF:
511 | $this->readDefault();
512 | break 2;
513 |
514 | default:
515 | $this->readDefault();
516 | break;
517 | }
518 |
519 | //End of row
520 | if ($this->eor) {
521 | //Recover current position
522 | $this->pos = $lastPos;
523 | break;
524 | }
525 | }
526 |
527 | return $this->row;
528 | }
529 |
530 | /**
531 | * Add cell data
532 | *
533 | * @param int $row
534 | * @param int $column
535 | * @param mixed $value
536 | * @param int $xfIndex
537 | * @return bool
538 | */
539 | private function addCell($row, $column, $value, $xfIndex) {
540 | if ($this->rowIndex != $row) {
541 | $this->eor = true;
542 |
543 | return false;
544 | }
545 |
546 | if (!$this->columnLimit || $column < $this->columnLimit) {
547 | $xfRecord = $this->xfRecords[$xfIndex];
548 | $this->row[$column] = self::toFormattedString($value, $xfRecord['format']);
549 | }
550 |
551 | return true;
552 | }
553 |
554 | /**
555 | * Read BOF
556 | *
557 | * @throws ParserException
558 | */
559 | private function readBof() {
560 | $length = Format::getUInt2d($this->data, $this->pos + 2);
561 | $recordData = substr($this->data, $this->pos + 4, $length);
562 |
563 | // move stream pointer to next record
564 | $this->pos += 4 + $length;
565 |
566 | // offset: 2; size: 2; type of the following data
567 | $substreamType = Format::getUInt2d($recordData, 2);
568 |
569 | switch ($substreamType) {
570 | case self::XLS_WORKBOOKGLOBALS:
571 | $version = Format::getUInt2d($recordData, 0);
572 | if (($version != self::XLS_BIFF8) && ($version != self::XLS_BIFF7)) {
573 | throw new ParserException('Cannot read this Excel file. Version is too old.', 1);
574 | }
575 |
576 | $this->version = $version;
577 | break;
578 |
579 | case self::XLS_WORKSHEET:
580 | // do not use this version information for anything
581 | // it is unreliable (OpenOffice doc, 5.8), use only version information from the global stream
582 | break;
583 |
584 | default:
585 | // substream, e.g. chart
586 | // just skip the entire substream
587 | do {
588 | $code = Format::getUInt2d($this->data, $this->pos);
589 | $this->readDefault();
590 | } while ($code != self::XLS_TYPE_EOF && $this->pos < $this->dataSize);
591 |
592 | break;
593 | }
594 | }
595 |
596 | /**
597 | * SHEET
598 | *
599 | * This record is located in the Workbook Globals Substream and represents a sheet inside the workbook.
600 | * One SHEET record is written for each sheet. It stores the sheet name and a stream offset to the BOF
601 | * record of the respective Sheet Substream within the Workbook Stream.
602 | */
603 | private function readSheet() {
604 | $length = Format::getUInt2d($this->data, $this->pos + 2);
605 | $recordData = substr($this->data, $this->pos + 4, $length);
606 |
607 | // offset: 0; size: 4; absolute stream position of the BOF record of the sheet
608 | // NOTE: not encrypted
609 | $offset = Format::getInt4d($this->data, $this->pos + 4);
610 |
611 | // move stream pointer to next record
612 | $this->pos += 4 + $length;
613 |
614 | // offset: 4; size: 1; sheet state
615 | switch (ord($recordData{4})) {
616 | case 0x00:
617 | $sheetState = self::SHEETSTATE_VISIBLE;
618 | break;
619 |
620 | case 0x01:
621 | $sheetState = self::SHEETSTATE_HIDDEN;
622 | break;
623 |
624 | case 0x02:
625 | $sheetState = self::SHEETSTATE_VERYHIDDEN;
626 | break;
627 |
628 | default:
629 | $sheetState = self::SHEETSTATE_VISIBLE;
630 | break;
631 | }
632 |
633 | // offset: 5; size: 1; sheet type
634 | $sheetType = ord($recordData{5});
635 |
636 | // offset: 6; size: var; sheet name
637 | $name = '';
638 | if ($this->version == self::XLS_BIFF8) {
639 | $string = self::readUnicodeStringShort(substr($recordData, 6));
640 | $name = $string['value'];
641 | } elseif ($this->version == self::XLS_BIFF7) {
642 | $string = $this->readByteStringShort(substr($recordData, 6));
643 | $name = $string['value'];
644 | }
645 |
646 | // ignore hidden sheet
647 | if ($sheetState == self::SHEETSTATE_VISIBLE) {
648 | $this->sheets[] = [
649 | 'name' => $name, 'offset' => $offset, 'sheetState' => $sheetState, 'sheetType' => $sheetType
650 | ];
651 | }
652 | }
653 |
654 | /**
655 | * Reads a general type of BIFF record.
656 | * Does nothing except for moving stream pointer forward to next record.
657 | */
658 | private function readDefault() {
659 | $length = Format::getUInt2d($this->data, $this->pos + 2);
660 | //$recordData = $this->readRecordData($this->data, $this->pos + 4, $length);
661 |
662 | // move stream pointer to next record
663 | $this->pos += 4 + $length;
664 | }
665 |
666 | /**
667 | * FILEPASS
668 | *
669 | * This record is part of the File Protection Block. It contains information about the read/write password of
670 | * the file. All record contents following this record will be encrypted.
671 | * The decryption functions and objects used from here on in are based on the source of Spreadsheet-ParseExcel:
672 | * http://search.cpan.org/~jmcnamara/Spreadsheet-ParseExcel/
673 | *
674 | * @throws ParserException
675 | */
676 | private function readFilepass() {
677 | $length = Format::getUInt2d($this->data, $this->pos + 2);
678 |
679 | if ($length != 54) {
680 | throw new ParserException('Unexpected file pass record length', 2);
681 | }
682 |
683 | $recordData = $this->readRecordData($this->data, $this->pos + 4, $length);
684 |
685 | // move stream pointer to next record
686 | $this->pos += 4 + $length;
687 |
688 | if (!$this->verifyPassword('VelvetSweatshop', substr($recordData, 6, 16), substr($recordData, 22, 16),
689 | substr($recordData, 38, 16), $this->md5Ctxt)) {
690 |
691 | throw new ParserException('Decryption password incorrect', 3);
692 | }
693 |
694 | $this->encryption = self::MS_BIFF_CRYPTO_RC4;
695 |
696 | // Decryption required from the record after next onwards
697 | $this->encryptionStartPos = $this->pos + Format::getUInt2d($this->data, $this->pos + 2);
698 | }
699 |
700 | /**
701 | * Read record data from stream, decrypting as required
702 | *
703 | * @param string $data Data stream to read from
704 | * @param int $pos Position to start reading from
705 | * @param int $len Record data length
706 | *
707 | * @throws ParserException
708 | * @return string Record data
709 | */
710 | private function readRecordData($data, $pos, $len) {
711 | $data = substr($data, $pos, $len);
712 |
713 | // File not encrypted, or record before encryption start point
714 | if ($this->encryption == self::MS_BIFF_CRYPTO_NONE || $pos < $this->encryptionStartPos) {
715 | return $data;
716 | }
717 |
718 | $recordData = '';
719 | if ($this->encryption == self::MS_BIFF_CRYPTO_RC4) {
720 | $oldBlock = floor($this->rc4Pos / self::REKEY_BLOCK);
721 | $block = floor($pos / self::REKEY_BLOCK);
722 | $endBlock = floor(($pos + $len) / self::REKEY_BLOCK);
723 |
724 | // Spin an RC4 decryptor to the right spot. If we have a decryptor sitting
725 | // at a point earlier in the current block, re-use it as we can save some time.
726 | if ($block != $oldBlock || $pos < $this->rc4Pos || !$this->rc4Key) {
727 | $this->rc4Key = $this->makeKey($block, $this->md5Ctxt);
728 | $step = $pos % self::REKEY_BLOCK;
729 | } else {
730 | $step = $pos - $this->rc4Pos;
731 | }
732 |
733 | $this->rc4Key->RC4(str_repeat("\0", $step));
734 |
735 | // Decrypt record data (re-keying at the end of every block)
736 | while ($block != $endBlock) {
737 | $step = self::REKEY_BLOCK - ($pos % self::REKEY_BLOCK);
738 | $recordData .= $this->rc4Key->RC4(substr($data, 0, $step));
739 |
740 | $data = substr($data, $step);
741 | $pos += $step;
742 | $len -= $step;
743 | $block++;
744 |
745 | $this->rc4Key = $this->makeKey($block, $this->md5Ctxt);
746 | }
747 |
748 | $recordData .= $this->rc4Key->RC4(substr($data, 0, $len));
749 |
750 | // Keep track of the position of this decryptor.
751 | // We'll try and re-use it later if we can to speed things up
752 | $this->rc4Pos = $pos + $len;
753 |
754 | } elseif ($this->encryption == self::MS_BIFF_CRYPTO_XOR) {
755 | throw new ParserException('XOr encryption not supported', 4);
756 | }
757 |
758 | return $recordData;
759 | }
760 |
761 | /**
762 | * Make an RC4 decryptor for the given block
763 | *
764 | * @param int $block Block for which to create decrypto
765 | * @param string $valContext MD5 context state
766 | *
767 | * @return RC4
768 | */
769 | private function makeKey($block, $valContext) {
770 | $pw = str_repeat("\0", 64);
771 |
772 | for ($i = 0; $i < 5; $i++) {
773 | $pw[$i] = $valContext[$i];
774 | }
775 |
776 | $pw[5] = chr($block & 0xff);
777 | $pw[6] = chr(($block >> 8) & 0xff);
778 | $pw[7] = chr(($block >> 16) & 0xff);
779 | $pw[8] = chr(($block >> 24) & 0xff);
780 |
781 | $pw[9] = "\x80";
782 | $pw[56] = "\x48";
783 |
784 | return new RC4(md5($pw));
785 | }
786 |
787 | /**
788 | * Verify RC4 file password
789 | *
790 | * @var string $password Password to check
791 | * @var string $docid Document id
792 | * @var string $salt_data Salt data
793 | * @var string $hashedsalt_data Hashed salt data
794 | * @var string &$valContext Set to the MD5 context of the value
795 | *
796 | * @return bool Success
797 | */
798 | private function verifyPassword($password, $docid, $salt_data, $hashedsalt_data, &$valContext) {
799 | $pw = str_repeat("\0", 64);
800 |
801 | for ($i = 0; $i < strlen($password); $i++) {
802 | $o = ord(substr($password, $i, 1));
803 | $pw[2 * $i] = chr($o & 0xff);
804 | $pw[2 * $i + 1] = chr(($o >> 8) & 0xff);
805 | }
806 |
807 | $pw[2 * $i] = chr(0x80);
808 | $pw[56] = chr(($i << 4) & 0xff);
809 |
810 | $mdContext1 = md5($pw);
811 |
812 | $offset = 0;
813 | $keyOffset = 0;
814 | $toCopy = 5;
815 |
816 | while ($offset != 16) {
817 | if ((64 - $offset) < 5) {
818 | $toCopy = 64 - $offset;
819 | }
820 |
821 | for ($i = 0; $i <= $toCopy; $i++) {
822 | $pw[$offset + $i] = $mdContext1[$keyOffset + $i];
823 | }
824 |
825 | $offset += $toCopy;
826 |
827 | if ($offset == 64) {
828 | $keyOffset = $toCopy;
829 | $toCopy = 5 - $toCopy;
830 | $offset = 0;
831 | continue;
832 | }
833 |
834 | $keyOffset = 0;
835 | $toCopy = 5;
836 | for ($i = 0; $i < 16; $i++) {
837 | $pw[$offset + $i] = $docid[$i];
838 | }
839 | $offset += 16;
840 | }
841 |
842 | $pw[16] = "\x80";
843 | for ($i = 0; $i < 47; $i++) {
844 | $pw[17 + $i] = "\0";
845 | }
846 | $pw[56] = "\x80";
847 | $pw[57] = "\x0a";
848 |
849 | $valContext = md5($pw);
850 |
851 | $key = $this->makeKey(0, $valContext);
852 |
853 | $salt = $key->RC4($salt_data);
854 | $hashedsalt = $key->RC4($hashedsalt_data);
855 |
856 | $salt .= "\x80" . str_repeat("\0", 47);
857 | $salt[56] = "\x80";
858 |
859 | $mdContext2 = md5($salt);
860 |
861 | return $mdContext2 == $hashedsalt;
862 | }
863 |
864 | /**
865 | * CODEPAGE
866 | *
867 | * This record stores the text encoding used to write byte strings, stored as MS Windows code page identifier.
868 | *
869 | * @throws ParserException
870 | */
871 | private function readCodepage() {
872 | $length = Format::getUInt2d($this->data, $this->pos + 2);
873 | $recordData = substr($this->data, $this->pos + 4, $length);
874 |
875 | // move stream pointer to next record
876 | $this->pos += 4 + $length;
877 |
878 | // offset: 0; size: 2; code page identifier
879 | $codePage = Format::getUInt2d($recordData, 0);
880 | $this->codePage = self::NumberToName($codePage);
881 | }
882 |
883 | /**
884 | * DATEMODE
885 | * This record specifies the base date for displaying date values. All dates are stored as count of days
886 | * past this base date. In BIFF2-BIFF4 this record is part of the Calculation Settings Block. In BIFF5-BIFF8
887 | * it is stored in the Workbook Globals Substream.
888 | */
889 | private function readDateMode() {
890 | $length = Format::getUInt2d($this->data, $this->pos + 2);
891 | $recordData = substr($this->data, $this->pos + 4, $length);
892 |
893 | // move stream pointer to next record
894 | $this->pos += 4 + $length;
895 |
896 | // offset: 0; size: 2; 0 = base 1900, 1 = base 1904
897 | self::$excelBaseDate = Format::CALENDAR_WINDOWS_1900;
898 | if (ord($recordData{0}) == 1) {
899 | self::$excelBaseDate = Format::CALENDAR_MAC_1904;
900 | }
901 | }
902 |
903 | /**
904 | * FORMAT
905 | *
906 | * This record contains information about a number format. All FORMAT records occur together in a sequential list.
907 | * In BIFF2-BIFF4 other records referencing a FORMAT record contain a zero-based index into this list. From BIFF5
908 | * on the FORMAT record contains the index itself that will be used by other records.
909 | */
910 | private function readFormat() {
911 | $length = Format::getUInt2d($this->data, $this->pos + 2);
912 | $recordData = substr($this->data, $this->pos + 4, $length);
913 |
914 | // move stream pointer to next record
915 | $this->pos += 4 + $length;
916 |
917 | $indexCode = Format::getUInt2d($recordData, 0);
918 | if ($this->version == self::XLS_BIFF8) {
919 | $string = self::readUnicodeStringLong(substr($recordData, 2));
920 | } else {
921 | // BIFF7
922 | $string = $this->readByteStringShort(substr($recordData, 2));
923 | }
924 |
925 | $formatString = $string['value'];
926 | $this->formats[$indexCode] = $formatString;
927 | }
928 |
929 | /**
930 | * XF - Extended Format
931 | *
932 | * This record contains formatting information for cells, rows, columns or styles.
933 | * According to http://support.microsoft.com/kb/147732 there are always at least 15 cell style XF and 1 cell XF.
934 | * Inspection of Excel files generated by MS Office Excel shows that XF records 0-14 are cell style XF and XF
935 | * record 15 is a cell XF. We only read the first cell style XF and skip the remaining cell style XF records
936 | * We read all cell XF records.
937 | */
938 | private function readXf() {
939 | $length = Format::getUInt2d($this->data, $this->pos + 2);
940 | $recordData = substr($this->data, $this->pos + 4, $length);
941 |
942 | // move stream pointer to next record
943 | $this->pos += 4 + $length;
944 |
945 | // offset: 2; size: 2; Index to FORMAT record
946 | $numberFormatIndex = Format::getUInt2d($recordData, 2);
947 | if (isset($this->formats[$numberFormatIndex])) {
948 | // then we have user-defined format code
949 | $numberFormat = $this->formats[$numberFormatIndex];
950 | } elseif (isset(Format::$buildInFormats[$numberFormatIndex])) {
951 | // then we have built-in format code
952 | $numberFormat = Format::$buildInFormats[$numberFormatIndex];
953 | } else {
954 | // we set the general format code
955 | $numberFormat = Format::FORMAT_GENERAL;
956 | }
957 |
958 | $this->xfRecords[] = ['index' => $numberFormatIndex, 'format' => $numberFormat];
959 | }
960 |
961 | /**
962 | * SST - Shared String Table
963 | *
964 | * This record contains a list of all strings used anywhere in the workbook. Each string occurs only once.
965 | * The workbook uses indexes into the list to reference the strings.
966 | **/
967 | private function readSst() {
968 | // offset within (spliced) record data
969 | $pos = 0;
970 |
971 | // get spliced record data
972 | $splicedRecordData = $this->getSplicedRecordData();
973 | $recordData = $splicedRecordData['recordData'];
974 | $spliceOffsets = $splicedRecordData['spliceOffsets'];
975 |
976 | // offset: 0; size: 4; total number of strings in the workbook
977 | $pos += 4;
978 |
979 | // offset: 4; size: 4; number of following strings ($nm)
980 | $nm = Format::getInt4d($recordData, 4);
981 |
982 | $pos += 4;
983 |
984 | // loop through the Unicode strings (16-bit length)
985 | for ($i = 0; $i < $nm; ++$i) {
986 | if (!isset($recordData[$pos + 2])) {
987 | break;
988 | }
989 |
990 | // number of characters in the Unicode string
991 | $numChars = Format::getUInt2d($recordData, $pos);
992 | $pos += 2;
993 |
994 | // option flags
995 | $optionFlags = ord($recordData[$pos]);
996 | ++$pos;
997 |
998 | // bit: 0; mask: 0x01; 0 = compressed; 1 = uncompressed
999 | $isCompressed = (($optionFlags & 0x01) == 0) ;
1000 |
1001 | // bit: 2; mask: 0x02; 0 = ordinary; 1 = Asian phonetic
1002 | $hasAsian = (($optionFlags & 0x04) != 0);
1003 |
1004 | // bit: 3; mask: 0x03; 0 = ordinary; 1 = Rich-Text
1005 | $formattingRuns = 0;
1006 | $hasRichText = (($optionFlags & 0x08) != 0);
1007 | if ($hasRichText && isset($recordData[$pos])) {
1008 | // number of Rich-Text formatting runs
1009 | $formattingRuns = Format::getUInt2d($recordData, $pos);
1010 | $pos += 2;
1011 | }
1012 |
1013 | $extendedRunLength = 0;
1014 | if ($hasAsian && isset($recordData[$pos])) {
1015 | // size of Asian phonetic setting
1016 | $extendedRunLength = Format::getInt4d($recordData, $pos);
1017 | $pos += 4;
1018 | }
1019 |
1020 | // expected byte length of character array if not split
1021 | $len = ($isCompressed) ? $numChars : $numChars * 2;
1022 |
1023 | // look up limit position
1024 | $limitPos = 0;
1025 | foreach ($spliceOffsets as $spliceOffset) {
1026 | // it can happen that the string is empty, therefore we need
1027 | // <= and not just <
1028 | if ($pos <= $spliceOffset) {
1029 | $limitPos = $spliceOffset;
1030 | break;
1031 | }
1032 | }
1033 |
1034 | if ($pos + $len <= $limitPos) {
1035 | // character array is not split between records
1036 | $retStr = substr($recordData, $pos, $len);
1037 | $pos += $len;
1038 | } else {
1039 | // character array is split between records
1040 | // first part of character array
1041 | $retStr = substr($recordData, $pos, $limitPos - $pos);
1042 | $bytesRead = $limitPos - $pos;
1043 |
1044 | // remaining characters in Unicode string
1045 | $charsLeft = $numChars - (($isCompressed) ? $bytesRead : ($bytesRead / 2));
1046 | $pos = $limitPos;
1047 |
1048 | // keep reading the characters
1049 | while ($charsLeft > 0) {
1050 | // look up next limit position, in case the string span more than one continue record
1051 | foreach ($spliceOffsets as $spliceOffset) {
1052 | if ($pos < $spliceOffset) {
1053 | $limitPos = $spliceOffset;
1054 | break;
1055 | }
1056 | }
1057 |
1058 | if (!isset($recordData[$pos])) {
1059 | break;
1060 | }
1061 |
1062 | // repeated option flags
1063 | // OpenOffice.org documentation 5.21
1064 | $option = ord($recordData[$pos]);
1065 | ++$pos;
1066 |
1067 | if ($isCompressed && ($option == 0)) {
1068 | // 1st fragment compressed
1069 | // this fragment compressed
1070 | $len = min($charsLeft, $limitPos - $pos);
1071 | $retStr .= substr($recordData, $pos, $len);
1072 | $charsLeft -= $len;
1073 | $isCompressed = true;
1074 | } elseif (!$isCompressed && ($option != 0)) {
1075 | // 1st fragment uncompressed
1076 | // this fragment uncompressed
1077 | $len = min($charsLeft * 2, $limitPos - $pos);
1078 | $retStr .= substr($recordData, $pos, $len);
1079 | $charsLeft -= $len / 2;
1080 | $isCompressed = false;
1081 | } elseif (!$isCompressed && ($option == 0)) {
1082 | // 1st fragment uncompressed
1083 | // this fragment compressed
1084 | $len = min($charsLeft, $limitPos - $pos);
1085 | for ($j = 0; $j < $len; ++$j) {
1086 | if (!isset($recordData[$pos + $j])) {
1087 | break;
1088 | }
1089 |
1090 | $retStr .= $recordData[$pos + $j] . chr(0);
1091 | }
1092 |
1093 | $charsLeft -= $len;
1094 | $isCompressed = false;
1095 | } else {
1096 | // 1st fragment compressed
1097 | // this fragment uncompressed
1098 | $newStr = '';
1099 | $jMax = strlen($retStr);
1100 | for ($j = 0; $j < $jMax; ++$j) {
1101 | $newStr .= $retStr[$j] . chr(0);
1102 | }
1103 |
1104 | $retStr = $newStr;
1105 | $len = min($charsLeft * 2, $limitPos - $pos);
1106 | $retStr .= substr($recordData, $pos, $len);
1107 | $charsLeft -= $len / 2;
1108 | $isCompressed = false;
1109 | }
1110 |
1111 | $pos += $len;
1112 | }
1113 | }
1114 |
1115 | // convert to UTF-8
1116 | $retStr = self::encodeUTF16($retStr, $isCompressed);
1117 |
1118 | // read additional Rich-Text information, if any
1119 | // $fmtRuns = [];
1120 | if ($hasRichText) {
1121 | // list of formatting runs
1122 | /*for ($j = 0; $j < $formattingRuns; ++$j) {
1123 | // first formatted character; zero-based
1124 | $charPos = Format::getUInt2d($recordData, $pos + $j * 4);
1125 |
1126 | // index to font record
1127 | $fontIndex = Format::getUInt2d($recordData, $pos + 2 + $j * 4);
1128 | $fmtRuns[] = ['charPos' => $charPos, 'fontIndex' => $fontIndex];
1129 | }*/
1130 |
1131 | $pos += 4 * $formattingRuns;
1132 | }
1133 |
1134 | // read additional Asian phonetics information, if any
1135 | if ($hasAsian) {
1136 | // For Asian phonetic settings, we skip the extended string data
1137 | $pos += $extendedRunLength;
1138 | }
1139 |
1140 | // store the shared sting
1141 | $this->sst[] = ['value' => $retStr];
1142 | }
1143 | }
1144 |
1145 | /**
1146 | * Read RK record
1147 | *
1148 | * This record represents a cell that contains an RK value (encoded integer or floating-point value). If a
1149 | * floating-point value cannot be encoded to an RK value, a NUMBER record will be written. This record replaces
1150 | * the record INTEGER written in BIFF2.
1151 | */
1152 | private function readRk() {
1153 | $length = Format::getUInt2d($this->data, $this->pos + 2);
1154 | $recordData = substr($this->data, $this->pos + 4, $length);
1155 |
1156 | // move stream pointer to next record
1157 | $this->pos += 4 + $length;
1158 |
1159 | // offset: 0; size: 2; index to row
1160 | $row = Format::getUInt2d($recordData, 0);
1161 |
1162 | // offset: 2; size: 2; index to column
1163 | $column = Format::getUInt2d($recordData, 2);
1164 |
1165 | // offset: 4; size: 2; index to XF record
1166 | $xfIndex = Format::getUInt2d($recordData, 4);
1167 |
1168 | // offset: 6; size: 4; RK value
1169 | $rkNum = Format::getInt4d($recordData, 6);
1170 | $numValue = self::getIEEE754($rkNum);
1171 |
1172 | // add cell
1173 | $this->addCell($row, $column, $numValue, $xfIndex);
1174 | }
1175 |
1176 | /**
1177 | * Read LABELSST record
1178 | *
1179 | * This record represents a cell that contains a string. It replaces the LABEL record and RSTRING record used in
1180 | * BIFF2-BIFF5.
1181 | */
1182 | private function readLabelSst() {
1183 | $length = Format::getUInt2d($this->data, $this->pos + 2);
1184 | $recordData = substr($this->data, $this->pos + 4, $length);
1185 |
1186 | $this->pos += 4 + $length;
1187 | $xfIndex = Format::getUInt2d($recordData, 4);
1188 | $row = Format::getUInt2d($recordData, 0);
1189 | $column = Format::getUInt2d($recordData, 2);
1190 |
1191 | // offset: 6; size: 4; index to SST record
1192 | $index = Format::getInt4d($recordData, 6);
1193 | $this->addCell($row, $column, $this->sst[$index]['value'], $xfIndex);
1194 | }
1195 |
1196 | /**
1197 | * Read MULRK record
1198 | *
1199 | * This record represents a cell range containing RK value cells. All cells are located in the same row.
1200 | */
1201 | private function readMulRk() {
1202 | $length = Format::getUInt2d($this->data, $this->pos + 2);
1203 | $recordData = substr($this->data, $this->pos + 4, $length);
1204 |
1205 | // move stream pointer to next record
1206 | $this->pos += 4 + $length;
1207 |
1208 | // offset: 0; size: 2; index to row
1209 | $row = Format::getUInt2d($recordData, 0);
1210 |
1211 | // offset: 2; size: 2; index to first column
1212 | $colFirst = Format::getUInt2d($recordData, 2);
1213 |
1214 | // offset: var; size: 2; index to last column
1215 | $colLast = Format::getUInt2d($recordData, $length - 2);
1216 | $columns = $colLast - $colFirst + 1;
1217 |
1218 | // offset within record data
1219 | $offset = 4;
1220 | for ($i = 0; $i < $columns; ++$i) {
1221 | // offset: var; size: 2; index to XF record
1222 | $xfIndex = Format::getUInt2d($recordData, $offset);
1223 |
1224 | // offset: var; size: 4; RK value
1225 | $numValue = self::getIEEE754(Format::getInt4d($recordData, $offset + 2));
1226 |
1227 | $this->addCell($row, $colFirst + $i, $numValue, $xfIndex);
1228 |
1229 | $offset += 6;
1230 | }
1231 | }
1232 |
1233 | /**
1234 | * Read NUMBER record
1235 | *
1236 | * This record represents a cell that contains a floating-point value.
1237 | */
1238 | private function readNumber() {
1239 | $length = Format::getUInt2d($this->data, $this->pos + 2);
1240 | $recordData = substr($this->data, $this->pos + 4, $length);
1241 |
1242 | // move stream pointer to next record
1243 | $this->pos += 4 + $length;
1244 |
1245 | // offset: 0; size: 2; index to row
1246 | $row = Format::getUInt2d($recordData, 0);
1247 |
1248 | // offset: 2; size 2; index to column
1249 | $column = Format::getUInt2d($recordData, 2);
1250 |
1251 | // offset 4; size: 2; index to XF record
1252 | $xfIndex = Format::getUInt2d($recordData, 4);
1253 | $numValue = self::extractNumber(substr($recordData, 6, 8));
1254 |
1255 | $this->addCell($row, $column, $numValue, $xfIndex);
1256 | }
1257 |
1258 | /**
1259 | * Read FORMULA record + perhaps a following STRING record if formula result is a string
1260 | * This record contains the token array and the result of a formula cell.
1261 | */
1262 | private function readFormula() {
1263 | $length = Format::getUInt2d($this->data, $this->pos + 2);
1264 | $recordData = substr($this->data, $this->pos + 4, $length);
1265 |
1266 | // move stream pointer to next record
1267 | $this->pos += 4 + $length;
1268 |
1269 | // offset: 0; size: 2; row index
1270 | $row = Format::getUInt2d($recordData, 0);
1271 |
1272 | // offset: 2; size: 2; col index
1273 | $column = Format::getUInt2d($recordData, 2);
1274 |
1275 | // offset 4; size: 2; index to XF record
1276 | $xfIndex = Format::getUInt2d($recordData, 4);
1277 |
1278 | // offset: 6; size: 8; result of the formula
1279 | if ((ord($recordData{6}) == 0) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) {
1280 | // read STRING record
1281 | $value = $this->readString();
1282 | } elseif ((ord($recordData{6}) == 1) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) {
1283 | // Boolean formula. Result is in +2; 0=false, 1=true
1284 | $value = (bool) ord($recordData{8});
1285 | } elseif ((ord($recordData{6}) == 2) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) {
1286 | // Error formula. Error code is in +2
1287 | $value = self::mapErrorCode(ord($recordData{8}));
1288 | } elseif ((ord($recordData{6}) == 3) && (ord($recordData{12}) == 255) && (ord($recordData{13}) == 255)) {
1289 | // Formula result is a null string
1290 | $value = '';
1291 | } else {
1292 | // forumla result is a number, first 14 bytes like _NUMBER record
1293 | $value = self::extractNumber(substr($recordData, 6, 8));
1294 | }
1295 |
1296 | $this->addCell($row, $column, $value, $xfIndex);
1297 | }
1298 |
1299 | /**
1300 | * Read a STRING record from current stream position and advance the stream pointer to next record.
1301 | * This record is used for storing result from FORMULA record when it is a string, and it occurs
1302 | * directly after the FORMULA record
1303 | *
1304 | * @return string The string contents as UTF-8
1305 | */
1306 | private function readString() {
1307 | $length = Format::getUInt2d($this->data, $this->pos + 2);
1308 | $recordData = substr($this->data, $this->pos + 4, $length);
1309 |
1310 | // move stream pointer to next record
1311 | $this->pos += 4 + $length;
1312 | if ($this->version == self::XLS_BIFF8) {
1313 | $string = self::readUnicodeStringLong($recordData);
1314 | $value = $string['value'];
1315 | } else {
1316 | $string = $this->readByteStringLong($recordData);
1317 | $value = $string['value'];
1318 | }
1319 |
1320 | return $value;
1321 | }
1322 |
1323 | /**
1324 | * Read BOOLERR record
1325 | *
1326 | * This record represents a Boolean value or error value cell.
1327 | */
1328 | private function readBoolErr() {
1329 | $length = Format::getUInt2d($this->data, $this->pos + 2);
1330 | $recordData = substr($this->data, $this->pos + 4, $length);
1331 |
1332 | // move stream pointer to next record
1333 | $this->pos += 4 + $length;
1334 |
1335 | // offset: 0; size: 2; row index
1336 | $row = Format::getUInt2d($recordData, 0);
1337 |
1338 | // offset: 2; size: 2; column index
1339 | $column = Format::getUInt2d($recordData, 2);
1340 |
1341 | // offset: 4; size: 2; index to XF record
1342 | $xfIndex = Format::getUInt2d($recordData, 4);
1343 |
1344 | // offset: 6; size: 1; the boolean value or error value
1345 | $boolError = ord($recordData{6});
1346 |
1347 | // offset: 7; size: 1; 0=boolean; 1=error
1348 | $isError = ord($recordData{7});
1349 |
1350 | switch ($isError) {
1351 | case 0: // boolean
1352 | $value = (bool)$boolError;
1353 |
1354 | // add cell value
1355 | $this->addCell($row, $column, $value, $xfIndex);
1356 | break;
1357 | case 1: // error type
1358 | $value = self::mapErrorCode($boolError);
1359 |
1360 | // add cell value
1361 | $this->addCell($row, $column, $value, $xfIndex);
1362 | break;
1363 | }
1364 | }
1365 |
1366 | /**
1367 | * Read BLANK record
1368 | */
1369 | private function readBlank() {
1370 | $length = Format::getUInt2d($this->data, $this->pos + 2);
1371 | $recordData = substr($this->data, $this->pos + 4, $length);
1372 |
1373 | // move stream pointer to next record
1374 | $this->pos += 4 + $length;
1375 |
1376 | // offset: 0; size: 2; row index
1377 | $row = Format::getUInt2d($recordData, 0);
1378 |
1379 | // offset: 2; size: 2; col index
1380 | $column = Format::getUInt2d($recordData, 2);
1381 |
1382 | // offset: 4; size: 2; XF index
1383 | $xfIndex = Format::getUInt2d($recordData, 4);
1384 |
1385 | $this->addCell($row, $column, '', $xfIndex);
1386 | }
1387 |
1388 | /**
1389 | * Read LABEL record
1390 | *
1391 | * This record represents a cell that contains a string. In BIFF8 it is usually replaced by the LABELSST record.
1392 | * Excel still uses this record, if it copies unformatted text cells to the clipboard.
1393 | */
1394 | private function readLabel() {
1395 | $length = Format::getUInt2d($this->data, $this->pos + 2);
1396 | $recordData = substr($this->data, $this->pos + 4, $length);
1397 |
1398 | // move stream pointer to next record
1399 | $this->pos += 4 + $length;
1400 |
1401 | // offset: 0; size: 2; index to row
1402 | $row = Format::getUInt2d($recordData, 0);
1403 |
1404 | // offset: 2; size: 2; index to column
1405 | $column = Format::getUInt2d($recordData, 2);
1406 |
1407 | // offset: 4; size: 2; XF index
1408 | $xfIndex = Format::getUInt2d($recordData, 4);
1409 |
1410 | // add cell value
1411 | if ($this->version == self::XLS_BIFF8) {
1412 | $string = self::readUnicodeStringLong(substr($recordData, 6));
1413 | $value = $string['value'];
1414 | } else {
1415 | $string = $this->readByteStringLong(substr($recordData, 6));
1416 | $value = $string['value'];
1417 | }
1418 |
1419 | $this->addCell($row, $column, $value, $xfIndex);
1420 | }
1421 |
1422 | /**
1423 | * Map error code, e.g. '#N/A'
1424 | *
1425 | * @param int $code
1426 | * @return string
1427 | */
1428 | private static function mapErrorCode($code) {
1429 | if (isset(self::$errorCode[$code])) {
1430 | return self::$errorCode[$code];
1431 | }
1432 |
1433 | return false;
1434 | }
1435 |
1436 | /**
1437 | * Convert a value in a pre-defined format to a PHP string
1438 | *
1439 | * @param mixed $value Value to format
1440 | * @param string $format Format code
1441 | * @return string
1442 | */
1443 | private static function toFormattedString($value = '0', $format = Format::FORMAT_GENERAL) {
1444 | // For now we do not treat strings although section 4 of a format code affects strings
1445 | if (!is_numeric($value)) {
1446 | return $value;
1447 | }
1448 |
1449 | // For 'General' format code, we just pass the value although this is not entirely the way Excel does it,
1450 | // it seems to round numbers to a total of 10 digits.
1451 | if (($format === Format::FORMAT_GENERAL) || ($format === Format::FORMAT_TEXT)) {
1452 | return $value;
1453 | }
1454 |
1455 | // Convert any other escaped characters to quoted strings, e.g. (\T to "T")
1456 | $format = preg_replace('/(\\\(.))(?=(?:[^"]|"[^"]*")*$)/u', '"${2}"', $format);
1457 |
1458 | // Get the sections, there can be up to four sections, separated with a semi-colon (but only if not a quoted literal)
1459 | $sections = preg_split('/(;)(?=(?:[^"]|"[^"]*")*$)/u', $format);
1460 |
1461 | // Extract the relevant section depending on whether number is positive, negative, or zero?
1462 | // Text not supported yet.
1463 | // Here is how the sections apply to various values in Excel:
1464 | // 1 section: [POSITIVE/NEGATIVE/ZERO/TEXT]
1465 | // 2 sections: [POSITIVE/ZERO/TEXT] [NEGATIVE]
1466 | // 3 sections: [POSITIVE/TEXT] [NEGATIVE] [ZERO]
1467 | // 4 sections: [POSITIVE] [NEGATIVE] [ZERO] [TEXT]
1468 | switch (count($sections)) {
1469 | case 1:
1470 | $format = $sections[0];
1471 | break;
1472 |
1473 | case 2:
1474 | $format = ($value >= 0) ? $sections[0] : $sections[1];
1475 | $value = abs($value); // Use the absolute value
1476 | break;
1477 |
1478 | case 3:
1479 | $format = ($value > 0) ? $sections[0] : ( ($value < 0) ? $sections[1] : $sections[2]);
1480 | $value = abs($value); // Use the absolute value
1481 | break;
1482 |
1483 | case 4:
1484 | $format = ($value > 0) ? $sections[0] : ( ($value < 0) ? $sections[1] : $sections[2]);
1485 | $value = abs($value); // Use the absolute value
1486 | break;
1487 |
1488 | default:
1489 | // something is wrong, just use first section
1490 | $format = $sections[0];
1491 | break;
1492 | }
1493 |
1494 | // In Excel formats, "_" is used to add spacing,
1495 | // The following character indicates the size of the spacing, which we can't do in HTML, so we just use a standard space
1496 | $format = preg_replace('/_./', ' ', $format);
1497 |
1498 | // Save format with color information for later use below
1499 | //$formatColor = $format;
1500 |
1501 | // Strip color information
1502 | $colorRegex = '/^\\[[a-zA-Z]+\\]/';
1503 | $format = preg_replace($colorRegex, '', $format);
1504 |
1505 | // Let's begin inspecting the format and converting the value to a formatted string
1506 | // Check for date/time characters (not inside quotes)
1507 | if (preg_match('/(\[\$[A-Z]*-[0-9A-F]*\])*[hmsdy](?=(?:[^"]|"[^"]*")*$)/miu', $format, $matches)) {
1508 | // datetime format
1509 | self::formatAsDate($value, $format);
1510 | } elseif (preg_match('/%$/', $format)) {
1511 | // % number format
1512 | self::formatAsPercentage($value, $format);
1513 | } else {
1514 | if ($format === Format::FORMAT_CURRENCY_EUR_SIMPLE) {
1515 | $value = 'EUR ' . sprintf('%1.2f', $value);
1516 | } else {
1517 | // Some non-number strings are quoted, so we'll get rid of the quotes, likewise any positional * symbols
1518 | $format = str_replace(['"', '*'], '', $format);
1519 |
1520 | // Find out if we need thousands separator
1521 | // This is indicated by a comma enclosed by a digit placeholder:
1522 | // #,# or 0,0
1523 | $useThousands = preg_match('/(#,#|0,0)/', $format);
1524 | if ($useThousands) {
1525 | $format = preg_replace('/0,0/', '00', $format);
1526 | $format = preg_replace('/#,#/', '##', $format);
1527 | }
1528 |
1529 | // Scale thousands, millions,...
1530 | // This is indicated by a number of commas after a digit placeholder:
1531 | // #, or 0.0,,
1532 | $scale = 1; // same as no scale
1533 | $matches = [];
1534 | if (preg_match('/(#|0)(,+)/', $format, $matches)) {
1535 | $scale = pow(1000, strlen($matches[2]));
1536 |
1537 | // strip the commas
1538 | $format = preg_replace('/0,+/', '0', $format);
1539 | $format = preg_replace('/#,+/', '#', $format);
1540 | }
1541 |
1542 | if (preg_match('/#?.*\?\/\?/', $format, $m)) {
1543 | //echo 'Format mask is fractional '.$format.'
';
1544 | if ($value != (int)$value) {
1545 | self::formatAsFraction($value, $format);
1546 | }
1547 | } else {
1548 | // Handle the number itself
1549 | // scale number
1550 | $value = $value / $scale;
1551 |
1552 | // Strip #
1553 | $format = preg_replace('/\\#/', '0', $format);
1554 | $n = "/\[[^\]]+\]/";
1555 | $m = preg_replace($n, '', $format);
1556 | $numberRegex = "/(0+)(\.?)(0*)/";
1557 | if (preg_match($numberRegex, $m, $matches)) {
1558 | $left = $matches[1];
1559 | $dec = $matches[2];
1560 | $right = $matches[3];
1561 |
1562 | // minimun width of formatted number (including dot)
1563 | $minWidth = strlen($left) + strlen($dec) + strlen($right);
1564 | if ($useThousands) {
1565 | $value = number_format(
1566 | $value,
1567 | strlen($right),
1568 | self::getDecimalSeparator(),
1569 | self::getThousandsSeparator()
1570 | );
1571 |
1572 | $value = preg_replace($numberRegex, $value, $format);
1573 | } else {
1574 | if (preg_match('/[0#]E[+-]0/i', $format)) {
1575 | //Scientific format
1576 | $value = sprintf('%5.2E', $value);
1577 | } elseif (preg_match('/0([^\d\.]+)0/', $format)) {
1578 | $value = self::complexNumberFormatMask($value, $format);
1579 | } else {
1580 | $sprintfPattern = "%0$minWidth." . strlen($right) . "f";
1581 | $value = sprintf($sprintfPattern, $value);
1582 | $value = preg_replace($numberRegex, $value, $format);
1583 | }
1584 | }
1585 | }
1586 | }
1587 |
1588 | if (preg_match('/\[\$(.*)\]/u', $format, $m)) {
1589 | // Currency or Accounting
1590 | //$currencyFormat = $m[0];
1591 | $currencyCode = $m[1];
1592 | list($currencyCode) = explode('-', $currencyCode);
1593 |
1594 | if ($currencyCode == '') {
1595 | $currencyCode = self::getCurrencyCode();
1596 | }
1597 |
1598 | $value = preg_replace('/\[\$([^\]]*)\]/u', $currencyCode, $value);
1599 | }
1600 | }
1601 | }
1602 |
1603 | return $value;
1604 | }
1605 |
1606 | /**
1607 | * Reads a record from current position in data stream and continues reading data as long as CONTINUE records
1608 | * are found. Splices the record data pieces and returns the combined string as if record data is in one piece.
1609 | * Moves to next current position in data stream to start of next record different from a CONtINUE record
1610 | *
1611 | * @return array
1612 | */
1613 | private function getSplicedRecordData() {
1614 | $i = 0;
1615 | $data = '';
1616 | $spliceOffsets = [0];
1617 |
1618 | do {
1619 | ++$i;
1620 | // offset: 0; size: 2; identifier
1621 | //$identifier = Cell::getInt2d($this->data, $this->pos);
1622 |
1623 | // offset: 2; size: 2; length
1624 | $length = Format::getUInt2d($this->data, $this->pos + 2);
1625 | $data .= substr($this->data, $this->pos + 4, $length);
1626 | $spliceOffsets[$i] = $spliceOffsets[$i - 1] + $length;
1627 |
1628 | $this->pos += 4 + $length;
1629 | $nextIdentifier = Format::getUInt2d($this->data, $this->pos);
1630 | } while ($nextIdentifier == self::XLS_TYPE_CONTINUE);
1631 |
1632 | return ['recordData' => $data, 'spliceOffsets' => $spliceOffsets];
1633 | }
1634 |
1635 | /**
1636 | * Get the decimal separator. If it has not yet been set explicitly, try to obtain number formatting
1637 | * information from locale.
1638 | *
1639 | * @return string
1640 | */
1641 | private static function getDecimalSeparator() {
1642 | if (!isset(self::$decimalSeparator)) {
1643 | $localeconv = localeconv();
1644 |
1645 | self::$decimalSeparator = ($localeconv['decimal_point'] != '') ? $localeconv['decimal_point']
1646 | : $localeconv['mon_decimal_point'];
1647 |
1648 | if (self::$decimalSeparator == '') {
1649 | // Default to .
1650 | self::$decimalSeparator = '.';
1651 | }
1652 | }
1653 |
1654 | return self::$decimalSeparator;
1655 | }
1656 |
1657 | /**
1658 | * Get the thousands separator. If it has not yet been set explicitly, try to obtain number formatting
1659 | * information from locale.
1660 | *
1661 | * @return string
1662 | */
1663 | private static function getThousandsSeparator() {
1664 | if (!isset(self::$thousandsSeparator)) {
1665 | $localeconv = localeconv();
1666 |
1667 | self::$thousandsSeparator = ($localeconv['thousands_sep'] != '') ? $localeconv['thousands_sep']
1668 | : $localeconv['mon_thousands_sep'];
1669 |
1670 | if (self::$thousandsSeparator == '') {
1671 | // Default to .
1672 | self::$thousandsSeparator = ',';
1673 | }
1674 | }
1675 |
1676 | return self::$thousandsSeparator;
1677 | }
1678 |
1679 | /**
1680 | * Get the currency code. If it has not yet been set explicitly, try to obtain the symbol information from locale.
1681 | *
1682 | * @return string
1683 | */
1684 | private static function getCurrencyCode() {
1685 | if (!isset(self::$currencyCode)) {
1686 | $localeconv = localeconv();
1687 |
1688 | self::$currencyCode = ($localeconv['currency_symbol'] != '') ? $localeconv['currency_symbol']
1689 | : $localeconv['int_curr_symbol'];
1690 |
1691 | if (self::$currencyCode == '') {
1692 | // Default to $
1693 | self::$currencyCode = '$';
1694 | }
1695 | }
1696 |
1697 | return self::$currencyCode;
1698 | }
1699 |
1700 | private static function complexNumberFormatMask($number, $mask) {
1701 | $sign = ($number < 0.0);
1702 | $number = abs($number);
1703 |
1704 | if (strpos($mask, '.') !== false) {
1705 | $numbers = explode('.', $number . '.0');
1706 | $masks = explode('.', $mask . '.0');
1707 | $result1 = self::complexNumberFormatMask($numbers[0], $masks[0]);
1708 | $result2 = strrev(self::complexNumberFormatMask(strrev($numbers[1]), strrev($masks[1])));
1709 |
1710 | return (($sign) ? '-' : '') . $result1 . '.' . $result2;
1711 | }
1712 |
1713 | $r = preg_match_all('/0+/', $mask, $result, PREG_OFFSET_CAPTURE);
1714 | if ($r > 1) {
1715 | $result = array_reverse($result[0]);
1716 |
1717 | $offset = 0;
1718 | foreach ($result as $block) {
1719 | $divisor = 1 . $block[0];
1720 | $size = strlen($block[0]);
1721 | $offset = $block[1];
1722 | $blockValue = sprintf('%0' . $size . 'd', fmod($number, $divisor));
1723 |
1724 | $number = floor($number / $divisor);
1725 | $mask = substr_replace($mask, $blockValue, $offset, $size);
1726 | }
1727 |
1728 | if ($number > 0) {
1729 | $mask = substr_replace($mask, $number, $offset, 0);
1730 | }
1731 |
1732 | $result = $mask;
1733 | } else {
1734 | $result = $number;
1735 | }
1736 |
1737 | return (($sign) ? '-' : '') . $result;
1738 | }
1739 |
1740 | /**
1741 | * Convert Microsoft Code Page Identifier to Code Page Name which iconv and mbstring understands
1742 | *
1743 | * @param int $codePage Microsoft Code Page Indentifier
1744 | *
1745 | * @throws ParserException
1746 | * @return string Code Page Name
1747 | */
1748 | private static function NumberToName($codePage = 1252) {
1749 | switch ($codePage) {
1750 | case 367:
1751 | return 'ASCII'; //ASCII
1752 |
1753 | case 437:
1754 | return 'CP437'; //OEM US
1755 |
1756 | case 720:
1757 | throw new ParserException('Code page 720 not supported.', 5); //OEM Arabic
1758 |
1759 | case 737:
1760 | return 'CP737'; //OEM Greek
1761 |
1762 | case 775:
1763 | return 'CP775'; //OEM Baltic
1764 |
1765 | case 850:
1766 | return 'CP850'; //OEM Latin I
1767 |
1768 | case 852:
1769 | return 'CP852'; //OEM Latin II (Central European)
1770 |
1771 | case 855:
1772 | return 'CP855'; //OEM Cyrillic
1773 |
1774 | case 857:
1775 | return 'CP857'; //OEM Turkish
1776 |
1777 | case 858:
1778 | return 'CP858'; //OEM Multilingual Latin I with Euro
1779 |
1780 | case 860:
1781 | return 'CP860'; //OEM Portugese
1782 |
1783 | case 861:
1784 | return 'CP861'; //OEM Icelandic
1785 |
1786 | case 862:
1787 | return 'CP862'; //OEM Hebrew
1788 |
1789 | case 863:
1790 | return 'CP863'; //OEM Canadian (French)
1791 |
1792 | case 864:
1793 | return 'CP864'; //OEM Arabic
1794 |
1795 | case 865:
1796 | return 'CP865'; //OEM Nordic
1797 |
1798 | case 866:
1799 | return 'CP866'; //OEM Cyrillic (Russian)
1800 |
1801 | case 869:
1802 | return 'CP869'; //OEM Greek (Modern)
1803 |
1804 | case 874:
1805 | return 'CP874'; //ANSI Thai
1806 |
1807 | case 932:
1808 | return 'CP932'; //ANSI Japanese Shift-JIS
1809 |
1810 | case 936:
1811 | return 'CP936'; //ANSI Chinese Simplified GBK
1812 |
1813 | case 949:
1814 | return 'CP949'; //ANSI Korean (Wansung)
1815 |
1816 | case 950:
1817 | return 'CP950'; //ANSI Chinese Traditional BIG5
1818 |
1819 | case 1200:
1820 | return 'UTF-16LE'; //UTF-16 (BIFF8)
1821 |
1822 | case 1250:
1823 | return 'CP1250'; //ANSI Latin II (Central European)
1824 |
1825 | case 1251:
1826 | return 'CP1251'; //ANSI Cyrillic
1827 |
1828 | case 0: //CodePage is not always correctly set when the xls file was saved by Apple's Numbers program
1829 | case 1252:
1830 | return 'CP1252'; //ANSI Latin I (BIFF4-BIFF7)
1831 |
1832 | case 1253:
1833 | return 'CP1253'; //ANSI Greek
1834 |
1835 | case 1254:
1836 | return 'CP1254'; //ANSI Turkish
1837 |
1838 | case 1255:
1839 | return 'CP1255'; //ANSI Hebrew
1840 |
1841 | case 1256:
1842 | return 'CP1256'; //ANSI Arabic
1843 |
1844 | case 1257:
1845 | return 'CP1257'; //ANSI Baltic
1846 |
1847 | case 1258:
1848 | return 'CP1258'; //ANSI Vietnamese
1849 |
1850 | case 1361:
1851 | return 'CP1361'; //ANSI Korean (Johab)
1852 |
1853 | case 10000:
1854 | return 'MAC'; //Apple Roman
1855 |
1856 | case 10001:
1857 | return 'CP932'; //Macintosh Japanese
1858 |
1859 | case 10002:
1860 | return 'CP950'; //Macintosh Chinese Traditional
1861 |
1862 | case 10003:
1863 | return 'CP1361'; //Macintosh Korean
1864 |
1865 | case 10004:
1866 | return 'MACARABIC'; // Apple Arabic
1867 |
1868 | case 10005:
1869 | return 'MACHEBREW'; //Apple Hebrew
1870 |
1871 | case 10006:
1872 | return 'MACGREEK'; //Macintosh Greek
1873 |
1874 | case 10007:
1875 | return 'MACCYRILLIC'; //Macintosh Cyrillic
1876 |
1877 | case 10008:
1878 | return 'CP936'; //Macintosh - Simplified Chinese (GB 2312)
1879 |
1880 | case 10010:
1881 | return 'MACROMANIA'; //Macintosh Romania
1882 |
1883 | case 10017:
1884 | return 'MACUKRAINE'; //Macintosh Ukraine
1885 |
1886 | case 10021:
1887 | return 'MACTHAI'; //Macintosh Thai
1888 |
1889 | case 10029:
1890 | return 'MACCENTRALEUROPE'; //Macintosh Central Europe
1891 |
1892 | case 10079:
1893 | return 'MACICELAND'; //Macintosh Icelandic
1894 |
1895 | case 10081:
1896 | return 'MACTURKISH'; //Macintosh Turkish
1897 |
1898 | case 10082:
1899 | return 'MACCROATIAN'; //Macintosh Croatian
1900 |
1901 | case 21010:
1902 | return 'UTF-16LE'; //UTF-16 (BIFF8) This isn't correct, but some Excel writer libraries erroneously
1903 | // use Codepage 21010 for UTF-16LE
1904 |
1905 | case 32768:
1906 | return 'MAC'; //Apple Roman
1907 |
1908 | case 32769:
1909 | throw new ParserException('Code page 32769 not supported.', 6); //ANSI Latin I (BIFF2-BIFF3)
1910 |
1911 | case 65000:
1912 | return 'UTF-7'; //Unicode (UTF-7)
1913 |
1914 | case 65001:
1915 | return 'UTF-8'; //Unicode (UTF-8)
1916 | }
1917 |
1918 | throw new ParserException("Unknown codepage: $codePage", 7);
1919 | }
1920 |
1921 | /**
1922 | * Read byte string (8-bit string length). OpenOffice documentation: 2.5.2
1923 | *
1924 | * @param string $subData
1925 | *
1926 | * @return array
1927 | */
1928 | private function readByteStringShort($subData) {
1929 | // offset: 0; size: 1; length of the string (character count)
1930 | $ln = ord($subData[0]);
1931 |
1932 | // offset: 1: size: var; character array (8-bit characters)
1933 | $value = $this->decodeCodepage(substr($subData, 1, $ln));
1934 |
1935 | // size in bytes of data structure
1936 | return ['value' => $value, 'size' => 1 + $ln];
1937 | }
1938 |
1939 | /**
1940 | * Read byte string (16-bit string length). OpenOffice documentation: 2.5.2
1941 | *
1942 | * @param string $subData
1943 | * @return array
1944 | */
1945 | private function readByteStringLong($subData) {
1946 | // offset: 0; size: 2; length of the string (character count)
1947 | $ln = Format::getUInt2d($subData, 0);
1948 |
1949 | // offset: 2: size: var; character array (8-bit characters)
1950 | $value = $this->decodeCodepage(substr($subData, 2));
1951 |
1952 | // size in bytes of data structure
1953 | return ['value' => $value, 'size' => 2 + $ln];
1954 | }
1955 |
1956 | private static function formatAsDate(&$value, &$format) {
1957 | // strip off first part containing e.g. [$-F800] or [$USD-409]
1958 | // general syntax: [$-]
1959 | // language info is in hexadecimal
1960 | $format = preg_replace('/^(\[\$[A-Z]*-[0-9A-F]*\])/i', '', $format);
1961 |
1962 | // OpenOffice.org uses upper-case number formats, e.g. 'YYYY', convert to lower-case;
1963 | // but we don't want to change any quoted strings
1964 | $format = preg_replace_callback('/(?:^|")([^"]*)(?:$|")/', ['self', 'setLowercaseCallback'], $format);
1965 |
1966 | // Only process the non-quoted blocks for date format characters
1967 | $blocks = explode('"', $format);
1968 |
1969 | foreach($blocks as $key => &$block) {
1970 | if ($key % 2 == 0) {
1971 | $block = strtr($block, Format::$dateFormatReplacements);
1972 | if (strpos($block, 'A') === false) {
1973 | // 24-hour time format
1974 | $block = strtr($block, Format::$dateFormatReplacements24);
1975 | } else {
1976 | // 12-hour time format
1977 | $block = strtr($block, Format::$dateFormatReplacements12);
1978 | }
1979 | }
1980 | }
1981 |
1982 | $format = implode('"', $blocks);
1983 |
1984 | // escape any quoted characters so that DateTime format() will render them correctly
1985 | $format = preg_replace_callback('/"(.*)"/U', ['self', 'escapeQuotesCallback'], $format);
1986 | $dateObj = self::ExcelToPHPObject($value);
1987 |
1988 | $value = $dateObj->format($format);
1989 | }
1990 |
1991 | private static function setLowercaseCallback($matches) {
1992 | return mb_strtolower($matches[0]);
1993 | }
1994 |
1995 | private static function escapeQuotesCallback($matches) {
1996 | return '\\' . implode('\\', str_split($matches[1]));
1997 | }
1998 |
1999 | /**
2000 | * Convert a date from Excel to a PHP Date/Time object
2001 | *
2002 | * @param int $dateValue Excel date/time value
2003 | *
2004 | * @return \DateTime PHP date/time object
2005 | */
2006 | private static function ExcelToPHPObject($dateValue = 0) {
2007 | $dateTime = self::ExcelToPHP($dateValue);
2008 |
2009 | $days = floor($dateTime / 86400);
2010 | $time = round((($dateTime / 86400) - $days) * 86400);
2011 | $hours = round($time / 3600);
2012 | $minutes = round($time / 60) - ($hours * 60);
2013 | $seconds = round($time) - ($hours * 3600) - ($minutes * 60);
2014 |
2015 | $dateObj = new \DateTime("1-Jan-1970+$days days");
2016 | $dateObj->setTime($hours, $minutes, $seconds);
2017 |
2018 | return $dateObj;
2019 | }
2020 |
2021 | /**
2022 | * Convert a date from Excel to PHP
2023 | *
2024 | * @param int $dateValue Excel date/time value
2025 | *
2026 | * @return int PHP serialized date/time
2027 | */
2028 | private static function ExcelToPHP($dateValue = 0) {
2029 | if (self::$excelBaseDate == Format::CALENDAR_WINDOWS_1900) {
2030 | $excelBaseDate = 25569;
2031 |
2032 | //Adjust for the spurious 29-Feb-1900 (Day 60)
2033 | if ($dateValue < 60) {
2034 | --$excelBaseDate;
2035 | }
2036 | } else {
2037 | $excelBaseDate = 24107;
2038 | }
2039 |
2040 | // Perform conversion
2041 | if ($dateValue >= 1) {
2042 | $utcDays = $dateValue - $excelBaseDate;
2043 | $returnValue = round($utcDays * 86400);
2044 |
2045 | if (($returnValue <= PHP_INT_MAX) && ($returnValue >= -PHP_INT_MAX)) {
2046 | $returnValue = (integer) $returnValue;
2047 | }
2048 | } else {
2049 | $hours = round($dateValue * 24);
2050 | $mins = round($dateValue * 1440) - round($hours * 60);
2051 | $secs = round($dateValue * 86400) - round($hours * 3600) - round($mins * 60);
2052 |
2053 | $returnValue = (integer) gmmktime($hours, $mins, $secs);
2054 | }
2055 |
2056 | return $returnValue;
2057 | }
2058 |
2059 | private static function formatAsPercentage(&$value, &$format) {
2060 | if ($format === Format::FORMAT_PERCENTAGE) {
2061 | $value = round((100 * $value), 0) . '%';
2062 | } else {
2063 | if (preg_match('/\.[#0]+/i', $format, $m)) {
2064 | $s = substr($m[0], 0, 1) . (strlen($m[0]) - 1);
2065 | $format = str_replace($m[0], $s, $format);
2066 | }
2067 |
2068 | if (preg_match('/^[#0]+/', $format, $m)) {
2069 | $format = str_replace($m[0], strlen($m[0]), $format);
2070 | }
2071 |
2072 | $format = '%' . str_replace('%', 'f%%', $format);
2073 | $value = sprintf($format, 100 * $value);
2074 | }
2075 | }
2076 |
2077 | private static function formatAsFraction(&$value, &$format) {
2078 | $sign = ($value < 0) ? '-' : '';
2079 | $integerPart = floor(abs($value));
2080 | $decimalPart = trim(fmod(abs($value), 1), '0.');
2081 | $decimalLength = strlen($decimalPart);
2082 | $decimalDivisor = pow(10, $decimalLength);
2083 |
2084 | $GCD = self::GCD([$decimalPart, $decimalDivisor]);
2085 | $adjustedDecimalPart = $decimalPart/$GCD;
2086 | $adjustedDecimalDivisor = $decimalDivisor/$GCD;
2087 |
2088 | if ((strpos($format, '0') !== false) || (strpos($format, '#') !== false) || (substr($format, 0, 3) == '? ?')) {
2089 | if ($integerPart == 0) {
2090 | $integerPart = '';
2091 | }
2092 |
2093 | $value = "$sign$integerPart $adjustedDecimalPart/$adjustedDecimalDivisor";
2094 | } else {
2095 | $adjustedDecimalPart += $integerPart * $adjustedDecimalDivisor;
2096 | $value = "$sign$adjustedDecimalPart/$adjustedDecimalDivisor";
2097 | }
2098 | }
2099 |
2100 | /**
2101 | * GCD
2102 | *
2103 | * Returns the greatest common divisor of a series of numbers. The greatest common divisor is the largest
2104 | * integer that divides both number1 and number2 without a remainder.
2105 | * Excel Function:
2106 | * GCD(number1[,number2[, ...]])
2107 | *
2108 | * @param array $params
2109 | *
2110 | * @return integer Greatest Common Divisor
2111 | */
2112 | private static function GCD($params) {
2113 | $returnValue = 1;
2114 | $allValuesFactors = [];
2115 |
2116 | // Loop through arguments
2117 | $flattenArr = self::flattenArray($params);
2118 | foreach ($flattenArr as $value) {
2119 | if (!is_numeric($value)) {
2120 | return '#VALUE!';
2121 | } elseif ($value == 0) {
2122 | continue;
2123 | } elseif ($value < 0) {
2124 | return '#NULL!';
2125 | }
2126 |
2127 | $factors = self::factors($value);
2128 | $countedFactors = array_count_values($factors);
2129 | $allValuesFactors[] = $countedFactors;
2130 | }
2131 |
2132 | $allValuesCount = count($allValuesFactors);
2133 | if ($allValuesCount == 0) {
2134 | return 0;
2135 | }
2136 |
2137 | $mergedArray = $allValuesFactors[0];
2138 | for ($i=1; $i < $allValuesCount; ++$i) {
2139 | $mergedArray = array_intersect_key($mergedArray, $allValuesFactors[$i]);
2140 | }
2141 |
2142 | $mergedArrayValues = count($mergedArray);
2143 |
2144 | if ($mergedArrayValues == 0) {
2145 | return $returnValue;
2146 | } elseif ($mergedArrayValues > 1) {
2147 | foreach ($mergedArray as $mergedKey => $mergedValue) {
2148 | foreach ($allValuesFactors as $highestPowerTest) {
2149 | foreach ($highestPowerTest as $testKey => $testValue) {
2150 | if (($testKey == $mergedKey) && ($testValue < $mergedValue)) {
2151 | $mergedArray[$mergedKey] = $testValue;
2152 | $mergedValue = $testValue;
2153 | }
2154 | }
2155 | }
2156 | }
2157 |
2158 | $returnValue = 1;
2159 | foreach ($mergedArray as $key => $value) {
2160 | $returnValue *= pow($key, $value);
2161 | }
2162 |
2163 | return $returnValue;
2164 | } else {
2165 | $keys = array_keys($mergedArray);
2166 | $key = $keys[0];
2167 | $value = $mergedArray[$key];
2168 |
2169 | foreach ($allValuesFactors as $testValue) {
2170 | foreach ($testValue as $mergedKey => $mergedValue) {
2171 | if (($mergedKey == $key) && ($mergedValue < $value)) {
2172 | $value = $mergedValue;
2173 | }
2174 | }
2175 | }
2176 |
2177 | return pow($key, $value);
2178 | }
2179 | }
2180 |
2181 | /**
2182 | * Convert a multi-dimensional array to a simple 1-dimensional array
2183 | *
2184 | * @param array $array Array to be flattened
2185 | *
2186 | * @return array Flattened array
2187 | */
2188 | private static function flattenArray($array) {
2189 | if (!is_array($array)) {
2190 | return (array) $array;
2191 | }
2192 |
2193 | $arrayValues = [];
2194 | foreach ($array as $value) {
2195 | if (is_array($value)) {
2196 | foreach ($value as $val) {
2197 | if (is_array($val)) {
2198 | foreach ($val as $v) {
2199 | $arrayValues[] = $v;
2200 | }
2201 | } else {
2202 | $arrayValues[] = $val;
2203 | }
2204 | }
2205 | } else {
2206 | $arrayValues[] = $value;
2207 | }
2208 | }
2209 |
2210 | return $arrayValues;
2211 | }
2212 |
2213 | /**
2214 | * Return an array of the factors of the input value
2215 | *
2216 | * @param int $value
2217 | *
2218 | * @return array
2219 | */
2220 | private static function factors($value) {
2221 | $startVal = floor(sqrt($value));
2222 | $factorArray = [];
2223 |
2224 | for ($i = $startVal; $i > 1; --$i) {
2225 | if (($value % $i) == 0) {
2226 | $factorArray = array_merge($factorArray, self::factors($value / $i));
2227 | $factorArray = array_merge($factorArray, self::factors($i));
2228 |
2229 | if ($i <= sqrt($value)) {
2230 | break;
2231 | }
2232 | }
2233 | }
2234 |
2235 | if (!empty($factorArray)) {
2236 | rsort($factorArray);
2237 |
2238 | return $factorArray;
2239 | }
2240 |
2241 | return [(int) $value];
2242 | }
2243 |
2244 | /**
2245 | * Read Unicode string with no string length field, but with known character count this function is under
2246 | * construction, needs to support rich text, and Asian phonetic settings
2247 | *
2248 | * @param string $subData
2249 | * @param int $characterCount
2250 | *
2251 | * @return array
2252 | */
2253 | private static function readUnicodeString($subData, $characterCount) {
2254 | // offset: 0: size: 1; option flags
2255 | // bit: 0; mask: 0x01; character compression (0 = compressed 8-bit, 1 = uncompressed 16-bit)
2256 | $isCompressed = !((0x01 & ord($subData[0])) >> 0);
2257 |
2258 | // offset: 1: size: var; character array
2259 | // this offset assumes richtext and Asian phonetic settings are off which is generally wrong
2260 | // needs to be fixed
2261 | $value = self::encodeUTF16(
2262 | substr($subData, 1, $isCompressed ? $characterCount : 2 * $characterCount), $isCompressed
2263 | );
2264 |
2265 | // the size in bytes including the option flags
2266 | return ['value' => $value, 'size' => $isCompressed ? 1 + $characterCount : 1 + 2 * $characterCount];
2267 | }
2268 |
2269 | /**
2270 | * Extracts an Excel Unicode short string (8-bit string length), this function will automatically find out
2271 | * where the Unicode string ends.
2272 | *
2273 | * @param string $subData
2274 | *
2275 | * @return array
2276 | */
2277 | private static function readUnicodeStringShort($subData) {
2278 | // offset: 0: size: 1; length of the string (character count)
2279 | $characterCount = ord($subData[0]);
2280 | $string = self::readUnicodeString(substr($subData, 1), $characterCount);
2281 |
2282 | // add 1 for the string length
2283 | $string['size'] += 1;
2284 |
2285 | return $string;
2286 | }
2287 |
2288 | /**
2289 | * Extracts an Excel Unicode long string (16-bit string length), this function is under construction,
2290 | * needs to support rich text, and Asian phonetic settings
2291 | *
2292 | * @param string $subData
2293 | *
2294 | * @return array
2295 | */
2296 | private static function readUnicodeStringLong($subData) {
2297 | // offset: 0: size: 2; length of the string (character count)
2298 | $characterCount = Format::getUInt2d($subData, 0);
2299 | $string = self::readUnicodeString(substr($subData, 2), $characterCount);
2300 |
2301 | // add 2 for the string length
2302 | $string['size'] += 2;
2303 |
2304 | return $string;
2305 | }
2306 |
2307 | private static function getIEEE754($rkNum) {
2308 | if (($rkNum & 0x02) != 0) {
2309 | $value = $rkNum >> 2;
2310 | } else {
2311 | // changes by mmp, info on IEEE754 encoding from
2312 | // research.microsoft.com/~hollasch/cgindex/coding/ieeefloat.html
2313 | // The RK format calls for using only the most significant 30 bits of the 64 bit floating point value.
2314 | // The other 34 bits are assumed to be 0 so we use the upper 30 bits of $rknum as follows...
2315 | $sign = ($rkNum & 0x80000000) >> 31;
2316 | $exp = ($rkNum & 0x7ff00000) >> 20;
2317 |
2318 | $mantissa = (0x100000 | ($rkNum & 0x000ffffc));
2319 | $value = $mantissa / pow(2, (20- ($exp - 1023)));
2320 |
2321 | if ($sign) {
2322 | $value = -1 * $value;
2323 | }
2324 | //end of changes by mmp
2325 | }
2326 |
2327 | if (($rkNum & 0x01) != 0) {
2328 | $value /= 100;
2329 | }
2330 |
2331 | return $value;
2332 | }
2333 |
2334 | /**
2335 | * Get UTF-8 string from (compressed or uncompressed) UTF-16 string
2336 | *
2337 | * @param string $string
2338 | * @param bool $compressed
2339 | *
2340 | * @return string
2341 | */
2342 | private static function encodeUTF16($string, $compressed = false) {
2343 | if ($compressed) {
2344 | $string = self::uncompressByteString($string);
2345 | }
2346 |
2347 | return mb_convert_encoding($string, 'UTF-8', 'UTF-16LE');
2348 | }
2349 |
2350 | /**
2351 | * Convert string to UTF-8. Only used for BIFF5.
2352 | *
2353 | * @param string $string
2354 | *
2355 | * @return string
2356 | */
2357 | private function decodeCodepage($string) {
2358 | return mb_convert_encoding($string, 'UTF-8', $this->codePage);
2359 | }
2360 |
2361 | /**
2362 | * Convert UTF-16 string in compressed notation to uncompressed form. Only used for BIFF8.
2363 | *
2364 | * @param string $string
2365 | *
2366 | * @return string
2367 | */
2368 | private static function uncompressByteString($string) {
2369 | $uncompressedString = '';
2370 | $strLen = strlen($string);
2371 |
2372 | for ($i = 0; $i < $strLen; ++$i) {
2373 | $uncompressedString .= $string[$i] . "\0";
2374 | }
2375 |
2376 | return $uncompressedString;
2377 | }
2378 |
2379 | /**
2380 | * Reads first 8 bytes of a string and return IEEE 754 float
2381 | *
2382 | * @param string $data Binary string that is at least 8 bytes long
2383 | *
2384 | * @return float
2385 | */
2386 | private static function extractNumber($data) {
2387 | $rkNumHigh = Format::getInt4d($data, 4);
2388 | $rkNumLow = Format::getInt4d($data, 0);
2389 |
2390 | $sign = ($rkNumHigh & 0x80000000) >> 31;
2391 | $exp = (($rkNumHigh & 0x7ff00000) >> 20) - 1023;
2392 | $mantissa = (0x100000 | ($rkNumHigh & 0x000fffff));
2393 |
2394 | $mantissaLow1 = ($rkNumLow & 0x80000000) >> 31;
2395 | $mantissaLow2 = ($rkNumLow & 0x7fffffff);
2396 | $value = $mantissa / pow(2, (20 - $exp));
2397 |
2398 | if ($mantissaLow1 != 0) {
2399 | $value += 1 / pow(2, (21 - $exp));
2400 | }
2401 |
2402 | $value += $mantissaLow2 / pow(2, (52 - $exp));
2403 |
2404 | if ($sign) {
2405 | $value *= -1;
2406 | }
2407 |
2408 | return $value;
2409 | }
2410 | }
2411 |
--------------------------------------------------------------------------------
/src/Parser/Excel5/OLERead.php:
--------------------------------------------------------------------------------
1 | openFile($file);
70 |
71 | // Total number of sectors used for the SAT
72 | $this->numBigBlockDepotBlocks = Format::getInt4d($this->data, self::NUM_BIG_BLOCK_DEPOT_BLOCKS_POS);
73 |
74 | // SecID of the first sector of the directory stream
75 | $this->rootStartBlock = Format::getInt4d($this->data, self::ROOT_START_BLOCK_POS);
76 |
77 | // SecID of the first sector of the SSAT (or -2 if not extant)
78 | $this->sbdStartBlock = Format::getInt4d($this->data, self::SMALL_BLOCK_DEPOT_BLOCK_POS);
79 |
80 | // SecID of the first sector of the MSAT (or -2 if no additional sectors are used)
81 | $this->extensionBlock = Format::getInt4d($this->data, self::EXTENSION_BLOCK_POS);
82 |
83 | // Total number of sectors used by MSAT
84 | $this->numExtensionBlocks = Format::getInt4d($this->data, self::NUM_EXTENSION_BLOCK_POS);
85 |
86 | $bigBlockDepotBlocks = [];
87 | $pos = self::BIG_BLOCK_DEPOT_BLOCKS_POS;
88 | $bbdBlocks = $this->numBigBlockDepotBlocks;
89 | if ($this->numExtensionBlocks != 0) {
90 | $bbdBlocks = (self::BIG_BLOCK_SIZE - self::BIG_BLOCK_DEPOT_BLOCKS_POS) / 4;
91 | }
92 |
93 | for ($i = 0; $i < $bbdBlocks; ++$i) {
94 | $bigBlockDepotBlocks[$i] = Format::getInt4d($this->data, $pos);
95 | $pos += 4;
96 | }
97 |
98 | for ($j = 0; $j < $this->numExtensionBlocks; ++$j) {
99 | $pos = ($this->extensionBlock + 1) * self::BIG_BLOCK_SIZE;
100 | $blocksToRead = min($this->numBigBlockDepotBlocks - $bbdBlocks, self::BIG_BLOCK_SIZE / 4 - 1);
101 |
102 | for ($i = $bbdBlocks; $i < $bbdBlocks + $blocksToRead; ++$i) {
103 | $bigBlockDepotBlocks[$i] = Format::getInt4d($this->data, $pos);
104 | $pos += 4;
105 | }
106 |
107 | $bbdBlocks += $blocksToRead;
108 | if ($bbdBlocks < $this->numBigBlockDepotBlocks) {
109 | $this->extensionBlock = Format::getInt4d($this->data, $pos);
110 | }
111 | }
112 |
113 | $this->bigBlockChain = '';
114 | $bbs = self::BIG_BLOCK_SIZE / 4;
115 | for ($i = 0; $i < $this->numBigBlockDepotBlocks; ++$i) {
116 | $pos = ($bigBlockDepotBlocks[$i] + 1) * self::BIG_BLOCK_SIZE;
117 | $this->bigBlockChain .= substr($this->data, $pos, 4 * $bbs);
118 | }
119 |
120 | $sbdBlock = $this->sbdStartBlock;
121 | $this->smallBlockChain = '';
122 | while ($sbdBlock != -2) {
123 | $pos = ($sbdBlock + 1) * self::BIG_BLOCK_SIZE;
124 | $this->smallBlockChain .= substr($this->data, $pos, 4 * $bbs);
125 |
126 | $sbdBlock = Format::getInt4d($this->bigBlockChain, $sbdBlock * 4);
127 | }
128 |
129 | // read the directory stream
130 | $block = $this->rootStartBlock;
131 | $this->entry = $this->readData($block);
132 | $this->readPropertySets();
133 | }
134 |
135 | /**
136 | * Open file for reading
137 | *
138 | * @param string $file
139 | *
140 | * @throws ReaderException|ParserException
141 | */
142 | public function openFile($file) {
143 | // Check if file exists
144 | if (!file_exists($file) || !is_readable($file)) {
145 | throw new ReaderException("Could not open file [$file] for reading! File does not exist.");
146 | }
147 |
148 | // Get the file data
149 | $this->data = file_get_contents($file);
150 |
151 | // Check OLE identifier
152 | if (empty($this->data) || substr($this->data, 0, 8) != self::IDENTIFIER_OLE) {
153 | throw new ParserException("The file [$file] is not recognised as an OLE file");
154 | }
155 | }
156 |
157 | /**
158 | * Extract binary stream data.
159 | *
160 | * @param int $stream
161 | *
162 | * @return string|null
163 | */
164 | public function getStream($stream) {
165 | if ($stream === null) {
166 | return null;
167 | }
168 |
169 | $streamData = '';
170 | if ($this->props[$stream]['size'] < self::SMALL_BLOCK_THRESHOLD) {
171 | $rootData = $this->readData($this->props[$this->rootEntry]['startBlock']);
172 | $block = $this->props[$stream]['startBlock'];
173 |
174 | while ($block != -2) {
175 | $pos = $block * self::SMALL_BLOCK_SIZE;
176 | $streamData .= substr($rootData, $pos, self::SMALL_BLOCK_SIZE);
177 | $block = Format::getInt4d($this->smallBlockChain, $block * 4);
178 | }
179 |
180 | return $streamData;
181 | }
182 |
183 | $numBlocks = $this->props[$stream]['size'] / self::BIG_BLOCK_SIZE;
184 | if ($this->props[$stream]['size'] % self::BIG_BLOCK_SIZE != 0) {
185 | ++$numBlocks;
186 | }
187 |
188 | if ($numBlocks == 0) {
189 | return '';
190 | }
191 |
192 | $block = $this->props[$stream]['startBlock'];
193 | while ($block != -2) {
194 | $pos = ($block + 1) * self::BIG_BLOCK_SIZE;
195 | $streamData .= substr($this->data, $pos, self::BIG_BLOCK_SIZE);
196 | $block = Format::getInt4d($this->bigBlockChain, $block * 4);
197 | }
198 |
199 | return $streamData;
200 | }
201 |
202 | /**
203 | * Read a standard stream (by joining sectors using information from SAT).
204 | *
205 | * @param int $bl Sector ID where the stream starts
206 | *
207 | * @return string
208 | */
209 | protected function readData($bl) {
210 | $block = $bl;
211 | $data = '';
212 |
213 | while ($block != -2) {
214 | $pos = ($block + 1) * self::BIG_BLOCK_SIZE;
215 | $data .= substr($this->data, $pos, self::BIG_BLOCK_SIZE);
216 | $block = Format::getInt4d($this->bigBlockChain, $block * 4);
217 | }
218 |
219 | return $data;
220 | }
221 |
222 | /**
223 | * Read entries in the directory stream.
224 | */
225 | protected function readPropertySets() {
226 | $offset = 0;
227 |
228 | // loop through entires, each entry is 128 bytes
229 | $entryLen = strlen($this->entry);
230 | while ($offset < $entryLen) {
231 | // entry data (128 bytes)
232 | $d = substr($this->entry, $offset, self::PROPERTY_STORAGE_BLOCK_SIZE);
233 |
234 | // size in bytes of name
235 | $nameSize = ord($d[self::SIZE_OF_NAME_POS]) | (ord($d[self::SIZE_OF_NAME_POS + 1]) << 8);
236 |
237 | // type of entry
238 | $type = ord($d[self::TYPE_POS]);
239 |
240 | // sectorID of first sector or short sector, if this entry refers to a stream (the case with workbook)
241 | // sectorID of first sector of the short-stream container stream, if this entry is root entry
242 | $startBlock = Format::getInt4d($d, self::START_BLOCK_POS);
243 | $size = Format::getInt4d($d, self::SIZE_POS);
244 | $name = str_replace("\x00", '', substr($d, 0, $nameSize));
245 | $this->props[] = [
246 | 'name' => $name,
247 | 'type' => $type,
248 | 'startBlock' => $startBlock,
249 | 'size' => $size,
250 | ];
251 |
252 | // tmp helper to simplify checks
253 | $upName = strtoupper($name);
254 |
255 | // Workbook directory entry (BIFF5 uses Book, BIFF8 uses Workbook)
256 | if (($upName === 'WORKBOOK') || ($upName === 'BOOK')) {
257 | $this->workbook = count($this->props) - 1;
258 | } elseif ($upName === 'ROOT ENTRY' || $upName === 'R') {
259 | // Root entry
260 | $this->rootEntry = count($this->props) - 1;
261 | }
262 |
263 | // Summary information
264 | if ($name == chr(5) . 'SummaryInformation') {
265 | $this->summaryInformation = count($this->props) - 1;
266 | }
267 |
268 | // Additional Document Summary information
269 | if ($name == chr(5) . 'DocumentSummaryInformation') {
270 | $this->documentSummaryInformation = count($this->props) - 1;
271 | }
272 |
273 | $offset += self::PROPERTY_STORAGE_BLOCK_SIZE;
274 | }
275 | }
276 | }
277 |
--------------------------------------------------------------------------------
/src/Parser/Excel5/RC4.php:
--------------------------------------------------------------------------------
1 | i = 0; $this->i < 256; $this->i++) {
25 | $this->s[$this->i] = $this->i;
26 | }
27 |
28 | $this->j = 0;
29 | for ($this->i = 0; $this->i < 256; $this->i++) {
30 | $this->j = ($this->j + $this->s[$this->i] + ord($key[$this->i % $len])) % 256;
31 | $t = $this->s[$this->i];
32 | $this->s[$this->i] = $this->s[$this->j];
33 | $this->s[$this->j] = $t;
34 | }
35 |
36 | $this->i = $this->j = 0;
37 | }
38 |
39 | /**
40 | * Symmetric decryption/encryption function
41 | *
42 | * @param string $data Data to encrypt/decrypt
43 | *
44 | * @return string
45 | */
46 | public function RC4($data) {
47 | $len = strlen($data);
48 |
49 | for ($c = 0; $c < $len; $c++) {
50 | $this->i = ($this->i + 1) % 256;
51 | $this->j = ($this->j + $this->s[$this->i]) % 256;
52 | $t = $this->s[$this->i];
53 | $this->s[$this->i] = $this->s[$this->j];
54 | $this->s[$this->j] = $t;
55 |
56 | $t = ($this->s[$this->i] + $this->s[$this->j]) % 256;
57 |
58 | $data[$c] = chr(ord($data[$c]) ^ $this->s[$t]);
59 | }
60 |
61 | return $data;
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/src/Parser/Format.php:
--------------------------------------------------------------------------------
1 | self::FORMAT_GENERAL,
29 | 1 => '0',
30 | 2 => '0.00',
31 | 3 => '#,##0',
32 | 4 => '#,##0.00',
33 | 5 => '"$"#,##0_),("$"#,##0)',
34 | 6 => '"$"#,##0_),[Red]("$"#,##0)',
35 | 7 => '"$"#,##0.00_),("$"#,##0.00)',
36 | 8 => '"$"#,##0.00_),[Red]("$"#,##0.00)',
37 | 9 => '0%',
38 | 10 => '0.00%',
39 | //11 => '0.00E+00',
40 | 12 => '# ?/?',
41 | 13 => '# ??/??',
42 | 14 => 'yyyy/m/d',
43 | 15 => 'd-mmm-yy',
44 | 16 => 'd-mmm',
45 | 17 => 'mmm-yy',
46 | 18 => 'h:mm AM/PM',
47 | 19 => 'h:mm:ss AM/PM',
48 | 20 => 'h:mm',
49 | 21 => 'h:mm:ss',
50 | 22 => 'yyyy/m/d h:mm',
51 |
52 | // 补充
53 | 28 => 'm月d日',
54 | 31 => 'yyyy年m月d日',
55 | 32 => 'h时i分',
56 | 33 => 'h时i分ss秒',
57 | 34 => 'AM/PM h时i分',
58 | 35 => 'AM/PM h时i分ss秒',
59 | 55 => 'AM/PM h时i分',
60 | 56 => 'AM/PM h时i分ss秒',
61 | 58 => 'm月d日',
62 |
63 | 37 => '#,##0_),(#,##0)',
64 | 38 => '#,##0_),[Red](#,##0)',
65 | 39 => '#,##0.00_),(#,##0.00)',
66 | 40 => '#,##0.00_),[Red](#,##0.00)',
67 | 41 => '_("$"* #,##0_),_("$"* (#,##0),_("$"* "-"_),_(@_)',
68 | 42 => '_(* #,##0_),_(* (#,##0),_(* "-"_),_(@_)',
69 | 43 => '_(* #,##0.00_),_(* (#,##0.00),_(* "-"??_),_(@_)',
70 | 44 => '_("$"* #,##0.00_),_("$"* \(#,##0.00\),_("$"* "-"??_),_(@_)',
71 | 45 => 'mm:ss',
72 | 46 => '[h]:mm:ss',
73 | 47 => 'mm:ss.0',
74 | 48 => '##0.0E+0',
75 | 49 => '@',
76 |
77 | // CHT
78 | 27 => 'yyyy年m月',
79 | 30 => 'm/d/yy',
80 | 36 => '[$-404]e/m/d',
81 | 50 => '[$-404]e/m/d',
82 | 57 => 'yyyy年m月',
83 |
84 | // THA
85 | 59 => 't0',
86 | 60 => 't0.00',
87 | 61 => 't#,##0',
88 | 62 => 't#,##0.00',
89 | 67 => 't0%',
90 | 68 => 't0.00%',
91 | 69 => 't# ?/?',
92 | 70 => 't# ??/??'
93 | ];
94 |
95 | /**
96 | * Search/replace values to convert Excel date/time format masks to PHP format masks
97 | *
98 | * @var array
99 | */
100 | public static $dateFormatReplacements = [
101 | // first remove escapes related to non-format characters
102 | '\\' => '',
103 |
104 | // 12-hour suffix
105 | 'am/pm' => 'A',
106 |
107 | // 2-digit year
108 | 'e' => 'Y',
109 | 'yyyy' => 'Y',
110 | 'yy' => 'y',
111 |
112 | // first letter of month - no php equivalent
113 | 'mmmmm' => 'M',
114 |
115 | // full month name
116 | 'mmmm' => 'F',
117 |
118 | // short month name
119 | 'mmm' => 'M',
120 |
121 | // mm is minutes if time, but can also be month w/leading zero
122 | // so we try to identify times be the inclusion of a : separator in the mask
123 | // It isn't perfect, but the best way I know how
124 | ':mm' => ':i',
125 | 'mm:' => 'i:',
126 |
127 | // month leading zero
128 | 'mm' => 'm',
129 | 'm' => 'n',
130 |
131 | // full day of week name
132 | 'dddd' => 'l',
133 |
134 | // short day of week name
135 | 'ddd' => 'D',
136 |
137 | // days leading zero
138 | 'dd' => 'd',
139 | 'd' => 'j',
140 |
141 | // seconds
142 | 'ss' => 's',
143 |
144 | // fractional seconds - no php equivalent
145 | '.s' => ''
146 | ];
147 |
148 | /**
149 | * Search/replace values to convert Excel date/time format masks hours to PHP format masks (24 hr clock)
150 | *
151 | * @var array
152 | */
153 | public static $dateFormatReplacements24 = [
154 | 'hh' => 'H',
155 | 'h' => 'G'
156 | ];
157 |
158 | /**
159 | * Search/replace values to convert Excel date/time format masks hours to PHP format masks (12 hr clock)
160 | *
161 | * @var array
162 | */
163 | public static $dateFormatReplacements12 = [
164 | 'hh' => 'h',
165 | 'h' => 'g'
166 | ];
167 |
168 | /**
169 | * Column index from string
170 | *
171 | * @param string $label
172 | *
173 | * @throws \Exception
174 | * @return int
175 | */
176 | public static function columnIndexFromString($label = 'A') {
177 | // Using a lookup cache adds a slight memory overhead, but boosts speed
178 | // caching using a static within the method is faster than a class static,
179 | // though it's additional memory overhead
180 | static $indexCache = [];
181 |
182 | if (isset($indexCache[$label])) {
183 | return $indexCache[$label];
184 | }
185 |
186 | // It's surprising how costly the strtoupper() and ord() calls actually are, so we use a lookup array rather
187 | // than use ord() and make it case insensitive to get rid of the strtoupper() as well. Because it's a static,
188 | // there's no significant memory overhead either
189 | static $columnLookup = [
190 | 'A' => 1, 'B' => 2, 'C' => 3, 'D' => 4, 'E' => 5, 'F' => 6, 'G' => 7, 'H' => 8, 'I' => 9, 'J' => 10,
191 | 'K' => 11, 'L' => 12, 'M' => 13, 'N' => 14, 'O' => 15, 'P' => 16, 'Q' => 17, 'R' => 18, 'S' => 19,
192 | 'T' => 20, 'U' => 21, 'V' => 22, 'W' => 23, 'X' => 24, 'Y' => 25, 'Z' => 26, 'a' => 1, 'b' => 2, 'c' => 3,
193 | 'd' => 4, 'e' => 5, 'f' => 6, 'g' => 7, 'h' => 8, 'i' => 9, 'j' => 10, 'k' => 11, 'l' => 12, 'm' => 13,
194 | 'n' => 14, 'o' => 15, 'p' => 16, 'q' => 17, 'r' => 18, 's' => 19, 't' => 20, 'u' => 21, 'v' => 22,
195 | 'w' => 23, 'x' => 24, 'y' => 25, 'z' => 26
196 | ];
197 |
198 | // We also use the language construct isset() rather than the more costly strlen() function to match the length
199 | // of $pString for improved performance
200 | if (!isset($indexCache[$label])) {
201 | if (!isset($label{0}) || isset($label{3})) {
202 | throw new ParserException('Column string can not be empty or longer than 3 characters');
203 | }
204 |
205 | if (!isset($label{1})) {
206 | $indexCache[$label] = $columnLookup[$label];
207 | } elseif (!isset($label{2})) {
208 | $indexCache[$label] = $columnLookup[$label{0}] * 26 + $columnLookup[$label{1}];
209 | } else {
210 | $indexCache[$label] = $columnLookup[$label{0}] * 676 + $columnLookup[$label{1}] * 26
211 | + $columnLookup[$label{2}];
212 | }
213 | }
214 |
215 | return $indexCache[$label];
216 | }
217 |
218 | /**
219 | * String from columnindex
220 | *
221 | * @param int $column
222 | * @return string
223 | */
224 | public static function stringFromColumnIndex($column = 0) {
225 | // Using a lookup cache adds a slight memory overhead, but boosts speed
226 | // caching using a static within the method is faster than a class static,
227 | // though it's additional memory overhead
228 | static $stringCache = [];
229 |
230 | if (!isset($stringCache[$column])) {
231 | // Determine column string
232 | if ($column < 26) {
233 | $stringCache[$column] = chr(65 + $column);
234 | } elseif ($column < 702) {
235 | $stringCache[$column] = chr(64 + ($column / 26)) . chr(65 + $column % 26);
236 | } else {
237 | $stringCache[$column] = chr(64 + (($column - 26) / 676)) . chr(65 + ((($column - 26) % 676) / 26))
238 | . chr(65 + $column % 26);
239 | }
240 | }
241 |
242 | return $stringCache[$column];
243 | }
244 |
245 | /**
246 | * Read 16-bit unsigned integer
247 | *
248 | * @param string $data
249 | * @param int $pos
250 | * @return int
251 | */
252 | public static function getUInt2d($data, $pos) {
253 | return ord($data[$pos]) | (ord($data[$pos + 1]) << 8);
254 | }
255 |
256 | /**
257 | * Read 32-bit signed integer
258 | *
259 | * @param string $data
260 | * @param int $pos
261 | * @return int
262 | */
263 | public static function getInt4d($data, $pos) {
264 | // FIX: represent numbers correctly on 64-bit system
265 | // http://sourceforge.net/tracker/index.php?func=detail&aid=1487372&group_id=99160&atid=623334
266 | // Hacked by Andreas Rehm 2006 to ensure correct result of the <<24 block on 32 and 64bit systems
267 | $ord24 = ord($data[$pos + 3]);
268 |
269 | if ($ord24 >= 128) {
270 | // negative number
271 | $ord24 = -abs((256 - $ord24) << 24);
272 | } else {
273 | $ord24 = ($ord24 & 127) << 24;
274 | }
275 |
276 | return ord($data[$pos]) | (ord($data[$pos + 1]) << 8) | (ord($data[$pos + 2]) << 16) | $ord24;
277 | }
278 | }
279 |
--------------------------------------------------------------------------------
/src/Reader/BaseReader.php:
--------------------------------------------------------------------------------
1 | generator->current();
48 | }
49 |
50 | /**
51 | * Move forward to next element
52 | */
53 | public function next() {
54 | $this->generator->next();
55 | }
56 |
57 | /**
58 | * Return the key of the current element
59 | *
60 | * @return int
61 | */
62 | public function key() {
63 | return $this->generator->key();
64 | }
65 |
66 | /**
67 | * Checks if current position is valid
68 | *
69 | * @return bool
70 | */
71 | public function valid() {
72 | return $this->generator->valid();
73 | }
74 |
75 | /**
76 | * Rewind the Iterator to the first element
77 | */
78 | public function rewind() {
79 | $this->generator = $this->makeGenerator();
80 | }
81 |
82 | /**
83 | * Make the generator
84 | */
85 | protected function makeGenerator() {
86 |
87 | }
88 |
89 | /**
90 | * Ignore empty row
91 | *
92 | * @param bool $ignoreEmpty
93 | */
94 | public function ignoreEmptyRow($ignoreEmpty = false) {
95 |
96 | }
97 |
98 | /**
99 | * Set row limit
100 | *
101 | * @param int $limit
102 | * @return $this
103 | */
104 | public function setRowLimit($limit = null) {
105 | $this->rowLimit = $limit;
106 |
107 | return $this;
108 | }
109 |
110 | /**
111 | * Get row limit
112 | *
113 | * @return int
114 | */
115 | public function getRowLimit() {
116 | return $this->rowLimit;
117 | }
118 |
119 | /**
120 | * Set column limit
121 | *
122 | * @param int $limit
123 | * @return $this
124 | */
125 | public function setColumnLimit($limit = null) {
126 | $this->columnLimit = $limit;
127 |
128 | return $this;
129 | }
130 |
131 | /**
132 | * Takes a row and traverses the file to that row
133 | *
134 | * @param int $row
135 | */
136 | public function seek($row) {
137 | if ($row <= 0) {
138 | throw new \InvalidArgumentException("Row $row is invalid");
139 | }
140 |
141 | $key = $this->key();
142 |
143 | if ($key !== --$row) {
144 | if ($row < $key || is_null($key) || $row == 0) {
145 | $this->rewind();
146 | }
147 |
148 | while ($this->valid() && $row > $this->key()) {
149 | $this->next();
150 | }
151 | }
152 | }
153 |
154 | /**
155 | * Get column limit
156 | *
157 | * @return int
158 | */
159 | public function getColumnLimit() {
160 | return $this->columnLimit;
161 | }
162 | }
163 |
--------------------------------------------------------------------------------
/src/Reader/Csv.php:
--------------------------------------------------------------------------------
1 | openFile($file);
69 |
70 | $this->autoDetection();
71 |
72 | $this->generator = $this->makeGenerator();
73 |
74 | ini_set('auto_detect_line_endings', $lineEnding);
75 |
76 | return $this;
77 | }
78 |
79 | /**
80 | * Count elements of the selected sheet
81 | *
82 | * @return int
83 | */
84 | public function count() {
85 | if ($this->count === null) {
86 | $position = ftell($this->fileHandle);
87 | $this->count = iterator_count($this->makeGenerator(true));
88 | fseek($this->fileHandle, $position);
89 | }
90 |
91 | return $this->count;
92 | }
93 |
94 | /**
95 | * Make the generator
96 | *
97 | * @param bool $calculate
98 | * @return \Generator
99 | */
100 | protected function makeGenerator($calculate = false) {
101 | fseek($this->fileHandle, $this->start);
102 |
103 | $finish = 0;
104 | while (($row = fgetcsv($this->fileHandle, 0, $this->delimiter, $this->enclosure)) !== false) {
105 | if ($this->ignoreEmpty && (empty($row) || trim(implode('', $row)) === '')) {
106 | continue;
107 | }
108 |
109 | if ($calculate) {
110 | yield;
111 | continue;
112 | }
113 |
114 | if ($this->rowLimit > 0 && ++$finish > $this->rowLimit) {
115 | break;
116 | }
117 |
118 | if ($this->columnLimit > 0) {
119 | $row = array_slice($row, 0, $this->columnLimit);
120 | }
121 |
122 | foreach ($row as &$value) {
123 | if ($value != '') {
124 | if (is_numeric($value)) {
125 | $value = (float)$value;
126 | }
127 |
128 | // Convert encoding if necessary
129 | if ($this->inputEncoding !== 'UTF-8') {
130 | $value = mb_convert_encoding($value, 'UTF-8', $this->inputEncoding);
131 | }
132 | }
133 | }
134 |
135 | unset($value);
136 |
137 | yield $row;
138 | }
139 | }
140 |
141 | /**
142 | * Detect the file delimiter and encoding
143 | */
144 | protected function autoDetection() {
145 | if (($this->delimiter !== null && $this->inputEncoding !== null)
146 | || ($line = fgets($this->fileHandle)) === false) {
147 |
148 | return;
149 | }
150 |
151 | if ($this->delimiter === null) {
152 | $this->delimiter = ',';
153 |
154 | if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
155 | $this->delimiter = substr($line, 4, 1);
156 | }
157 | }
158 |
159 | if ($this->inputEncoding === null) {
160 | $this->inputEncoding = 'UTF-8';
161 |
162 | if (($bom = substr($line, 0, 4)) == "\xFF\xFE\x00\x00" || $bom == "\x00\x00\xFE\xFF") {
163 | $this->start = 4;
164 | $this->inputEncoding = 'UTF-32';
165 | } elseif (($bom = substr($line, 0, 2)) == "\xFF\xFE" || $bom == "\xFE\xFF") {
166 | $this->start = 2;
167 | $this->inputEncoding = 'UTF-16';
168 | } elseif (($bom = substr($line, 0, 3)) == "\xEF\xBB\xBF") {
169 | $this->start = 3;
170 | }
171 |
172 | if (!$this->start) {
173 | $encoding = mb_detect_encoding($line, 'ASCII, UTF-8, GB2312, GBK');
174 |
175 | if ($encoding) {
176 | if ($encoding == 'EUC-CN') {
177 | $encoding = 'GB2312';
178 | } elseif ($encoding == 'CP936') {
179 | $encoding = 'GBK';
180 | }
181 |
182 | $this->inputEncoding = $encoding;
183 | }
184 | }
185 | }
186 |
187 | fseek($this->fileHandle, $this->start);
188 | }
189 |
190 | /**
191 | * Ignore empty row
192 | *
193 | * @param bool $ignoreEmpty
194 | *
195 | * @return $this
196 | */
197 | public function ignoreEmptyRow($ignoreEmpty = false) {
198 | $this->ignoreEmpty = $ignoreEmpty;
199 |
200 | return $this;
201 | }
202 |
203 | /**
204 | * Set input encoding
205 | *
206 | * @param string $encoding
207 | * @return $this
208 | */
209 | public function setInputEncoding($encoding = 'UTF-8') {
210 | $this->inputEncoding = $encoding;
211 |
212 | return $this;
213 | }
214 |
215 | /**
216 | * Get input encoding
217 | *
218 | * @return string
219 | */
220 | public function getInputEncoding() {
221 | return $this->inputEncoding;
222 | }
223 |
224 | /**
225 | * Set delimiter
226 | *
227 | * @param string $delimiter Delimiter, defaults to ,
228 | * @return $this
229 | */
230 | public function setDelimiter($delimiter = ',') {
231 | $this->delimiter = $delimiter;
232 |
233 | return $this;
234 | }
235 |
236 | /**
237 | * Get delimiter
238 | *
239 | * @return string
240 | */
241 | public function getDelimiter() {
242 | return $this->delimiter;
243 | }
244 |
245 | /**
246 | * Set enclosure
247 | *
248 | * @param string $enclosure Enclosure, defaults to "
249 | * @return $this
250 | */
251 | public function setEnclosure($enclosure = '"') {
252 | if ($enclosure == '') {
253 | $enclosure = '"';
254 | }
255 |
256 | $this->enclosure = $enclosure;
257 |
258 | return $this;
259 | }
260 |
261 | /**
262 | * Get enclosure
263 | *
264 | * @return string
265 | */
266 | public function getEnclosure() {
267 | return $this->enclosure;
268 | }
269 |
270 | /**
271 | * Can the current Reader read the file?
272 | *
273 | * @param string $file
274 | *
275 | * @return bool
276 | */
277 | public function canRead($file) {
278 | try {
279 | $this->openFile($file);
280 | } catch (\Exception $e) {
281 | return false;
282 | }
283 |
284 | fclose($this->fileHandle);
285 |
286 | return true;
287 | }
288 |
289 | /**
290 | * Open file for reading
291 | *
292 | * @param string $file
293 | *
294 | * @throws ReaderException
295 | */
296 | protected function openFile($file) {
297 | // Check if file exists
298 | if (!file_exists($file) || !is_readable($file)) {
299 | throw new ReaderException("Could not open file [$file] for reading! File does not exist.");
300 | }
301 |
302 | // Open file
303 | $this->fileHandle = fopen($file, 'r');
304 | if ($this->fileHandle === false) {
305 | throw new ReaderException("Could not open file [$file] for reading.");
306 | }
307 | }
308 |
309 | /**
310 | * Close file and release generator
311 | */
312 | public function __destruct() {
313 | if ($this->fileHandle) {
314 | fclose($this->fileHandle);
315 | }
316 |
317 | $this->generator = null;
318 | }
319 | }
320 |
--------------------------------------------------------------------------------
/src/Reader/Xls.php:
--------------------------------------------------------------------------------
1 | parser = new Excel5();
30 | }
31 |
32 | /**
33 | * Loads Excel from file
34 | *
35 | * @param string $file
36 | *
37 | * @return $this
38 | */
39 | public function load($file) {
40 | $this->parser->loadOLE($file);
41 |
42 | $this->generator = $this->makeGenerator();
43 |
44 | return $this;
45 | }
46 |
47 | /**
48 | * Count elements of the selected sheet
49 | *
50 | * @param bool $all
51 | * @return int|array
52 | */
53 | public function count($all = false) {
54 | if ($this->count === null) {
55 | $row = $column = 0;
56 | if ($sheet = $this->sheets($this->parser->getSheetIndex())) {
57 | $row = $sheet['totalRows'] ?? 0;
58 | $column = $sheet['totalColumns'] ?? 0;
59 | }
60 |
61 | $this->count = [
62 | $this->rowLimit > 0 ? min($row, $this->rowLimit) : $row,
63 | $this->columnLimit > 0 ? min($column, $this->columnLimit) : $column
64 | ];
65 | }
66 |
67 | return $all ? $this->count : $this->count[0];
68 | }
69 |
70 | /**
71 | * Get the work sheets info
72 | *
73 | * @param int $index
74 | * @return array
75 | */
76 | public function sheets($index = null) {
77 | $sheets = $this->parser->parseWorksheetInfo();
78 |
79 | if ($index !== null) {
80 | return $sheets[$index] ?? [];
81 | }
82 |
83 | return $sheets;
84 | }
85 |
86 | /**
87 | * Make the generator
88 | *
89 | * @return \Generator
90 | */
91 | protected function makeGenerator() {
92 | list($rowLimit, $columnLimit) = $this->count(true);
93 |
94 | $line = $finish = 0;
95 | while ($finish < $rowLimit && ($row = $this->parser->getRow($line++, $columnLimit)) !== false) {
96 | if ($this->parser->isIgnoreEmptyRow() && trim(implode('', $row)) === '') {
97 | continue;
98 | }
99 |
100 | $finish++;
101 | yield $row;
102 | }
103 | }
104 |
105 | /**
106 | * Ignore empty row
107 | *
108 | * @param bool $ignoreEmpty
109 | *
110 | * @return $this
111 | */
112 | public function ignoreEmptyRow($ignoreEmpty = false) {
113 | $this->parser->ignoreEmptyRow($ignoreEmpty);
114 |
115 | return $this;
116 | }
117 |
118 | /**
119 | * Set sheet index
120 | *
121 | * @param int $index
122 | * @return $this
123 | */
124 | public function setSheetIndex($index) {
125 | if ($index != $this->parser->getSheetIndex()) {
126 | $this->parser->setSheetIndex($index);
127 |
128 | $this->count = null;
129 | $this->rewind();
130 | }
131 |
132 | return $this;
133 | }
134 |
135 | /**
136 | * Can the current Reader read the file?
137 | *
138 | * @param string $file
139 | *
140 | * @return bool
141 | */
142 | public function canRead($file) {
143 | try {
144 | // Use ParseXL for the hard work.
145 | $ole = new OLERead();
146 |
147 | // open file
148 | $ole->openFile($file);
149 | } catch (\Exception $e) {
150 | return false;
151 | }
152 |
153 | return true;
154 | }
155 |
156 | /**
157 | * Release parser and generator
158 | */
159 | public function __destruct() {
160 | $this->parser = null;
161 | $this->generator = null;
162 | }
163 | }
164 |
--------------------------------------------------------------------------------
/src/Reader/Xlsx.php:
--------------------------------------------------------------------------------
1 | parser = new Excel2007();
29 | }
30 |
31 | /**
32 | * Loads Excel from file
33 | *
34 | * @param string $file
35 | *
36 | * @return $this
37 | */
38 | public function load($file) {
39 | $this->parser->loadZip($file);
40 |
41 | $this->generator = $this->makeGenerator();
42 |
43 | return $this;
44 | }
45 |
46 | /**
47 | * Count elements of an object
48 | *
49 | * @param bool $all
50 | * @return int|array
51 | */
52 | public function count($all = false) {
53 | if ($this->count === null) {
54 | $row = $column = 0;
55 | if ($sheet = $this->sheets($this->parser->getSheetIndex())) {
56 | $row = $sheet['totalRows'] ?? 0;
57 | $column = $sheet['totalColumns'] ?? 0;
58 | }
59 |
60 | $this->count = [
61 | $this->rowLimit > 0 ? min($row, $this->rowLimit) : $row,
62 | $this->columnLimit > 0 ? min($column, $this->columnLimit) : $column
63 | ];
64 | }
65 |
66 | return $all ? $this->count : $this->count[0];
67 | }
68 |
69 | /**
70 | * Get the work sheets info
71 | *
72 | * @param int $index
73 | * @return array
74 | */
75 | public function sheets($index = null) {
76 | $sheets = $this->parser->parseWorksheetInfo();
77 |
78 | if ($index !== null) {
79 | return $sheets[$index] ?? [];
80 | }
81 |
82 | return $sheets;
83 | }
84 |
85 | /**
86 | * Make the generator
87 | *
88 | * @return \Generator
89 | */
90 | protected function makeGenerator() {
91 | list($rowLimit, $columnLimit) = $this->count(true);
92 |
93 | $line = $finish = 0;
94 | while ($finish < $rowLimit && ($row = $this->parser->getRow($line++, $columnLimit)) !== false) {
95 | if ($this->parser->isIgnoreEmptyRow() && trim(implode('', $row)) === '') {
96 | continue;
97 | }
98 |
99 | $finish++;
100 | yield $row;
101 | }
102 | }
103 |
104 | /**
105 | * Ignore empty row
106 | *
107 | * @param bool $ignoreEmpty
108 | *
109 | * @return $this
110 | */
111 | public function ignoreEmptyRow($ignoreEmpty = false) {
112 | $this->parser->ignoreEmptyRow($ignoreEmpty);
113 |
114 | return $this;
115 | }
116 |
117 | /**
118 | * Set sheet index
119 | *
120 | * @param int $index
121 | * @return $this
122 | */
123 | public function setSheetIndex($index = 0) {
124 | if ($index != $this->parser->getSheetIndex()) {
125 | $this->parser->setSheetIndex($index);
126 |
127 | $this->count = null;
128 | $this->rewind();
129 | }
130 |
131 | return $this;
132 | }
133 |
134 | /**
135 | * Can the current Reader read the file?
136 | *
137 | * @param string $file
138 | *
139 | * @return bool
140 | */
141 | public function canRead($file) {
142 | try {
143 | $parser = new Excel2007();
144 |
145 | // open file
146 | $parser->openFile($file);
147 | } catch (\Exception $e) {
148 | return false;
149 | }
150 |
151 | return true;
152 | }
153 |
154 | /**
155 | * Release parser and generator
156 | */
157 | public function __destruct() {
158 | $this->parser = null;
159 | $this->generator = null;
160 | }
161 | }
162 |
--------------------------------------------------------------------------------
/tests/csvTest.php:
--------------------------------------------------------------------------------
1 | setRowLimit(5);
15 | $reader->setColumnLimit(10);
16 |
17 | $reader->ignoreEmptyRow(true);
18 |
19 | //$reader->setInputEncoding('UTF-8');
20 | $reader->setDelimiter("\t");
21 | });
22 |
23 | foreach ($reader as $row) {
24 | var_dump($row);
25 | }
26 |
27 | $reader->seek(2);
28 |
29 | $count = $reader->count();
30 | //$reader->seek(1);
31 | $current = $reader->current();
32 |
33 | $time = microtime(true) - $start;
34 | $use = memory_get_usage() - $memory;
35 | var_dump($current, $count, $time, $use/1024/1024);
36 |
--------------------------------------------------------------------------------
/tests/files/01.csv:
--------------------------------------------------------------------------------
1 | 姓名,称呼,性别,QQ,手机,电话,邮箱,传真,公司,职务,网址,地址,备注
2 | 巴蒂,黑曼巴,男,654333,18643910100,0755-07551255,5310100@sina.com,7.55576E+11,xxxx有限公司,测试工程师,www.baidu1.com,美国洛杉矶,NBA球员
3 |
--------------------------------------------------------------------------------
/tests/files/01.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Janson-Leung/PHPExcel/9df8bd178a41de108ebdc65e90a335a28f1c5959/tests/files/01.xls
--------------------------------------------------------------------------------
/tests/files/01.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Janson-Leung/PHPExcel/9df8bd178a41de108ebdc65e90a335a28f1c5959/tests/files/01.xlsx
--------------------------------------------------------------------------------
/tests/files/02.csv:
--------------------------------------------------------------------------------
1 | 10
2 | 21 22
3 | 311
4 |
5 | 407
6 |
7 |
8 |
9 |
10 | 104
--------------------------------------------------------------------------------
/tests/xlsTest.php:
--------------------------------------------------------------------------------
1 | setRowLimit(5);
15 | $reader->setColumnLimit(10);
16 |
17 | //$reader->setSheetIndex(1);
18 | });
19 |
20 | foreach ($reader as $row) {
21 | var_dump($row);
22 | }
23 |
24 | $reader->seek(50);
25 |
26 | //$reader->seek(5);
27 | $count = $reader->count();
28 | $current = $reader->current();
29 |
30 | $sheets = $reader->sheets();
31 |
32 | $time = microtime(true) - $start;
33 | $use = memory_get_usage() - $memory;
34 |
35 | var_dump($current, $count, $sheets, $time, $use/1024/1024);
36 |
--------------------------------------------------------------------------------
/tests/xlsxTest.php:
--------------------------------------------------------------------------------
1 | setRowLimit(10);
15 | $reader->setColumnLimit(10);
16 |
17 | $reader->ignoreEmptyRow(true);
18 |
19 | //$reader->setSheetIndex(0);
20 | });
21 |
22 | foreach ($reader as $row) {
23 | var_dump($row);
24 | }
25 |
26 | //$reader->seek(50);
27 |
28 | $count = $reader->count();
29 | $reader->seek(2);
30 | $current = $reader->current();
31 |
32 | $sheets = $reader->sheets();
33 |
34 | $time = microtime(true) - $start;
35 | $use = memory_get_usage() - $memory;
36 |
37 | var_dump($current, $count, $sheets, $time, $use/1024/1024);
--------------------------------------------------------------------------------