├── .gitignore ├── .gitlab-ci.yml ├── LICENSE ├── README.md ├── pom.xml └── src ├── main └── java │ └── com │ └── monitorjbl │ └── xlsx │ ├── StreamingReader.java │ ├── XmlUtils.java │ ├── exceptions │ ├── CloseException.java │ ├── MissingSheetException.java │ ├── NotSupportedException.java │ ├── OpenException.java │ ├── ParseException.java │ └── ReadException.java │ ├── impl │ ├── StreamingCell.java │ ├── StreamingRow.java │ ├── StreamingSheet.java │ ├── StreamingSheetReader.java │ ├── StreamingWorkbook.java │ ├── StreamingWorkbookReader.java │ ├── StringSupplier.java │ ├── Supplier.java │ └── TempFileUtil.java │ └── sst │ ├── BufferedStringsTable.java │ ├── FileBackedList.java │ └── LRUCache.java └── test ├── java └── com │ └── monitorjbl │ └── xlsx │ ├── BufferedStringsTableTest.java │ ├── PerformanceTest.java │ ├── StreamingReaderTest.java │ ├── StreamingSheetTest.java │ ├── StreamingWorkbookTest.java │ └── TestUtils.java └── resources ├── 1904Dates.xlsx ├── blank_cell_StringCellValue.xlsx ├── blank_cells.xlsx ├── blank_sst_reference_doctored.xlsx ├── data_types.xlsx ├── empty_sheet.xlsx ├── encrypted.xlsx ├── entity-expansion-exploit-poc-file.xlsx ├── formula_cell.xlsx ├── formula_outside_cell.xlsx ├── formula_test.xlsx ├── gaps.xlsx ├── has_spreadsheetdrawing.xlsx ├── hidden.xlsx ├── inline.xlsx ├── large.xlsx ├── leadingZeroes.xlsx ├── log4j.properties ├── missing-r-attrs.xlsx ├── no_type_cell.xlsx ├── null_cell.xlsx ├── null_celltype.xlsx ├── shared_styled_string.xlsx ├── sheets.xlsx ├── sparse-columns.xlsx └── special_types.xlsx /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.iml 3 | *.iws 4 | *.ipr 5 | .idea 6 | target 7 | .java-version 8 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | image: maven:3.6.3-openjdk-8-slim 2 | ci_build: 3 | stage: build 4 | before_script: 5 | - mkdir -p ~/.m2 && echo "${MVN_SETTINGS}" > ~/.m2/settings.xml 6 | script: 7 | - mvn clean package 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Run Status](https://gitlab.com/monitorjbl/excel-streaming-reader/badges/master/pipeline.svg) 2 | 3 | Profiled with [![Yourkit](https://www.yourkit.com/images/yklogo.png)](https://www.yourkit.com/java/profiler/) 4 | 5 | # Excel Streaming Reader 6 | 7 | If you've used [Apache POI](http://poi.apache.org) in the past to read in Excel files, you probably noticed that it's not very memory efficient. Reading in an entire workbook will cause a severe memory usage spike, which can wreak havoc on a server. 8 | 9 | There are plenty of good reasons for why Apache has to read in the whole workbook, but most of them have to do with the fact that the library allows you to read and write with random addresses. If (and only if) you just want to read the contents of an Excel file in a fast and memory effecient way, you probably don't need this ability. Unfortunately, the only thing in the POI library for reading a streaming workbook requires your code to use a SAX-like parser. All of the friendly classes like `Row` and `Cell` are missing from that API. 10 | 11 | This library serves as a wrapper around that streaming API while preserving the syntax of the standard POI API. Read on to see if it's right for you. 12 | 13 | **NOTE**: This library only supports reading XLSX files. 14 | 15 | # Important notice about Java 7 support 16 | 17 | The latest versions of this library (2.x) have dropped support for Java 7. This is due to POI 4.0 requiring Java 8; as that is a core dependency of this library, it cannot support older versions of Java. The older 1.x and 0.x versions will no longer be maintained. 18 | 19 | # Include 20 | 21 | This library is available from from Maven Central, and you can optionally install it yourself. The Maven installation instructions can be found on the [release](https://github.com/monitorjbl/excel-streaming-reader/releases) page. 22 | 23 | To use it, add this to your POM: 24 | 25 | ``` 26 | 27 | 28 | com.monitorjbl 29 | xlsx-streamer 30 | 2.1.0 31 | 32 | 33 | ``` 34 | 35 | # Usage 36 | 37 | This library is very specific in how it is meant to be used. You should initialize it like so: 38 | 39 | ```java 40 | import com.monitorjbl.xlsx.StreamingReader; 41 | 42 | InputStream is = new FileInputStream(new File("/path/to/workbook.xlsx")); 43 | Workbook workbook = StreamingReader.builder() 44 | .rowCacheSize(100) // number of rows to keep in memory (defaults to 10) 45 | .bufferSize(4096) // buffer size to use when reading InputStream to file (defaults to 1024) 46 | .open(is); // InputStream or File for XLSX file (required) 47 | ``` 48 | 49 | Once you've done this, you can then iterate through the rows and cells like so: 50 | 51 | ```java 52 | for (Sheet sheet : workbook){ 53 | System.out.println(sheet.getSheetName()); 54 | for (Row r : sheet) { 55 | for (Cell c : r) { 56 | System.out.println(c.getStringCellValue()); 57 | } 58 | } 59 | } 60 | ``` 61 | 62 | Or open a sheet by name or index: 63 | 64 | ```java 65 | Sheet sheet = workbook.getSheet("My Sheet") 66 | ``` 67 | 68 | The StreamingWorkbook is an autoclosable resource, and it's important that you close it to free the filesystem resource it consumed. With Java 7, you can do this: 69 | 70 | ```java 71 | try ( 72 | InputStream is = new FileInputStream(new File("/path/to/workbook.xlsx")); 73 | Workbook workbook = StreamingReader.builder() 74 | .rowCacheSize(100) 75 | .bufferSize(4096) 76 | .open(is)) { 77 | for (Sheet sheet : workbook){ 78 | System.out.println(sheet.getSheetName()); 79 | for (Row r : sheet) { 80 | for (Cell c : r) { 81 | System.out.println(c.getStringCellValue()); 82 | } 83 | } 84 | } 85 | } 86 | ``` 87 | 88 | You may access cells randomly within a row, as the entire row is cached. **However**, there is no way to randomly access rows. As this is a streaming implementation, only a small number of rows are kept in memory at any given time. 89 | 90 | # Supported Methods 91 | 92 | Not all POI Cell and Row functions are supported. The most basic ones are (`Cell.getStringCellValue()`, `Cell.getColumnIndex()`, etc.), but don't be surprised if you get a `NotSupportedException` on the more advanced ones. 93 | 94 | I'll try to add more support as time goes on, but some items simply can't be read in a streaming fashion. Methods that require dependent values will not have said dependencies available at the point in the stream in which they are read. 95 | 96 | This is a brief and very generalized list of things that are not supported for reads: 97 | 98 | * Functions 99 | * Macros 100 | * Styled cells (the styles are kept at the end of the ZIP file) 101 | 102 | # Logging 103 | 104 | This library uses SLF4j logging. This is a rare use case, but you can plug in your logging provider and get some potentially useful output. Below is an example of doing this with log4j: 105 | 106 | **pom.xml** 107 | 108 | ``` 109 | 110 | 111 | com.monitorjbl 112 | xlsx-streamer 113 | 2.1.0 114 | 115 | 116 | org.slf4j 117 | slf4j-log4j12 118 | 1.7.6 119 | 120 | 121 | log4j 122 | log4j 123 | 1.2.17 124 | 125 | 126 | ``` 127 | 128 | **log4j.properties** 129 | 130 | ``` 131 | log4j.rootLogger=DEBUG, A1 132 | log4j.appender.A1=org.apache.log4j.ConsoleAppender 133 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout 134 | log4j.appender.A1.layout.ConversionPattern=%d{ISO8601} [%c] %p: %m%n 135 | 136 | log4j.category.com.monitorjbl=DEBUG 137 | ``` 138 | 139 | # Implementation Details 140 | 141 | This library will take a provided `InputStream` and output it to the file system. The stream is piped safely through a configurable-sized buffer to prevent large usage of memory. Once the file is created, it is then streamed into memory from the file system. 142 | 143 | The reason for needing the stream being outputted in this manner has to do with how ZIP files work. Because the XLSX file format is basically a ZIP file, it's not possible to find all of the entries without reading the entire InputStream. 144 | 145 | This is a problem that can't really be gotten around for POI, as it needs a complete list of ZIP entries. The default implementation of reading from an `InputStream` in POI is to read the entire stream directly into memory. This library works by reading out the stream into a temporary file. As part of the auto-close action, the temporary file is deleted. 146 | 147 | If you need more control over how the file is created/disposed of, there is an option to initialize the library with a `java.io.File`. This file will not be written to or removed: 148 | 149 | ```java 150 | File f = new File("/path/to/workbook.xlsx"); 151 | Workbook workbook = StreamingReader.builder() 152 | .rowCacheSize(100) 153 | .bufferSize(4096) 154 | .open(f); 155 | ``` 156 | 157 | This library will ONLY work with XLSX files. The older XLS format is not capable of being streamed. 158 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | com.monitorjbl 7 | xlsx-streamer 8 | 3.0.0 9 | Streaming Excel reader 10 | Streaming Excel reader 11 | https://github.com/monitorjbl/excel-streaming-reader 12 | 13 | 14 | UTF-8 15 | 5.0.0 16 | 1.7.30 17 | 18 | 19 | 20 | 21 | monitorjbl 22 | Taylor Jones 23 | monitorjbl@gmail.com 24 | 25 | 26 | 27 | 28 | 29 | 30 | org.apache.maven.plugins 31 | maven-compiler-plugin 32 | 3.1 33 | 34 | 1.8 35 | 1.8 36 | 37 | 38 | 39 | org.apache.maven.plugins 40 | maven-javadoc-plugin 41 | 2.10.1 42 | 43 | 44 | attach-sources 45 | 46 | jar 47 | 48 | 49 | 50 | 51 | 52 | org.apache.maven.plugins 53 | maven-source-plugin 54 | 2.2.1 55 | 56 | 57 | attach-sources 58 | 59 | jar 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | release 70 | 71 | 72 | 73 | org.apache.maven.plugins 74 | maven-gpg-plugin 75 | 1.6 76 | 77 | 78 | sign-artifacts 79 | verify 80 | 81 | sign 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | Apache 2.0 94 | https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/master/LICENSE 95 | repo 96 | 97 | 98 | 99 | 100 | https://github.com/monitorjbl/excel-streaming-reader.git 101 | 102 | 103 | 104 | 105 | ossrh 106 | https://oss.sonatype.org/content/repositories/snapshots 107 | 108 | 109 | ossrh 110 | https://oss.sonatype.org/service/local/staging/deploy/maven2 111 | 112 | 113 | 114 | 115 | 116 | org.apache.poi 117 | poi 118 | ${apache.poi.version} 119 | 120 | 121 | org.apache.poi 122 | poi-ooxml 123 | ${apache.poi.version} 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | xml-apis 132 | xml-apis 133 | 1.4.01 134 | 135 | 136 | 137 | org.slf4j 138 | slf4j-api 139 | ${slf4j.version} 140 | 141 | 142 | 143 | org.slf4j 144 | slf4j-log4j12 145 | ${slf4j.version} 146 | test 147 | 148 | 149 | org.apache.logging.log4j 150 | log4j-core 151 | 2.14.1 152 | test 153 | 154 | 155 | org.hamcrest 156 | hamcrest 157 | 2.2 158 | test 159 | 160 | 161 | org.junit.jupiter 162 | junit-jupiter-engine 163 | 5.7.1 164 | test 165 | 166 | 167 | org.nanohttpd 168 | nanohttpd 169 | 2.3.1 170 | test 171 | 172 | 173 | 174 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/StreamingReader.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx; 2 | 3 | import com.monitorjbl.xlsx.exceptions.MissingSheetException; 4 | import com.monitorjbl.xlsx.exceptions.OpenException; 5 | import com.monitorjbl.xlsx.exceptions.ReadException; 6 | import com.monitorjbl.xlsx.sst.BufferedStringsTable; 7 | import com.monitorjbl.xlsx.impl.StreamingSheetReader; 8 | import com.monitorjbl.xlsx.impl.StreamingWorkbook; 9 | import com.monitorjbl.xlsx.impl.StreamingWorkbookReader; 10 | import org.apache.poi.openxml4j.exceptions.InvalidFormatException; 11 | import org.apache.poi.openxml4j.exceptions.OpenXML4JException; 12 | import org.apache.poi.openxml4j.opc.OPCPackage; 13 | import org.apache.poi.poifs.crypt.Decryptor; 14 | import org.apache.poi.poifs.crypt.EncryptionInfo; 15 | import org.apache.poi.poifs.filesystem.POIFSFileSystem; 16 | import org.apache.poi.ss.usermodel.Row; 17 | import org.apache.poi.ss.usermodel.Workbook; 18 | import org.apache.poi.util.StaxHelper; 19 | import org.apache.poi.xssf.eventusermodel.XSSFReader; 20 | import org.apache.poi.xssf.model.SharedStringsTable; 21 | import org.apache.poi.xssf.model.StylesTable; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | import org.w3c.dom.Node; 25 | import org.w3c.dom.NodeList; 26 | 27 | import javax.xml.stream.XMLEventReader; 28 | import javax.xml.stream.XMLStreamException; 29 | import java.io.File; 30 | import java.io.IOException; 31 | import java.io.InputStream; 32 | import java.nio.file.Files; 33 | import java.security.GeneralSecurityException; 34 | import java.util.Iterator; 35 | import java.util.Objects; 36 | 37 | import static com.monitorjbl.xlsx.XmlUtils.document; 38 | import static com.monitorjbl.xlsx.XmlUtils.searchForNodeList; 39 | import static com.monitorjbl.xlsx.impl.TempFileUtil.writeInputStreamToFile; 40 | 41 | /** 42 | * Streaming Excel workbook implementation. Most advanced features of POI are not supported. 43 | * Use this only if your application can handle iterating through an entire workbook, row by 44 | * row. 45 | */ 46 | public class StreamingReader implements Iterable, AutoCloseable { 47 | private static final Logger log = LoggerFactory.getLogger(StreamingReader.class); 48 | 49 | private File tmp; 50 | private final StreamingWorkbookReader workbook; 51 | 52 | public StreamingReader(StreamingWorkbookReader workbook) { 53 | this.workbook = workbook; 54 | } 55 | 56 | /** 57 | * Returns a new streaming iterator to loop through rows. This iterator is not 58 | * guaranteed to have all rows in memory, and any particular iteration may 59 | * trigger a load from disk to read in new data. 60 | * 61 | * @return the streaming iterator 62 | * @deprecated StreamingReader is equivalent to the POI Workbook object rather 63 | * than the Sheet object. This method will be removed in a future release. 64 | */ 65 | @Override 66 | public Iterator iterator() { 67 | return workbook.first().iterator(); 68 | } 69 | 70 | /** 71 | * Closes the streaming resource, attempting to clean up any temporary files created. 72 | * 73 | * @throws com.monitorjbl.xlsx.exceptions.CloseException if there is an issue closing the stream 74 | */ 75 | @Override 76 | public void close() throws IOException { 77 | try { 78 | workbook.close(); 79 | } finally { 80 | if(tmp != null) { 81 | if (log.isDebugEnabled()) { 82 | log.debug("Deleting tmp file [" + tmp.getAbsolutePath() + "]"); 83 | } 84 | tmp.delete(); 85 | } 86 | } 87 | } 88 | 89 | public static Builder builder() { 90 | return new Builder(); 91 | } 92 | 93 | public static class Builder { 94 | private int rowCacheSize = 10; 95 | private int bufferSize = 1024; 96 | private int sheetIndex = 0; 97 | private int sstCacheSizeBytes = -1; 98 | private String sheetName; 99 | private String password; 100 | 101 | public int getRowCacheSize() { 102 | return rowCacheSize; 103 | } 104 | 105 | public int getBufferSize() { 106 | return bufferSize; 107 | } 108 | 109 | /** 110 | * @return The sheet index 111 | * @deprecated This method will be removed in a future release. 112 | */ 113 | public int getSheetIndex() { 114 | return sheetIndex; 115 | } 116 | 117 | /** 118 | * @return The sheet name 119 | * @deprecated This method will be removed in a future release. 120 | */ 121 | public String getSheetName() { 122 | return sheetName; 123 | } 124 | 125 | /** 126 | * @return The password to use to unlock this workbook 127 | */ 128 | public String getPassword() { 129 | return password; 130 | } 131 | 132 | /** 133 | * @return The size of the shared string table cache. If less than 0, no 134 | * cache will be used and the entire table will be loaded into memory. 135 | */ 136 | public int getSstCacheSizeBytes() { 137 | return sstCacheSizeBytes; 138 | } 139 | 140 | /** 141 | * The number of rows to keep in memory at any given point. 142 | *

143 | * Defaults to 10 144 | *

145 | * 146 | * @param rowCacheSize number of rows 147 | * @return reference to current {@code Builder} 148 | */ 149 | public Builder rowCacheSize(int rowCacheSize) { 150 | this.rowCacheSize = rowCacheSize; 151 | return this; 152 | } 153 | 154 | /** 155 | * The number of bytes to read into memory from the input 156 | * resource. 157 | *

158 | * Defaults to 1024 159 | *

160 | * 161 | * @param bufferSize buffer size in bytes 162 | * @return reference to current {@code Builder} 163 | */ 164 | public Builder bufferSize(int bufferSize) { 165 | this.bufferSize = bufferSize; 166 | return this; 167 | } 168 | 169 | /** 170 | * Which sheet to open. There can only be one sheet open 171 | * for a single instance of {@code StreamingReader}. If 172 | * more sheets need to be read, a new instance must be 173 | * created. 174 | *

175 | * Defaults to 0 176 | *

177 | * 178 | * @param sheetIndex index of sheet 179 | * @return reference to current {@code Builder} 180 | * @deprecated This method will be removed in a future release. Use {@link StreamingWorkbook#getSheetAt(int)} instead. 181 | */ 182 | public Builder sheetIndex(int sheetIndex) { 183 | this.sheetIndex = sheetIndex; 184 | return this; 185 | } 186 | 187 | /** 188 | * Which sheet to open. There can only be one sheet open 189 | * for a single instance of {@code StreamingReader}. If 190 | * more sheets need to be read, a new instance must be 191 | * created. 192 | * 193 | * @param sheetName name of sheet 194 | * @return reference to current {@code Builder} 195 | * @deprecated This method will be removed in a future release. Use {@link StreamingWorkbook#getSheet(String)} instead. 196 | */ 197 | public Builder sheetName(String sheetName) { 198 | this.sheetName = sheetName; 199 | return this; 200 | } 201 | 202 | /** 203 | * For password protected files specify password to open file. 204 | * If the password is incorrect a {@code ReadException} is thrown on 205 | * {@code read}. 206 | *

NULL indicates that no password should be used, this is the 207 | * default value.

208 | * 209 | * @param password to use when opening file 210 | * @return reference to current {@code Builder} 211 | */ 212 | public Builder password(String password) { 213 | this.password = password; 214 | return this; 215 | } 216 | 217 | /** 218 | *

!!! This option is experimental !!!

219 | * 220 | * Set the size of the Shared Strings Table cache. This option exists to accommodate 221 | * extremely large workbooks with millions of unique strings. Normally the SST is entirely 222 | * loaded into memory, but with large workbooks with high cardinality (i.e., very few 223 | * duplicate values) the SST may not fit entirely into memory. 224 | *

225 | * By default, the entire SST *will* be loaded into memory. Setting a value greater than 226 | * 0 for this option will only cache up to this many entries in memory. However, 227 | * enabling this option at all will have some noticeable performance degredation as you are 228 | * trading memory for disk space. 229 | * 230 | * @param sstCacheSizeBytes size of SST cache 231 | * @return reference to current {@code Builder} 232 | */ 233 | public Builder sstCacheSizeBytes(int sstCacheSizeBytes) { 234 | this.sstCacheSizeBytes = sstCacheSizeBytes; 235 | return this; 236 | } 237 | 238 | /** 239 | * Reads a given {@code InputStream} and returns a new 240 | * instance of {@code Workbook}. Due to Apache POI 241 | * limitations, a temporary file must be written in order 242 | * to create a streaming iterator. This process will use 243 | * the same buffer size as specified in {@link #bufferSize(int)}. 244 | * 245 | * @param is input stream to read in 246 | * @return A {@link Workbook} that can be read from 247 | * @throws com.monitorjbl.xlsx.exceptions.ReadException if there is an issue reading the stream 248 | */ 249 | public Workbook open(InputStream is) { 250 | StreamingWorkbookReader workbook = new StreamingWorkbookReader(this); 251 | workbook.init(is); 252 | return new StreamingWorkbook(workbook); 253 | } 254 | 255 | /** 256 | * Reads a given {@code File} and returns a new instance 257 | * of {@code Workbook}. 258 | * 259 | * @param file file to read in 260 | * @return built streaming reader instance 261 | * @throws com.monitorjbl.xlsx.exceptions.OpenException if there is an issue opening the file 262 | * @throws com.monitorjbl.xlsx.exceptions.ReadException if there is an issue reading the file 263 | */ 264 | public Workbook open(File file) { 265 | StreamingWorkbookReader workbook = new StreamingWorkbookReader(this); 266 | workbook.init(file); 267 | return new StreamingWorkbook(workbook); 268 | } 269 | 270 | /** 271 | * Reads a given {@code InputStream} and returns a new 272 | * instance of {@code StreamingReader}. Due to Apache POI 273 | * limitations, a temporary file must be written in order 274 | * to create a streaming iterator. This process will use 275 | * the same buffer size as specified in {@link #bufferSize(int)}. 276 | * 277 | * @param is input stream to read in 278 | * @return built streaming reader instance 279 | * @throws com.monitorjbl.xlsx.exceptions.ReadException if there is an issue reading the stream 280 | * @deprecated This method will be removed in a future release. Use {@link Builder#open(InputStream)} instead 281 | */ 282 | public StreamingReader read(InputStream is) { 283 | File f = null; 284 | try { 285 | f = writeInputStreamToFile(is, bufferSize); 286 | log.debug("Created temp file [" + f.getAbsolutePath() + "]"); 287 | 288 | StreamingReader r = read(f); 289 | r.tmp = f; 290 | return r; 291 | } catch(IOException e) { 292 | throw new ReadException("Unable to read input stream", e); 293 | } catch(RuntimeException e) { 294 | if(f != null) { 295 | f.delete(); 296 | } 297 | throw e; 298 | } 299 | } 300 | 301 | /** 302 | * Reads a given {@code File} and returns a new instance 303 | * of {@code StreamingReader}. 304 | * 305 | * @param f file to read in 306 | * @return built streaming reader instance 307 | * @throws com.monitorjbl.xlsx.exceptions.OpenException if there is an issue opening the file 308 | * @throws com.monitorjbl.xlsx.exceptions.ReadException if there is an issue reading the file 309 | * @deprecated This method will be removed in a future release. Use {@link Builder#open(File)} instead 310 | */ 311 | public StreamingReader read(File f) { 312 | try { 313 | OPCPackage pkg; 314 | if(password != null) { 315 | // Based on: https://poi.apache.org/encryption.html 316 | POIFSFileSystem poifs = new POIFSFileSystem(f); 317 | EncryptionInfo info = new EncryptionInfo(poifs); 318 | Decryptor d = Decryptor.getInstance(info); 319 | d.verifyPassword(password); 320 | pkg = OPCPackage.open(d.getDataStream(poifs)); 321 | } else { 322 | pkg = OPCPackage.open(f); 323 | } 324 | 325 | boolean use1904Dates = false; 326 | XSSFReader reader = new XSSFReader(pkg); 327 | 328 | SharedStringsTable sst; 329 | File sstCache = null; 330 | if(sstCacheSizeBytes > 0) { 331 | sstCache = Files.createTempFile("", "").toFile(); 332 | log.debug("Created sst cache file [" + sstCache.getAbsolutePath() + "]"); 333 | sst = BufferedStringsTable.getSharedStringsTable(sstCache, sstCacheSizeBytes, pkg); 334 | } else { 335 | sst = reader.getSharedStringsTable(); 336 | } 337 | 338 | StylesTable styles = reader.getStylesTable(); 339 | NodeList workbookPr = searchForNodeList(document(reader.getWorkbookData()), "/ss:workbook/ss:workbookPr"); 340 | if (workbookPr.getLength() == 1) { 341 | final Node date1904 = workbookPr.item(0).getAttributes().getNamedItem("date1904"); 342 | if (date1904 != null) { 343 | use1904Dates = ("1".equals(date1904.getTextContent())); 344 | } 345 | } 346 | InputStream sheet = findSheet(reader); 347 | if(sheet == null) { 348 | throw new MissingSheetException("Unable to find sheet at index [" + sheetIndex + "]"); 349 | } 350 | 351 | XMLEventReader parser = StaxHelper.newXMLInputFactory().createXMLEventReader(sheet); 352 | 353 | return new StreamingReader(new StreamingWorkbookReader(sst, sstCache, pkg, new StreamingSheetReader(sst, styles, parser, use1904Dates, rowCacheSize), 354 | this)); 355 | } catch(IOException e) { 356 | throw new OpenException("Failed to open file", e); 357 | } catch(OpenXML4JException | XMLStreamException e) { 358 | throw new ReadException("Unable to read workbook", e); 359 | } catch(GeneralSecurityException e) { 360 | throw new ReadException("Unable to read workbook - Decryption failed", e); 361 | } 362 | } 363 | 364 | /** 365 | * @deprecated This will be removed when the transition to the 1.x API is complete 366 | */ 367 | private InputStream findSheet(XSSFReader reader) throws IOException, InvalidFormatException { 368 | int index = sheetIndex; 369 | if(sheetName != null) { 370 | index = -1; 371 | //This file is separate from the worksheet data, and should be fairly small 372 | NodeList nl = searchForNodeList(document(reader.getWorkbookData()), "/ss:workbook/ss:sheets/ss:sheet"); 373 | for(int i = 0; i < nl.getLength(); i++) { 374 | if(Objects.equals(nl.item(i).getAttributes().getNamedItem("name").getTextContent(), sheetName)) { 375 | index = i; 376 | } 377 | } 378 | if(index < 0) { 379 | return null; 380 | } 381 | } 382 | Iterator iter = reader.getSheetsData(); 383 | InputStream sheet = null; 384 | 385 | int i = 0; 386 | while(iter.hasNext()) { 387 | InputStream is = iter.next(); 388 | if(i++ == index) { 389 | sheet = is; 390 | log.debug("Found sheet at index [" + sheetIndex + "]"); 391 | break; 392 | } 393 | } 394 | return sheet; 395 | } 396 | } 397 | 398 | } 399 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/XmlUtils.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx; 2 | 3 | import com.monitorjbl.xlsx.exceptions.ParseException; 4 | import org.apache.poi.ooxml.util.DocumentHelper; 5 | import org.w3c.dom.Document; 6 | import org.w3c.dom.NodeList; 7 | import org.xml.sax.SAXException; 8 | 9 | import javax.xml.XMLConstants; 10 | import javax.xml.namespace.NamespaceContext; 11 | import javax.xml.xpath.XPath; 12 | import javax.xml.xpath.XPathConstants; 13 | import javax.xml.xpath.XPathExpressionException; 14 | import javax.xml.xpath.XPathFactory; 15 | import java.io.IOException; 16 | import java.io.InputStream; 17 | import java.util.*; 18 | 19 | public class XmlUtils { 20 | public static Document document(InputStream is) { 21 | try { 22 | return DocumentHelper.readDocument(is); 23 | } catch(SAXException | IOException e) { 24 | throw new ParseException(e); 25 | } 26 | } 27 | 28 | public static NodeList searchForNodeList(Document document, String xpath) { 29 | try { 30 | XPath xp = XPathFactory.newInstance().newXPath(); 31 | NamespaceContextImpl nc = new NamespaceContextImpl(); 32 | nc.addNamespace("ss", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"); 33 | xp.setNamespaceContext(nc); 34 | return (NodeList)xp.compile(xpath) 35 | .evaluate(document, XPathConstants.NODESET); 36 | } catch(XPathExpressionException e) { 37 | throw new ParseException(e); 38 | } 39 | } 40 | 41 | private static class NamespaceContextImpl implements NamespaceContext { 42 | private Map urisByPrefix = new HashMap(); 43 | 44 | private Map prefixesByURI = new HashMap(); 45 | 46 | public NamespaceContextImpl() { 47 | addNamespace(XMLConstants.XML_NS_PREFIX, XMLConstants.XML_NS_URI); 48 | addNamespace(XMLConstants.XMLNS_ATTRIBUTE, XMLConstants.XMLNS_ATTRIBUTE_NS_URI); 49 | } 50 | 51 | public void addNamespace(String prefix, String namespaceURI) { 52 | urisByPrefix.put(prefix, namespaceURI); 53 | if (prefixesByURI.containsKey(namespaceURI)) { 54 | (prefixesByURI.get(namespaceURI)).add(prefix); 55 | } else { 56 | Set set = new HashSet(); 57 | set.add(prefix); 58 | prefixesByURI.put(namespaceURI, set); 59 | } 60 | } 61 | 62 | public String getNamespaceURI(String prefix) { 63 | if (prefix == null) 64 | throw new IllegalArgumentException("prefix cannot be null"); 65 | if (urisByPrefix.containsKey(prefix)) 66 | return (String) urisByPrefix.get(prefix); 67 | else 68 | return XMLConstants.NULL_NS_URI; 69 | } 70 | 71 | public String getPrefix(String namespaceURI) { 72 | return (String) getPrefixes(namespaceURI).next(); 73 | } 74 | 75 | public Iterator getPrefixes(String namespaceURI) { 76 | if (namespaceURI == null) 77 | throw new IllegalArgumentException("namespaceURI cannot be null"); 78 | if (prefixesByURI.containsKey(namespaceURI)) { 79 | return ((Set) prefixesByURI.get(namespaceURI)).iterator(); 80 | } else { 81 | return Collections.EMPTY_SET.iterator(); 82 | } 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/exceptions/CloseException.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.exceptions; 2 | 3 | public class CloseException extends RuntimeException { 4 | 5 | public CloseException() { 6 | super(); 7 | } 8 | 9 | public CloseException(String msg) { 10 | super(msg); 11 | } 12 | 13 | public CloseException(Exception e) { 14 | super(e); 15 | } 16 | 17 | public CloseException(String msg, Exception e) { 18 | super(msg, e); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/exceptions/MissingSheetException.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.exceptions; 2 | 3 | public class MissingSheetException extends RuntimeException { 4 | 5 | public MissingSheetException() { 6 | super(); 7 | } 8 | 9 | public MissingSheetException(String msg) { 10 | super(msg); 11 | } 12 | 13 | public MissingSheetException(Exception e) { 14 | super(e); 15 | } 16 | 17 | public MissingSheetException(String msg, Exception e) { 18 | super(msg, e); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/exceptions/NotSupportedException.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.exceptions; 2 | 3 | public class NotSupportedException extends RuntimeException { 4 | 5 | public NotSupportedException() { 6 | super(); 7 | } 8 | 9 | public NotSupportedException(String msg) { 10 | super(msg); 11 | } 12 | 13 | public NotSupportedException(Exception e) { 14 | super(e); 15 | } 16 | 17 | public NotSupportedException(String msg, Exception e) { 18 | super(msg, e); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/exceptions/OpenException.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.exceptions; 2 | 3 | public class OpenException extends RuntimeException { 4 | 5 | public OpenException() { 6 | super(); 7 | } 8 | 9 | public OpenException(String msg) { 10 | super(msg); 11 | } 12 | 13 | public OpenException(Exception e) { 14 | super(e); 15 | } 16 | 17 | public OpenException(String msg, Exception e) { 18 | super(msg, e); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/exceptions/ParseException.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.exceptions; 2 | 3 | public class ParseException extends RuntimeException { 4 | 5 | public ParseException() { 6 | super(); 7 | } 8 | 9 | public ParseException(String msg) { 10 | super(msg); 11 | } 12 | 13 | public ParseException(Exception e) { 14 | super(e); 15 | } 16 | 17 | public ParseException(String msg, Exception e) { 18 | super(msg, e); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/exceptions/ReadException.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.exceptions; 2 | 3 | public class ReadException extends RuntimeException { 4 | 5 | public ReadException() { 6 | super(); 7 | } 8 | 9 | public ReadException(String msg) { 10 | super(msg); 11 | } 12 | 13 | public ReadException(Exception e) { 14 | super(e); 15 | } 16 | 17 | public ReadException(String msg, Exception e) { 18 | super(msg, e); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/impl/StreamingCell.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.impl; 2 | 3 | import com.monitorjbl.xlsx.exceptions.NotSupportedException; 4 | import org.apache.poi.ss.formula.FormulaParseException; 5 | import org.apache.poi.ss.usermodel.Cell; 6 | import org.apache.poi.ss.usermodel.CellStyle; 7 | import org.apache.poi.ss.usermodel.CellType; 8 | import org.apache.poi.ss.usermodel.Comment; 9 | import org.apache.poi.ss.usermodel.DateUtil; 10 | import org.apache.poi.ss.usermodel.Hyperlink; 11 | import org.apache.poi.ss.usermodel.RichTextString; 12 | import org.apache.poi.ss.usermodel.Row; 13 | import org.apache.poi.ss.usermodel.Sheet; 14 | import org.apache.poi.ss.util.CellAddress; 15 | import org.apache.poi.ss.util.CellRangeAddress; 16 | import org.apache.poi.xssf.usermodel.XSSFRichTextString; 17 | 18 | import java.time.Instant; 19 | import java.time.LocalDateTime; 20 | import java.time.ZoneOffset; 21 | import java.util.Calendar; 22 | import java.util.Date; 23 | 24 | public class StreamingCell implements Cell { 25 | 26 | private static final Supplier NULL_SUPPLIER = () -> null; 27 | private static final String FALSE_AS_STRING = "0"; 28 | private static final String TRUE_AS_STRING = "1"; 29 | 30 | private final Sheet sheet; 31 | private int columnIndex; 32 | private int rowIndex; 33 | private final boolean use1904Dates; 34 | 35 | private Supplier contentsSupplier = NULL_SUPPLIER; 36 | private Object rawContents; 37 | private String formula; 38 | private String numericFormat; 39 | private Short numericFormatIndex; 40 | private String type; 41 | private CellStyle cellStyle; 42 | private Row row; 43 | private boolean formulaType; 44 | 45 | public StreamingCell(Sheet sheet, int columnIndex, int rowIndex, boolean use1904Dates) { 46 | this.sheet = sheet; 47 | this.columnIndex = columnIndex; 48 | this.rowIndex = rowIndex; 49 | this.use1904Dates = use1904Dates; 50 | } 51 | 52 | public void setContentSupplier(Supplier contentsSupplier) { 53 | this.contentsSupplier = contentsSupplier; 54 | } 55 | 56 | public void setRawContents(Object rawContents) { 57 | this.rawContents = rawContents; 58 | } 59 | 60 | public String getNumericFormat() { 61 | return numericFormat; 62 | } 63 | 64 | public void setNumericFormat(String numericFormat) { 65 | this.numericFormat = numericFormat; 66 | } 67 | 68 | public Short getNumericFormatIndex() { 69 | return numericFormatIndex; 70 | } 71 | 72 | public void setNumericFormatIndex(Short numericFormatIndex) { 73 | this.numericFormatIndex = numericFormatIndex; 74 | } 75 | 76 | public void setFormula(String formula) { 77 | this.formula = formula; 78 | } 79 | 80 | public String getType() { 81 | return type; 82 | } 83 | 84 | public void setType(String type) { 85 | this.type = type; 86 | } 87 | 88 | public boolean isFormulaType() { 89 | return formulaType; 90 | } 91 | 92 | public void setFormulaType(boolean formulaType) { 93 | this.formulaType = formulaType; 94 | } 95 | 96 | @Override 97 | public void setCellStyle(CellStyle cellStyle) { 98 | this.cellStyle = cellStyle; 99 | } 100 | 101 | /* Supported */ 102 | 103 | /** 104 | * Returns column index of this cell 105 | * 106 | * @return zero-based column index of a column in a sheet. 107 | */ 108 | @Override 109 | public int getColumnIndex() { 110 | return columnIndex; 111 | } 112 | 113 | /** 114 | * Returns row index of a row in the sheet that contains this cell 115 | * 116 | * @return zero-based row index of a row in the sheet that contains this cell 117 | */ 118 | @Override 119 | public int getRowIndex() { 120 | return rowIndex; 121 | } 122 | 123 | /** 124 | * Returns the Row this cell belongs to. Note that keeping references to cell 125 | * rows around after the iterator window has passed will preserve them. 126 | * 127 | * @return the Row that owns this cell 128 | */ 129 | @Override 130 | public Row getRow() { 131 | return row; 132 | } 133 | 134 | /** 135 | * Sets the Row this cell belongs to. Note that keeping references to cell 136 | * rows around after the iterator window has passed will preserve them. 137 | *

138 | * The row is not automatically set. 139 | * 140 | * @param row The row 141 | */ 142 | public void setRow(Row row) { 143 | this.row = row; 144 | } 145 | 146 | 147 | /** 148 | * Return the cell type. 149 | * 150 | * @return the cell type 151 | */ 152 | @Override 153 | public CellType getCellType() { 154 | if(formulaType) { 155 | return CellType.FORMULA; 156 | } else if(contentsSupplier.getContent() == null || type == null) { 157 | return CellType.BLANK; 158 | } else if("n".equals(type)) { 159 | return CellType.NUMERIC; 160 | } else if("s".equals(type) || "inlineStr".equals(type) || "str".equals(type)) { 161 | return CellType.STRING; 162 | } else if("str".equals(type)) { 163 | return CellType.FORMULA; 164 | } else if("b".equals(type)) { 165 | return CellType.BOOLEAN; 166 | } else if("e".equals(type)) { 167 | return CellType.ERROR; 168 | } else { 169 | throw new UnsupportedOperationException("Unsupported cell type '" + type + "'"); 170 | } 171 | } 172 | 173 | /** 174 | * Get the value of the cell as a string. 175 | * For blank cells we return an empty string. 176 | * 177 | * @return the value of the cell as a string 178 | */ 179 | @Override 180 | public String getStringCellValue() { 181 | Object c = contentsSupplier.getContent(); 182 | 183 | return c == null ? "" : c.toString(); 184 | } 185 | 186 | /** 187 | * Get the value of the cell as a number. For strings we throw an exception. For 188 | * blank cells we return a 0. 189 | * 190 | * @return the value of the cell as a number 191 | * @throws NumberFormatException if the cell value isn't a parsable double. 192 | */ 193 | @Override 194 | public double getNumericCellValue() { 195 | return rawContents == null ? 0.0 : Double.parseDouble((String) rawContents); 196 | } 197 | 198 | /** 199 | * Get the value of the cell as a date. For strings we throw an exception. For 200 | * blank cells we return a null. 201 | * 202 | * @return the value of the cell as a date 203 | * @throws IllegalStateException if the cell type returned by {@link #getCellType()} is CELL_TYPE_STRING 204 | * @throws NumberFormatException if the cell value isn't a parsable double. 205 | */ 206 | @Override 207 | public Date getDateCellValue() { 208 | if(getCellType() == CellType.STRING) { 209 | throw new IllegalStateException("Cell type cannot be CELL_TYPE_STRING"); 210 | } 211 | return rawContents == null ? null : DateUtil.getJavaDate(getNumericCellValue(), use1904Dates); 212 | } 213 | 214 | @Override 215 | public LocalDateTime getLocalDateTimeCellValue() { 216 | return LocalDateTime.ofInstant(Instant.ofEpochMilli(getDateCellValue().getTime()), ZoneOffset.systemDefault()); 217 | } 218 | 219 | /** 220 | * Get the value of the cell as a boolean. For strings we throw an exception. For 221 | * blank cells we return a false. 222 | * 223 | * @return the value of the cell as a date 224 | */ 225 | @Override 226 | public boolean getBooleanCellValue() { 227 | CellType cellType = getCellType(); 228 | switch(cellType) { 229 | case BLANK: 230 | return false; 231 | case BOOLEAN: 232 | return rawContents != null && TRUE_AS_STRING.equals(rawContents); 233 | case FORMULA: 234 | throw new NotSupportedException(); 235 | default: 236 | throw typeMismatch(CellType.BOOLEAN, cellType, false); 237 | } 238 | } 239 | 240 | /** 241 | * Get the value of the cell as a XSSFRichTextString 242 | *

243 | * For numeric cells we throw an exception. For blank cells we return an empty string. 244 | * For formula cells we return the pre-calculated value if a string, otherwise an exception 245 | *

246 | * 247 | * @return the value of the cell as a XSSFRichTextString 248 | */ 249 | @Override 250 | public XSSFRichTextString getRichStringCellValue() { 251 | CellType cellType = getCellType(); 252 | XSSFRichTextString rt; 253 | switch(cellType) { 254 | case BLANK: 255 | rt = new XSSFRichTextString(""); 256 | break; 257 | case STRING: 258 | rt = new XSSFRichTextString(getStringCellValue()); 259 | break; 260 | default: 261 | throw new NotSupportedException(); 262 | } 263 | return rt; 264 | } 265 | 266 | @Override 267 | public Sheet getSheet() { 268 | return sheet; 269 | } 270 | 271 | private static RuntimeException typeMismatch(CellType expectedType, CellType actualType, boolean isFormulaCell) { 272 | String msg = "Cannot get a " 273 | + getCellTypeName(expectedType) + " value from a " 274 | + getCellTypeName(actualType) + " " + (isFormulaCell ? "formula " : "") + "cell"; 275 | return new IllegalStateException(msg); 276 | } 277 | 278 | /** 279 | * Used to help format error messages 280 | */ 281 | private static String getCellTypeName(CellType cellType) { 282 | switch(cellType) { 283 | case BLANK: 284 | return "blank"; 285 | case STRING: 286 | return "text"; 287 | case BOOLEAN: 288 | return "boolean"; 289 | case ERROR: 290 | return "error"; 291 | case NUMERIC: 292 | return "numeric"; 293 | case FORMULA: 294 | return "formula"; 295 | } 296 | return "#unknown cell type (" + cellType + ")#"; 297 | } 298 | 299 | /** 300 | * @return the style of the cell 301 | */ 302 | @Override 303 | public CellStyle getCellStyle() { 304 | return this.cellStyle; 305 | } 306 | 307 | /** 308 | * Return a formula for the cell, for example, SUM(C4:E4) 309 | * 310 | * @return a formula for the cell 311 | * @throws IllegalStateException if the cell type returned by {@link #getCellType()} is not CELL_TYPE_FORMULA 312 | */ 313 | @Override 314 | public String getCellFormula() { 315 | if(!formulaType) 316 | throw new IllegalStateException("This cell does not have a formula"); 317 | return formula; 318 | } 319 | 320 | /** 321 | * Only valid for formula cells 322 | * 323 | * @return one of ({@link CellType#NUMERIC}, {@link CellType#STRING}, 324 | * {@link CellType#BOOLEAN}, {@link CellType#ERROR}) depending 325 | * on the cached value of the formula 326 | */ 327 | @Override 328 | public CellType getCachedFormulaResultType() { 329 | if(formulaType) { 330 | if(contentsSupplier.getContent() == null || type == null) { 331 | return CellType.BLANK; 332 | } else if("n".equals(type)) { 333 | return CellType.NUMERIC; 334 | } else if("s".equals(type) || "inlineStr".equals(type) || "str".equals(type)) { 335 | return CellType.STRING; 336 | } else if("b".equals(type)) { 337 | return CellType.BOOLEAN; 338 | } else if("e".equals(type)) { 339 | return CellType.ERROR; 340 | } else { 341 | throw new UnsupportedOperationException("Unsupported cell type '" + type + "'"); 342 | } 343 | } else { 344 | throw new IllegalStateException("Only formula cells have cached results"); 345 | } 346 | } 347 | 348 | /* Not supported */ 349 | 350 | /** 351 | * Not supported 352 | */ 353 | @Override 354 | public void setCellType(CellType cellType) { 355 | throw new NotSupportedException(); 356 | } 357 | 358 | /** 359 | * Not supported 360 | */ 361 | @Override 362 | public void setCellValue(double value) { 363 | throw new NotSupportedException(); 364 | } 365 | 366 | /** 367 | * Not supported 368 | */ 369 | @Override 370 | public void setCellValue(Date value) { 371 | throw new NotSupportedException(); 372 | } 373 | 374 | /** 375 | * Not supported 376 | */ 377 | @Override 378 | public void setCellValue(LocalDateTime value) { 379 | throw new NotSupportedException(); 380 | } 381 | 382 | /** 383 | * Not supported 384 | */ 385 | @Override 386 | public void setCellValue(Calendar value) { 387 | throw new NotSupportedException(); 388 | } 389 | 390 | /** 391 | * Not supported 392 | */ 393 | @Override 394 | public void setCellValue(RichTextString value) { 395 | throw new NotSupportedException(); 396 | } 397 | 398 | /** 399 | * Not supported 400 | */ 401 | @Override 402 | public void setCellValue(String value) { 403 | throw new NotSupportedException(); 404 | } 405 | 406 | /** 407 | * Not supported 408 | */ 409 | @Override 410 | public void setCellFormula(String formula) throws FormulaParseException { 411 | throw new NotSupportedException(); 412 | } 413 | 414 | /** 415 | * Not supported 416 | */ 417 | @Override 418 | public void setCellValue(boolean value) { 419 | throw new NotSupportedException(); 420 | } 421 | 422 | /** 423 | * Not supported 424 | */ 425 | @Override 426 | public void setCellErrorValue(byte value) { 427 | throw new NotSupportedException(); 428 | } 429 | 430 | /** 431 | * Not supported 432 | */ 433 | @Override 434 | public byte getErrorCellValue() { 435 | throw new NotSupportedException(); 436 | } 437 | 438 | /** 439 | * Not supported 440 | */ 441 | @Override 442 | public void setAsActiveCell() { 443 | throw new NotSupportedException(); 444 | } 445 | 446 | /** 447 | * Not supported 448 | */ 449 | @Override 450 | public CellAddress getAddress() { 451 | throw new NotSupportedException(); 452 | } 453 | 454 | /** 455 | * Not supported 456 | */ 457 | @Override 458 | public void setCellComment(Comment comment) { 459 | throw new NotSupportedException(); 460 | } 461 | 462 | /** 463 | * Not supported 464 | */ 465 | @Override 466 | public Comment getCellComment() { 467 | throw new NotSupportedException(); 468 | } 469 | 470 | /** 471 | * Not supported 472 | */ 473 | @Override 474 | public void removeCellComment() { 475 | throw new NotSupportedException(); 476 | } 477 | 478 | /** 479 | * Not supported 480 | */ 481 | @Override 482 | public Hyperlink getHyperlink() { 483 | throw new NotSupportedException(); 484 | } 485 | 486 | /** 487 | * Not supported 488 | */ 489 | @Override 490 | public void setHyperlink(Hyperlink link) { 491 | throw new NotSupportedException(); 492 | } 493 | 494 | /** 495 | * Not supported 496 | */ 497 | @Override 498 | public void removeHyperlink() { 499 | throw new NotSupportedException(); 500 | } 501 | 502 | /** 503 | * Not supported 504 | */ 505 | @Override 506 | public CellRangeAddress getArrayFormulaRange() { 507 | throw new NotSupportedException(); 508 | } 509 | 510 | /** 511 | * Not supported 512 | */ 513 | @Override 514 | public boolean isPartOfArrayFormulaGroup() { 515 | throw new NotSupportedException(); 516 | } 517 | 518 | /** 519 | * Not supported 520 | */ 521 | @Override 522 | public void setBlank() { 523 | throw new NotSupportedException(); 524 | } 525 | 526 | /** 527 | * Not supported 528 | */ 529 | @Override 530 | public void removeFormula() throws IllegalStateException { 531 | throw new NotSupportedException(); 532 | } 533 | } -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/impl/StreamingRow.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.impl; 2 | 3 | import com.monitorjbl.xlsx.exceptions.NotSupportedException; 4 | import org.apache.poi.ss.usermodel.Cell; 5 | import org.apache.poi.ss.usermodel.CellStyle; 6 | import org.apache.poi.ss.usermodel.CellType; 7 | import org.apache.poi.ss.usermodel.Row; 8 | import org.apache.poi.ss.usermodel.Sheet; 9 | 10 | import java.util.Iterator; 11 | import java.util.Map; 12 | import java.util.TreeMap; 13 | 14 | public class StreamingRow implements Row { 15 | private final Sheet sheet; 16 | private int rowIndex; 17 | private boolean isHidden; 18 | private TreeMap cellMap = new TreeMap<>(); 19 | 20 | public StreamingRow(Sheet sheet, int rowIndex, boolean isHidden) { 21 | this.sheet = sheet; 22 | this.rowIndex = rowIndex; 23 | this.isHidden = isHidden; 24 | } 25 | 26 | @Override 27 | public Sheet getSheet() { 28 | return sheet; 29 | } 30 | 31 | public Map getCellMap() { 32 | return cellMap; 33 | } 34 | 35 | public void setCellMap(TreeMap cellMap) { 36 | this.cellMap = cellMap; 37 | } 38 | 39 | /* Supported */ 40 | 41 | /** 42 | * Get row number this row represents 43 | * 44 | * @return the row number (0 based) 45 | */ 46 | @Override 47 | public int getRowNum() { 48 | return rowIndex; 49 | } 50 | 51 | /** 52 | * @return Cell iterator of the physically defined cells for this row. 53 | */ 54 | @Override 55 | public Iterator cellIterator() { 56 | return cellMap.values().iterator(); 57 | } 58 | 59 | /** 60 | * @return Cell iterator of the physically defined cells for this row. 61 | */ 62 | @Override 63 | public Iterator iterator() { 64 | return cellMap.values().iterator(); 65 | } 66 | 67 | /** 68 | * Get the cell representing a given column (logical cell) 0-based. If you 69 | * ask for a cell that is not defined, you get a null. 70 | * 71 | * @param cellnum 0 based column number 72 | * @return Cell representing that column or null if undefined. 73 | */ 74 | @Override 75 | public Cell getCell(int cellnum) { 76 | return cellMap.get(cellnum); 77 | } 78 | 79 | /** 80 | * Gets the index of the last cell contained in this row PLUS ONE. 81 | * 82 | * @return short representing the last logical cell in the row PLUS ONE, 83 | * or -1 if the row does not contain any cells. 84 | */ 85 | @Override 86 | public short getLastCellNum() { 87 | return (short) (cellMap.size() == 0 ? -1 : cellMap.lastEntry().getValue().getColumnIndex() + 1); 88 | } 89 | 90 | /** 91 | * Get whether or not to display this row with 0 height 92 | * 93 | * @return - zHeight height is zero or not. 94 | */ 95 | @Override 96 | public boolean getZeroHeight() { 97 | return isHidden; 98 | } 99 | 100 | /** 101 | * Gets the number of defined cells (NOT number of cells in the actual row!). 102 | * That is to say if only columns 0,4,5 have values then there would be 3. 103 | * 104 | * @return int representing the number of defined cells in the row. 105 | */ 106 | @Override 107 | public int getPhysicalNumberOfCells() { 108 | return cellMap.size(); 109 | } 110 | 111 | /** 112 | * {@inheritDoc} 113 | */ 114 | @Override 115 | public short getFirstCellNum() { 116 | if(cellMap.size() == 0) { 117 | return -1; 118 | } 119 | return cellMap.firstKey().shortValue(); 120 | } 121 | 122 | /** 123 | * {@inheritDoc} 124 | */ 125 | @Override 126 | public Cell getCell(int cellnum, MissingCellPolicy policy) { 127 | StreamingCell cell = (StreamingCell) cellMap.get(cellnum); 128 | if(policy == MissingCellPolicy.CREATE_NULL_AS_BLANK) { 129 | if(cell == null) { return new StreamingCell(sheet, cellnum, rowIndex, false); } 130 | } else if(policy == MissingCellPolicy.RETURN_BLANK_AS_NULL) { 131 | if(cell == null || cell.getCellType() == CellType.BLANK) { return null; } 132 | } 133 | return cell; 134 | } 135 | 136 | /* Not supported */ 137 | 138 | /** 139 | * Not supported 140 | */ 141 | @Override 142 | public Cell createCell(int column) { 143 | throw new NotSupportedException(); 144 | } 145 | 146 | /** 147 | * Not supported 148 | */ 149 | @Override 150 | public Cell createCell(int i, CellType cellType) { 151 | throw new NotSupportedException(); 152 | } 153 | 154 | /** 155 | * Not supported 156 | */ 157 | @Override 158 | public void removeCell(Cell cell) { 159 | throw new NotSupportedException(); 160 | } 161 | 162 | /** 163 | * Not supported 164 | */ 165 | @Override 166 | public void setRowNum(int rowNum) { 167 | throw new NotSupportedException(); 168 | } 169 | 170 | /** 171 | * Not supported 172 | */ 173 | @Override 174 | public void setHeight(short height) { 175 | throw new NotSupportedException(); 176 | } 177 | 178 | /** 179 | * Not supported 180 | */ 181 | @Override 182 | public void setZeroHeight(boolean zHeight) { 183 | throw new NotSupportedException(); 184 | } 185 | 186 | /** 187 | * Not supported 188 | */ 189 | @Override 190 | public void setHeightInPoints(float height) { 191 | throw new NotSupportedException(); 192 | } 193 | 194 | /** 195 | * Not supported 196 | */ 197 | @Override 198 | public short getHeight() { 199 | throw new NotSupportedException(); 200 | } 201 | 202 | /** 203 | * Not supported 204 | */ 205 | @Override 206 | public float getHeightInPoints() { 207 | throw new NotSupportedException(); 208 | } 209 | 210 | /** 211 | * Not supported 212 | */ 213 | @Override 214 | public boolean isFormatted() { 215 | throw new NotSupportedException(); 216 | } 217 | 218 | /** 219 | * Not supported 220 | */ 221 | @Override 222 | public CellStyle getRowStyle() { 223 | throw new NotSupportedException(); 224 | } 225 | 226 | /** 227 | * Not supported 228 | */ 229 | @Override 230 | public void setRowStyle(CellStyle style) { 231 | throw new NotSupportedException(); 232 | } 233 | 234 | /** 235 | * Not supported 236 | */ 237 | @Override 238 | public int getOutlineLevel() { 239 | throw new NotSupportedException(); 240 | } 241 | 242 | /** 243 | * Not supported 244 | */ 245 | @Override 246 | public void shiftCellsRight(int firstShiftColumnIndex, int lastShiftColumnIndex, int step) { 247 | throw new NotSupportedException(); 248 | } 249 | 250 | /** 251 | * Not supported 252 | */ 253 | @Override 254 | public void shiftCellsLeft(int firstShiftColumnIndex, int lastShiftColumnIndex, int step) { 255 | throw new NotSupportedException(); 256 | } 257 | 258 | } 259 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/impl/StreamingSheet.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.impl; 2 | 3 | import org.apache.poi.ss.util.PaneInformation; 4 | import org.apache.poi.ss.usermodel.AutoFilter; 5 | import org.apache.poi.ss.usermodel.Cell; 6 | import org.apache.poi.ss.usermodel.CellRange; 7 | import org.apache.poi.ss.usermodel.CellStyle; 8 | import org.apache.poi.ss.usermodel.Comment; 9 | import org.apache.poi.ss.usermodel.DataValidation; 10 | import org.apache.poi.ss.usermodel.DataValidationHelper; 11 | import org.apache.poi.ss.usermodel.Drawing; 12 | import org.apache.poi.ss.usermodel.Footer; 13 | import org.apache.poi.ss.usermodel.Header; 14 | import org.apache.poi.ss.usermodel.Hyperlink; 15 | import org.apache.poi.ss.usermodel.PrintSetup; 16 | import org.apache.poi.ss.usermodel.Row; 17 | import org.apache.poi.ss.usermodel.Sheet; 18 | import org.apache.poi.ss.usermodel.SheetConditionalFormatting; 19 | import org.apache.poi.ss.usermodel.Workbook; 20 | import org.apache.poi.ss.util.CellAddress; 21 | import org.apache.poi.ss.util.CellRangeAddress; 22 | 23 | import java.util.Collection; 24 | import java.util.Iterator; 25 | import java.util.List; 26 | import java.util.Map; 27 | 28 | public class StreamingSheet implements Sheet { 29 | 30 | private final String name; 31 | private final StreamingSheetReader reader; 32 | 33 | public StreamingSheet(String name, StreamingSheetReader reader) { 34 | this.name = name; 35 | this.reader = reader; 36 | reader.setSheet(this); 37 | } 38 | 39 | StreamingSheetReader getReader() { 40 | return reader; 41 | } 42 | 43 | /* Supported */ 44 | 45 | /** 46 | * {@inheritDoc} 47 | */ 48 | @Override 49 | public Iterator iterator() { 50 | return reader.iterator(); 51 | } 52 | 53 | /** 54 | * {@inheritDoc} 55 | */ 56 | @Override 57 | public Iterator rowIterator() { 58 | return reader.iterator(); 59 | } 60 | 61 | /** 62 | * {@inheritDoc} 63 | */ 64 | @Override 65 | public String getSheetName() { 66 | return name; 67 | } 68 | 69 | /** 70 | * Get the hidden state for a given column 71 | * 72 | * @param columnIndex - the column to set (0-based) 73 | * @return hidden - false if the column is visible 74 | */ 75 | @Override 76 | public boolean isColumnHidden(int columnIndex) { 77 | return reader.isColumnHidden(columnIndex); 78 | } 79 | 80 | /* Unsupported */ 81 | 82 | /** 83 | * Not supported 84 | */ 85 | @Override 86 | public Row createRow(int rownum) { 87 | throw new UnsupportedOperationException(); 88 | } 89 | 90 | /** 91 | * Not supported 92 | */ 93 | @Override 94 | public void removeRow(Row row) { 95 | throw new UnsupportedOperationException(); 96 | } 97 | 98 | /** 99 | * Not supported 100 | */ 101 | @Override 102 | public Row getRow(int rownum) { 103 | throw new UnsupportedOperationException(); 104 | } 105 | 106 | /** 107 | * Not supported 108 | */ 109 | @Override 110 | public int getPhysicalNumberOfRows() { 111 | throw new UnsupportedOperationException(); 112 | } 113 | 114 | /** 115 | * Not supported 116 | */ 117 | @Override 118 | public int getFirstRowNum() { 119 | throw new UnsupportedOperationException(); 120 | } 121 | 122 | /** 123 | * Gets the last row on the sheet 124 | * 125 | * @return last row contained n this sheet (0-based) 126 | */ 127 | @Override 128 | public int getLastRowNum() { 129 | return reader.getLastRowNum(); 130 | } 131 | 132 | /** 133 | * Not supported 134 | */ 135 | @Override 136 | public void setColumnHidden(int columnIndex, boolean hidden) { 137 | throw new UnsupportedOperationException(); 138 | } 139 | 140 | /** 141 | * Not supported 142 | */ 143 | @Override 144 | public void setRightToLeft(boolean value) { 145 | throw new UnsupportedOperationException(); 146 | } 147 | 148 | /** 149 | * Not supported 150 | */ 151 | @Override 152 | public boolean isRightToLeft() { 153 | throw new UnsupportedOperationException(); 154 | } 155 | 156 | /** 157 | * Not supported 158 | */ 159 | @Override 160 | public void setColumnWidth(int columnIndex, int width) { 161 | throw new UnsupportedOperationException(); 162 | } 163 | 164 | /** 165 | * Not supported 166 | */ 167 | @Override 168 | public int getColumnWidth(int columnIndex) { 169 | throw new UnsupportedOperationException(); 170 | } 171 | 172 | /** 173 | * Not supported 174 | */ 175 | @Override 176 | public float getColumnWidthInPixels(int columnIndex) { 177 | throw new UnsupportedOperationException(); 178 | } 179 | 180 | /** 181 | * Not supported 182 | */ 183 | @Override 184 | public void setDefaultColumnWidth(int width) { 185 | throw new UnsupportedOperationException(); 186 | } 187 | 188 | /** 189 | * Not supported 190 | */ 191 | @Override 192 | public int getDefaultColumnWidth() { 193 | throw new UnsupportedOperationException(); 194 | } 195 | 196 | /** 197 | * Not supported 198 | */ 199 | @Override 200 | public short getDefaultRowHeight() { 201 | throw new UnsupportedOperationException(); 202 | } 203 | 204 | /** 205 | * Not supported 206 | */ 207 | @Override 208 | public float getDefaultRowHeightInPoints() { 209 | throw new UnsupportedOperationException(); 210 | } 211 | 212 | /** 213 | * Not supported 214 | */ 215 | @Override 216 | public void setDefaultRowHeight(short height) { 217 | throw new UnsupportedOperationException(); 218 | } 219 | 220 | /** 221 | * Not supported 222 | */ 223 | @Override 224 | public void setDefaultRowHeightInPoints(float height) { 225 | throw new UnsupportedOperationException(); 226 | } 227 | 228 | /** 229 | * Not supported 230 | */ 231 | @Override 232 | public CellStyle getColumnStyle(int column) { 233 | throw new UnsupportedOperationException(); 234 | } 235 | 236 | /** 237 | * Not supported 238 | */ 239 | @Override 240 | public int addMergedRegion(CellRangeAddress region) { 241 | throw new UnsupportedOperationException(); 242 | } 243 | 244 | /** 245 | * Not supported 246 | */ 247 | @Override 248 | public int addMergedRegionUnsafe(CellRangeAddress cellRangeAddress) { 249 | throw new UnsupportedOperationException(); 250 | } 251 | 252 | /** 253 | * Not supported 254 | */ 255 | @Override 256 | public void validateMergedRegions() { 257 | throw new UnsupportedOperationException(); 258 | } 259 | 260 | /** 261 | * Not supported 262 | */ 263 | @Override 264 | public void setVerticallyCenter(boolean value) { 265 | throw new UnsupportedOperationException(); 266 | } 267 | 268 | /** 269 | * Not supported 270 | */ 271 | @Override 272 | public void setHorizontallyCenter(boolean value) { 273 | throw new UnsupportedOperationException(); 274 | } 275 | 276 | /** 277 | * Not supported 278 | */ 279 | @Override 280 | public boolean getHorizontallyCenter() { 281 | throw new UnsupportedOperationException(); 282 | } 283 | 284 | /** 285 | * Not supported 286 | */ 287 | @Override 288 | public boolean getVerticallyCenter() { 289 | throw new UnsupportedOperationException(); 290 | } 291 | 292 | /** 293 | * Not supported 294 | */ 295 | @Override 296 | public void removeMergedRegion(int index) { 297 | throw new UnsupportedOperationException(); 298 | } 299 | 300 | /** 301 | * Not supported 302 | */ 303 | @Override 304 | public void removeMergedRegions(Collection collection) { 305 | throw new UnsupportedOperationException(); 306 | } 307 | 308 | /** 309 | * Not supported 310 | */ 311 | @Override 312 | public int getNumMergedRegions() { 313 | throw new UnsupportedOperationException(); 314 | } 315 | 316 | /** 317 | * Not supported 318 | */ 319 | @Override 320 | public CellRangeAddress getMergedRegion(int index) { 321 | throw new UnsupportedOperationException(); 322 | } 323 | 324 | /** 325 | * Not supported 326 | */ 327 | @Override 328 | public List getMergedRegions() { 329 | throw new UnsupportedOperationException(); 330 | } 331 | 332 | /** 333 | * Not supported 334 | */ 335 | @Override 336 | public void setForceFormulaRecalculation(boolean value) { 337 | throw new UnsupportedOperationException(); 338 | } 339 | 340 | /** 341 | * Not supported 342 | */ 343 | @Override 344 | public boolean getForceFormulaRecalculation() { 345 | throw new UnsupportedOperationException(); 346 | } 347 | 348 | /** 349 | * Not supported 350 | */ 351 | @Override 352 | public void setAutobreaks(boolean value) { 353 | throw new UnsupportedOperationException(); 354 | } 355 | 356 | /** 357 | * Not supported 358 | */ 359 | @Override 360 | public void setDisplayGuts(boolean value) { 361 | throw new UnsupportedOperationException(); 362 | } 363 | 364 | /** 365 | * Not supported 366 | */ 367 | @Override 368 | public void setDisplayZeros(boolean value) { 369 | throw new UnsupportedOperationException(); 370 | } 371 | 372 | /** 373 | * Not supported 374 | */ 375 | @Override 376 | public boolean isDisplayZeros() { 377 | throw new UnsupportedOperationException(); 378 | } 379 | 380 | /** 381 | * Not supported 382 | */ 383 | @Override 384 | public void setFitToPage(boolean value) { 385 | throw new UnsupportedOperationException(); 386 | } 387 | 388 | /** 389 | * Not supported 390 | */ 391 | @Override 392 | public void setRowSumsBelow(boolean value) { 393 | throw new UnsupportedOperationException(); 394 | } 395 | 396 | /** 397 | * Not supported 398 | */ 399 | @Override 400 | public void setRowSumsRight(boolean value) { 401 | throw new UnsupportedOperationException(); 402 | } 403 | 404 | /** 405 | * Not supported 406 | */ 407 | @Override 408 | public boolean getAutobreaks() { 409 | throw new UnsupportedOperationException(); 410 | } 411 | 412 | /** 413 | * Not supported 414 | */ 415 | @Override 416 | public boolean getDisplayGuts() { 417 | throw new UnsupportedOperationException(); 418 | } 419 | 420 | /** 421 | * Not supported 422 | */ 423 | @Override 424 | public boolean getFitToPage() { 425 | throw new UnsupportedOperationException(); 426 | } 427 | 428 | /** 429 | * Not supported 430 | */ 431 | @Override 432 | public boolean getRowSumsBelow() { 433 | throw new UnsupportedOperationException(); 434 | } 435 | 436 | /** 437 | * Not supported 438 | */ 439 | @Override 440 | public boolean getRowSumsRight() { 441 | throw new UnsupportedOperationException(); 442 | } 443 | 444 | /** 445 | * Not supported 446 | */ 447 | @Override 448 | public boolean isPrintGridlines() { 449 | throw new UnsupportedOperationException(); 450 | } 451 | 452 | /** 453 | * Not supported 454 | */ 455 | @Override 456 | public void setPrintGridlines(boolean show) { 457 | throw new UnsupportedOperationException(); 458 | } 459 | 460 | /** 461 | * Not supported 462 | */ 463 | @Override 464 | public boolean isPrintRowAndColumnHeadings() { 465 | throw new UnsupportedOperationException(); 466 | } 467 | 468 | /** 469 | * Not supported 470 | */ 471 | @Override 472 | public void setPrintRowAndColumnHeadings(boolean b) { 473 | throw new UnsupportedOperationException(); 474 | } 475 | 476 | /** 477 | * Not supported 478 | */ 479 | @Override 480 | public PrintSetup getPrintSetup() { 481 | throw new UnsupportedOperationException(); 482 | } 483 | 484 | /** 485 | * Not supported 486 | */ 487 | @Override 488 | public Header getHeader() { 489 | throw new UnsupportedOperationException(); 490 | } 491 | 492 | /** 493 | * Not supported 494 | */ 495 | @Override 496 | public Footer getFooter() { 497 | throw new UnsupportedOperationException(); 498 | } 499 | 500 | /** 501 | * Not supported 502 | */ 503 | @Override 504 | public void setSelected(boolean value) { 505 | throw new UnsupportedOperationException(); 506 | } 507 | 508 | /** 509 | * Not supported 510 | */ 511 | @Override 512 | public double getMargin(short margin) { 513 | throw new UnsupportedOperationException(); 514 | } 515 | 516 | /** 517 | * Not supported 518 | */ 519 | @Override 520 | public void setMargin(short margin, double size) { 521 | throw new UnsupportedOperationException(); 522 | } 523 | 524 | /** 525 | * Not supported 526 | */ 527 | @Override 528 | public boolean getProtect() { 529 | throw new UnsupportedOperationException(); 530 | } 531 | 532 | /** 533 | * Not supported 534 | */ 535 | @Override 536 | public void protectSheet(String password) { 537 | throw new UnsupportedOperationException(); 538 | } 539 | 540 | /** 541 | * Not supported 542 | */ 543 | @Override 544 | public boolean getScenarioProtect() { 545 | throw new UnsupportedOperationException(); 546 | } 547 | 548 | /** 549 | * Not supported 550 | */ 551 | @Override 552 | public void setZoom(int i) { 553 | throw new UnsupportedOperationException(); 554 | } 555 | 556 | /** 557 | * Not supported 558 | */ 559 | @Override 560 | public short getTopRow() { 561 | throw new UnsupportedOperationException(); 562 | } 563 | 564 | /** 565 | * Not supported 566 | */ 567 | @Override 568 | public short getLeftCol() { 569 | throw new UnsupportedOperationException(); 570 | } 571 | 572 | /** 573 | * Not supported 574 | */ 575 | @Override 576 | public void showInPane(int toprow, int leftcol) { 577 | throw new UnsupportedOperationException(); 578 | } 579 | 580 | /** 581 | * Not supported 582 | */ 583 | @Override 584 | public void shiftRows(int startRow, int endRow, int n) { 585 | throw new UnsupportedOperationException(); 586 | } 587 | 588 | /** 589 | * Not supported 590 | */ 591 | @Override 592 | public void shiftRows(int startRow, int endRow, int n, boolean copyRowHeight, boolean resetOriginalRowHeight) { 593 | throw new UnsupportedOperationException(); 594 | } 595 | 596 | /** 597 | * Not supported 598 | */ 599 | @Override 600 | public void shiftColumns(int startColumn, int endColumn, final int n) { 601 | throw new UnsupportedOperationException(); 602 | } 603 | 604 | /** 605 | * Not supported 606 | */ 607 | @Override 608 | public void createFreezePane(int colSplit, int rowSplit, int leftmostColumn, int topRow) { 609 | throw new UnsupportedOperationException(); 610 | } 611 | 612 | /** 613 | * Not supported 614 | */ 615 | @Override 616 | public void createFreezePane(int colSplit, int rowSplit) { 617 | throw new UnsupportedOperationException(); 618 | } 619 | 620 | /** 621 | * Not supported 622 | */ 623 | @Override 624 | public void createSplitPane(int xSplitPos, int ySplitPos, int leftmostColumn, int topRow, int activePane) { 625 | throw new UnsupportedOperationException(); 626 | } 627 | 628 | /** 629 | * Not supported 630 | */ 631 | @Override 632 | public PaneInformation getPaneInformation() { 633 | throw new UnsupportedOperationException(); 634 | } 635 | 636 | /** 637 | * Not supported 638 | */ 639 | @Override 640 | public void setDisplayGridlines(boolean show) { 641 | throw new UnsupportedOperationException(); 642 | } 643 | 644 | /** 645 | * Not supported 646 | */ 647 | @Override 648 | public boolean isDisplayGridlines() { 649 | throw new UnsupportedOperationException(); 650 | } 651 | 652 | /** 653 | * Not supported 654 | */ 655 | @Override 656 | public void setDisplayFormulas(boolean show) { 657 | throw new UnsupportedOperationException(); 658 | } 659 | 660 | /** 661 | * Not supported 662 | */ 663 | @Override 664 | public boolean isDisplayFormulas() { 665 | throw new UnsupportedOperationException(); 666 | } 667 | 668 | /** 669 | * Not supported 670 | */ 671 | @Override 672 | public void setDisplayRowColHeadings(boolean show) { 673 | throw new UnsupportedOperationException(); 674 | } 675 | 676 | /** 677 | * Not supported 678 | */ 679 | @Override 680 | public boolean isDisplayRowColHeadings() { 681 | throw new UnsupportedOperationException(); 682 | } 683 | 684 | /** 685 | * Not supported 686 | */ 687 | @Override 688 | public void setRowBreak(int row) { 689 | throw new UnsupportedOperationException(); 690 | } 691 | 692 | /** 693 | * Not supported 694 | */ 695 | @Override 696 | public boolean isRowBroken(int row) { 697 | throw new UnsupportedOperationException(); 698 | } 699 | 700 | /** 701 | * Not supported 702 | */ 703 | @Override 704 | public void removeRowBreak(int row) { 705 | throw new UnsupportedOperationException(); 706 | } 707 | 708 | /** 709 | * Not supported 710 | */ 711 | @Override 712 | public int[] getRowBreaks() { 713 | throw new UnsupportedOperationException(); 714 | } 715 | 716 | /** 717 | * Not supported 718 | */ 719 | @Override 720 | public int[] getColumnBreaks() { 721 | throw new UnsupportedOperationException(); 722 | } 723 | 724 | /** 725 | * Not supported 726 | */ 727 | @Override 728 | public void setColumnBreak(int column) { 729 | throw new UnsupportedOperationException(); 730 | } 731 | 732 | /** 733 | * Not supported 734 | */ 735 | @Override 736 | public boolean isColumnBroken(int column) { 737 | throw new UnsupportedOperationException(); 738 | } 739 | 740 | /** 741 | * Not supported 742 | */ 743 | @Override 744 | public void removeColumnBreak(int column) { 745 | throw new UnsupportedOperationException(); 746 | } 747 | 748 | /** 749 | * Not supported 750 | */ 751 | @Override 752 | public void setColumnGroupCollapsed(int columnNumber, boolean collapsed) { 753 | throw new UnsupportedOperationException(); 754 | } 755 | 756 | /** 757 | * Not supported 758 | */ 759 | @Override 760 | public void groupColumn(int fromColumn, int toColumn) { 761 | throw new UnsupportedOperationException(); 762 | } 763 | 764 | /** 765 | * Not supported 766 | */ 767 | @Override 768 | public void ungroupColumn(int fromColumn, int toColumn) { 769 | throw new UnsupportedOperationException(); 770 | } 771 | 772 | /** 773 | * Not supported 774 | */ 775 | @Override 776 | public void groupRow(int fromRow, int toRow) { 777 | throw new UnsupportedOperationException(); 778 | } 779 | 780 | /** 781 | * Not supported 782 | */ 783 | @Override 784 | public void ungroupRow(int fromRow, int toRow) { 785 | throw new UnsupportedOperationException(); 786 | } 787 | 788 | /** 789 | * Not supported 790 | */ 791 | @Override 792 | public void setRowGroupCollapsed(int row, boolean collapse) { 793 | throw new UnsupportedOperationException(); 794 | } 795 | 796 | /** 797 | * Not supported 798 | */ 799 | @Override 800 | public void setDefaultColumnStyle(int column, CellStyle style) { 801 | throw new UnsupportedOperationException(); 802 | } 803 | 804 | /** 805 | * Not supported 806 | */ 807 | @Override 808 | public void autoSizeColumn(int column) { 809 | throw new UnsupportedOperationException(); 810 | } 811 | 812 | /** 813 | * Not supported 814 | */ 815 | @Override 816 | public void autoSizeColumn(int column, boolean useMergedCells) { 817 | throw new UnsupportedOperationException(); 818 | } 819 | 820 | /** 821 | * Not supported 822 | */ 823 | @Override 824 | public Comment getCellComment(CellAddress cellAddress) { 825 | throw new UnsupportedOperationException(); 826 | } 827 | 828 | /** 829 | * Not supported 830 | */ 831 | @Override 832 | public Map getCellComments() { 833 | throw new UnsupportedOperationException(); 834 | } 835 | 836 | /** 837 | * Not supported 838 | */ 839 | @Override 840 | public Drawing getDrawingPatriarch() { 841 | throw new UnsupportedOperationException(); 842 | } 843 | 844 | /** 845 | * Not supported 846 | */ 847 | @Override 848 | public Drawing createDrawingPatriarch() { 849 | throw new UnsupportedOperationException(); 850 | } 851 | 852 | /** 853 | * Not supported 854 | */ 855 | @Override 856 | public Workbook getWorkbook() { 857 | throw new UnsupportedOperationException(); 858 | } 859 | 860 | /** 861 | * Not supported 862 | */ 863 | @Override 864 | public boolean isSelected() { 865 | throw new UnsupportedOperationException(); 866 | } 867 | 868 | /** 869 | * Not supported 870 | */ 871 | @Override 872 | public CellRange setArrayFormula(String formula, CellRangeAddress range) { 873 | throw new UnsupportedOperationException(); 874 | } 875 | 876 | /** 877 | * Not supported 878 | */ 879 | @Override 880 | public CellRange removeArrayFormula(Cell cell) { 881 | throw new UnsupportedOperationException(); 882 | } 883 | 884 | /** 885 | * Not supported 886 | */ 887 | @Override 888 | public DataValidationHelper getDataValidationHelper() { 889 | throw new UnsupportedOperationException(); 890 | } 891 | 892 | /** 893 | * Not supported 894 | */ 895 | @Override 896 | public List getDataValidations() { 897 | throw new UnsupportedOperationException(); 898 | } 899 | 900 | /** 901 | * Not supported 902 | */ 903 | @Override 904 | public void addValidationData(DataValidation dataValidation) { 905 | throw new UnsupportedOperationException(); 906 | } 907 | 908 | /** 909 | * Not supported 910 | */ 911 | @Override 912 | public AutoFilter setAutoFilter(CellRangeAddress range) { 913 | throw new UnsupportedOperationException(); 914 | } 915 | 916 | /** 917 | * Not supported 918 | */ 919 | @Override 920 | public SheetConditionalFormatting getSheetConditionalFormatting() { 921 | throw new UnsupportedOperationException(); 922 | } 923 | 924 | /** 925 | * Not supported 926 | */ 927 | @Override 928 | public CellRangeAddress getRepeatingRows() { 929 | throw new UnsupportedOperationException(); 930 | } 931 | 932 | /** 933 | * Not supported 934 | */ 935 | @Override 936 | public CellRangeAddress getRepeatingColumns() { 937 | throw new UnsupportedOperationException(); 938 | } 939 | 940 | /** 941 | * Not supported 942 | */ 943 | @Override 944 | public void setRepeatingRows(CellRangeAddress rowRangeRef) { 945 | throw new UnsupportedOperationException(); 946 | } 947 | 948 | /** 949 | * Not supported 950 | */ 951 | @Override 952 | public void setRepeatingColumns(CellRangeAddress columnRangeRef) { 953 | throw new UnsupportedOperationException(); 954 | } 955 | 956 | /** 957 | * Not supported 958 | */ 959 | @Override 960 | public int getColumnOutlineLevel(int columnIndex) { 961 | throw new UnsupportedOperationException(); 962 | } 963 | 964 | /** 965 | * Not supported 966 | */ 967 | @Override 968 | public Hyperlink getHyperlink(int i, int i1) { 969 | throw new UnsupportedOperationException(); 970 | } 971 | 972 | /** 973 | * Not supported 974 | */ 975 | @Override 976 | public Hyperlink getHyperlink(CellAddress cellAddress) { 977 | throw new UnsupportedOperationException(); 978 | } 979 | 980 | /** 981 | * Not supported 982 | */ 983 | @Override 984 | public List getHyperlinkList() { 985 | throw new UnsupportedOperationException(); 986 | } 987 | 988 | /** 989 | * Not supported 990 | */ 991 | @Override 992 | public CellAddress getActiveCell() { 993 | throw new UnsupportedOperationException(); 994 | } 995 | 996 | /** 997 | * Not supported 998 | */ 999 | @Override 1000 | public void setActiveCell(CellAddress cellAddress) { 1001 | throw new UnsupportedOperationException(); 1002 | } 1003 | } 1004 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/impl/StreamingSheetReader.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.impl; 2 | 3 | import com.monitorjbl.xlsx.exceptions.CloseException; 4 | import com.monitorjbl.xlsx.exceptions.ParseException; 5 | import org.apache.poi.ss.usermodel.BuiltinFormats; 6 | import org.apache.poi.ss.usermodel.DataFormatter; 7 | import org.apache.poi.ss.usermodel.Row; 8 | import org.apache.poi.ss.usermodel.Sheet; 9 | import org.apache.poi.ss.util.CellReference; 10 | import org.apache.poi.xssf.model.SharedStringsTable; 11 | import org.apache.poi.xssf.model.StylesTable; 12 | import org.apache.poi.xssf.usermodel.XSSFCellStyle; 13 | import org.apache.poi.xssf.usermodel.XSSFRichTextString; 14 | import org.slf4j.Logger; 15 | import org.slf4j.LoggerFactory; 16 | 17 | import javax.xml.namespace.QName; 18 | import javax.xml.stream.XMLEventReader; 19 | import javax.xml.stream.XMLStreamConstants; 20 | import javax.xml.stream.XMLStreamException; 21 | import javax.xml.stream.events.Attribute; 22 | import javax.xml.stream.events.Characters; 23 | import javax.xml.stream.events.EndElement; 24 | import javax.xml.stream.events.StartElement; 25 | import javax.xml.stream.events.XMLEvent; 26 | import java.util.ArrayList; 27 | import java.util.HashSet; 28 | import java.util.Iterator; 29 | import java.util.List; 30 | import java.util.Set; 31 | 32 | public class StreamingSheetReader implements Iterable { 33 | private static final Logger log = LoggerFactory.getLogger(StreamingSheetReader.class); 34 | 35 | private final SharedStringsTable sst; 36 | private final StylesTable stylesTable; 37 | private final XMLEventReader parser; 38 | private final DataFormatter dataFormatter = new DataFormatter(); 39 | private final Set hiddenColumns = new HashSet<>(); 40 | 41 | private int lastRowNum; 42 | private int currentRowNum; 43 | private int firstColNum = 0; 44 | private int currentColNum; 45 | private int rowCacheSize; 46 | private List rowCache = new ArrayList<>(); 47 | private Iterator rowCacheIterator; 48 | 49 | private String lastContents; 50 | private Sheet sheet; 51 | private StreamingRow currentRow; 52 | private StreamingCell currentCell; 53 | private boolean use1904Dates; 54 | 55 | public StreamingSheetReader(SharedStringsTable sst, StylesTable stylesTable, XMLEventReader parser, 56 | final boolean use1904Dates, int rowCacheSize) { 57 | this.sst = sst; 58 | this.stylesTable = stylesTable; 59 | this.parser = parser; 60 | this.use1904Dates = use1904Dates; 61 | this.rowCacheSize = rowCacheSize; 62 | } 63 | 64 | void setSheet(StreamingSheet sheet) { 65 | this.sheet = sheet; 66 | } 67 | 68 | /** 69 | * Read through a number of rows equal to the rowCacheSize field or until there is no more data to read 70 | * 71 | * @return true if data was read 72 | */ 73 | private boolean getRow() { 74 | try { 75 | rowCache.clear(); 76 | while(rowCache.size() < rowCacheSize && parser.hasNext()) { 77 | handleEvent(parser.nextEvent()); 78 | } 79 | rowCacheIterator = rowCache.iterator(); 80 | return rowCacheIterator.hasNext(); 81 | } catch(XMLStreamException e) { 82 | throw new ParseException("Error reading XML stream", e); 83 | } 84 | } 85 | 86 | private String[] splitCellRef(String ref) { 87 | int splitPos = -1; 88 | 89 | // start at pos 1, since the first char is expected to always be a letter 90 | for(int i = 1; i < ref.length(); i++) { 91 | char c = ref.charAt(i); 92 | 93 | if(c >= '0' && c <= '9') { 94 | splitPos = i; 95 | break; 96 | } 97 | } 98 | 99 | return new String[]{ 100 | ref.substring(0, splitPos), 101 | ref.substring(splitPos) 102 | }; 103 | } 104 | 105 | /** 106 | * Handles a SAX event. 107 | * 108 | * @param event 109 | */ 110 | private void handleEvent(XMLEvent event) { 111 | if(event.getEventType() == XMLStreamConstants.CHARACTERS) { 112 | Characters c = event.asCharacters(); 113 | lastContents += c.getData(); 114 | } else if(event.getEventType() == XMLStreamConstants.START_ELEMENT 115 | && isSpreadsheetTag(event.asStartElement().getName())) { 116 | StartElement startElement = event.asStartElement(); 117 | String tagLocalName = startElement.getName().getLocalPart(); 118 | 119 | if("row".equals(tagLocalName)) { 120 | Attribute rowNumAttr = startElement.getAttributeByName(new QName("r")); 121 | int rowIndex = currentRowNum; 122 | if(rowNumAttr != null) { 123 | rowIndex = Integer.parseInt(rowNumAttr.getValue()) - 1; 124 | currentRowNum = rowIndex; 125 | } 126 | Attribute isHiddenAttr = startElement.getAttributeByName(new QName("hidden")); 127 | boolean isHidden = isHiddenAttr != null && ("1".equals(isHiddenAttr.getValue()) || "true".equals(isHiddenAttr.getValue())); 128 | currentRow = new StreamingRow(sheet, rowIndex, isHidden); 129 | currentColNum = firstColNum; 130 | } else if("col".equals(tagLocalName)) { 131 | Attribute isHiddenAttr = startElement.getAttributeByName(new QName("hidden")); 132 | boolean isHidden = isHiddenAttr != null && ("1".equals(isHiddenAttr.getValue()) || "true".equals(isHiddenAttr.getValue())); 133 | if(isHidden) { 134 | Attribute minAttr = startElement.getAttributeByName(new QName("min")); 135 | Attribute maxAttr = startElement.getAttributeByName(new QName("max")); 136 | int min = Integer.parseInt(minAttr.getValue()) - 1; 137 | int max = Integer.parseInt(maxAttr.getValue()) - 1; 138 | for(int columnIndex = min; columnIndex <= max; columnIndex++) 139 | hiddenColumns.add(columnIndex); 140 | } 141 | } else if("c".equals(tagLocalName)) { 142 | Attribute ref = startElement.getAttributeByName(new QName("r")); 143 | 144 | if(ref != null) { 145 | String[] coord = splitCellRef(ref.getValue()); 146 | currentColNum = CellReference.convertColStringToIndex(coord[0]); 147 | currentCell = new StreamingCell(sheet, currentColNum, Integer.parseInt(coord[1]) - 1, use1904Dates); 148 | } else { 149 | currentCell = new StreamingCell(sheet, currentColNum, currentRowNum, use1904Dates); 150 | } 151 | setFormatString(startElement, currentCell); 152 | 153 | Attribute type = startElement.getAttributeByName(new QName("t")); 154 | if(type != null) { 155 | currentCell.setType(type.getValue()); 156 | } else { 157 | currentCell.setType("n"); 158 | } 159 | 160 | Attribute style = startElement.getAttributeByName(new QName("s")); 161 | if(style != null) { 162 | String indexStr = style.getValue(); 163 | try { 164 | int index = Integer.parseInt(indexStr); 165 | currentCell.setCellStyle(stylesTable.getStyleAt(index)); 166 | } catch(NumberFormatException nfe) { 167 | log.warn("Ignoring invalid style index {}", indexStr); 168 | } 169 | } else { 170 | currentCell.setCellStyle(stylesTable.getStyleAt(0)); 171 | } 172 | } else if("dimension".equals(tagLocalName)) { 173 | Attribute refAttr = startElement.getAttributeByName(new QName("ref")); 174 | String ref = refAttr != null ? refAttr.getValue() : null; 175 | if(ref != null) { 176 | // ref is formatted as A1 or A1:F25. Take the last numbers of this string and use it as lastRowNum 177 | for(int i = ref.length() - 1; i >= 0; i--) { 178 | if(!Character.isDigit(ref.charAt(i))) { 179 | try { 180 | lastRowNum = Integer.parseInt(ref.substring(i + 1)) - 1; 181 | } catch(NumberFormatException ignore) { } 182 | break; 183 | } 184 | } 185 | for(int i = 0; i < ref.length(); i++) { 186 | if(!Character.isAlphabetic(ref.charAt(i))) { 187 | firstColNum = CellReference.convertColStringToIndex(ref.substring(0, i)); 188 | break; 189 | } 190 | } 191 | } 192 | } else if("f".equals(tagLocalName)) { 193 | if(currentCell != null) { 194 | currentCell.setFormulaType(true); 195 | } 196 | } 197 | 198 | // Clear contents cache 199 | lastContents = ""; 200 | } else if(event.getEventType() == XMLStreamConstants.END_ELEMENT 201 | && isSpreadsheetTag(event.asEndElement().getName())) { 202 | EndElement endElement = event.asEndElement(); 203 | String tagLocalName = endElement.getName().getLocalPart(); 204 | 205 | if("v".equals(tagLocalName) || "t".equals(tagLocalName)) { 206 | currentCell.setRawContents(unformattedContents()); 207 | currentCell.setContentSupplier(formattedContents()); 208 | } else if("row".equals(tagLocalName) && currentRow != null) { 209 | rowCache.add(currentRow); 210 | currentRowNum++; 211 | } else if("c".equals(tagLocalName)) { 212 | currentRow.getCellMap().put(currentCell.getColumnIndex(), currentCell); 213 | currentCell = null; 214 | currentColNum++; 215 | } else if("f".equals(tagLocalName)) { 216 | if(currentCell != null) { 217 | currentCell.setFormula(lastContents); 218 | } 219 | } 220 | 221 | } 222 | } 223 | 224 | /** 225 | * Returns true if a tag is part of the main namespace for SpreadsheetML: 226 | *
    227 | *
  • http://schemas.openxmlformats.org/spreadsheetml/2006/main 228 | *
  • http://purl.oclc.org/ooxml/spreadsheetml/main 229 | *
230 | * As opposed to http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing, etc. 231 | * 232 | * @param name 233 | * @return 234 | */ 235 | private boolean isSpreadsheetTag(QName name) { 236 | return (name.getNamespaceURI() != null 237 | && name.getNamespaceURI().endsWith("/main")); 238 | } 239 | 240 | /** 241 | * Get the hidden state for a given column 242 | * 243 | * @param columnIndex - the column to set (0-based) 244 | * @return hidden - false if the column is visible 245 | */ 246 | boolean isColumnHidden(int columnIndex) { 247 | if(rowCacheIterator == null) { 248 | getRow(); 249 | } 250 | return hiddenColumns.contains(columnIndex); 251 | } 252 | 253 | /** 254 | * Gets the last row on the sheet 255 | * 256 | * @return 257 | */ 258 | int getLastRowNum() { 259 | if(rowCacheIterator == null) { 260 | getRow(); 261 | } 262 | return lastRowNum; 263 | } 264 | 265 | /** 266 | * Read the numeric format string out of the styles table for this cell. Stores 267 | * the result in the Cell. 268 | * 269 | * @param startElement 270 | * @param cell 271 | */ 272 | void setFormatString(StartElement startElement, StreamingCell cell) { 273 | Attribute cellStyle = startElement.getAttributeByName(new QName("s")); 274 | String cellStyleString = (cellStyle != null) ? cellStyle.getValue() : null; 275 | XSSFCellStyle style = null; 276 | 277 | if(cellStyleString != null) { 278 | style = stylesTable.getStyleAt(Integer.parseInt(cellStyleString)); 279 | } else if(stylesTable.getNumCellStyles() > 0) { 280 | style = stylesTable.getStyleAt(0); 281 | } 282 | 283 | if(style != null) { 284 | cell.setNumericFormatIndex(style.getDataFormat()); 285 | String formatString = style.getDataFormatString(); 286 | 287 | if(formatString != null) { 288 | cell.setNumericFormat(formatString); 289 | } else { 290 | cell.setNumericFormat(BuiltinFormats.getBuiltinFormat(cell.getNumericFormatIndex())); 291 | } 292 | } else { 293 | cell.setNumericFormatIndex(null); 294 | cell.setNumericFormat(null); 295 | } 296 | } 297 | 298 | /** 299 | * Tries to format the contents of the last contents appropriately based on 300 | * the type of cell and the discovered numeric format. 301 | * 302 | * @return 303 | */ 304 | Supplier formattedContents() { 305 | return getFormatterForType(currentCell.getType()); 306 | } 307 | 308 | /** 309 | * Tries to format the contents of the last contents appropriately based on 310 | * the provided type and the discovered numeric format. 311 | * 312 | * @return 313 | */ 314 | private Supplier getFormatterForType(String type) { 315 | switch(type) { 316 | case "s": //string stored in shared table 317 | if(!lastContents.isEmpty()) { 318 | int idx = Integer.parseInt(lastContents); 319 | return new StringSupplier(sst.getItemAt(idx).toString()); 320 | } 321 | return new StringSupplier(lastContents); 322 | case "inlineStr": //inline string (not in sst) 323 | case "str": 324 | return new StringSupplier(new XSSFRichTextString(lastContents).toString()); 325 | case "e": //error type 326 | return new StringSupplier("ERROR: " + lastContents); 327 | case "n": //numeric type 328 | if(currentCell.getNumericFormat() != null && lastContents.length() > 0) { 329 | // the formatRawCellContents operation incurs a significant overhead on large sheets, 330 | // and we want to defer the execution of this method until the value is actually needed. 331 | // it is not needed in all cases.. 332 | final String currentLastContents = lastContents; 333 | final int currentNumericFormatIndex = currentCell.getNumericFormatIndex(); 334 | final String currentNumericFormat = currentCell.getNumericFormat(); 335 | 336 | return new Supplier() { 337 | String cachedContent; 338 | 339 | @Override 340 | public Object getContent() { 341 | if(cachedContent == null) { 342 | cachedContent = dataFormatter.formatRawCellContents( 343 | Double.parseDouble(currentLastContents), 344 | currentNumericFormatIndex, 345 | currentNumericFormat); 346 | } 347 | 348 | return cachedContent; 349 | } 350 | }; 351 | } else { 352 | return new StringSupplier(lastContents); 353 | } 354 | default: 355 | return new StringSupplier(lastContents); 356 | } 357 | } 358 | 359 | /** 360 | * Returns the contents of the cell, with no formatting applied 361 | * 362 | * @return 363 | */ 364 | String unformattedContents() { 365 | switch(currentCell.getType()) { 366 | case "s": //string stored in shared table 367 | if(!lastContents.isEmpty()) { 368 | int idx = Integer.parseInt(lastContents); 369 | return sst.getItemAt(idx).toString(); 370 | } 371 | return lastContents; 372 | case "inlineStr": //inline string (not in sst) 373 | return new XSSFRichTextString(lastContents).toString(); 374 | default: 375 | return lastContents; 376 | } 377 | } 378 | 379 | /** 380 | * Returns a new streaming iterator to loop through rows. This iterator is not 381 | * guaranteed to have all rows in memory, and any particular iteration may 382 | * trigger a load from disk to read in new data. 383 | * 384 | * @return the streaming iterator 385 | */ 386 | @Override 387 | public Iterator iterator() { 388 | return new StreamingRowIterator(); 389 | } 390 | 391 | public void close() { 392 | try { 393 | parser.close(); 394 | } catch(XMLStreamException e) { 395 | throw new CloseException(e); 396 | } 397 | } 398 | 399 | class StreamingRowIterator implements Iterator { 400 | public StreamingRowIterator() { 401 | if(rowCacheIterator == null) { 402 | hasNext(); 403 | } 404 | } 405 | 406 | @Override 407 | public boolean hasNext() { 408 | return (rowCacheIterator != null && rowCacheIterator.hasNext()) || getRow(); 409 | } 410 | 411 | @Override 412 | public Row next() { 413 | return rowCacheIterator.next(); 414 | } 415 | 416 | @Override 417 | public void remove() { 418 | throw new RuntimeException("NotSupported"); 419 | } 420 | } 421 | } 422 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/impl/StreamingWorkbook.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.impl; 2 | 3 | import com.monitorjbl.xlsx.exceptions.MissingSheetException; 4 | import org.apache.poi.ss.SpreadsheetVersion; 5 | import org.apache.poi.ss.formula.EvaluationWorkbook; 6 | import org.apache.poi.ss.formula.udf.UDFFinder; 7 | import org.apache.poi.ss.usermodel.CellStyle; 8 | import org.apache.poi.ss.usermodel.CreationHelper; 9 | import org.apache.poi.ss.usermodel.DataFormat; 10 | import org.apache.poi.ss.usermodel.Font; 11 | import org.apache.poi.ss.usermodel.Name; 12 | import org.apache.poi.ss.usermodel.PictureData; 13 | import org.apache.poi.ss.usermodel.Row.MissingCellPolicy; 14 | import org.apache.poi.ss.usermodel.Sheet; 15 | import org.apache.poi.ss.usermodel.SheetVisibility; 16 | import org.apache.poi.ss.usermodel.Workbook; 17 | 18 | import java.io.IOException; 19 | import java.io.OutputStream; 20 | import java.util.Iterator; 21 | import java.util.List; 22 | 23 | public class StreamingWorkbook implements Workbook, AutoCloseable { 24 | private final StreamingWorkbookReader reader; 25 | 26 | public StreamingWorkbook(StreamingWorkbookReader reader) { 27 | this.reader = reader; 28 | } 29 | 30 | int findSheetByName(String name) { 31 | for(int i = 0; i < reader.getSheetProperties().size(); i++) { 32 | if(reader.getSheetProperties().get(i).get("name").equals(name)) { 33 | return i; 34 | } 35 | } 36 | return -1; 37 | } 38 | 39 | /* Supported */ 40 | 41 | /** 42 | * {@inheritDoc} 43 | */ 44 | @Override 45 | public Iterator iterator() { 46 | return reader.iterator(); 47 | } 48 | 49 | /** 50 | * {@inheritDoc} 51 | */ 52 | @Override 53 | public Iterator sheetIterator() { 54 | return iterator(); 55 | } 56 | 57 | /** 58 | * {@inheritDoc} 59 | */ 60 | @Override 61 | public String getSheetName(int sheet) { 62 | return reader.getSheetProperties().get(sheet).get("name"); 63 | } 64 | 65 | /** 66 | * {@inheritDoc} 67 | */ 68 | @Override 69 | public int getSheetIndex(String name) { 70 | return findSheetByName(name); 71 | } 72 | 73 | /** 74 | * {@inheritDoc} 75 | */ 76 | @Override 77 | public int getSheetIndex(Sheet sheet) { 78 | if(sheet instanceof StreamingSheet) { 79 | return findSheetByName(sheet.getSheetName()); 80 | } else { 81 | throw new UnsupportedOperationException("Cannot use non-StreamingSheet sheets"); 82 | } 83 | } 84 | 85 | /** 86 | * {@inheritDoc} 87 | */ 88 | @Override 89 | public int getNumberOfSheets() { 90 | return reader.getSheets().size(); 91 | } 92 | 93 | /** 94 | * {@inheritDoc} 95 | */ 96 | @Override 97 | public Sheet getSheetAt(int index) { 98 | return reader.getSheets().get(index); 99 | } 100 | 101 | /** 102 | * {@inheritDoc} 103 | */ 104 | @Override 105 | public Sheet getSheet(String name) { 106 | int index = getSheetIndex(name); 107 | if(index == -1) { 108 | throw new MissingSheetException("Sheet '" + name + "' does not exist"); 109 | } 110 | return reader.getSheets().get(index); 111 | } 112 | 113 | /** 114 | * {@inheritDoc} 115 | */ 116 | @Override 117 | public boolean isSheetHidden(int sheetIx) { 118 | return "hidden".equals(reader.getSheetProperties().get(sheetIx).get("state")); 119 | } 120 | 121 | /** 122 | * {@inheritDoc} 123 | */ 124 | @Override 125 | public boolean isSheetVeryHidden(int sheetIx) { 126 | return "veryHidden".equals(reader.getSheetProperties().get(sheetIx).get("state")); 127 | } 128 | 129 | /** 130 | * {@inheritDoc} 131 | */ 132 | @Override 133 | public void close() throws IOException { 134 | reader.close(); 135 | } 136 | 137 | /* Not supported */ 138 | 139 | /** 140 | * Not supported 141 | */ 142 | @Override 143 | public int getActiveSheetIndex() { 144 | throw new UnsupportedOperationException(); 145 | } 146 | 147 | /** 148 | * Not supported 149 | */ 150 | @Override 151 | public void setActiveSheet(int sheetIndex) { 152 | throw new UnsupportedOperationException(); 153 | } 154 | 155 | /** 156 | * Not supported 157 | */ 158 | @Override 159 | public int getFirstVisibleTab() { 160 | throw new UnsupportedOperationException(); 161 | } 162 | 163 | /** 164 | * Not supported 165 | */ 166 | @Override 167 | public void setFirstVisibleTab(int sheetIndex) { 168 | throw new UnsupportedOperationException(); 169 | } 170 | 171 | /** 172 | * Not supported 173 | */ 174 | @Override 175 | public void setSheetOrder(String sheetname, int pos) { 176 | throw new UnsupportedOperationException(); 177 | } 178 | 179 | /** 180 | * Not supported 181 | */ 182 | @Override 183 | public void setSelectedTab(int index) { 184 | throw new UnsupportedOperationException(); 185 | } 186 | 187 | /** 188 | * Not supported 189 | */ 190 | @Override 191 | public void setSheetName(int sheet, String name) { 192 | throw new UnsupportedOperationException(); 193 | } 194 | 195 | /** 196 | * Not supported 197 | */ 198 | @Override 199 | public Sheet createSheet() { 200 | throw new UnsupportedOperationException(); 201 | } 202 | 203 | /** 204 | * Not supported 205 | */ 206 | @Override 207 | public Sheet createSheet(String sheetname) { 208 | throw new UnsupportedOperationException(); 209 | } 210 | 211 | /** 212 | * Not supported 213 | */ 214 | @Override 215 | public Sheet cloneSheet(int sheetNum) { 216 | throw new UnsupportedOperationException(); 217 | } 218 | 219 | /** 220 | * Not supported 221 | */ 222 | @Override 223 | public void removeSheetAt(int index) { 224 | throw new UnsupportedOperationException(); 225 | } 226 | 227 | /** 228 | * Not supported 229 | */ 230 | @Override 231 | public Font createFont() { 232 | throw new UnsupportedOperationException(); 233 | } 234 | 235 | /** 236 | * Not supported 237 | */ 238 | @Override 239 | public Font findFont(boolean b, short i, short i1, String s, boolean b1, boolean b2, short i2, byte b3) { 240 | throw new UnsupportedOperationException(); 241 | } 242 | 243 | @Override 244 | public int getNumberOfFonts() { 245 | throw new UnsupportedOperationException(); 246 | } 247 | 248 | /** 249 | * Not supported 250 | */ 251 | @Override 252 | public int getNumberOfFontsAsInt() { throw new UnsupportedOperationException(); } 253 | 254 | /** 255 | * Not supported 256 | */ 257 | @Override 258 | public Font getFontAt(int i) { throw new UnsupportedOperationException(); } 259 | 260 | /** 261 | * Not supported 262 | */ 263 | @Override 264 | public CellStyle createCellStyle() { 265 | throw new UnsupportedOperationException(); 266 | } 267 | 268 | /** 269 | * Not supported 270 | */ 271 | @Override 272 | public int getNumCellStyles() { 273 | throw new UnsupportedOperationException(); 274 | } 275 | 276 | /** 277 | * Not supported 278 | */ 279 | @Override 280 | public CellStyle getCellStyleAt(int i) { 281 | throw new UnsupportedOperationException(); 282 | } 283 | 284 | /** 285 | * Not supported 286 | */ 287 | @Override 288 | public void write(OutputStream stream) throws IOException { 289 | throw new UnsupportedOperationException(); 290 | } 291 | 292 | /** 293 | * Not supported 294 | */ 295 | @Override 296 | public int getNumberOfNames() { 297 | throw new UnsupportedOperationException(); 298 | } 299 | 300 | /** 301 | * Not supported 302 | */ 303 | @Override 304 | public Name getName(String name) { 305 | throw new UnsupportedOperationException(); 306 | } 307 | 308 | /** 309 | * Not supported 310 | */ 311 | @Override 312 | public List getNames(String s) { 313 | throw new UnsupportedOperationException(); 314 | } 315 | 316 | /** 317 | * Not supported 318 | */ 319 | @Override 320 | public List getAllNames() { 321 | throw new UnsupportedOperationException(); 322 | } 323 | 324 | /** 325 | * Not supported 326 | */ 327 | @Override 328 | public Name createName() { 329 | throw new UnsupportedOperationException(); 330 | } 331 | 332 | /** 333 | * Not supported 334 | */ 335 | @Override 336 | public void removeName(Name name) { 337 | throw new UnsupportedOperationException(); 338 | } 339 | 340 | /** 341 | * Not supported 342 | */ 343 | @Override 344 | public int linkExternalWorkbook(String name, Workbook workbook) { 345 | throw new UnsupportedOperationException(); 346 | } 347 | 348 | /** 349 | * Not supported 350 | */ 351 | @Override 352 | public void setPrintArea(int sheetIndex, String reference) { 353 | throw new UnsupportedOperationException(); 354 | } 355 | 356 | /** 357 | * Not supported 358 | */ 359 | @Override 360 | public void setPrintArea(int sheetIndex, int startColumn, int endColumn, int startRow, int endRow) { 361 | throw new UnsupportedOperationException(); 362 | } 363 | 364 | /** 365 | * Not supported 366 | */ 367 | @Override 368 | public String getPrintArea(int sheetIndex) { 369 | throw new UnsupportedOperationException(); 370 | } 371 | 372 | /** 373 | * Not supported 374 | */ 375 | @Override 376 | public void removePrintArea(int sheetIndex) { 377 | throw new UnsupportedOperationException(); 378 | } 379 | 380 | /** 381 | * Not supported 382 | */ 383 | @Override 384 | public MissingCellPolicy getMissingCellPolicy() { 385 | throw new UnsupportedOperationException(); 386 | } 387 | 388 | /** 389 | * Not supported 390 | */ 391 | @Override 392 | public void setMissingCellPolicy(MissingCellPolicy missingCellPolicy) { 393 | throw new UnsupportedOperationException(); 394 | } 395 | 396 | /** 397 | * Not supported 398 | */ 399 | @Override 400 | public DataFormat createDataFormat() { 401 | throw new UnsupportedOperationException(); 402 | } 403 | 404 | /** 405 | * Not supported 406 | */ 407 | @Override 408 | public int addPicture(byte[] pictureData, int format) { 409 | throw new UnsupportedOperationException(); 410 | } 411 | 412 | /** 413 | * Not supported 414 | */ 415 | @Override 416 | public List getAllPictures() { 417 | throw new UnsupportedOperationException(); 418 | } 419 | 420 | /** 421 | * Not supported 422 | */ 423 | @Override 424 | public CreationHelper getCreationHelper() { 425 | throw new UnsupportedOperationException(); 426 | } 427 | 428 | /** 429 | * Not supported 430 | */ 431 | @Override 432 | public boolean isHidden() { 433 | throw new UnsupportedOperationException(); 434 | } 435 | 436 | /** 437 | * Not supported 438 | */ 439 | @Override 440 | public void setHidden(boolean hiddenFlag) { 441 | throw new UnsupportedOperationException(); 442 | } 443 | 444 | /** 445 | * Not supported 446 | */ 447 | @Override 448 | public void setSheetHidden(int sheetIx, boolean hidden) { 449 | throw new UnsupportedOperationException(); 450 | } 451 | 452 | /** 453 | * Not supported 454 | */ 455 | @Override 456 | public SheetVisibility getSheetVisibility(int i) { 457 | throw new UnsupportedOperationException(); 458 | } 459 | 460 | /** 461 | * Not supported 462 | */ 463 | @Override 464 | public void setSheetVisibility(int i, SheetVisibility sheetVisibility) { 465 | throw new UnsupportedOperationException(); 466 | } 467 | 468 | /** 469 | * Not supported 470 | */ 471 | @Override 472 | public void addToolPack(UDFFinder toopack) { 473 | throw new UnsupportedOperationException(); 474 | } 475 | 476 | /** 477 | * Not supported 478 | */ 479 | @Override 480 | public void setForceFormulaRecalculation(boolean value) { 481 | throw new UnsupportedOperationException(); 482 | } 483 | 484 | /** 485 | * Not supported 486 | */ 487 | @Override 488 | public boolean getForceFormulaRecalculation() { 489 | throw new UnsupportedOperationException(); 490 | } 491 | 492 | /** 493 | * Not supported 494 | */ 495 | @Override 496 | public SpreadsheetVersion getSpreadsheetVersion() { 497 | throw new UnsupportedOperationException(); 498 | } 499 | 500 | /** 501 | * Not supported 502 | */ 503 | @Override 504 | public int addOlePackage(byte[] bytes, String s, String s1, String s2) throws IOException { 505 | throw new UnsupportedOperationException(); 506 | } 507 | 508 | @Override 509 | public EvaluationWorkbook createEvaluationWorkbook() { 510 | return null; 511 | } 512 | } 513 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/impl/StreamingWorkbookReader.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.impl; 2 | 3 | import com.monitorjbl.xlsx.StreamingReader.Builder; 4 | import com.monitorjbl.xlsx.exceptions.OpenException; 5 | import com.monitorjbl.xlsx.exceptions.ReadException; 6 | import com.monitorjbl.xlsx.sst.BufferedStringsTable; 7 | import org.apache.poi.openxml4j.exceptions.InvalidFormatException; 8 | import org.apache.poi.openxml4j.exceptions.OpenXML4JException; 9 | import org.apache.poi.openxml4j.opc.OPCPackage; 10 | import org.apache.poi.poifs.crypt.Decryptor; 11 | import org.apache.poi.poifs.crypt.EncryptionInfo; 12 | import org.apache.poi.poifs.filesystem.POIFSFileSystem; 13 | import org.apache.poi.ss.usermodel.Sheet; 14 | import org.apache.poi.util.StaxHelper; 15 | import org.apache.poi.xssf.eventusermodel.XSSFReader; 16 | import org.apache.poi.xssf.eventusermodel.XSSFReader.SheetIterator; 17 | import org.apache.poi.xssf.model.SharedStringsTable; 18 | import org.apache.poi.xssf.model.StylesTable; 19 | import org.slf4j.Logger; 20 | import org.slf4j.LoggerFactory; 21 | import org.w3c.dom.Node; 22 | import org.w3c.dom.NodeList; 23 | 24 | import javax.xml.stream.XMLEventReader; 25 | import javax.xml.stream.XMLStreamException; 26 | import java.io.File; 27 | import java.io.IOException; 28 | import java.io.InputStream; 29 | import java.net.URI; 30 | import java.nio.file.Files; 31 | import java.security.GeneralSecurityException; 32 | import java.util.ArrayList; 33 | import java.util.HashMap; 34 | import java.util.Iterator; 35 | import java.util.LinkedHashMap; 36 | import java.util.List; 37 | import java.util.Map; 38 | 39 | import static com.monitorjbl.xlsx.XmlUtils.document; 40 | import static com.monitorjbl.xlsx.XmlUtils.searchForNodeList; 41 | import static com.monitorjbl.xlsx.impl.TempFileUtil.writeInputStreamToFile; 42 | import static java.util.Arrays.asList; 43 | 44 | public class StreamingWorkbookReader implements Iterable, AutoCloseable { 45 | private static final Logger log = LoggerFactory.getLogger(StreamingWorkbookReader.class); 46 | 47 | private final List sheets; 48 | private final List> sheetProperties = new ArrayList<>(); 49 | private final Builder builder; 50 | private File tmp; 51 | private File sstCache; 52 | private OPCPackage pkg; 53 | private SharedStringsTable sst; 54 | private boolean use1904Dates = false; 55 | 56 | /** 57 | * This constructor exists only so the StreamingReader can instantiate 58 | * a StreamingWorkbook using its own reader implementation. Do not use 59 | * going forward. 60 | * 61 | * @param sst The SST data for this workbook 62 | * @param sstCache The backing cache file for the SST data 63 | * @param pkg The POI package that should be closed when this workbook is closed 64 | * @param reader A single streaming reader instance 65 | * @param builder The builder containing all options 66 | */ 67 | @Deprecated 68 | public StreamingWorkbookReader(SharedStringsTable sst, File sstCache, OPCPackage pkg, StreamingSheetReader reader, Builder builder) { 69 | this.sst = sst; 70 | this.sstCache = sstCache; 71 | this.pkg = pkg; 72 | this.sheets = asList(new StreamingSheet(null, reader)); 73 | this.builder = builder; 74 | } 75 | 76 | public StreamingWorkbookReader(Builder builder) { 77 | this.sheets = new ArrayList<>(); 78 | this.builder = builder; 79 | } 80 | 81 | public StreamingSheetReader first() { 82 | return sheets.get(0).getReader(); 83 | } 84 | 85 | public void init(InputStream is) { 86 | File f = null; 87 | try { 88 | f = writeInputStreamToFile(is, builder.getBufferSize()); 89 | log.debug("Created temp file [" + f.getAbsolutePath() + "]"); 90 | 91 | init(f); 92 | tmp = f; 93 | } catch(IOException e) { 94 | throw new ReadException("Unable to read input stream", e); 95 | } catch(RuntimeException e) { 96 | if(f != null) { 97 | f.delete(); 98 | } 99 | throw e; 100 | } 101 | } 102 | 103 | public void init(File f) { 104 | try { 105 | if(builder.getPassword() != null) { 106 | // Based on: https://poi.apache.org/encryption.html 107 | POIFSFileSystem poifs = new POIFSFileSystem(f); 108 | EncryptionInfo info = new EncryptionInfo(poifs); 109 | Decryptor d = Decryptor.getInstance(info); 110 | d.verifyPassword(builder.getPassword()); 111 | pkg = OPCPackage.open(d.getDataStream(poifs)); 112 | } else { 113 | pkg = OPCPackage.open(f); 114 | } 115 | 116 | XSSFReader reader = new XSSFReader(pkg); 117 | if(builder.getSstCacheSizeBytes() > 0) { 118 | sstCache = Files.createTempFile("", "").toFile(); 119 | log.debug("Created sst cache file [" + sstCache.getAbsolutePath() + "]"); 120 | sst = BufferedStringsTable.getSharedStringsTable(sstCache, builder.getSstCacheSizeBytes(), pkg); 121 | } else { 122 | sst = reader.getSharedStringsTable(); 123 | } 124 | 125 | StylesTable styles = reader.getStylesTable(); 126 | NodeList workbookPr = searchForNodeList(document(reader.getWorkbookData()), "/ss:workbook/ss:workbookPr"); 127 | if(workbookPr.getLength() == 1) { 128 | final Node date1904 = workbookPr.item(0).getAttributes().getNamedItem("date1904"); 129 | if(date1904 != null) { 130 | use1904Dates = ("1".equals(date1904.getTextContent())); 131 | } 132 | } 133 | 134 | loadSheets(reader, sst, styles, builder.getRowCacheSize()); 135 | } catch(IOException e) { 136 | throw new OpenException("Failed to open file", e); 137 | } catch(OpenXML4JException | XMLStreamException e) { 138 | throw new ReadException("Unable to read workbook", e); 139 | } catch(GeneralSecurityException e) { 140 | throw new ReadException("Unable to read workbook - Decryption failed", e); 141 | } 142 | } 143 | 144 | void loadSheets(XSSFReader reader, SharedStringsTable sst, StylesTable stylesTable, int rowCacheSize) 145 | throws IOException, InvalidFormatException, XMLStreamException { 146 | lookupSheetNames(reader); 147 | 148 | //Some workbooks have multiple references to the same sheet. Need to filter 149 | //them out before creating the XMLEventReader by keeping track of their URIs. 150 | //The sheets are listed in order, so we must keep track of insertion order. 151 | SheetIterator iter = (SheetIterator) reader.getSheetsData(); 152 | Map sheetStreams = new LinkedHashMap<>(); 153 | while(iter.hasNext()) { 154 | InputStream is = iter.next(); 155 | sheetStreams.put(iter.getSheetPart().getPartName().getURI(), is); 156 | } 157 | 158 | //Iterate over the loaded streams 159 | int i = 0; 160 | for(URI uri : sheetStreams.keySet()) { 161 | XMLEventReader parser = StaxHelper.newXMLInputFactory().createXMLEventReader(sheetStreams.get(uri)); 162 | sheets.add(new StreamingSheet(sheetProperties.get(i++).get("name"), new StreamingSheetReader(sst, stylesTable, parser, use1904Dates, rowCacheSize))); 163 | } 164 | } 165 | 166 | void lookupSheetNames(XSSFReader reader) throws IOException, InvalidFormatException { 167 | sheetProperties.clear(); 168 | NodeList nl = searchForNodeList(document(reader.getWorkbookData()), "/ss:workbook/ss:sheets/ss:sheet"); 169 | for(int i = 0; i < nl.getLength(); i++) { 170 | Map props = new HashMap<>(); 171 | props.put("name", nl.item(i).getAttributes().getNamedItem("name").getTextContent()); 172 | 173 | Node state = nl.item(i).getAttributes().getNamedItem("state"); 174 | props.put("state", state == null ? "visible" : state.getTextContent()); 175 | sheetProperties.add(props); 176 | } 177 | } 178 | 179 | List getSheets() { 180 | return sheets; 181 | } 182 | 183 | public List> getSheetProperties() { 184 | return sheetProperties; 185 | } 186 | 187 | @Override 188 | public Iterator iterator() { 189 | return new StreamingSheetIterator(sheets.iterator()); 190 | } 191 | 192 | @Override 193 | public void close() throws IOException { 194 | try { 195 | for(StreamingSheet sheet : sheets) { 196 | sheet.getReader().close(); 197 | } 198 | pkg.revert(); 199 | } finally { 200 | if(tmp != null) { 201 | if(log.isDebugEnabled()) { 202 | log.debug("Deleting tmp file [" + tmp.getAbsolutePath() + "]"); 203 | } 204 | tmp.delete(); 205 | } 206 | if(sst instanceof BufferedStringsTable) { 207 | if(log.isDebugEnabled()) { 208 | log.debug("Deleting sst cache file [" + this.sstCache.getAbsolutePath() + "]"); 209 | } 210 | ((BufferedStringsTable) sst).close(); 211 | sstCache.delete(); 212 | } 213 | } 214 | } 215 | 216 | static class StreamingSheetIterator implements Iterator { 217 | private final Iterator iterator; 218 | 219 | public StreamingSheetIterator(Iterator iterator) { 220 | this.iterator = iterator; 221 | } 222 | 223 | @Override 224 | public boolean hasNext() { 225 | return iterator.hasNext(); 226 | } 227 | 228 | @Override 229 | public Sheet next() { 230 | return iterator.next(); 231 | } 232 | 233 | @Override 234 | public void remove() { 235 | throw new RuntimeException("NotSupported"); 236 | } 237 | } 238 | } 239 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/impl/StringSupplier.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.impl; 2 | 3 | class StringSupplier implements Supplier { 4 | private final String val; 5 | 6 | StringSupplier(String val) { 7 | this.val = val; 8 | } 9 | 10 | @Override 11 | public Object getContent() { 12 | return val; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/impl/Supplier.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.impl; 2 | 3 | interface Supplier { 4 | Object getContent(); 5 | } 6 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/impl/TempFileUtil.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.impl; 2 | 3 | import java.io.File; 4 | import java.io.FileOutputStream; 5 | import java.io.IOException; 6 | import java.io.InputStream; 7 | import java.nio.file.Files; 8 | 9 | public class TempFileUtil { 10 | public static File writeInputStreamToFile(InputStream is, int bufferSize) throws IOException { 11 | File f = Files.createTempFile("tmp-", ".xlsx").toFile(); 12 | try(FileOutputStream fos = new FileOutputStream(f)) { 13 | int read; 14 | byte[] bytes = new byte[bufferSize]; 15 | while((read = is.read(bytes)) != -1) { 16 | fos.write(bytes, 0, read); 17 | } 18 | return f; 19 | } finally { 20 | is.close(); 21 | } 22 | } 23 | } -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/sst/BufferedStringsTable.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.sst; 2 | 3 | import org.apache.poi.openxml4j.opc.OPCPackage; 4 | import org.apache.poi.openxml4j.opc.PackagePart; 5 | import org.apache.poi.ss.usermodel.RichTextString; 6 | import org.apache.poi.util.StaxHelper; 7 | import org.apache.poi.xssf.model.SharedStringsTable; 8 | import org.apache.poi.xssf.usermodel.XSSFRelation; 9 | import org.apache.poi.xssf.usermodel.XSSFRichTextString; 10 | 11 | import javax.xml.stream.XMLEventReader; 12 | import javax.xml.stream.XMLStreamException; 13 | import javax.xml.stream.events.XMLEvent; 14 | import java.io.File; 15 | import java.io.IOException; 16 | import java.io.InputStream; 17 | import java.util.List; 18 | 19 | public class BufferedStringsTable extends SharedStringsTable implements AutoCloseable { 20 | private final FileBackedList list; 21 | 22 | public static BufferedStringsTable getSharedStringsTable(File tmp, int cacheSizeBytes, OPCPackage pkg) 23 | throws IOException { 24 | List parts = pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType()); 25 | return parts.size() == 0 ? null : new BufferedStringsTable(parts.get(0), tmp, cacheSizeBytes); 26 | } 27 | 28 | private BufferedStringsTable(PackagePart part, File file, int cacheSizeBytes) throws IOException { 29 | this.list = new FileBackedList(file, cacheSizeBytes); 30 | readFrom(part.getInputStream()); 31 | } 32 | 33 | @Override 34 | public void readFrom(InputStream is) throws IOException { 35 | try { 36 | XMLEventReader xmlEventReader = StaxHelper.newXMLInputFactory().createXMLEventReader(is); 37 | 38 | while(xmlEventReader.hasNext()) { 39 | XMLEvent xmlEvent = xmlEventReader.nextEvent(); 40 | 41 | if(xmlEvent.isStartElement() && xmlEvent.asStartElement().getName().getLocalPart().equals("si")) { 42 | list.add(parseCT_Rst(xmlEventReader)); 43 | } 44 | } 45 | } catch(XMLStreamException e) { 46 | throw new IOException(e); 47 | } 48 | } 49 | 50 | /** 51 | * Parses a {@code } String Item. Returns just the text and drops the formatting. See xmlschema 53 | * type {@code CT_Rst}. 54 | */ 55 | private String parseCT_Rst(XMLEventReader xmlEventReader) throws XMLStreamException { 56 | // Precondition: pointing to ; Post condition: pointing to 57 | StringBuilder buf = new StringBuilder(); 58 | XMLEvent xmlEvent; 59 | while((xmlEvent = xmlEventReader.nextTag()).isStartElement()) { 60 | switch(xmlEvent.asStartElement().getName().getLocalPart()) { 61 | case "t": // Text 62 | buf.append(xmlEventReader.getElementText()); 63 | break; 64 | case "r": // Rich Text Run 65 | parseCT_RElt(xmlEventReader, buf); 66 | break; 67 | case "rPh": // Phonetic Run 68 | case "phoneticPr": // Phonetic Properties 69 | skipElement(xmlEventReader); 70 | break; 71 | default: 72 | throw new IllegalArgumentException(xmlEvent.asStartElement().getName().getLocalPart()); 73 | } 74 | } 75 | return buf.toString(); 76 | } 77 | 78 | /** 79 | * Parses a {@code } Rich Text Run. Returns just the text and drops the formatting. See xmlschema 81 | * type {@code CT_RElt}. 82 | */ 83 | private void parseCT_RElt(XMLEventReader xmlEventReader, StringBuilder buf) throws XMLStreamException { 84 | // Precondition: pointing to ; Post condition: pointing to 85 | XMLEvent xmlEvent; 86 | while((xmlEvent = xmlEventReader.nextTag()).isStartElement()) { 87 | switch(xmlEvent.asStartElement().getName().getLocalPart()) { 88 | case "t": // Text 89 | buf.append(xmlEventReader.getElementText()); 90 | break; 91 | case "rPr": // Run Properties 92 | skipElement(xmlEventReader); 93 | break; 94 | default: 95 | throw new IllegalArgumentException(xmlEvent.asStartElement().getName().getLocalPart()); 96 | } 97 | } 98 | } 99 | 100 | private void skipElement(XMLEventReader xmlEventReader) throws XMLStreamException { 101 | // Precondition: pointing to start element; Post condition: pointing to end element 102 | while(xmlEventReader.nextTag().isStartElement()) { 103 | skipElement(xmlEventReader); // recursively skip over child 104 | } 105 | } 106 | 107 | @Override 108 | public RichTextString getItemAt(int idx) { 109 | return new XSSFRichTextString(list.getAt(idx)); 110 | } 111 | 112 | @Override 113 | public void close() throws IOException { 114 | super.close(); 115 | list.close(); 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/sst/FileBackedList.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.sst; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.RandomAccessFile; 6 | import java.nio.ByteBuffer; 7 | import java.nio.channels.FileChannel; 8 | import java.nio.charset.StandardCharsets; 9 | import java.util.ArrayList; 10 | import java.util.LinkedHashMap; 11 | import java.util.List; 12 | import java.util.Map; 13 | 14 | /** 15 | * File-backed list-like class. Allows addition of arbitrary 16 | * numbers of array entries (serialized to JSON) in a binary 17 | * packed file. Reading of entries is done with an NIO 18 | * channel that seeks to the entry in the file. 19 | *

20 | * File entry format: 21 | *

    22 | *
  • 4 bytes: length of entry
  • 23 | *
  • length bytes: JSON string containing the entry data
  • 24 | *
25 | *

26 | * Pointers to the offset of each entry are kept in a {@code List}. 27 | * The values loaded from the the file are cached up to a maximum of 28 | * {@code cacheSize}. Items are evicted from the cache with an LRU algorithm. 29 | */ 30 | public class FileBackedList implements AutoCloseable { 31 | 32 | private final List pointers = new ArrayList<>(); 33 | private final RandomAccessFile raf; 34 | private final FileChannel channel; 35 | private final LRUCache cache; 36 | 37 | private long filesize; 38 | 39 | public FileBackedList(File file, final int cacheSizeBytes) throws IOException { 40 | this.raf = new RandomAccessFile(file, "rw"); 41 | this.channel = raf.getChannel(); 42 | this.filesize = raf.length(); 43 | this.cache = new LRUCache(cacheSizeBytes); 44 | } 45 | 46 | public void add(String str) { 47 | try { 48 | writeToFile(str); 49 | } catch(IOException e) { 50 | throw new RuntimeException(e); 51 | } 52 | } 53 | 54 | public String getAt(int index) { 55 | String s = cache.getIfPresent(index); 56 | if (s != null) 57 | return s; 58 | 59 | try { 60 | String val = readFromFile(pointers.get(index)); 61 | cache.store(index, val); 62 | return val; 63 | } catch(IOException e) { 64 | throw new RuntimeException(e); 65 | } 66 | } 67 | 68 | private void writeToFile(String str) throws IOException { 69 | synchronized (channel) { 70 | ByteBuffer bytes = ByteBuffer.wrap(str.getBytes(StandardCharsets.UTF_8)); 71 | ByteBuffer length = ByteBuffer.allocate(4).putInt(bytes.array().length); 72 | 73 | channel.position(filesize); 74 | pointers.add(channel.position()); 75 | length.flip(); 76 | channel.write(length); 77 | channel.write(bytes); 78 | 79 | filesize += 4 + bytes.array().length; 80 | } 81 | } 82 | 83 | private String readFromFile(long pointer) throws IOException { 84 | synchronized (channel) { 85 | FileChannel fc = channel.position(pointer); 86 | 87 | //get length of entry 88 | ByteBuffer buffer = ByteBuffer.wrap(new byte[4]); 89 | fc.read(buffer); 90 | buffer.flip(); 91 | int length = buffer.getInt(); 92 | 93 | //read entry 94 | buffer = ByteBuffer.wrap(new byte[length]); 95 | fc.read(buffer); 96 | buffer.flip(); 97 | 98 | return new String(buffer.array(), StandardCharsets.UTF_8); 99 | } 100 | } 101 | 102 | @Override 103 | public void close() { 104 | try { 105 | raf.close(); 106 | } catch(IOException e) { 107 | throw new RuntimeException(e); 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/main/java/com/monitorjbl/xlsx/sst/LRUCache.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx.sst; 2 | 3 | import java.util.Iterator; 4 | import java.util.LinkedHashMap; 5 | 6 | class LRUCache { 7 | 8 | private long sizeBytes; 9 | private final long capacityBytes; 10 | private final LinkedHashMap map = new LinkedHashMap<>(); 11 | 12 | LRUCache(long capacityBytes) { 13 | this.capacityBytes = capacityBytes; 14 | } 15 | 16 | String getIfPresent(int key) { 17 | String s = map.get(key); 18 | if (s != null) { 19 | map.remove(key); 20 | map.put(key, s); 21 | } 22 | return s; 23 | } 24 | 25 | void store(int key, String val) { 26 | long valSize = strSize(val); 27 | if (valSize > capacityBytes) 28 | throw new RuntimeException("Insufficient cache space."); 29 | Iterator it = map.values().iterator(); 30 | while (valSize + sizeBytes > capacityBytes) { 31 | String s = it.next(); 32 | sizeBytes -= strSize(s); 33 | it.remove(); 34 | } 35 | map.put(key, val); 36 | sizeBytes += valSize; 37 | } 38 | 39 | // just an estimation 40 | private static long strSize(String str) { 41 | long size = Integer.BYTES; // hashCode 42 | size += Character.BYTES * str.length(); // characters 43 | return size; 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/test/java/com/monitorjbl/xlsx/BufferedStringsTableTest.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx; 2 | 3 | import com.monitorjbl.xlsx.sst.BufferedStringsTable; 4 | import org.apache.poi.openxml4j.opc.OPCPackage; 5 | import org.apache.poi.openxml4j.opc.PackageAccess; 6 | import org.junit.jupiter.api.Test; 7 | 8 | import java.io.File; 9 | 10 | import static org.junit.jupiter.api.Assertions.assertEquals; 11 | import static org.junit.jupiter.api.Assertions.assertNotNull; 12 | 13 | 14 | public class BufferedStringsTableTest { 15 | 16 | /** 17 | * Verifies a bug where BufferedStringsTable was only looking at the first Characters xml element 18 | * in a text sequence. 19 | */ 20 | @Test 21 | public void testStringsWithMultipleXmlElements() throws Exception { 22 | File file = new File("src/test/resources/blank_cells.xlsx"); 23 | File sstCache = File.createTempFile("cache", ".sst"); 24 | sstCache.deleteOnExit(); 25 | try (OPCPackage pkg = OPCPackage.open(file, PackageAccess.READ); 26 | BufferedStringsTable sst = BufferedStringsTable.getSharedStringsTable(sstCache, 1000, pkg)) { 27 | assertNotNull(sst); 28 | assertEquals("B1 is Blank --->", sst.getItemAt(0).getString()); 29 | } 30 | } 31 | 32 | /** 33 | * Verifies a bug where BufferedStringsTable was dropping text enclosed in formatting 34 | * instructions. 35 | */ 36 | @Test 37 | public void testStringsWrappedInFormatting() throws Exception { 38 | File file = new File("src/test/resources/shared_styled_string.xlsx"); 39 | File sstCache = File.createTempFile("cache", ".sst"); 40 | sstCache.deleteOnExit(); 41 | try (OPCPackage pkg = OPCPackage.open(file, PackageAccess.READ); 42 | BufferedStringsTable sst = BufferedStringsTable.getSharedStringsTable(sstCache, 1000, pkg)) { 43 | assertNotNull(sst); 44 | assertEquals("shared styled string", sst.getItemAt(0).getString()); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/test/java/com/monitorjbl/xlsx/PerformanceTest.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx; 2 | 3 | import org.apache.poi.ss.usermodel.Cell; 4 | import org.apache.poi.ss.usermodel.Row; 5 | import org.apache.poi.ss.usermodel.Workbook; 6 | 7 | import java.io.File; 8 | import java.io.FileInputStream; 9 | import java.io.IOException; 10 | import java.io.InputStream; 11 | 12 | public class PerformanceTest { 13 | 14 | public static void main(String[] args) throws IOException { 15 | for(int i = 0; i < 10; i++) { 16 | long start = System.currentTimeMillis(); 17 | InputStream is = new FileInputStream(new File("/Users/thundermoose/Downloads/SampleXLSFile_6800kb.xlsx")); 18 | try(Workbook workbook = StreamingReader.builder() 19 | .rowCacheSize(100) // number of rows to keep in memory (defaults to 10) 20 | .bufferSize(4096) // buffer size to use when reading InputStream to file (defaults to 1024) 21 | .open(is)) { 22 | 23 | for(Row r : workbook.getSheet("test")) { 24 | for(Cell c : r) { 25 | //do nothing 26 | } 27 | } 28 | } 29 | 30 | long end = System.currentTimeMillis(); 31 | System.out.println("Time: " + (end - start) + "ms"); 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/test/java/com/monitorjbl/xlsx/StreamingReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx; 2 | 3 | import com.monitorjbl.xlsx.exceptions.MissingSheetException; 4 | import org.apache.poi.openxml4j.opc.OPCPackage; 5 | import org.apache.poi.openxml4j.opc.PackageAccess; 6 | import org.apache.poi.ss.usermodel.Cell; 7 | import org.apache.poi.ss.usermodel.CellType; 8 | import org.apache.poi.ss.usermodel.DateUtil; 9 | import org.apache.poi.ss.usermodel.Row; 10 | import org.apache.poi.ss.usermodel.Sheet; 11 | import org.apache.poi.ss.usermodel.Workbook; 12 | import org.junit.jupiter.api.BeforeAll; 13 | import org.junit.jupiter.api.Test; 14 | 15 | import java.io.File; 16 | import java.io.FileInputStream; 17 | import java.io.InputStream; 18 | import java.text.SimpleDateFormat; 19 | import java.time.LocalDateTime; 20 | import java.util.ArrayList; 21 | import java.util.Calendar; 22 | import java.util.Date; 23 | import java.util.GregorianCalendar; 24 | import java.util.HashMap; 25 | import java.util.Iterator; 26 | import java.util.List; 27 | import java.util.Locale; 28 | import java.util.Map; 29 | import java.util.Spliterator; 30 | import java.util.Spliterators; 31 | import java.util.stream.Collectors; 32 | import java.util.stream.StreamSupport; 33 | 34 | import static org.apache.poi.ss.usermodel.CellType.BOOLEAN; 35 | import static org.apache.poi.ss.usermodel.CellType.NUMERIC; 36 | import static org.apache.poi.ss.usermodel.CellType.STRING; 37 | import static org.apache.poi.ss.usermodel.Row.MissingCellPolicy.CREATE_NULL_AS_BLANK; 38 | import static org.apache.poi.ss.usermodel.Row.MissingCellPolicy.RETURN_BLANK_AS_NULL; 39 | import static org.hamcrest.CoreMatchers.equalTo; 40 | import static org.hamcrest.CoreMatchers.nullValue; 41 | import static org.hamcrest.MatcherAssert.assertThat; 42 | import static org.hamcrest.core.Is.is; 43 | import static org.junit.jupiter.api.Assertions.assertEquals; 44 | import static org.junit.jupiter.api.Assertions.assertFalse; 45 | import static org.junit.jupiter.api.Assertions.assertNotNull; 46 | import static org.junit.jupiter.api.Assertions.assertNull; 47 | import static org.junit.jupiter.api.Assertions.assertThrows; 48 | import static org.junit.jupiter.api.Assertions.assertTrue; 49 | import static org.junit.jupiter.api.Assertions.fail; 50 | 51 | public class StreamingReaderTest { 52 | @BeforeAll 53 | public static void init() { 54 | Locale.setDefault(Locale.ENGLISH); 55 | } 56 | 57 | @Test 58 | public void testTypes() throws Exception { 59 | SimpleDateFormat df = new SimpleDateFormat("MM/dd/yyyy"); 60 | try( 61 | InputStream is = new FileInputStream(new File("src/test/resources/data_types.xlsx")); 62 | Workbook wb = StreamingReader.builder().open(is); 63 | ) { 64 | 65 | List> obj = new ArrayList<>(); 66 | 67 | for(Row r : wb.getSheetAt(0)) { 68 | List o = new ArrayList<>(); 69 | for(Cell c : r) { 70 | o.add(c); 71 | } 72 | obj.add(o); 73 | } 74 | 75 | assertEquals(7, obj.size()); 76 | List row; 77 | 78 | row = obj.get(0); 79 | assertEquals(2, row.size()); 80 | assertEquals(STRING, row.get(0).getCellType()); 81 | assertEquals(STRING, row.get(1).getCellType()); 82 | assertEquals("Type", row.get(0).getStringCellValue()); 83 | assertEquals("Type", row.get(0).getRichStringCellValue().getString()); 84 | assertEquals("Value", row.get(1).getStringCellValue()); 85 | assertEquals("Value", row.get(1).getRichStringCellValue().getString()); 86 | 87 | row = obj.get(1); 88 | assertEquals(2, row.size()); 89 | assertEquals(STRING, row.get(0).getCellType()); 90 | assertEquals(STRING, row.get(1).getCellType()); 91 | assertEquals("string", row.get(0).getStringCellValue()); 92 | assertEquals("string", row.get(0).getRichStringCellValue().getString()); 93 | assertEquals("jib-jab", row.get(1).getStringCellValue()); 94 | assertEquals("jib-jab", row.get(1).getRichStringCellValue().getString()); 95 | 96 | row = obj.get(2); 97 | assertEquals(2, row.size()); 98 | assertEquals(STRING, row.get(0).getCellType()); 99 | assertEquals(NUMERIC, row.get(1).getCellType()); 100 | assertEquals("int", row.get(0).getStringCellValue()); 101 | assertEquals("int", row.get(0).getRichStringCellValue().getString()); 102 | assertEquals(10, row.get(1).getNumericCellValue(), 0); 103 | 104 | row = obj.get(3); 105 | assertEquals(2, row.size()); 106 | assertEquals(STRING, row.get(0).getCellType()); 107 | assertEquals(NUMERIC, row.get(1).getCellType()); 108 | assertEquals("double", row.get(0).getStringCellValue()); 109 | assertEquals("double", row.get(0).getRichStringCellValue().getString()); 110 | assertEquals(3.14, row.get(1).getNumericCellValue(), 0); 111 | 112 | row = obj.get(4); 113 | assertEquals(2, row.size()); 114 | assertEquals(STRING, row.get(0).getCellType()); 115 | assertEquals(NUMERIC, row.get(1).getCellType()); 116 | assertEquals("date", row.get(0).getStringCellValue()); 117 | assertEquals("date", row.get(0).getRichStringCellValue().getString()); 118 | assertEquals(df.parse("1/1/2014"), row.get(1).getDateCellValue()); 119 | assertTrue(DateUtil.isCellDateFormatted(row.get(1))); 120 | 121 | row = obj.get(5); 122 | assertEquals(7, row.size()); 123 | assertEquals(STRING, row.get(0).getCellType()); 124 | assertEquals(STRING, row.get(1).getCellType()); 125 | assertEquals(STRING, row.get(2).getCellType()); 126 | assertEquals(STRING, row.get(3).getCellType()); 127 | assertEquals(STRING, row.get(4).getCellType()); 128 | assertEquals(STRING, row.get(5).getCellType()); 129 | assertEquals(STRING, row.get(6).getCellType()); 130 | assertEquals("long", row.get(0).getStringCellValue()); 131 | assertEquals("long", row.get(0).getRichStringCellValue().getString()); 132 | assertEquals("ass", row.get(1).getStringCellValue()); 133 | assertEquals("ass", row.get(1).getRichStringCellValue().getString()); 134 | assertEquals("row", row.get(2).getStringCellValue()); 135 | assertEquals("row", row.get(2).getRichStringCellValue().getString()); 136 | assertEquals("look", row.get(3).getStringCellValue()); 137 | assertEquals("look", row.get(3).getRichStringCellValue().getString()); 138 | assertEquals("at", row.get(4).getStringCellValue()); 139 | assertEquals("at", row.get(4).getRichStringCellValue().getString()); 140 | assertEquals("it", row.get(5).getStringCellValue()); 141 | assertEquals("it", row.get(5).getRichStringCellValue().getString()); 142 | assertEquals("go", row.get(6).getStringCellValue()); 143 | assertEquals("go", row.get(6).getRichStringCellValue().getString()); 144 | 145 | row = obj.get(6); 146 | assertEquals(3, row.size()); 147 | assertEquals(STRING, row.get(0).getCellType()); 148 | assertEquals(BOOLEAN, row.get(1).getCellType()); 149 | assertEquals(BOOLEAN, row.get(2).getCellType()); 150 | assertEquals("boolean", row.get(0).getStringCellValue()); 151 | assertEquals("boolean", row.get(0).getRichStringCellValue().getString()); 152 | assertEquals(true, row.get(1).getBooleanCellValue()); 153 | assertEquals(false, row.get(2).getBooleanCellValue()); 154 | } 155 | } 156 | 157 | @Test 158 | public void testGetDateCellValue() throws Exception { 159 | try( 160 | InputStream is = new FileInputStream("src/test/resources/data_types.xlsx"); 161 | Workbook wb = StreamingReader.builder().open(is); 162 | ) { 163 | 164 | List> obj = new ArrayList<>(); 165 | 166 | for(Row r : wb.getSheetAt(0)) { 167 | List o = new ArrayList<>(); 168 | for(Cell c : r) { 169 | o.add(c); 170 | } 171 | obj.add(o); 172 | } 173 | 174 | Date dt = obj.get(4).get(1).getDateCellValue(); 175 | assertNotNull(dt); 176 | final GregorianCalendar cal = new GregorianCalendar(); 177 | cal.setTime(dt); 178 | assertEquals(cal.get(Calendar.YEAR), 2014); 179 | 180 | // Verify LocalDateTime version is correct as well 181 | LocalDateTime localDateTime = obj.get(4).get(1).getLocalDateTimeCellValue(); 182 | assertEquals(2014, localDateTime.getYear()); 183 | 184 | try { 185 | obj.get(0).get(0).getDateCellValue(); 186 | fail("Should have thrown IllegalStateException"); 187 | } catch(IllegalStateException e) { } 188 | } 189 | } 190 | 191 | @Test 192 | public void testGetDateCellValue1904() throws Exception { 193 | try( 194 | InputStream is = new FileInputStream(new File("src/test/resources/1904Dates.xlsx")); 195 | Workbook wb = StreamingReader.builder().open(is); 196 | ) { 197 | 198 | List> obj = new ArrayList<>(); 199 | 200 | for(Row r : wb.getSheetAt(0)) { 201 | List o = new ArrayList<>(); 202 | for(Cell c : r) { 203 | o.add(c); 204 | } 205 | obj.add(o); 206 | } 207 | 208 | Date dt = obj.get(1).get(5).getDateCellValue(); 209 | assertNotNull(dt); 210 | final GregorianCalendar cal = new GregorianCalendar(); 211 | cal.setTime(dt); 212 | assertEquals(cal.get(Calendar.YEAR), 1991); 213 | 214 | try { 215 | obj.get(0).get(0).getDateCellValue(); 216 | fail("Should have thrown IllegalStateException"); 217 | } catch(IllegalStateException e) { } 218 | } 219 | } 220 | 221 | @Test 222 | public void testGetFirstCellNum() throws Exception { 223 | try( 224 | InputStream is = new FileInputStream(new File("src/test/resources/gaps.xlsx")); 225 | Workbook wb = StreamingReader.builder().open(is); 226 | ) { 227 | 228 | List> obj = new ArrayList<>(); 229 | List rows = new ArrayList<>(); 230 | for(Row r : wb.getSheetAt(0)) { 231 | rows.add(r); 232 | List o = new ArrayList<>(); 233 | for(Cell c : r) { 234 | o.add(c); 235 | } 236 | obj.add(o); 237 | } 238 | 239 | assertEquals(3, rows.size()); 240 | assertEquals(3, rows.get(2).getFirstCellNum()); 241 | } 242 | } 243 | 244 | @Test 245 | public void testGaps() throws Exception { 246 | try( 247 | InputStream is = new FileInputStream(new File("src/test/resources/gaps.xlsx")); 248 | Workbook wb = StreamingReader.builder().open(is); 249 | ) { 250 | List> obj = new ArrayList<>(); 251 | 252 | for(Row r : wb.getSheetAt(0)) { 253 | List o = new ArrayList<>(); 254 | for(Cell c : r) { 255 | o.add(c); 256 | } 257 | obj.add(o); 258 | } 259 | 260 | assertEquals(3, obj.size()); 261 | List row; 262 | 263 | row = obj.get(0); 264 | assertEquals(2, row.size()); 265 | assertEquals(STRING, row.get(0).getCellType()); 266 | assertEquals(STRING, row.get(1).getCellType()); 267 | assertEquals("Dat", row.get(0).getStringCellValue()); 268 | assertEquals("Dat", row.get(0).getRichStringCellValue().getString()); 269 | assertEquals(0, row.get(0).getColumnIndex()); 270 | assertEquals(0, row.get(0).getRowIndex()); 271 | assertEquals("gap", row.get(1).getStringCellValue()); 272 | assertEquals("gap", row.get(1).getRichStringCellValue().getString()); 273 | assertEquals(2, row.get(1).getColumnIndex()); 274 | assertEquals(0, row.get(1).getRowIndex()); 275 | 276 | row = obj.get(1); 277 | assertEquals(2, row.size()); 278 | assertEquals(STRING, row.get(0).getCellType()); 279 | assertEquals(STRING, row.get(1).getCellType()); 280 | assertEquals("guuurrrrrl", row.get(0).getStringCellValue()); 281 | assertEquals("guuurrrrrl", row.get(0).getRichStringCellValue().getString()); 282 | assertEquals(0, row.get(0).getColumnIndex()); 283 | assertEquals(6, row.get(0).getRowIndex()); 284 | assertEquals("!", row.get(1).getStringCellValue()); 285 | assertEquals("!", row.get(1).getRichStringCellValue().getString()); 286 | assertEquals(6, row.get(1).getColumnIndex()); 287 | assertEquals(6, row.get(1).getRowIndex()); 288 | } 289 | } 290 | 291 | @Test 292 | public void testMultipleSheets_alpha() throws Exception { 293 | try( 294 | InputStream is = new FileInputStream(new File("src/test/resources/sheets.xlsx")); 295 | Workbook wb = StreamingReader.builder().open(is); 296 | ) { 297 | List> obj = new ArrayList<>(); 298 | 299 | for(Row r : wb.getSheetAt(0)) { 300 | List o = new ArrayList<>(); 301 | for(Cell c : r) { 302 | o.add(c); 303 | } 304 | obj.add(o); 305 | } 306 | 307 | assertEquals(1, obj.size()); 308 | List row; 309 | 310 | row = obj.get(0); 311 | assertEquals(1, row.size()); 312 | assertEquals("stuff", row.get(0).getStringCellValue()); 313 | assertEquals("stuff", row.get(0).getRichStringCellValue().getString()); 314 | } 315 | } 316 | 317 | @Test 318 | public void testMultipleSheets_zulu() throws Exception { 319 | try( 320 | InputStream is = new FileInputStream(new File("src/test/resources/sheets.xlsx")); 321 | Workbook wb = StreamingReader.builder().open(is); 322 | ) { 323 | 324 | List> obj = new ArrayList<>(); 325 | 326 | for(Row r : wb.getSheetAt(1)) { 327 | List o = new ArrayList<>(); 328 | for(Cell c : r) { 329 | o.add(c); 330 | } 331 | obj.add(o); 332 | } 333 | 334 | assertEquals(1, obj.size()); 335 | List row; 336 | 337 | row = obj.get(0); 338 | assertEquals(1, row.size()); 339 | assertEquals("yeah", row.get(0).getStringCellValue()); 340 | assertEquals("yeah", row.get(0).getRichStringCellValue().getString()); 341 | } 342 | } 343 | 344 | @Test 345 | public void testSheetName_zulu() throws Exception { 346 | try( 347 | InputStream is = new FileInputStream(new File("src/test/resources/sheets.xlsx")); 348 | Workbook wb = StreamingReader.builder().open(is); 349 | ) { 350 | 351 | List> obj = new ArrayList<>(); 352 | 353 | for(Row r : wb.getSheet("SheetZulu")) { 354 | List o = new ArrayList<>(); 355 | for(Cell c : r) { 356 | o.add(c); 357 | } 358 | obj.add(o); 359 | } 360 | 361 | assertEquals(1, obj.size()); 362 | List row; 363 | 364 | row = obj.get(0); 365 | assertEquals(1, row.size()); 366 | assertEquals("yeah", row.get(0).getStringCellValue()); 367 | assertEquals("yeah", row.get(0).getRichStringCellValue().getString()); 368 | } 369 | } 370 | 371 | @Test 372 | public void testSheetName_alpha() throws Exception { 373 | try( 374 | InputStream is = new FileInputStream(new File("src/test/resources/sheets.xlsx")); 375 | Workbook wb = StreamingReader.builder().open(is); 376 | ) { 377 | List> obj = new ArrayList<>(); 378 | 379 | for(Row r : wb.getSheet("SheetAlpha")) { 380 | List o = new ArrayList<>(); 381 | for(Cell c : r) { 382 | o.add(c); 383 | } 384 | obj.add(o); 385 | } 386 | 387 | assertEquals(1, obj.size()); 388 | List row; 389 | 390 | row = obj.get(0); 391 | assertEquals(1, row.size()); 392 | assertEquals("stuff", row.get(0).getStringCellValue()); 393 | assertEquals("stuff", row.get(0).getRichStringCellValue().getString()); 394 | } 395 | } 396 | 397 | @Test 398 | public void testSheetName_missingInStream() throws Exception { 399 | try( 400 | InputStream is = new FileInputStream(new File("src/test/resources/sheets.xlsx")); 401 | Workbook wb = StreamingReader.builder().open(is); 402 | ) { 403 | assertThrows(MissingSheetException.class, ()->wb.getSheet("asdfasdfasdf")); 404 | } 405 | } 406 | 407 | @Test 408 | public void testSheetName_missingInFile() throws Exception { 409 | File f = new File("src/test/resources/sheets.xlsx"); 410 | try(Workbook wb = StreamingReader.builder().open(f)) { 411 | wb.getSheet("asdfasdfasdf"); 412 | fail("Should have failed"); 413 | } catch(MissingSheetException e) { 414 | assertTrue(f.exists()); 415 | } 416 | } 417 | 418 | @Test 419 | public void testIteration() throws Exception { 420 | File f = new File("src/test/resources/large.xlsx"); 421 | try( 422 | Workbook wb = StreamingReader.builder() 423 | .rowCacheSize(5) 424 | .open(f)) { 425 | int i = 1; 426 | for(Row r : wb.getSheetAt(0)) { 427 | assertEquals(i, r.getCell(0).getNumericCellValue(), 0); 428 | assertEquals("#" + i, r.getCell(1).getStringCellValue()); 429 | assertEquals("#" + i, r.getCell(1).getRichStringCellValue().getString()); 430 | i++; 431 | } 432 | } 433 | } 434 | 435 | @Test 436 | public void testLeadingZeroes() throws Exception { 437 | File f = new File("src/test/resources/leadingZeroes.xlsx"); 438 | 439 | try(Workbook wb = StreamingReader.builder().open(f)) { 440 | Iterator iter = wb.getSheetAt(0).iterator(); 441 | iter.hasNext(); 442 | 443 | Row r1 = iter.next(); 444 | assertEquals(1, r1.getCell(0).getNumericCellValue(), 0); 445 | assertEquals("1", r1.getCell(0).getStringCellValue()); 446 | assertEquals(NUMERIC, r1.getCell(0).getCellType()); 447 | 448 | Row r2 = iter.next(); 449 | assertEquals(2, r2.getCell(0).getNumericCellValue(), 0); 450 | assertEquals("0002", r2.getCell(0).getStringCellValue()); 451 | assertEquals("0002", r2.getCell(0).getRichStringCellValue().getString()); 452 | assertEquals(STRING, r2.getCell(0).getCellType()); 453 | } 454 | } 455 | 456 | @Test 457 | public void testReadingEmptyFile() throws Exception { 458 | File f = new File("src/test/resources/empty_sheet.xlsx"); 459 | 460 | try(Workbook wb = StreamingReader.builder().open(f)) { 461 | Iterator iter = wb.getSheetAt(0).iterator(); 462 | assertThat(iter.hasNext(), is(false)); 463 | } 464 | } 465 | 466 | @Test 467 | public void testSpecialStyles() throws Exception { 468 | File f = new File("src/test/resources/special_types.xlsx"); 469 | 470 | Map> contents = new HashMap<>(); 471 | try(Workbook wb = StreamingReader.builder().open(f)) { 472 | for(Row row : wb.getSheetAt(0)) { 473 | contents.put(row.getRowNum(), new ArrayList()); 474 | for(Cell c : row) { 475 | if(c.getColumnIndex() > 0) { 476 | contents.get(row.getRowNum()).add(c); 477 | } 478 | } 479 | } 480 | } 481 | 482 | SimpleDateFormat df = new SimpleDateFormat("dd/MM/yyyy"); 483 | 484 | assertThat(contents.size(), equalTo(2)); 485 | assertThat(contents.get(0).size(), equalTo(4)); 486 | assertThat(contents.get(0).get(0).getStringCellValue(), equalTo("Thu\", \"Dec 25\", \"14")); 487 | assertThat(contents.get(0).get(0).getDateCellValue(), equalTo(df.parse("25/12/2014"))); 488 | assertThat(contents.get(0).get(1).getStringCellValue(), equalTo("02/04/15")); 489 | assertThat(contents.get(0).get(1).getDateCellValue(), equalTo(df.parse("04/02/2015"))); 490 | assertThat(contents.get(0).get(2).getStringCellValue(), equalTo("14\". \"Mar\". \"2015")); 491 | assertThat(contents.get(0).get(2).getDateCellValue(), equalTo(df.parse("14/03/2015"))); 492 | assertThat(contents.get(0).get(3).getStringCellValue(), equalTo("2015-05-05")); 493 | assertThat(contents.get(0).get(3).getDateCellValue(), equalTo(df.parse("05/05/2015"))); 494 | 495 | assertThat(contents.get(1).size(), equalTo(4)); 496 | assertThat(contents.get(1).get(0).getStringCellValue(), equalTo("3.12")); 497 | assertThat(contents.get(1).get(0).getNumericCellValue(), equalTo(3.12312312312)); 498 | assertThat(contents.get(1).get(1).getStringCellValue(), equalTo("1,023,042")); 499 | assertThat(contents.get(1).get(1).getNumericCellValue(), equalTo(1023042.0)); 500 | assertThat(contents.get(1).get(2).getStringCellValue(), equalTo("-312,231.12")); 501 | assertThat(contents.get(1).get(2).getNumericCellValue(), equalTo(-312231.12123145)); 502 | assertThat(contents.get(1).get(3).getStringCellValue(), equalTo("(132)")); 503 | assertThat(contents.get(1).get(3).getNumericCellValue(), equalTo(-132.0)); 504 | } 505 | 506 | @Test 507 | public void testBlankNumerics() throws Exception { 508 | File f = new File("src/test/resources/blank_cells.xlsx"); 509 | try(Workbook wb = StreamingReader.builder().open(f)) { 510 | Row row = wb.getSheetAt(0).iterator().next(); 511 | assertThat(row.getCell(1).getStringCellValue(), equalTo("")); 512 | assertThat(row.getCell(1).getRichStringCellValue().getString(), equalTo("")); 513 | assertThat(row.getCell(1).getDateCellValue(), is(nullValue())); 514 | assertThat(row.getCell(1).getNumericCellValue(), equalTo(0.0)); 515 | } 516 | } 517 | 518 | @Test 519 | public void testFirstRowNumIs0() throws Exception { 520 | File f = new File("src/test/resources/data_types.xlsx"); 521 | try(Workbook wb = StreamingReader.builder().open(f)) { 522 | Row row = wb.getSheetAt(0).iterator().next(); 523 | assertThat(row.getRowNum(), equalTo(0)); 524 | } 525 | } 526 | 527 | @Test 528 | public void testNoTypeCell() throws Exception { 529 | try( 530 | InputStream is = new FileInputStream(new File("src/test/resources/no_type_cell.xlsx")); 531 | Workbook wb = StreamingReader.builder().open(is)) { 532 | for(Row r : wb.getSheetAt(0)) { 533 | for(Cell c : r) { 534 | assertEquals("1", c.getStringCellValue()); 535 | } 536 | } 537 | } 538 | } 539 | 540 | @Test 541 | public void testEncryption() throws Exception { 542 | try( 543 | InputStream is = new FileInputStream(new File("src/test/resources/encrypted.xlsx")); 544 | Workbook wb = StreamingReader.builder().password("test").open(is)) { 545 | OUTER: 546 | for(Row r : wb.getSheetAt(0)) { 547 | for(Cell c : r) { 548 | assertEquals("Demo", c.getStringCellValue()); 549 | assertEquals("Demo", c.getRichStringCellValue().getString()); 550 | break OUTER; 551 | } 552 | } 553 | } 554 | } 555 | 556 | @Test 557 | public void testStringCellValue() throws Exception { 558 | try( 559 | InputStream is = new FileInputStream(new File("src/test/resources/blank_cell_StringCellValue.xlsx")); 560 | Workbook wb = StreamingReader.builder().open(is); 561 | ) { 562 | for(Row r : wb.getSheetAt(0)) { 563 | if(r.getRowNum() == 1) { 564 | assertEquals("", r.getCell(1).getStringCellValue()); 565 | assertEquals("", r.getCell(1).getRichStringCellValue().getString()); 566 | } 567 | } 568 | } 569 | } 570 | 571 | @Test 572 | public void testNullValueType() throws Exception { 573 | try( 574 | InputStream is = new FileInputStream(new File("src/test/resources/null_celltype.xlsx")); 575 | Workbook wb = StreamingReader.builder().open(is); 576 | ) { 577 | for(Row r : wb.getSheetAt(0)) { 578 | for(Cell cell : r) { 579 | if(r.getRowNum() == 0 && cell.getColumnIndex() == 8) { 580 | assertEquals(NUMERIC, cell.getCellType()); 581 | assertEquals("8:00:00", cell.getStringCellValue()); 582 | } 583 | } 584 | } 585 | } 586 | } 587 | 588 | @Test 589 | public void testInlineCells() throws Exception { 590 | try( 591 | InputStream is = new FileInputStream(new File("src/test/resources/inline.xlsx")); 592 | Workbook wb = StreamingReader.builder().open(is); 593 | ) { 594 | Row row = wb.getSheetAt(0).iterator().next(); 595 | assertEquals("First inline cell", row.getCell(0).getStringCellValue()); 596 | assertEquals("First inline cell", row.getCell(0).getRichStringCellValue().getString()); 597 | assertEquals("Second inline cell", row.getCell(1).getStringCellValue()); 598 | assertEquals("Second inline cell", row.getCell(1).getRichStringCellValue().getString()); 599 | } 600 | } 601 | 602 | @Test 603 | public void testMissingRattrs() throws Exception { 604 | try( 605 | InputStream is = new FileInputStream(new File("src/test/resources/missing-r-attrs.xlsx")); 606 | StreamingReader reader = StreamingReader.builder().read(is); 607 | ) { 608 | Row row = reader.iterator().next(); 609 | assertEquals(0, row.getRowNum()); 610 | assertEquals("1", row.getCell(0).getStringCellValue()); 611 | assertEquals("5", row.getCell(4).getStringCellValue()); 612 | row = reader.iterator().next(); 613 | assertEquals(1, row.getRowNum()); 614 | assertEquals("6", row.getCell(0).getStringCellValue()); 615 | assertEquals("10", row.getCell(4).getStringCellValue()); 616 | row = reader.iterator().next(); 617 | assertEquals(6, row.getRowNum()); 618 | assertEquals("11", row.getCell(0).getStringCellValue()); 619 | assertEquals("15", row.getCell(4).getStringCellValue()); 620 | 621 | assertFalse(reader.iterator().hasNext()); 622 | } 623 | } 624 | 625 | @Test 626 | public void testClosingFiles() throws Exception { 627 | OPCPackage o = OPCPackage.open(new File("src/test/resources/blank_cell_StringCellValue.xlsx"), PackageAccess.READ); 628 | o.close(); 629 | } 630 | 631 | @Test 632 | public void shouldIgnoreSpreadsheetDrawingRows() throws Exception { 633 | try( 634 | InputStream is = new FileInputStream(new File("src/test/resources/has_spreadsheetdrawing.xlsx")); 635 | Workbook wb = StreamingReader.builder().open(is); 636 | ) { 637 | Iterator iterator = wb.getSheetAt(0).iterator(); 638 | while(iterator.hasNext()) { 639 | iterator.next(); 640 | } 641 | } 642 | } 643 | 644 | @Test 645 | public void testShouldReturnNullForMissingCellPolicy_RETURN_BLANK_AS_NULL() throws Exception { 646 | try( 647 | InputStream is = new FileInputStream(new File("src/test/resources/blank_cells.xlsx")); 648 | Workbook wb = StreamingReader.builder().open(is); 649 | ) { 650 | Row row = wb.getSheetAt(0).iterator().next(); 651 | assertNotNull(row.getCell(0, RETURN_BLANK_AS_NULL)); //Remain unchanged 652 | assertNull(row.getCell(1, RETURN_BLANK_AS_NULL)); 653 | } 654 | } 655 | 656 | @Test 657 | public void testShouldReturnBlankForMissingCellPolicy_CREATE_NULL_AS_BLANK() throws Exception { 658 | try( 659 | InputStream is = new FileInputStream(new File("src/test/resources/null_cell.xlsx")); 660 | Workbook wb = StreamingReader.builder().open(is); 661 | ) { 662 | Row row = wb.getSheetAt(0).iterator().next(); 663 | assertEquals("B1 is Null ->", row.getCell(0, CREATE_NULL_AS_BLANK).getStringCellValue()); //Remain unchanged 664 | assertEquals("B1 is Null ->", row.getCell(0, CREATE_NULL_AS_BLANK).getRichStringCellValue().getString()); //Remain unchanged 665 | assertThat(row.getCell(1), is(nullValue())); 666 | assertNotNull(row.getCell(1, CREATE_NULL_AS_BLANK)); 667 | } 668 | } 669 | 670 | 671 | // Handle a file with a blank SST reference, like 672 | // Normally, if Excel saves the file, that whole wouldn't even be there. 673 | @Test 674 | public void testShouldHandleBlankSSTReference() throws Exception { 675 | try( 676 | InputStream is = new FileInputStream(new File("src/test/resources/blank_sst_reference_doctored.xlsx")); 677 | Workbook wb = StreamingReader.builder().open(is); 678 | ) { 679 | Iterator iterator = wb.getSheetAt(0).iterator(); 680 | while(iterator.hasNext()) { 681 | iterator.next(); 682 | } 683 | } 684 | } 685 | 686 | // The last cell on this sheet should be a NUMERIC but there is a lingering "f" 687 | // tag that was getting attached to the last cell causing it to be a FORUMLA. 688 | @Test 689 | public void testForumulaOutsideCellIgnored() throws Exception { 690 | try( 691 | InputStream is = new FileInputStream(new File("src/test/resources/formula_outside_cell.xlsx")); 692 | Workbook wb = StreamingReader.builder().open(is); 693 | ) { 694 | Iterator rows = wb.getSheetAt(0).iterator(); 695 | Cell cell = null; 696 | while(rows.hasNext()) { 697 | Iterator cells = rows.next().iterator(); 698 | while(cells.hasNext()) { 699 | cell = cells.next(); 700 | } 701 | } 702 | assertNotNull(cell); 703 | assertThat(cell.getCellType(), is(CellType.NUMERIC)); 704 | } 705 | } 706 | 707 | @Test 708 | public void testFormulaWithDifferentTypes() throws Exception { 709 | try( 710 | InputStream is = new FileInputStream(new File("src/test/resources/formula_test.xlsx")); 711 | Workbook wb = StreamingReader.builder().open(is) 712 | ) { 713 | Sheet sheet = wb.getSheetAt(0); 714 | Iterator rowIterator = sheet.rowIterator(); 715 | 716 | Row next = rowIterator.next(); 717 | Cell cell = next.getCell(0); 718 | 719 | assertThat(cell.getCellType(), is(CellType.STRING)); 720 | 721 | next = rowIterator.next(); 722 | cell = next.getCell(0); 723 | 724 | assertThat(cell.getCellType(), is(CellType.FORMULA)); 725 | assertThat(cell.getCachedFormulaResultType(), is(CellType.STRING)); 726 | 727 | next = rowIterator.next(); 728 | cell = next.getCell(0); 729 | 730 | assertThat(cell.getCellType(), is(CellType.FORMULA)); 731 | assertThat(cell.getCachedFormulaResultType(), is(CellType.BOOLEAN)); 732 | 733 | next = rowIterator.next(); 734 | cell = next.getCell(0); 735 | 736 | assertThat(cell.getCellType(), is(CellType.FORMULA)); 737 | assertThat(cell.getCachedFormulaResultType(), is(CellType.NUMERIC)); 738 | } 739 | } 740 | 741 | @Test 742 | public void testShouldIncrementColumnNumberIfExplicitCellAddressMissing() throws Exception { 743 | // On consecutive columns the element might miss an "r" attribute, which indicate the cell position. 744 | // This might be an optimization triggered by file size and specific to a particular excel version. 745 | // The excel would read such a file without complaining. 746 | try( 747 | InputStream is = new FileInputStream(new File("src/test/resources/sparse-columns.xlsx")); 748 | Workbook wb = StreamingReader.builder().open(is); 749 | ) { 750 | Sheet sheet = wb.getSheetAt(0); 751 | 752 | Iterator rowIterator = sheet.rowIterator(); 753 | Row row = rowIterator.next(); 754 | 755 | assertThat(row.getCell(0).getStringCellValue(), is("sparse")); 756 | assertThat(row.getCell(3).getStringCellValue(), is("columns")); 757 | assertThat(row.getCell(4).getNumericCellValue(), is(0.0)); 758 | assertThat(row.getCell(5).getNumericCellValue(), is(1.0)); 759 | 760 | } 761 | } 762 | } 763 | -------------------------------------------------------------------------------- /src/test/java/com/monitorjbl/xlsx/StreamingSheetTest.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx; 2 | 3 | import java.io.File; 4 | import java.io.FileInputStream; 5 | import java.io.InputStream; 6 | import java.util.Locale; 7 | 8 | import org.apache.poi.ss.usermodel.CellType; 9 | import org.apache.poi.ss.usermodel.Row; 10 | import org.apache.poi.ss.usermodel.Sheet; 11 | import org.apache.poi.ss.usermodel.Workbook; 12 | import org.junit.jupiter.api.BeforeAll; 13 | import org.junit.jupiter.api.Test; 14 | 15 | import static org.junit.jupiter.api.Assertions.assertEquals; 16 | import static org.junit.jupiter.api.Assertions.assertNotNull; 17 | 18 | public class StreamingSheetTest { 19 | @BeforeAll 20 | public static void init() { 21 | Locale.setDefault(Locale.ENGLISH); 22 | } 23 | 24 | @Test 25 | public void testLastRowNum() throws Exception { 26 | try( 27 | InputStream is = new FileInputStream(new File("src/test/resources/large.xlsx")); 28 | Workbook workbook = StreamingReader.builder().open(is); 29 | ) { 30 | assertEquals(1, workbook.getNumberOfSheets()); 31 | Sheet sheet = workbook.getSheetAt(0); 32 | assertEquals(24, sheet.getLastRowNum()); 33 | } 34 | 35 | try( 36 | InputStream is = new FileInputStream(new File("src/test/resources/empty_sheet.xlsx")); 37 | Workbook workbook = StreamingReader.builder().open(is); 38 | ) { 39 | assertEquals(1, workbook.getNumberOfSheets()); 40 | Sheet sheet = workbook.getSheetAt(0); 41 | assertEquals(0, sheet.getLastRowNum()); 42 | } 43 | } 44 | 45 | @Test 46 | public void testEmptyCellShouldHaveGeneralStyle() throws Exception { 47 | try( 48 | InputStream is = new FileInputStream(new File("src/test/resources/large.xlsx")); 49 | Workbook workbook = StreamingReader.builder().open(is); 50 | ) { 51 | assertEquals(1, workbook.getNumberOfSheets()); 52 | Sheet sheet = workbook.getSheetAt(0); 53 | Row row = sheet.iterator().next(); 54 | assertEquals(CellType.NUMERIC, row.getCell(0).getCellType()); 55 | assertNotNull(row.getCell(0).getCellStyle()); 56 | } 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/test/java/com/monitorjbl/xlsx/StreamingWorkbookTest.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx; 2 | 3 | import com.monitorjbl.xlsx.exceptions.ParseException; 4 | import fi.iki.elonen.NanoHTTPD; 5 | import org.apache.poi.ss.usermodel.Cell; 6 | import org.apache.poi.ss.usermodel.Row; 7 | import org.apache.poi.ss.usermodel.Sheet; 8 | import org.apache.poi.ss.usermodel.Workbook; 9 | import org.junit.jupiter.api.Assertions; 10 | import org.junit.jupiter.api.BeforeAll; 11 | import org.junit.jupiter.api.Test; 12 | 13 | import java.io.File; 14 | import java.io.FileInputStream; 15 | import java.io.IOException; 16 | import java.io.InputStream; 17 | import java.io.UncheckedIOException; 18 | import java.util.Iterator; 19 | import java.util.Locale; 20 | import java.util.function.Consumer; 21 | 22 | import static com.monitorjbl.xlsx.TestUtils.expectCachedType; 23 | import static com.monitorjbl.xlsx.TestUtils.expectFormula; 24 | import static com.monitorjbl.xlsx.TestUtils.expectSameStringContent; 25 | import static com.monitorjbl.xlsx.TestUtils.expectStringContent; 26 | import static com.monitorjbl.xlsx.TestUtils.expectType; 27 | import static com.monitorjbl.xlsx.TestUtils.getCellFromNextRow; 28 | import static com.monitorjbl.xlsx.TestUtils.nextRow; 29 | import static com.monitorjbl.xlsx.TestUtils.openWorkbook; 30 | import static org.apache.poi.ss.usermodel.CellType.FORMULA; 31 | import static org.apache.poi.ss.usermodel.CellType.NUMERIC; 32 | import static org.junit.jupiter.api.Assertions.assertEquals; 33 | import static org.junit.jupiter.api.Assertions.assertFalse; 34 | import static org.junit.jupiter.api.Assertions.assertThrows; 35 | import static org.junit.jupiter.api.Assertions.assertTrue; 36 | import static org.junit.jupiter.api.Assertions.fail; 37 | 38 | public class StreamingWorkbookTest { 39 | @BeforeAll 40 | public static void init() { 41 | Locale.setDefault(Locale.ENGLISH); 42 | } 43 | 44 | @Test 45 | public void testIterateSheets() throws Exception { 46 | try( 47 | InputStream is = new FileInputStream(new File("src/test/resources/sheets.xlsx")); 48 | Workbook workbook = StreamingReader.builder().open(is); 49 | ) { 50 | 51 | assertEquals(2, workbook.getNumberOfSheets()); 52 | 53 | Sheet alpha = workbook.getSheetAt(0); 54 | Sheet zulu = workbook.getSheetAt(1); 55 | assertEquals("SheetAlpha", alpha.getSheetName()); 56 | assertEquals("SheetZulu", zulu.getSheetName()); 57 | 58 | Row rowA = alpha.rowIterator().next(); 59 | Row rowZ = zulu.rowIterator().next(); 60 | 61 | assertEquals("stuff", rowA.getCell(0).getStringCellValue()); 62 | assertEquals("yeah", rowZ.getCell(0).getStringCellValue()); 63 | } 64 | } 65 | 66 | @Test 67 | public void testHiddenCells() throws Exception { 68 | try( 69 | InputStream is = new FileInputStream(new File("src/test/resources/hidden.xlsx")); 70 | Workbook workbook = StreamingReader.builder().open(is) 71 | ) { 72 | assertEquals(3, workbook.getNumberOfSheets()); 73 | Sheet sheet = workbook.getSheetAt(0); 74 | 75 | assertFalse(sheet.isColumnHidden(0), "Column 0 should not be hidden"); 76 | assertTrue(sheet.isColumnHidden(1), "Column 1 should be hidden"); 77 | assertFalse(sheet.isColumnHidden(2), "Column 2 should not be hidden"); 78 | 79 | assertFalse(sheet.rowIterator().next().getZeroHeight(), "Row 0 should not be hidden"); 80 | assertTrue(sheet.rowIterator().next().getZeroHeight(), "Row 1 should be hidden"); 81 | assertFalse(sheet.rowIterator().next().getZeroHeight(), "Row 2 should not be hidden"); 82 | } 83 | } 84 | 85 | @Test 86 | public void testHiddenSheets() throws Exception { 87 | try( 88 | InputStream is = new FileInputStream(new File("src/test/resources/hidden.xlsx")); 89 | Workbook workbook = StreamingReader.builder().open(is) 90 | ) { 91 | assertEquals(3, workbook.getNumberOfSheets()); 92 | assertFalse(workbook.isSheetHidden(0)); 93 | 94 | assertTrue(workbook.isSheetHidden(1)); 95 | assertFalse(workbook.isSheetVeryHidden(1)); 96 | 97 | assertFalse(workbook.isSheetHidden(2)); 98 | assertTrue(workbook.isSheetVeryHidden(2)); 99 | } 100 | } 101 | 102 | @Test 103 | public void testFormulaCells() throws Exception { 104 | try(Workbook workbook = openWorkbook("formula_cell.xlsx")) { 105 | assertEquals(1, workbook.getNumberOfSheets()); 106 | Sheet sheet = workbook.getSheetAt(0); 107 | 108 | Iterator rowIterator = sheet.rowIterator(); 109 | Cell A1 = getCellFromNextRow(rowIterator, 0); 110 | Cell A2 = getCellFromNextRow(rowIterator, 0); 111 | Cell A3 = getCellFromNextRow(rowIterator, 0); 112 | 113 | expectType(A3, FORMULA); 114 | expectCachedType(A3, NUMERIC); 115 | expectFormula(A3, "SUM(A1:A2)"); 116 | 117 | expectStringContent(A1, "1"); 118 | expectStringContent(A2, "2"); 119 | expectStringContent(A3, "3"); 120 | } 121 | } 122 | 123 | @Test 124 | public void testNumericFormattedFormulaCell() throws Exception { 125 | try(Workbook workbook = openWorkbook("formula_cell.xlsx")) { 126 | Sheet sheet = workbook.getSheetAt(0); 127 | Iterator rowIterator = sheet.rowIterator(); 128 | 129 | Cell C1 = getCellFromNextRow(rowIterator, 2); 130 | Cell C2 = getCellFromNextRow(rowIterator, 2); 131 | 132 | expectType(C2, FORMULA); 133 | expectCachedType(C2, NUMERIC); 134 | expectFormula(C2, "C1"); 135 | expectSameStringContent(C2, C1); 136 | expectStringContent(C2, "May 11 2018"); 137 | } 138 | } 139 | 140 | @Test 141 | public void testStringFormattedFormulaCell() throws Exception { 142 | try(Workbook workbook = openWorkbook("formula_cell.xlsx")) { 143 | Sheet sheet = workbook.getSheetAt(0); 144 | Iterator rowIterator = sheet.rowIterator(); 145 | 146 | Cell B1 = getCellFromNextRow(rowIterator, 1); 147 | nextRow(rowIterator); 148 | Cell B3 = getCellFromNextRow(rowIterator, 1); 149 | 150 | expectType(B3, FORMULA); 151 | // expectCachedType(B3, STRING); // this can't return FUNCTION as cached type as per javadoc ! fix in future work 152 | expectFormula(B3, "B1"); 153 | expectSameStringContent(B1, B3); 154 | expectStringContent(B3, "a"); 155 | } 156 | } 157 | 158 | @Test 159 | public void testQuotedStringFormattedFormulaCell() throws Exception { 160 | try(Workbook workbook = openWorkbook("formula_cell.xlsx")) { 161 | Sheet sheet = workbook.getSheetAt(0); 162 | Iterator rowIterator = sheet.rowIterator(); 163 | 164 | nextRow(rowIterator); 165 | Cell B2 = getCellFromNextRow(rowIterator, 1); 166 | nextRow(rowIterator); 167 | Cell B4 = getCellFromNextRow(rowIterator, 1); 168 | 169 | expectType(B4, FORMULA); 170 | // expectCachedType(B4, STRING); // this can't return FUNCTION as cached type as per javadoc ! fix in future work 171 | // expectFormula(B4, "B2"); // returning wrong forumla type? this needs to be fixed in future work 172 | expectSameStringContent(B2, B4); 173 | expectStringContent(B4, "\"a\""); 174 | } 175 | } 176 | 177 | @Test 178 | public void testEntityExpansion() { 179 | assertThrows(ParseException.class, () -> ExploitServer.withServer(s -> fail("Should not have made request"), () -> { 180 | try(Workbook workbook = openWorkbook("entity-expansion-exploit-poc-file.xlsx")) { 181 | Sheet sheet = workbook.getSheetAt(0); 182 | for(Row row : sheet) { 183 | for(Cell cell : row) { 184 | System.out.println(cell.getStringCellValue()); 185 | } 186 | } 187 | } catch(IOException e) { 188 | throw new UncheckedIOException(e); 189 | } 190 | })); 191 | } 192 | 193 | private static class ExploitServer extends NanoHTTPD implements AutoCloseable { 194 | private final Consumer onRequest; 195 | 196 | public ExploitServer(Consumer onRequest) throws IOException { 197 | super(61932); 198 | this.onRequest = onRequest; 199 | } 200 | 201 | @Override 202 | public Response serve(IHTTPSession session) { 203 | onRequest.accept(session); 204 | return newFixedLengthResponse("\n"); 205 | } 206 | 207 | public static void withServer(Consumer onRequest, Runnable func) { 208 | try(ExploitServer server = new ExploitServer(onRequest)) { 209 | server.start(NanoHTTPD.SOCKET_READ_TIMEOUT, false); 210 | func.run(); 211 | } catch(IOException e) { 212 | throw new UncheckedIOException(e); 213 | } 214 | } 215 | 216 | @Override 217 | public void close() { 218 | this.stop(); 219 | } 220 | } 221 | } 222 | -------------------------------------------------------------------------------- /src/test/java/com/monitorjbl/xlsx/TestUtils.java: -------------------------------------------------------------------------------- 1 | package com.monitorjbl.xlsx; 2 | 3 | import org.apache.poi.ss.usermodel.Cell; 4 | import org.apache.poi.ss.usermodel.CellType; 5 | import org.apache.poi.ss.usermodel.Row; 6 | import org.apache.poi.ss.usermodel.Workbook; 7 | import org.apache.poi.ss.util.CellReference; 8 | 9 | import java.io.IOException; 10 | import java.io.InputStream; 11 | import java.util.Iterator; 12 | 13 | import static org.junit.jupiter.api.Assertions.assertEquals; 14 | 15 | final class TestUtils { 16 | 17 | static Workbook openWorkbook(String fileName) throws IOException { 18 | try(InputStream stream = TestUtils.class.getResourceAsStream("/" + fileName)) { 19 | return StreamingReader.builder() 20 | .open(stream); 21 | } 22 | } 23 | 24 | static void expectSameStringContent(Cell cell1, Cell cell2) { 25 | assertEquals(cell1.getStringCellValue(), cell2.getStringCellValue(), 26 | "Cell " + ref(cell1) + " has should equal cell " + ref(cell2) + " string value."); 27 | } 28 | 29 | static void expectStringContent(Cell cell, String value) { 30 | assertEquals(value, cell.getStringCellValue(), "Cell " + ref(cell) + " has wrong string content."); 31 | } 32 | 33 | static void expectCachedType(Cell cell, CellType cellType) { 34 | assertEquals(cellType, cell.getCachedFormulaResultType(), "Cell " + ref(cell) + " has wrong cached type." + cellType); 35 | } 36 | 37 | static void expectType(Cell cell, CellType cellType) { 38 | assertEquals(cellType, cell.getCellType(), "Cell " + ref(cell) + " has wrong type."); 39 | } 40 | 41 | static void expectFormula(Cell cell, String formula) { 42 | assertEquals(formula, cell.getCellFormula(), "Cell " + ref(cell) + " has wrong formula."); 43 | } 44 | 45 | private static String ref(Cell cell) { 46 | return new CellReference(cell).formatAsString(); 47 | } 48 | 49 | static Cell getCellFromNextRow(Iterator rowIterator, int index) { 50 | return nextRow(rowIterator) 51 | .getCell(index); 52 | } 53 | 54 | static Row nextRow(Iterator rowIterator) { 55 | return rowIterator.next(); 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/test/resources/1904Dates.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/1904Dates.xlsx -------------------------------------------------------------------------------- /src/test/resources/blank_cell_StringCellValue.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/blank_cell_StringCellValue.xlsx -------------------------------------------------------------------------------- /src/test/resources/blank_cells.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/blank_cells.xlsx -------------------------------------------------------------------------------- /src/test/resources/blank_sst_reference_doctored.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/blank_sst_reference_doctored.xlsx -------------------------------------------------------------------------------- /src/test/resources/data_types.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/data_types.xlsx -------------------------------------------------------------------------------- /src/test/resources/empty_sheet.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/empty_sheet.xlsx -------------------------------------------------------------------------------- /src/test/resources/encrypted.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/encrypted.xlsx -------------------------------------------------------------------------------- /src/test/resources/entity-expansion-exploit-poc-file.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/entity-expansion-exploit-poc-file.xlsx -------------------------------------------------------------------------------- /src/test/resources/formula_cell.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/formula_cell.xlsx -------------------------------------------------------------------------------- /src/test/resources/formula_outside_cell.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/formula_outside_cell.xlsx -------------------------------------------------------------------------------- /src/test/resources/formula_test.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/formula_test.xlsx -------------------------------------------------------------------------------- /src/test/resources/gaps.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/gaps.xlsx -------------------------------------------------------------------------------- /src/test/resources/has_spreadsheetdrawing.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/has_spreadsheetdrawing.xlsx -------------------------------------------------------------------------------- /src/test/resources/hidden.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/hidden.xlsx -------------------------------------------------------------------------------- /src/test/resources/inline.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/inline.xlsx -------------------------------------------------------------------------------- /src/test/resources/large.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/large.xlsx -------------------------------------------------------------------------------- /src/test/resources/leadingZeroes.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/leadingZeroes.xlsx -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=DEBUG, A1 2 | log4j.appender.A1=org.apache.log4j.ConsoleAppender 3 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout 4 | log4j.appender.A1.layout.ConversionPattern=%d{ISO8601} [%c] %p: %m%n 5 | 6 | log4j.category.com.monitorjbl=DEBUG -------------------------------------------------------------------------------- /src/test/resources/missing-r-attrs.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/missing-r-attrs.xlsx -------------------------------------------------------------------------------- /src/test/resources/no_type_cell.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/no_type_cell.xlsx -------------------------------------------------------------------------------- /src/test/resources/null_cell.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/null_cell.xlsx -------------------------------------------------------------------------------- /src/test/resources/null_celltype.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/null_celltype.xlsx -------------------------------------------------------------------------------- /src/test/resources/shared_styled_string.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/shared_styled_string.xlsx -------------------------------------------------------------------------------- /src/test/resources/sheets.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/sheets.xlsx -------------------------------------------------------------------------------- /src/test/resources/sparse-columns.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/sparse-columns.xlsx -------------------------------------------------------------------------------- /src/test/resources/special_types.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monitorjbl/excel-streaming-reader/af775bab85e873e839c0e36f71c9dfcb830b54b2/src/test/resources/special_types.xlsx --------------------------------------------------------------------------------