├── .classpath ├── .gitattributes ├── .gitignore ├── .project ├── .settings ├── org.eclipse.jdt.core.prefs └── org.eclipse.m2e.core.prefs ├── LICENSE ├── README.md ├── commit.bat ├── doc └── readme.txt ├── libs ├── commons-cli-1.1.jar ├── commons-io-1.4.jar ├── jodconverter-core-3.0-beta-4-sources.jar ├── jodconverter-core-3.0-beta-4.jar ├── json-20090211.jar ├── juh-3.2.1.jar ├── jurt-3.2.1.jar ├── ridl-3.2.1.jar └── unoil-3.2.1.jar ├── pom.xml ├── src ├── main │ ├── java │ │ └── com │ │ │ └── suncht │ │ │ ├── convert │ │ │ ├── CommonDocumentConverter.java │ │ │ ├── FileUtils.java │ │ │ ├── IOfficeDocumentConverter.java │ │ │ ├── OfficeDocumentConvertServer.java │ │ │ ├── TxtDocumentConverter.java │ │ │ └── demo │ │ │ │ ├── Doc2DocxUtil.java │ │ │ │ └── OfficePDFConverter.java │ │ │ └── wordread │ │ │ ├── exceptions │ │ │ └── ParseException.java │ │ │ ├── format │ │ │ ├── DefaultCellFormater.java │ │ │ ├── DefaultWordTableFormater.java │ │ │ ├── ICellFormater.java │ │ │ └── IWordTableFormater.java │ │ │ ├── model │ │ │ ├── ContentTypeEnum.java │ │ │ ├── TTCPr.java │ │ │ ├── WordTable.java │ │ │ ├── WordTableCell.java │ │ │ ├── WordTableCellContent.java │ │ │ ├── WordTableCellContentFormula.java │ │ │ ├── WordTableCellContentImage.java │ │ │ ├── WordTableCellContentOleObject.java │ │ │ ├── WordTableCellContentText.java │ │ │ ├── WordTableCellContents.java │ │ │ ├── WordTableComplexCell.java │ │ │ ├── WordTableHeader.java │ │ │ ├── WordTableMap.java │ │ │ ├── WordTableRow.java │ │ │ └── WordTableSimpleCell.java │ │ │ ├── output │ │ │ ├── DefaultWordTableOutputStrategy.java │ │ │ └── IWordTableOutputStrategy.java │ │ │ ├── parser │ │ │ ├── ISingleWordTableParser.java │ │ │ ├── IWordTableParser.java │ │ │ ├── WordTableParser.java │ │ │ ├── WordTableTransferContext.java │ │ │ ├── mapping │ │ │ │ ├── IWordTableMemoryMappingVisitor.java │ │ │ │ └── WordTableMemoryMapping.java │ │ │ ├── strategy │ │ │ │ ├── DefaultTableStrategy.java │ │ │ │ ├── ITableTransferStrategy.java │ │ │ │ └── LogicalTableStrategy.java │ │ │ ├── wordh │ │ │ │ ├── SingleWordHTableParser.java │ │ │ │ └── WordHTableParser.java │ │ │ └── wordx │ │ │ │ ├── SingleWordXTableParser.java │ │ │ │ └── WordXTableParser.java │ │ │ └── utils │ │ │ └── MathmlUtils.java │ └── resources │ │ ├── 1.doc │ │ ├── FMEA信息导入-客户实例.doc │ │ ├── FMEA信息导入-客户实例.docx │ │ ├── conventer │ │ ├── MML2OMML.XSL │ │ ├── OMML2MML.XSL │ │ └── mml2tex │ │ │ ├── README │ │ │ ├── cmarkup.xsl │ │ │ ├── entities.xsl │ │ │ ├── glayout.xsl │ │ │ ├── mmltex.xsl │ │ │ ├── scripts.xsl │ │ │ ├── tables.xsl │ │ │ └── tokens.xsl │ │ ├── logback.xml │ │ ├── 故障模式分析表格样例.docx │ │ └── 故障模式分析表格样例_处理模型.docx └── test │ ├── java │ └── com │ │ └── test │ │ ├── Doc2DocxTest.java │ │ ├── MemoryMappingVisitorTest.java │ │ ├── MuliHeaderXTableParserTest.java │ │ ├── MultiTextCellTest.java │ │ ├── NestedFormulaTest.java │ │ ├── NestedImageCellTest.java │ │ ├── OfficeConverterTest.java │ │ ├── OleObjectCellTest.java │ │ ├── WordCellDataTest.java │ │ ├── WordEmbedsTest.java │ │ ├── WordHTableParserTest.java │ │ └── WordXTableParserTest.java │ └── resources │ ├── 1.doc │ ├── 1.docx │ ├── 2.doc │ ├── conventer │ ├── MML2OMML.XSL │ ├── OMML2MML.XSL │ └── mml2tex │ │ ├── README │ │ ├── cmarkup.xsl │ │ ├── entities.xsl │ │ ├── glayout.xsl │ │ ├── mmltex.xsl │ │ ├── scripts.xsl │ │ ├── tables.xsl │ │ └── tokens.xsl │ ├── 复杂表格.docx │ ├── 嵌套公式.doc │ ├── 嵌套公式.docx │ ├── 嵌套图片.docx │ ├── 嵌套图片01.docx │ ├── 嵌套图片02.docx │ ├── 嵌套多文本.docx │ ├── 嵌套附件01.docx │ ├── 嵌套附件02.docx │ ├── 故障模式分析表格样例01.docx │ └── 标准表格1.doc └── target └── .gitignore /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.java linguist-language=Java 2 | *.jsp linguist-language=Java 3 | *.css linguist-language=Java 4 | *.js linguist-language=Java 5 | *.html linguist-language=Java 6 | *.doc linguist-language=Java 7 | *.docx linguist-language=Java 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled class file 2 | *.class 3 | 4 | # Log file 5 | *.log 6 | 7 | # BlueJ files 8 | *.ctxt 9 | 10 | # Mobile Tools for Java (J2ME) 11 | .mtj.tmp/ 12 | 13 | # Package Files # 14 | #*.jar 15 | *.war 16 | *.ear 17 | *.zip 18 | *.tar.gz 19 | *.rar 20 | 21 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 22 | hs_err_pid* 23 | /target/ 24 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | sun-wordtable-read 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.springframework.ide.eclipse.core.springbuilder 15 | 16 | 17 | 18 | 19 | org.eclipse.m2e.core.maven2Builder 20 | 21 | 22 | 23 | 24 | 25 | org.springframework.ide.eclipse.core.springnature 26 | org.eclipse.jdt.core.javanature 27 | org.eclipse.m2e.core.maven2Nature 28 | 29 | 30 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 6 | org.eclipse.jdt.core.compiler.compliance=1.7 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 12 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 13 | org.eclipse.jdt.core.compiler.source=1.7 14 | -------------------------------------------------------------------------------- /.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # sun-wordtable-read 3 | ======== 4 | 读取Word文档的各种复杂表格内容,支持2007以上的Docx文档(暂不支持2007以下的Doc类型文档) 5 | 6 | ## 开发背景: 7 | 工作上遇到如何读取Word文档中的表格内容,表格是有业务数据意义的,而且有一定规则的,因此不能直接读取表格文本,而是遍历表格单元格进行一行一列读取。 8 | 9 | 表格规则: 10 | 1. 表格可以有表头,表头也有业务意思 11 | 2. 一行为一个业务数据,可能会跨行 12 | 3. 列可能会有跨列、跨行 13 | 4. 单元格中图片、数学公式、嵌套表格、文件等 14 | 15 | 比如,以下表格 16 | [![](https://img-blog.csdn.net/20180414152908387?watermark/2/text/aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3N1bmN0/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70)](https://img-blog.csdn.net/20180414152908387?watermark/2/text/aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3N1bmN0/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70) 17 | 18 | ## 设计理念: 19 | 1. 读取Word文档中表格数据到内存映射表,再通过自定义读取策略,将内存映射表转换成实际业务表格数据。 20 | 2. 使用统一的内存映射表,屏蔽了实际Word文档读取方式,开发者只关心如何转换为业务数据。 21 | 22 | ## 功能现状: 23 | 1. 目前只支持读取2007以上Word文档表格单元格的文本,支持读取图片、数学公式、嵌套表格、附件内嵌对象(除PPT、WORD、EXCEL类型的OLE内嵌对象以外)。 24 | 2. 支持一般性的有规则的复杂表格。 25 | 3. 暂不支持2007以下的Doc类型文档,因为POI中暂未找到关于表格单元格合并信息的API。(目前已有解决方案,正在积极处理中。。。) 26 | 目前折中解决方案:为了兼容2007以下的Doc类型文档,利用jodconverter3.0 + LibreOffice 5.3,“先将Doc类型文档转换为Docx类型文档,再进行读取表格内容”。 27 | 注意:LibreOffice直接支持Docx类型文档,而OpenOffice不能直接支持Docx类型文档,需要AccessODF插件 28 | 29 | ## 后续要增加的功能: 30 | 1. 处理PPT、WORD、EXCEL类型的OLE内嵌对象 31 | 2. 正处理2007以下的Doc类型文档的读取。(Docx文档、Doc文档解析读取单元格时有区别,区别在于Docx有行合并、列合并、列宽,而Doc只有行合并、列宽,而没有列合并) 32 | 3. 直接导入到目标(比如:数据库表、Excel等)的公共功能 33 | 4. 读取大文件的Word、性能优化策略 -------------------------------------------------------------------------------- /commit.bat: -------------------------------------------------------------------------------- 1 | git add . 2 | git commit -m "更新README.MD" 3 | git push -u origin master -------------------------------------------------------------------------------- /doc/readme.txt: -------------------------------------------------------------------------------- 1 | 在网页中显示latex公式的方法: 2 | 直接在html源文件中插入 3 | 4 | 5 | Office中数学公式用Java解析: 6 | http://www.jianshu.com/p/ea7f62e3b23a -------------------------------------------------------------------------------- /libs/commons-cli-1.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/commons-cli-1.1.jar -------------------------------------------------------------------------------- /libs/commons-io-1.4.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/commons-io-1.4.jar -------------------------------------------------------------------------------- /libs/jodconverter-core-3.0-beta-4-sources.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/jodconverter-core-3.0-beta-4-sources.jar -------------------------------------------------------------------------------- /libs/jodconverter-core-3.0-beta-4.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/jodconverter-core-3.0-beta-4.jar -------------------------------------------------------------------------------- /libs/json-20090211.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/json-20090211.jar -------------------------------------------------------------------------------- /libs/juh-3.2.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/juh-3.2.1.jar -------------------------------------------------------------------------------- /libs/jurt-3.2.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/jurt-3.2.1.jar -------------------------------------------------------------------------------- /libs/ridl-3.2.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/ridl-3.2.1.jar -------------------------------------------------------------------------------- /libs/unoil-3.2.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/unoil-3.2.1.jar -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.suncht 5 | wordtable-read 6 | 0.0.1-SNAPSHOT 7 | 8 | 9 | 3.9 10 | 11 | 12 | 13 | org.springframework 14 | spring-beans 15 | 3.1.1.RELEASE 16 | 17 | 18 | 19 | 20 | org.apache.poi 21 | poi 22 | ${apache-poi} 23 | 24 | 25 | 26 | 27 | org.apache.poi 28 | poi-ooxml 29 | ${apache-poi} 30 | 31 | 32 | 33 | 34 | org.apache.poi 35 | poi-scratchpad 36 | ${apache-poi} 37 | 38 | 39 | 40 | com.google.guava 41 | guava 42 | 19.0 43 | 44 | 45 | org.apache.commons 46 | commons-lang3 47 | 3.3.2 48 | 49 | 50 | 51 | commons-io 52 | commons-io 53 | 1.4 54 | 55 | 56 | 57 | dom4j 58 | dom4j 59 | 1.6.1 60 | 61 | 62 | 63 | jaxen 64 | jaxen 65 | 1.1.6 66 | 67 | 68 | 69 | org.slf4j 70 | slf4j-api 71 | 1.7.7 72 | 73 | 74 | ch.qos.logback 75 | logback-core 76 | 1.1.7 77 | 78 | 79 | ch.qos.logback 80 | logback-access 81 | 1.1.7 82 | 83 | 84 | ch.qos.logback 85 | logback-classic 86 | 1.1.7 87 | 88 | 89 | 90 | junit 91 | junit 92 | 4.12 93 | test 94 | 95 | 96 | 97 | 98 | 99 | 100 | maven-assembly-plugin 101 | 102 | 103 | 104 | 105 | 106 | 107 | jar-with-dependencies 108 | 109 | 110 | 111 | 112 | make-assembly 113 | package 114 | 115 | single 116 | 117 | 118 | 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/convert/CommonDocumentConverter.java: -------------------------------------------------------------------------------- 1 | package com.suncht.convert; 2 | 3 | import java.io.File; 4 | import java.util.Collections; 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | 8 | import org.apache.commons.lang3.StringUtils; 9 | import org.artofsolving.jodconverter.OfficeDocumentConverter; 10 | import org.artofsolving.jodconverter.document.DocumentFamily; 11 | import org.artofsolving.jodconverter.document.DocumentFormat; 12 | import org.artofsolving.jodconverter.office.OfficeManager; 13 | 14 | /** 15 | * 通用文档转换,可支持docx文档 16 | * 支持docx文档,需要LibreOffice直接支持, 而OpenOffice不能直接支持Docx文档,需要AccessODF插件 17 | * LibreOffice/OpenOffice能支持哪些文档转换,那么该程序能支持哪些转换 18 | * @author changtan.sun 19 | * 20 | */ 21 | public class CommonDocumentConverter implements IOfficeDocumentConverter { 22 | protected OfficeManager officeManager; 23 | protected String inputFile; 24 | protected String outputFile; 25 | 26 | protected boolean needTempFile = false; //是否删除中间文件 27 | protected String tempFile; 28 | protected boolean needDeleteInputFile = false; //是否需要删除输入文件 29 | protected String extraOutputFormatToNeed; 30 | 31 | //额外的输出文档格式 32 | private static Map extraOutputFormatMap = new HashMap(); 33 | 34 | static { 35 | //增加Docx文档格式 36 | extraOutputFormatMap.put("docx", "MS Word 2007 XML"); 37 | } 38 | 39 | public CommonDocumentConverter(OfficeManager officeManager, String inputFile, String outputFile, boolean needDeleteInputFile) { 40 | this.officeManager = officeManager; 41 | this.inputFile = inputFile; 42 | this.outputFile = outputFile; 43 | this.needDeleteInputFile = needDeleteInputFile; 44 | } 45 | 46 | public void before() { 47 | tempFile = null; 48 | needTempFile = false; 49 | extraOutputFormatToNeed = null; 50 | 51 | String sufix = FileUtils.getFileSufix(outputFile); 52 | this.judgeFormat(sufix); 53 | } 54 | 55 | @Override 56 | public void convert() { 57 | OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager); 58 | 59 | System.out.println("转换前处理..."); 60 | before(); 61 | 62 | //开始转换 63 | System.out.println("转换开始执行,["+inputFile+"]转换为["+outputFile+"]..."); 64 | if(StringUtils.isNotBlank(extraOutputFormatToNeed)) { 65 | DocumentFormat extraFormat = converter.getFormatRegistry().getFormatByExtension(extraOutputFormatToNeed); 66 | extraFormat.setStoreProperties(DocumentFamily.TEXT, Collections.singletonMap("FilterName", extraOutputFormatMap.get(extraOutputFormatToNeed))); 67 | 68 | if(needTempFile) { 69 | converter.convert(new File(tempFile),new File(outputFile), extraFormat); 70 | } else { 71 | converter.convert(new File(inputFile),new File(outputFile), extraFormat); 72 | } 73 | 74 | } else { 75 | if(needTempFile) { 76 | converter.convert(new File(tempFile),new File(outputFile)); 77 | } else { 78 | converter.convert(new File(inputFile),new File(outputFile)); 79 | } 80 | } 81 | 82 | System.out.println("转换后处理..."); 83 | after(); 84 | 85 | System.out.println("转换完成"); 86 | } 87 | 88 | public void after() { 89 | if(needTempFile) { 90 | FileUtils.deleteFile(tempFile); 91 | } 92 | 93 | if(needDeleteInputFile) { 94 | FileUtils.deleteFile(inputFile); 95 | } 96 | } 97 | 98 | private void judgeFormat(String sufix) { 99 | if(extraOutputFormatMap.containsKey(sufix)) { 100 | extraOutputFormatToNeed = sufix; 101 | } 102 | } 103 | 104 | 105 | } 106 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/convert/FileUtils.java: -------------------------------------------------------------------------------- 1 | package com.suncht.convert; 2 | 3 | import java.io.File; 4 | import java.io.FileInputStream; 5 | import java.io.FileNotFoundException; 6 | import java.io.FileOutputStream; 7 | import java.io.IOException; 8 | 9 | 10 | 11 | public class FileUtils { 12 | 13 | public static String getFilePrefix(String fileName){ 14 | int splitIndex = fileName.lastIndexOf("."); 15 | return fileName.substring(0, splitIndex); 16 | } 17 | 18 | public static String getFileSufix(String fileName){ 19 | int splitIndex = fileName.lastIndexOf("."); 20 | return fileName.substring(splitIndex + 1); 21 | } 22 | 23 | public static void copyFile(String inputFile,String outputFile) throws FileNotFoundException{ 24 | File sFile = new File(inputFile); 25 | File tFile = new File(outputFile); 26 | FileInputStream fis = new FileInputStream(sFile); 27 | FileOutputStream fos = new FileOutputStream(tFile); 28 | int temp = 0; 29 | try { 30 | while ((temp = fis.read()) != -1) { 31 | fos.write(temp); 32 | } 33 | } catch (IOException e) { 34 | e.printStackTrace(); 35 | } finally{ 36 | try { 37 | fis.close(); 38 | fos.close(); 39 | } catch (IOException e) { 40 | e.printStackTrace(); 41 | } 42 | } 43 | } 44 | 45 | public static void deleteFile(String fileToDelete) { 46 | File file = new File(fileToDelete); 47 | if(file.exists()) { 48 | file.delete(); 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/convert/IOfficeDocumentConverter.java: -------------------------------------------------------------------------------- 1 | package com.suncht.convert; 2 | 3 | /** 4 | * 文档转换接口 5 | * @author changtan.sun 6 | * 7 | */ 8 | public interface IOfficeDocumentConverter { 9 | /** 10 | * 转换 11 | */ 12 | void convert(); 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/convert/OfficeDocumentConvertServer.java: -------------------------------------------------------------------------------- 1 | package com.suncht.convert; 2 | 3 | import java.io.Closeable; 4 | import java.io.IOException; 5 | 6 | import org.artofsolving.jodconverter.office.DefaultOfficeManagerConfiguration; 7 | import org.artofsolving.jodconverter.office.OfficeManager; 8 | 9 | /** 10 | * 文档转换服务 11 | * @author changtan.sun 12 | * 13 | */ 14 | public class OfficeDocumentConvertServer implements Closeable { 15 | private OfficeManager officeManager; 16 | 17 | public OfficeDocumentConvertServer(String officeHome, int... ports) { 18 | this.startService(officeHome, ports); 19 | } 20 | 21 | private void startService(String officeHome, int... ports) { 22 | DefaultOfficeManagerConfiguration configuration = new DefaultOfficeManagerConfiguration(); 23 | try { 24 | System.out.println("准备启动office服务...."); 25 | configuration.setOfficeHome(officeHome);// 设置安装目录 26 | configuration.setPortNumbers(ports); // 设置端口 27 | configuration.setTaskExecutionTimeout(1000 * 60 * 5L); 28 | configuration.setTaskQueueTimeout(1000 * 60 * 60 * 24L); 29 | officeManager = configuration.buildOfficeManager(); 30 | officeManager.start(); // 启动服务 31 | System.out.println("office转换服务启动成功!"); 32 | } catch (Exception ce) { 33 | System.out.println("office转换服务启动失败!详细信息:" + ce); 34 | } 35 | } 36 | 37 | public void convert(String inputFile, String outputFile, boolean needDeleteInputFile) { 38 | IOfficeDocumentConverter converter = null; 39 | if(inputFile.endsWith(".txt")) { 40 | converter = new TxtDocumentConverter(officeManager, inputFile, outputFile, needDeleteInputFile); 41 | } else { 42 | converter = new CommonDocumentConverter(officeManager, inputFile, outputFile, needDeleteInputFile); 43 | } 44 | 45 | converter.convert(); 46 | } 47 | 48 | public void convert(String inputFile, String outputFile) { 49 | this.convert(inputFile, outputFile, false); 50 | } 51 | 52 | @Override 53 | public void close() throws IOException { 54 | if (officeManager != null) { 55 | officeManager.stop(); 56 | System.out.println("关闭office服务"); 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/convert/TxtDocumentConverter.java: -------------------------------------------------------------------------------- 1 | package com.suncht.convert; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | 6 | import org.artofsolving.jodconverter.office.OfficeManager; 7 | 8 | /** 9 | * 支持Txt类型文档转换 10 | * txt需要先转为odt类型文档,才能进行下一步转换 11 | * @author changtan.sun 12 | * 13 | */ 14 | public class TxtDocumentConverter extends CommonDocumentConverter { 15 | 16 | public TxtDocumentConverter(OfficeManager officeManager, String inputFile, String outputFile, boolean needDeleteInputFile) { 17 | super(officeManager, inputFile, outputFile, needDeleteInputFile); 18 | } 19 | 20 | @Override 21 | public void before() { 22 | super.before(); 23 | 24 | if(inputFile.endsWith(".txt")){ //如果是Txt文件,需要转换为odt文件 25 | needTempFile = true; 26 | tempFile = FileUtils.getFilePrefix(inputFile)+".odt"; 27 | if(new File(tempFile).exists()){ 28 | System.out.println("odt文件已存在!"); 29 | inputFile = tempFile; 30 | }else{ 31 | try { 32 | FileUtils.copyFile(inputFile, tempFile); 33 | inputFile = tempFile; 34 | } catch (FileNotFoundException e) { 35 | System.out.println("文档不存在!"); 36 | e.printStackTrace(); 37 | } 38 | } 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/convert/demo/Doc2DocxUtil.java: -------------------------------------------------------------------------------- 1 | package com.suncht.convert.demo; 2 | import java.io.File; 3 | import java.util.Collections; 4 | 5 | import org.artofsolving.jodconverter.OfficeDocumentConverter; 6 | import org.artofsolving.jodconverter.document.DocumentFamily; 7 | import org.artofsolving.jodconverter.document.DocumentFormat; 8 | import org.artofsolving.jodconverter.office.DefaultOfficeManagerConfiguration; 9 | import org.artofsolving.jodconverter.office.OfficeManager; 10 | 11 | public class Doc2DocxUtil{ 12 | 13 | private static Doc2DocxUtil doc2DocxUtil = new Doc2DocxUtil(); 14 | private static OfficeManager officeManager; 15 | //openOffice安装路径 16 | private static String OPEN_OFFICE_HOME = "D:\\Program Files\\LibreOffice 5\\"; 17 | //服务端口 18 | private static int OPEN_OFFICE_PORT[] = {8101}; 19 | 20 | public static Doc2DocxUtil getOffice2PdfUtil() { 21 | return doc2DocxUtil; 22 | } 23 | 24 | 25 | public static void doc2Docx(String inputFile,String outputFile) { 26 | File pdfFile = new File(outputFile); 27 | if (pdfFile.exists()) { 28 | pdfFile.delete(); 29 | } 30 | try{ 31 | //打开服务 32 | startService(); 33 | OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager); 34 | DocumentFormat docx = converter.getFormatRegistry().getFormatByExtension("docx"); 35 | docx.setStoreProperties(DocumentFamily.TEXT, Collections.singletonMap("FilterName", "MS Word 2007 XML")); 36 | //开始转换 37 | converter.convert(new File(inputFile),new File(outputFile), docx); 38 | //关闭 39 | stopService(); 40 | System.out.println("运行结束"); 41 | }catch (Exception e) { 42 | // TODO: handle exception 43 | e.printStackTrace(); 44 | } 45 | } 46 | 47 | private static void transformBinaryWordDocToDocX(File in, File out) { 48 | OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager); 49 | DocumentFormat docx = converter.getFormatRegistry().getFormatByExtension("docx"); 50 | docx.setStoreProperties(DocumentFamily.TEXT, Collections.singletonMap("FilterName", "MS Word 2007 XML")); 51 | 52 | converter.convert(in, out, docx); 53 | } 54 | 55 | private static void transformBinaryWordDocToW2003Xml(File in, File out) { 56 | OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager);; 57 | DocumentFormat w2003xml = new DocumentFormat("Microsoft Word 2003 XML", "xml", "text/xml"); 58 | w2003xml.setInputFamily(DocumentFamily.TEXT); 59 | w2003xml.setStoreProperties(DocumentFamily.TEXT, Collections.singletonMap("FilterName", "MS Word 2003 XML")); 60 | converter.convert(in, out, w2003xml); 61 | } 62 | 63 | 64 | public static void stopService(){ 65 | if (officeManager != null) { 66 | officeManager.stop(); 67 | } 68 | } 69 | 70 | public static void startService(){ 71 | DefaultOfficeManagerConfiguration configuration = new DefaultOfficeManagerConfiguration(); 72 | try { 73 | configuration.setOfficeHome(OPEN_OFFICE_HOME);//设置安装目录 74 | configuration.setPortNumbers(OPEN_OFFICE_PORT); //设置端口 75 | configuration.setTaskExecutionTimeout(1000 * 60 * 5L); 76 | configuration.setTaskQueueTimeout(1000 * 60 * 60 * 24L); 77 | officeManager = configuration.buildOfficeManager(); 78 | officeManager.start(); //启动服务 79 | } catch (Exception ce) { 80 | System.out.println("office转换服务启动失败!详细信息:" + ce); 81 | } 82 | } 83 | } -------------------------------------------------------------------------------- /src/main/java/com/suncht/convert/demo/OfficePDFConverter.java: -------------------------------------------------------------------------------- 1 | package com.suncht.convert.demo; 2 | import java.io.File; 3 | import java.io.FileNotFoundException; 4 | import java.util.Collections; 5 | 6 | import org.artofsolving.jodconverter.OfficeDocumentConverter; 7 | import org.artofsolving.jodconverter.document.DocumentFamily; 8 | import org.artofsolving.jodconverter.document.DocumentFormat; 9 | import org.artofsolving.jodconverter.office.DefaultOfficeManagerConfiguration; 10 | import org.artofsolving.jodconverter.office.OfficeManager; 11 | 12 | import com.suncht.convert.FileUtils; 13 | 14 | public class OfficePDFConverter{ 15 | 16 | private static OfficePDFConverter converter = new OfficePDFConverter(); 17 | private static OfficeManager officeManager; 18 | //openOffice安装路径 19 | //private static String OPEN_OFFICE_HOME = "D:\\Program Files (x86)\\OpenOffice 4\\"; 20 | private static String OPEN_OFFICE_HOME = "D:\\Program Files\\LibreOffice 5\\"; 21 | //服务端口 22 | private static int OPEN_OFFICE_PORT[] = {8100}; 23 | 24 | public static OfficePDFConverter getConverter() { 25 | return converter; 26 | } 27 | 28 | /** 29 | * 30 | * office2Pdf 方法 31 | * @descript:TODO 32 | * @param inputFile 文件全路径 33 | * @param outputFile pdf文件全路径 34 | * @return void 35 | * @author lxz 36 | * @return 37 | */ 38 | public void convert2PDF(String inputFile,String outputFile) { 39 | 40 | if(inputFile.endsWith(".txt")){ 41 | String odtFile = FileUtils.getFilePrefix(inputFile)+".odt"; 42 | if(new File(odtFile).exists()){ 43 | System.out.println("odt文件已存在!"); 44 | inputFile = odtFile; 45 | }else{ 46 | try { 47 | FileUtils.copyFile(inputFile,odtFile); 48 | inputFile = odtFile; 49 | } catch (FileNotFoundException e) { 50 | System.out.println("文档不存在!"); 51 | e.printStackTrace(); 52 | } 53 | } 54 | } 55 | 56 | File pdfFile = new File(outputFile); 57 | if (pdfFile.exists()) { 58 | pdfFile.delete(); 59 | } 60 | try{ 61 | long startTime = System.currentTimeMillis(); 62 | //打开服务 63 | startService(); 64 | OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager); 65 | //开始转换 66 | converter.convert(new File(inputFile),new File(outputFile)); 67 | //关闭 68 | stopService(); 69 | System.out.println("运行结束"); 70 | }catch (Exception e) { 71 | // TODO: handle exception 72 | e.printStackTrace(); 73 | } 74 | } 75 | 76 | public static void doc2Docx(String inputFile,String outputFile) { 77 | File pdfFile = new File(outputFile); 78 | if (pdfFile.exists()) { 79 | pdfFile.delete(); 80 | } 81 | try{ 82 | long startTime = System.currentTimeMillis(); 83 | //打开服务 84 | startService(); 85 | OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager); 86 | DocumentFormat docx = converter.getFormatRegistry().getFormatByExtension("docx"); 87 | docx.setStoreProperties(DocumentFamily.TEXT, Collections.singletonMap("FilterName", "MS Word 2007 XML")); 88 | //开始转换 89 | converter.convert(new File(inputFile),new File(outputFile), docx); 90 | //关闭 91 | stopService(); 92 | System.out.println("运行结束"); 93 | }catch (Exception e) { 94 | // TODO: handle exception 95 | e.printStackTrace(); 96 | } 97 | } 98 | 99 | public static void stopService(){ 100 | if (officeManager != null) { 101 | officeManager.stop(); 102 | } 103 | } 104 | 105 | public static void startService(){ 106 | DefaultOfficeManagerConfiguration configuration = new DefaultOfficeManagerConfiguration(); 107 | try { 108 | System.out.println("准备启动服务...."); 109 | configuration.setOfficeHome(OPEN_OFFICE_HOME);//设置安装目录 110 | configuration.setPortNumbers(OPEN_OFFICE_PORT); //设置端口 111 | configuration.setTaskExecutionTimeout(1000 * 60 * 5L); 112 | configuration.setTaskQueueTimeout(1000 * 60 * 60 * 24L); 113 | officeManager = configuration.buildOfficeManager(); 114 | officeManager.start(); //启动服务 115 | System.out.println("office转换服务启动成功!"); 116 | } catch (Exception ce) { 117 | System.out.println("office转换服务启动失败!详细信息:" + ce); 118 | } 119 | } 120 | } -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/exceptions/ParseException.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.exceptions; 2 | 3 | public class ParseException extends Exception { 4 | 5 | private static final long serialVersionUID = 939204100093323412L; 6 | 7 | public ParseException(Throwable e) { 8 | super(e.getMessage(), e); 9 | } 10 | 11 | public ParseException(String message) { 12 | super(message); 13 | } 14 | 15 | public ParseException(String messageTemplate, Object... params) { 16 | super(String.format(messageTemplate, params)); 17 | } 18 | 19 | public ParseException(String message, Throwable throwable) { 20 | super(message, throwable); 21 | } 22 | 23 | public ParseException(Throwable throwable, String messageTemplate, Object... params) { 24 | super(String.format(messageTemplate, params), throwable); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/format/DefaultCellFormater.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.format; 2 | 3 | import com.suncht.wordread.model.ContentTypeEnum; 4 | import com.suncht.wordread.model.WordTableCellContent; 5 | import com.suncht.wordread.model.WordTableCellContentFormula; 6 | import com.suncht.wordread.model.WordTableCellContentImage; 7 | import com.suncht.wordread.model.WordTableCellContentImage.WcImage; 8 | import com.suncht.wordread.model.WordTableCellContentOleObject; 9 | import com.suncht.wordread.model.WordTableCellContentOleObject.WcOleObject; 10 | import com.suncht.wordread.model.WordTableCellContentText; 11 | 12 | /** 13 | * 单元格内容格式化的默认实现 14 | * @author suncht 15 | * 16 | */ 17 | public class DefaultCellFormater implements ICellFormater { 18 | @Override 19 | public Object format(WordTableCellContent cellContent) { 20 | if(cellContent.getContentType() == ContentTypeEnum.Text) { 21 | WordTableCellContentText _cellContent = (WordTableCellContentText)cellContent; 22 | return this.formatText(_cellContent); 23 | } else if(cellContent.getContentType() == ContentTypeEnum.Image) { 24 | WordTableCellContentImage _cellContent = (WordTableCellContentImage)cellContent; 25 | return this.formatImage(_cellContent); 26 | } else if(cellContent.getContentType() == ContentTypeEnum.Formula) { 27 | WordTableCellContentFormula _cellContent = (WordTableCellContentFormula)cellContent; 28 | return this.formatFormula(_cellContent); 29 | } else if(cellContent.getContentType() == ContentTypeEnum.OleObject) { 30 | WordTableCellContentOleObject _cellContent = (WordTableCellContentOleObject)cellContent; 31 | return this.formatOleObject(_cellContent); 32 | } 33 | return ""; 34 | } 35 | 36 | 37 | public Object formatText(WordTableCellContentText cellContent) { 38 | String text = cellContent.getData().toString(); 39 | return text; 40 | } 41 | 42 | public Object formatImage(WordTableCellContentImage cellContent) { 43 | WcImage imageContent = (WcImage)cellContent.getData(); 44 | return imageContent!=null ? imageContent.getFileName(): ""; 45 | } 46 | 47 | public Object formatFormula(WordTableCellContentFormula cellContent) { 48 | String formula = cellContent.getData().getLatex(); 49 | return formula; 50 | } 51 | 52 | private Object formatOleObject(WordTableCellContentOleObject cellContent) { 53 | WcOleObject oleObject = cellContent.getData(); 54 | return oleObject!=null ? oleObject.getFileName(): ""; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/format/DefaultWordTableFormater.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.format; 2 | 3 | import java.util.List; 4 | 5 | import com.suncht.wordread.model.WordTableCell; 6 | import com.suncht.wordread.model.WordTableCellContent; 7 | import com.suncht.wordread.model.WordTableComplexCell; 8 | import com.suncht.wordread.model.WordTableRow; 9 | import com.suncht.wordread.model.WordTableSimpleCell; 10 | 11 | /** 12 | * 默认的单元格内容Formatter 13 | * 14 | * @author suncht 15 | * 16 | */ 17 | public class DefaultWordTableFormater implements IWordTableFormater { 18 | private ICellFormater cellFormater; 19 | 20 | private StringBuilder builder; 21 | 22 | public DefaultWordTableFormater() { 23 | this.cellFormater = new DefaultCellFormater(); 24 | } 25 | 26 | public DefaultWordTableFormater(ICellFormater cellFormater) { 27 | this.cellFormater = cellFormater; 28 | } 29 | 30 | public void format(WordTableCell tableCell, StringBuilder builder) { 31 | this.builder = builder!=null ? builder : new StringBuilder(); 32 | 33 | if (tableCell instanceof WordTableSimpleCell) { 34 | printCell(tableCell.getContent()); 35 | } else if (tableCell instanceof WordTableComplexCell) { 36 | WordTableComplexCell cell = (WordTableComplexCell) tableCell; 37 | 38 | List rows = cell.getInnerTable().getRows(); 39 | for (WordTableRow row : rows) { 40 | for (WordTableCell wtcell : row.getCells()) { 41 | printCell(wtcell.getContent()); 42 | } 43 | } 44 | } 45 | } 46 | 47 | private void printCell(WordTableCellContent cellContent) { 48 | Object data = cellFormater.format(cellContent); 49 | builder.append(data.toString()); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/format/ICellFormater.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.format; 2 | 3 | import com.suncht.wordread.model.WordTableCellContent; 4 | 5 | /** 6 | * 单元格数据格式化接口 7 | * @author changtan.sun 8 | * 9 | */ 10 | public interface ICellFormater { 11 | public Object format(WordTableCellContent cellContent); 12 | 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/format/IWordTableFormater.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.format; 2 | 3 | import com.suncht.wordread.model.WordTableCell; 4 | 5 | /** 6 | * 表格数据格式化接口 7 | * @author changtan.sun 8 | * 9 | */ 10 | public interface IWordTableFormater { 11 | public void format(WordTableCell tableCell, StringBuilder builder); 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/ContentTypeEnum.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | /** 4 | * 单元格内容的类型 5 | * @author suncht 6 | * 7 | */ 8 | public enum ContentTypeEnum { 9 | /** 10 | * 一般性文本 11 | */ 12 | Text, 13 | /** 14 | * 图片 15 | */ 16 | Image, 17 | /** 18 | * 公式 19 | */ 20 | Formula, 21 | /** 22 | * OLE对象 23 | */ 24 | OleObject 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/TTCPr.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | import java.io.Serializable; 4 | import java.math.BigInteger; 5 | 6 | import com.suncht.wordread.parser.mapping.IWordTableMemoryMappingVisitor; 7 | 8 | /** 9 | * word的单元格 10 | * @author changtan.sun 11 | * 12 | */ 13 | public class TTCPr implements Serializable, Cloneable { 14 | private static final long serialVersionUID = 463466191913957614L; 15 | /** 16 | * 单元格的类型 17 | */ 18 | private TTCPrEnum type; 19 | /** 20 | * 在word中实际行号 21 | */ 22 | private int realRowIndex; 23 | /** 24 | * 在word中实际列号 25 | */ 26 | private int realColumnIndex; 27 | 28 | /** 29 | * 逻辑行号 30 | */ 31 | private int logicRowIndex; 32 | 33 | /** 34 | * 逻辑列号 35 | */ 36 | private int logicColumnIndex; 37 | /** 38 | * 根单元格(如果是被合并的单元格(包括行合并、列合并), 则指向合并开始的单元格) 39 | * * 例子: 40 | * |-------|----------------------------- | 41 | | 任务 | 故障影响 | 42 | | | ---------------------------- | 43 | | 阶段 | 局部影响 | 高一层次影响 | 最终影响 | 44 | |-------|----------------------------- | 45 | 46 | 说明:“任务阶段”所在单元格进行2行合并,其中第一个单元格是root,其他单元格的Root是第一个单元格 47 | */ 48 | private TTCPr root; 49 | /** 50 | * 单元格的数据内容 51 | */ 52 | private WordTableCellContent content; 53 | 54 | /** 55 | * 单元格的宽度 56 | */ 57 | private BigInteger width; 58 | /** 59 | * 合并了多少行 60 | */ 61 | private int rowSpan = 0; 62 | /** 63 | * 合并了多少列 64 | */ 65 | private int colSpan = 0; 66 | 67 | /** 68 | * 父单元格,当列合并时有效 69 | * 例子: 70 | * |-------|----------------------------- | 71 | | 任务 | 故障影响 | 72 | | | ---------------------------- | 73 | | 阶段 | 局部影响 | 高一层次影响 | 最终影响 | 74 | |-------|----------------------------- | 75 | 说明:“局部影响”所在单元格的父单元格parent是“故障影响”所在单元格 76 | */ 77 | //private TTCPr parent; 78 | 79 | /** 80 | * 是否有效单元格。 被合并的单元格不属于有效单元格 81 | * @return 82 | */ 83 | public boolean isValid() { 84 | return type == TTCPrEnum.VM_S || type == TTCPrEnum.NONE || type == TTCPrEnum.HM_S || type == TTCPrEnum.HVM_S; 85 | } 86 | 87 | /** 88 | * 是否进行了列合并 89 | * @return 90 | */ 91 | public boolean isDoneColSpan() { 92 | return type == TTCPrEnum.HM_S || type == TTCPrEnum.HM || type == TTCPrEnum.HVM_S; 93 | } 94 | 95 | /** 96 | * 是否进行了行合并 97 | * @return 98 | */ 99 | public boolean isDoneRowSpan() { 100 | return type == TTCPrEnum.VM_S || type == TTCPrEnum.VM || type == TTCPrEnum.HVM_S; 101 | } 102 | 103 | /** 104 | * 单元格坐标 105 | * @return 106 | */ 107 | public String getCellPosition() { 108 | return realRowIndex + "-" + realColumnIndex; 109 | } 110 | 111 | public TTCPrEnum getType() { 112 | return type; 113 | } 114 | 115 | public void setType(TTCPrEnum type) { 116 | this.type = type; 117 | } 118 | 119 | public int getRealRowIndex() { 120 | return realRowIndex; 121 | } 122 | 123 | public void setRealRowIndex(int realRowIndex) { 124 | this.realRowIndex = realRowIndex; 125 | } 126 | 127 | public int getRealColumnIndex() { 128 | return realColumnIndex; 129 | } 130 | 131 | public void setRealColumnIndex(int realColumnIndex) { 132 | this.realColumnIndex = realColumnIndex; 133 | } 134 | 135 | public int getLogicRowIndex() { 136 | return logicRowIndex; 137 | } 138 | 139 | public void setLogicRowIndex(int logicRowIndex) { 140 | this.logicRowIndex = logicRowIndex; 141 | } 142 | 143 | public int getLogicColumnIndex() { 144 | return logicColumnIndex; 145 | } 146 | 147 | public void setLogicColumnIndex(int logicColumnIndex) { 148 | this.logicColumnIndex = logicColumnIndex; 149 | } 150 | 151 | public int getRowSpan() { 152 | return rowSpan; 153 | } 154 | 155 | public void setRowSpan(int rowSpan) { 156 | this.rowSpan = rowSpan; 157 | } 158 | 159 | public int getColSpan() { 160 | return colSpan; 161 | } 162 | 163 | public void setColSpan(int colSpan) { 164 | this.colSpan = colSpan; 165 | } 166 | 167 | public WordTableCellContent getContent() { 168 | if (root != null) { 169 | return root.getContent(); 170 | } 171 | return content; 172 | } 173 | 174 | public void setContent(WordTableCellContent content) { 175 | this.content = content; 176 | } 177 | 178 | public TTCPr getRoot() { 179 | return root; 180 | } 181 | 182 | public void setRoot(TTCPr root) { 183 | this.root = root; 184 | } 185 | 186 | // public TTCPr getParent() { 187 | // return parent; 188 | // } 189 | // 190 | // public void setParent(TTCPr parent) { 191 | // this.parent = parent; 192 | // } 193 | 194 | public BigInteger getWidth() { 195 | return width; 196 | } 197 | 198 | public void setWidth(BigInteger width) { 199 | this.width = width; 200 | } 201 | 202 | @Override 203 | public String toString() { 204 | if (root != null) { 205 | return "TTCPr [root=" + root + "]"; 206 | } 207 | return "TTCPr [content=" + content.getData() + "]"; 208 | } 209 | 210 | public void accept(IWordTableMemoryMappingVisitor visitor, int realRowIndex, int realColumnIndex) { 211 | visitor.visit(this, realRowIndex, realColumnIndex); 212 | } 213 | 214 | public static enum TTCPrEnum { 215 | /** 216 | * 无任何格式 217 | */ 218 | NONE, 219 | /** 220 | * 行合并的开始 221 | */ 222 | VM_S, 223 | /** 224 | * 被行合并 225 | */ 226 | VM, 227 | /** 228 | * 列合并的开始 229 | */ 230 | HM_S, 231 | /** 232 | * 被列合并 233 | */ 234 | HM, 235 | /** 236 | * 行合并的开始,又是列合并的开始 237 | */ 238 | HVM_S 239 | } 240 | } 241 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/WordTable.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | import java.util.List; 4 | 5 | import com.google.common.collect.Lists; 6 | import com.suncht.wordread.format.DefaultWordTableFormater; 7 | import com.suncht.wordread.format.IWordTableFormater; 8 | import com.suncht.wordread.output.IWordTableOutputStrategy; 9 | 10 | /** 11 | * 一个表格对象 每个表格有多个行 12 | * 13 | * @author changtan.sun 14 | * 15 | */ 16 | public class WordTable { 17 | private List rows = Lists.newArrayList(); 18 | 19 | public List getRows() { 20 | return rows; 21 | } 22 | 23 | @Override 24 | public String toString() { 25 | return rows.toString(); 26 | } 27 | 28 | public void output(IWordTableOutputStrategy outputStrategy) { 29 | for (WordTableRow row : rows) { 30 | for (WordTableCell cell : row.getCells()) { 31 | outputStrategy.output(cell); 32 | } 33 | } 34 | } 35 | 36 | public String format(IWordTableFormater tableFormater) { 37 | if (tableFormater == null) { 38 | tableFormater = new DefaultWordTableFormater(); 39 | } 40 | 41 | StringBuilder builder = new StringBuilder(); 42 | for (WordTableRow row : rows) { 43 | for (WordTableCell cell : row.getCells()) { 44 | tableFormater.format(cell, builder); 45 | builder.append('\t'); 46 | } 47 | builder.append(this.newline()); 48 | } 49 | 50 | return builder.toString(); 51 | } 52 | 53 | public String format() { 54 | return this.format(null); 55 | } 56 | 57 | private String newline() { 58 | return System.getProperty("line.separator"); 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/WordTableCell.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | import com.suncht.wordread.format.IWordTableFormater; 4 | 5 | /** 6 | * 表格的单元格 7 | * @author changtan.sun 8 | * 9 | */ 10 | public class WordTableCell { 11 | // /** 12 | // * 行号 13 | // */ 14 | // private int rowIndex; 15 | // /** 16 | // * 列号 17 | // */ 18 | // private int columnIndex; 19 | // /** 20 | // * 在word中实际行号 21 | // */ 22 | // private int realRowIndex; 23 | // /** 24 | // * 在word中实际列号 25 | // */ 26 | // private int realColumnIndex; 27 | /** 28 | * 单元格的内容 29 | */ 30 | private WordTableCellContent content; 31 | 32 | /** 33 | * 在word中合并了多少行 34 | */ 35 | private int rowSpan = 1; 36 | /** 37 | * 在word中合并了多少列 38 | */ 39 | private int columnSpan = 1; 40 | 41 | // public String getCellPosition() { 42 | // return rowIndex + "-" + columnIndex; 43 | // } 44 | 45 | // public int getRealRowIndex() { 46 | // return realRowIndex; 47 | // } 48 | // 49 | // public void setRealRowIndex(int realRowIndex) { 50 | // this.realRowIndex = realRowIndex; 51 | // } 52 | // 53 | // public int getRealColumnIndex() { 54 | // return realColumnIndex; 55 | // } 56 | // 57 | // public void setRealColumnIndex(int realColumnIndex) { 58 | // this.realColumnIndex = realColumnIndex; 59 | // } 60 | 61 | public int getRowSpan() { 62 | return rowSpan; 63 | } 64 | 65 | public WordTableCellContent getContent() { 66 | return content; 67 | } 68 | 69 | public void setContent(WordTableCellContent content) { 70 | this.content = content; 71 | } 72 | 73 | public void setRowSpan(int rowSpan) { 74 | this.rowSpan = rowSpan; 75 | } 76 | 77 | public int getColumnSpan() { 78 | return columnSpan; 79 | } 80 | 81 | public void setColumnSpan(int columnSpan) { 82 | this.columnSpan = columnSpan; 83 | } 84 | 85 | // public int getRowIndex() { 86 | // return rowIndex; 87 | // } 88 | // 89 | // public void setRowIndex(int rowIndex) { 90 | // this.rowIndex = rowIndex; 91 | // } 92 | 93 | // public int getColumnIndex() { 94 | // return columnIndex; 95 | // } 96 | 97 | // public void setColumnIndex(int columnIndex) { 98 | // this.columnIndex = columnIndex; 99 | // } 100 | 101 | // @Override 102 | // public String toString() { 103 | // return "PeraWordTableCell [rowIndex=" + rowIndex + ", columnIndex=" + columnIndex + ", text=" + text + "]"; 104 | // } 105 | 106 | /** 107 | * 单元格数据格式化成字符串 108 | * @param formater 109 | * @return 110 | */ 111 | public String format(IWordTableFormater formater) { 112 | if (formater == null) { 113 | return this.toString(); 114 | } 115 | 116 | StringBuilder stringBuilder = new StringBuilder(); 117 | formater.format(this, stringBuilder); 118 | return stringBuilder.toString(); 119 | } 120 | 121 | } 122 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/WordTableCellContent.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | import com.suncht.wordread.parser.WordTableParser.WordDocType; 4 | 5 | /** 6 | *

标题: 单元格内容对象

7 | *

描述:

8 | * @author changtan.sun 9 | * @date 2018年4月22日 10 | */ 11 | public abstract class WordTableCellContent { 12 | protected WordDocType docType; 13 | 14 | protected ContentTypeEnum contentType; 15 | protected T data; 16 | 17 | public T getData() { 18 | return data; 19 | } 20 | 21 | public void setData(T data) { 22 | this.data = data; 23 | } 24 | 25 | public ContentTypeEnum getContentType() { 26 | return contentType; 27 | } 28 | 29 | public void setContentType(ContentTypeEnum contentType) { 30 | this.contentType = contentType; 31 | } 32 | 33 | /** 34 | * 拷贝对象,具体实现由子类实现 35 | * @return 36 | */ 37 | public abstract WordTableCellContent copy(); 38 | 39 | public abstract void load(Object cellObj); 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/WordTableCellContentFormula.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | import java.io.StringReader; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | 7 | import org.apache.poi.xwpf.usermodel.XWPFTableCell; 8 | import org.dom4j.Document; 9 | import org.dom4j.DocumentException; 10 | import org.dom4j.DocumentFactory; 11 | import org.dom4j.Element; 12 | import org.dom4j.io.SAXReader; 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | import org.xml.sax.InputSource; 16 | 17 | import com.suncht.wordread.model.WordTableCellContentFormula.WcFormula; 18 | import com.suncht.wordread.parser.WordTableParser.WordDocType; 19 | import com.suncht.wordread.utils.MathmlUtils; 20 | 21 | 22 | public class WordTableCellContentFormula extends WordTableCellContent { 23 | private final static Logger logger = LoggerFactory.getLogger(WordTableCellContentFormula.class); 24 | 25 | public WordTableCellContentFormula(WordDocType docType) { 26 | this.docType = docType; 27 | } 28 | 29 | @Override 30 | public void load(Object cellObj) { 31 | this.setContentType(ContentTypeEnum.Formula); 32 | 33 | if(docType == WordDocType.DOCX) { 34 | XWPFTableCell cell = (XWPFTableCell) cellObj; 35 | 36 | String xml = cell.getCTTc().xmlText(); 37 | String omml = this.extractOml(xml); 38 | 39 | String mml = MathmlUtils.convertOMML2MML(omml); 40 | String latex = MathmlUtils.convertMML2Latex(mml); 41 | 42 | WcFormula formulaContent = new WcFormula(); 43 | formulaContent.setMml(mml); 44 | formulaContent.setLatex(latex); 45 | this.setData(formulaContent); 46 | } else if(docType == WordDocType.DOC) { 47 | 48 | } 49 | 50 | } 51 | 52 | 53 | @Override 54 | public WordTableCellContent copy() { 55 | WordTableCellContentFormula newContent = new WordTableCellContentFormula(this.docType); 56 | newContent.setData(this.data); 57 | newContent.setContentType(ContentTypeEnum.Formula); 58 | return newContent; 59 | } 60 | 61 | private String extractOml(String xml) { 62 | //dom4j解析器的初始化 63 | SAXReader reader = new SAXReader(new DocumentFactory()); 64 | Map map = new HashMap(); 65 | map.put("xdr", "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing"); 66 | map.put("m", "http://schemas.openxmlformats.org/officeDocument/2006/math"); 67 | reader.getDocumentFactory().setXPathNamespaceURIs(map); //xml文档的namespace设置 68 | 69 | InputSource source = new InputSource(new StringReader(xml)); 70 | source.setEncoding("utf-8"); 71 | try { 72 | Document doc = reader.read(source); 73 | Element root = doc.getRootElement(); 74 | Element e = (Element) root.selectSingleNode("//m:oMathPara"); //用xpath得到OMML节点 75 | String omml = e.asXML(); //转为xml 76 | return omml; 77 | } catch (DocumentException e) { 78 | e.printStackTrace(); 79 | } 80 | return null; 81 | } 82 | 83 | public static class WcFormula { 84 | private String mml; 85 | private String latex; 86 | public String getMml() { 87 | return mml; 88 | } 89 | public void setMml(String mml) { 90 | this.mml = mml; 91 | } 92 | public String getLatex() { 93 | return latex; 94 | } 95 | public void setLatex(String latex) { 96 | this.latex = latex; 97 | } 98 | } 99 | 100 | 101 | } 102 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/WordTableCellContentImage.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | import java.io.StringReader; 4 | import java.util.Arrays; 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | 8 | import org.apache.commons.lang3.StringUtils; 9 | import org.apache.poi.openxml4j.opc.PackageRelationship; 10 | import org.apache.poi.xwpf.usermodel.XWPFDocument; 11 | import org.apache.poi.xwpf.usermodel.XWPFPictureData; 12 | import org.apache.poi.xwpf.usermodel.XWPFTableCell; 13 | import org.dom4j.Attribute; 14 | import org.dom4j.Document; 15 | import org.dom4j.DocumentException; 16 | import org.dom4j.DocumentFactory; 17 | import org.dom4j.Element; 18 | import org.dom4j.io.SAXReader; 19 | import org.dom4j.tree.DefaultElement; 20 | import org.slf4j.Logger; 21 | import org.slf4j.LoggerFactory; 22 | import org.xml.sax.InputSource; 23 | 24 | import com.suncht.wordread.model.WordTableCellContentImage.WcImage; 25 | import com.suncht.wordread.parser.WordTableParser.WordDocType; 26 | 27 | public class WordTableCellContentImage extends WordTableCellContent { 28 | private final static Logger logger = LoggerFactory.getLogger(WordTableCellContentImage.class); 29 | 30 | public WordTableCellContentImage(WordDocType docType) { 31 | this.docType = docType; 32 | } 33 | 34 | @Override 35 | public void load(Object cellObj) { 36 | this.setContentType(ContentTypeEnum.Image); 37 | 38 | if(docType == WordDocType.DOCX) { 39 | XWPFTableCell cell = (XWPFTableCell) cellObj; 40 | String xml = cell.getCTTc().xmlText(); 41 | String embedId = extractEmbedId(xml); 42 | this.setData(this.readImage(embedId, cell.getXWPFDocument())); 43 | } else if(docType == WordDocType.DOC) { 44 | 45 | } 46 | } 47 | 48 | 49 | @Override 50 | public WordTableCellContent copy() { 51 | WordTableCellContentImage newContent = new WordTableCellContentImage(this.docType); 52 | newContent.setData(data); 53 | newContent.setContentType(contentType); 54 | return newContent; 55 | } 56 | 57 | private String extractEmbedId(String xml) { 58 | // dom4j解析器的初始化 59 | SAXReader reader = new SAXReader(new DocumentFactory()); 60 | Map map = new HashMap(); 61 | map.put("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); 62 | map.put("a", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); 63 | map.put("xdr", "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing"); 64 | map.put("wp", "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"); 65 | map.put("pic", "http://schemas.openxmlformats.org/drawingml/2006/picture"); 66 | map.put("r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"); 67 | reader.getDocumentFactory().setXPathNamespaceURIs(map); // xml文档的namespace设置 68 | 69 | InputSource source = new InputSource(new StringReader(xml)); 70 | source.setEncoding("utf-8"); 71 | try { 72 | Document doc = reader.read(source); 73 | Element root = doc.getRootElement(); 74 | Element e = (Element) root.selectSingleNode("//pic:blipFill"); 75 | Element blip = (DefaultElement) e.content().get(0); 76 | String embedId = ((Attribute) (blip.attributes().get(0))).getValue(); 77 | return embedId; 78 | } catch (DocumentException e) { 79 | e.printStackTrace(); 80 | } 81 | return null; 82 | } 83 | 84 | private WcImage readImage(String embedId, final XWPFDocument xdoc) { 85 | if (StringUtils.isBlank(embedId)) { 86 | return null; 87 | } 88 | WcImage imageContent = null; 89 | for (XWPFPictureData pictureData : xdoc.getAllPictures()) { 90 | PackageRelationship relationship = pictureData.getPackageRelationship(); 91 | if (embedId.equals(relationship.getId())) { 92 | imageContent = new WcImage(); 93 | imageContent.setData(pictureData.getData()); 94 | imageContent.setFileName(pictureData.getFileName()); 95 | imageContent.setImageType(pictureData.getPictureType()); 96 | break; 97 | } 98 | } 99 | 100 | return imageContent; 101 | } 102 | 103 | /** 104 | * 图片内容 105 | *

标题: ImageContent

106 | *

描述:

107 | * @author changtan.sun 108 | * @date 2018年4月22日 109 | */ 110 | public static class WcImage { 111 | private String fileName; 112 | private byte[] data; 113 | 114 | /** 115 | * 图片类型,参考org.apache.poi.xwpf.usermodel 116 | */ 117 | private int imageType; 118 | 119 | public String getFileName() { 120 | return fileName; 121 | } 122 | 123 | public void setFileName(String fileName) { 124 | this.fileName = fileName; 125 | } 126 | 127 | public byte[] getData() { 128 | if (data == null) { 129 | return new byte[0]; 130 | } 131 | return Arrays.copyOf(data, data.length); 132 | } 133 | 134 | public void setData(byte[] data) { 135 | if (data == null) { 136 | return; 137 | } 138 | this.data = Arrays.copyOf(data, data.length); 139 | } 140 | 141 | public int getImageType() { 142 | return imageType; 143 | } 144 | 145 | public void setImageType(int imageType) { 146 | this.imageType = imageType; 147 | } 148 | 149 | } 150 | 151 | 152 | } 153 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/WordTableCellContentOleObject.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | import java.io.InputStream; 4 | import java.io.StringReader; 5 | import java.util.Arrays; 6 | import java.util.HashMap; 7 | import java.util.Iterator; 8 | import java.util.List; 9 | import java.util.Map; 10 | 11 | import org.apache.commons.io.FilenameUtils; 12 | import org.apache.commons.lang3.StringUtils; 13 | import org.apache.poi.POIXMLDocumentPart; 14 | import org.apache.poi.hwpf.usermodel.TableCell; 15 | import org.apache.poi.openxml4j.opc.PackagePart; 16 | import org.apache.poi.poifs.dev.POIFSViewEngine; 17 | import org.apache.poi.poifs.filesystem.DirectoryEntry; 18 | import org.apache.poi.poifs.filesystem.DirectoryNode; 19 | import org.apache.poi.poifs.filesystem.DocumentEntry; 20 | import org.apache.poi.poifs.filesystem.DocumentNode; 21 | import org.apache.poi.poifs.filesystem.Entry; 22 | import org.apache.poi.poifs.filesystem.Ole10Native; 23 | import org.apache.poi.poifs.filesystem.POIFSFileSystem; 24 | import org.apache.poi.ss.formula.eval.NotImplementedException; 25 | import org.apache.poi.xwpf.usermodel.XWPFDocument; 26 | import org.apache.poi.xwpf.usermodel.XWPFTableCell; 27 | import org.dom4j.Attribute; 28 | import org.dom4j.Document; 29 | import org.dom4j.DocumentException; 30 | import org.dom4j.DocumentFactory; 31 | import org.dom4j.Element; 32 | import org.dom4j.io.SAXReader; 33 | import org.dom4j.tree.DefaultElement; 34 | import org.slf4j.Logger; 35 | import org.slf4j.LoggerFactory; 36 | import org.xml.sax.InputSource; 37 | 38 | import com.suncht.wordread.model.WordTableCellContentOleObject.WcOleObject; 39 | import com.suncht.wordread.parser.WordTableParser.WordDocType; 40 | 41 | /** 42 | *

43 | * 标题: 单元格中嵌套OLE对象 44 | *

45 | *

46 | * 描述: OLE对象,比如:附件 47 | *

48 | * 49 | * @author changtan.sun 50 | * @date 2018年4月22日 51 | */ 52 | public class WordTableCellContentOleObject extends WordTableCellContent { 53 | private final static Logger logger = LoggerFactory.getLogger(WordTableCellContentOleObject.class); 54 | 55 | public WordTableCellContentOleObject(WordDocType docType) { 56 | this.docType = docType; 57 | } 58 | 59 | @Override 60 | public void load(Object cellObj) { 61 | this.setContentType(ContentTypeEnum.OleObject); 62 | 63 | if(docType == WordDocType.DOCX) { 64 | XWPFTableCell cell = (XWPFTableCell) cellObj; 65 | String xml = cell.getCTTc().xmlText(); 66 | Document doc = this.buildDocument(xml); 67 | String embedId = extractOleObjectEmbedId(doc); 68 | 69 | WcOleObject oleObject = this.readOleObject(embedId, cell.getXWPFDocument()); 70 | this.setData(oleObject); 71 | } else if(docType == WordDocType.DOC) { 72 | 73 | } 74 | } 75 | 76 | @Override 77 | public WordTableCellContent copy() { 78 | WordTableCellContentOleObject newContent = new WordTableCellContentOleObject(this.docType); 79 | newContent.setData(data); 80 | newContent.setContentType(contentType); 81 | return newContent; 82 | } 83 | 84 | /** 85 | * 由单元格内容xml构建Document 86 | * 87 | * @param xml 88 | * @return 89 | */ 90 | private Document buildDocument(String xml) { 91 | // dom4j解析器的初始化 92 | SAXReader reader = new SAXReader(new DocumentFactory()); 93 | Map map = new HashMap(); 94 | map.put("o", "urn:schemas-microsoft-com:office:office"); 95 | map.put("v", "urn:schemas-microsoft-com:vml"); 96 | map.put("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); 97 | map.put("a", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"); 98 | map.put("xdr", "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing"); 99 | map.put("wp", "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"); 100 | map.put("r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships"); 101 | reader.getDocumentFactory().setXPathNamespaceURIs(map); // xml文档的namespace设置 102 | 103 | InputSource source = new InputSource(new StringReader(xml)); 104 | source.setEncoding("utf-8"); 105 | 106 | try { 107 | Document doc = reader.read(source); 108 | return doc; 109 | } catch (DocumentException e) { 110 | logger.error(e.getMessage(), e); 111 | } 112 | return null; 113 | } 114 | 115 | /** 116 | * 从单元格Document中获取OLE对象的embedId 117 | * 118 | * @param doc 119 | * @return 120 | */ 121 | private String extractOleObjectEmbedId(Document doc) { 122 | Element root = doc.getRootElement(); 123 | Element e = (Element) root.selectSingleNode("//w:object"); 124 | Element oOLEObject = (DefaultElement) e.content().get(1); 125 | String embedId = ((Attribute) (oOLEObject.attribute("id"))).getValue(); 126 | return embedId; 127 | } 128 | 129 | /** 130 | * 从单元格Document中获取附件的显示图片的embedId 131 | * 132 | * @param doc 133 | * @return 134 | */ 135 | private String extractImageEmbedId(Document doc) { 136 | Element root = doc.getRootElement(); 137 | Element e = (Element) root.selectSingleNode("//w:object"); 138 | Element vShape = (DefaultElement) e.content().get(0); 139 | Element vImagedata = (Element) vShape.selectSingleNode("//v:imagedata"); 140 | String embedId = ((Attribute) (vImagedata.attribute("id"))).getValue(); 141 | return embedId; 142 | } 143 | 144 | /** 145 | * 读取Ole对象 146 | * 147 | * @param embedId 148 | * @param xdoc 149 | * @return 150 | */ 151 | private WcOleObject readOleObject(String embedId, final XWPFDocument xdoc) { 152 | if (StringUtils.isBlank(embedId)) { 153 | return null; 154 | } 155 | WcOleObject oleObject = null; 156 | List parts = xdoc.getRelations(); 157 | for (POIXMLDocumentPart poixmlDocumentPart : parts) { 158 | String id = poixmlDocumentPart.getPackageRelationship().getId(); 159 | if (embedId.equals(id)) { 160 | PackagePart packagePart = poixmlDocumentPart.getPackagePart(); 161 | 162 | oleObject = new WcOleObject(); 163 | // oleObjectContent.setFileName(packagePart.getPartName().getName()); 164 | 165 | // 解析Ole对象中的文件,参考:http://poi.apache.org/poifs/how-to.html 166 | try (InputStream is = packagePart.getInputStream();) { 167 | POIFSFileSystem poifs = new POIFSFileSystem(is); 168 | 169 | if (isOle10NativeObject(poifs.getRoot())) { 170 | oleObject = readOle10Native(poifs); 171 | } else { 172 | oleObject = readDocumentOle(poifs, is); 173 | } 174 | } catch (Exception e) { 175 | logger.error(e.getMessage(), e); 176 | } 177 | } 178 | } 179 | 180 | return oleObject; 181 | } 182 | 183 | private boolean isOle10NativeObject(DirectoryNode directory) { 184 | return directory.hasEntry(Ole10Native.OLE10_NATIVE); 185 | } 186 | 187 | /** 188 | * 读取非文档类的Ole对象 189 | * 190 | * @param poifs 191 | * @return 192 | */ 193 | private WcOleObject readOle10Native(POIFSFileSystem poifs) { 194 | WcOleObject oleObject = new WcOleObject(); 195 | try { 196 | Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(poifs); 197 | oleObject.setFileName(FilenameUtils.getName(ole10.getFileName())); 198 | 199 | // byte[] data = IOUtils.toByteArray(packagePart.getInputStream()); 200 | oleObject.setDataSize(ole10.getDataSize()); 201 | oleObject.setData(ole10.getDataBuffer()); 202 | } catch (Exception e) { 203 | logger.error(e.getMessage(), e); 204 | } 205 | 206 | return oleObject; 207 | } 208 | 209 | /** 210 | * 读取文档类的OLE对象,包括Docx、Doc、xlsx、xls、ppt、pptx等 211 | * 暂未实现,需要进行大量资料查阅、寻找解决方案 212 | * @param poifs 213 | * @return 214 | */ 215 | private WcOleObject readDocumentOle(POIFSFileSystem poifs, InputStream is) { 216 | DirectoryNode directory = poifs.getRoot(); 217 | if (!directory.hasEntry("WordDocument")) { 218 | return null; 219 | } 220 | 221 | List strings = POIFSViewEngine.inspectViewable(poifs, true, 0, " "); 222 | Iterator iter = strings.iterator(); 223 | 224 | while (iter.hasNext()) { 225 | //os.write( ((String)iter.next()).getBytes()); 226 | System.out.println(iter.next()); 227 | } 228 | throw new NotImplementedException("暂未实现"); 229 | 230 | 231 | // WcOleObject oleObject = new WcOleObject(); 232 | // try { 233 | // DocumentNode entry = (DocumentNode) directory.getEntry("WpsCustomData"); 234 | // byte[] data = new byte[entry.getSize()]; 235 | // directory.createDocumentInputStream(entry).read(data); 236 | // 237 | // XWPFDocument doc = new XWPFDocument(directory.createDocumentInputStream(entry)); // 载入文档 238 | // doc.toString(); 239 | // oleObject.setFileName(FilenameUtils.getName(entry.getName())); 240 | // // 241 | // // //byte[] data = 242 | // // IOUtils.toByteArray(packagePart.getInputStream()); 243 | // oleObject.setDataSize(data.length); 244 | // oleObject.setData(data); 245 | // } catch (Exception e) { 246 | // logger.error(e.getMessage(), e); 247 | // } 248 | 249 | //return oleObject; 250 | } 251 | 252 | public static class WcOleObject { 253 | private String fileName; 254 | private byte[] data; 255 | private int dataSize; 256 | 257 | public String getFileName() { 258 | return fileName; 259 | } 260 | 261 | public void setFileName(String fileName) { 262 | this.fileName = fileName; 263 | } 264 | 265 | public byte[] getData() { 266 | if (data == null) { 267 | return new byte[0]; 268 | } 269 | return Arrays.copyOf(data, data.length); 270 | } 271 | 272 | public void setData(byte[] data) { 273 | if (data == null) { 274 | return; 275 | } 276 | this.data = Arrays.copyOf(data, data.length); 277 | } 278 | 279 | public int getDataSize() { 280 | return dataSize; 281 | } 282 | 283 | public void setDataSize(int dataSize) { 284 | this.dataSize = dataSize; 285 | } 286 | } 287 | 288 | } 289 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/WordTableCellContentText.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.List; 6 | 7 | import org.apache.commons.lang3.StringUtils; 8 | import org.apache.poi.hwpf.usermodel.Paragraph; 9 | import org.apache.poi.hwpf.usermodel.TableCell; 10 | import org.apache.poi.xwpf.usermodel.XWPFParagraph; 11 | import org.apache.poi.xwpf.usermodel.XWPFRun; 12 | import org.apache.poi.xwpf.usermodel.XWPFTableCell; 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | 16 | import com.suncht.wordread.model.WordTableCellContentText.WcText; 17 | import com.suncht.wordread.parser.WordTableParser.WordDocType; 18 | 19 | public class WordTableCellContentText extends WordTableCellContent { 20 | private final static Logger logger = LoggerFactory.getLogger(WordTableCellContentText.class); 21 | 22 | public WordTableCellContentText(WordDocType docType) { 23 | this.docType = docType; 24 | } 25 | 26 | @Override 27 | public void load(Object cellObj) { 28 | this.setContentType(ContentTypeEnum.Text); 29 | 30 | if(docType == WordDocType.DOCX) { 31 | XWPFTableCell cell = (XWPFTableCell) cellObj; 32 | List texts = new ArrayList(); 33 | 34 | List paragraphs = cell.getParagraphs(); 35 | if(paragraphs!=null && paragraphs.size()>0) { 36 | for (XWPFParagraph paragraph : paragraphs) { 37 | texts.add(this.runsToLine(paragraph.getRuns())); 38 | } 39 | } 40 | 41 | WcText text = new WcText(); 42 | text.setParagraphs(texts); 43 | 44 | this.setData(text); 45 | } else if(docType == WordDocType.DOC) { 46 | TableCell cell = (TableCell) cellObj; 47 | 48 | List texts = new ArrayList(); 49 | 50 | for (int i = 0, num = cell.numParagraphs(); i < num; i++) { 51 | Paragraph paragraph = cell.getParagraph(i); 52 | texts.add(paragraph.text().trim()); 53 | } 54 | 55 | WcText text = new WcText(); 56 | text.setParagraphs(texts); 57 | 58 | this.setData(text); 59 | } 60 | } 61 | 62 | private String runsToLine(List runs) { 63 | StringBuilder builder = new StringBuilder(); 64 | for (XWPFRun run : runs) { 65 | builder.append(run.toString()); 66 | } 67 | 68 | return builder.toString(); 69 | } 70 | 71 | public WordTableCellContent copy() { 72 | WordTableCellContentText newContent = new WordTableCellContentText(this.docType); 73 | newContent.setData(data); 74 | newContent.setContentType(contentType); 75 | return newContent; 76 | } 77 | 78 | /** 79 | * 文本 80 | *

标题: WcText

81 | *

描述:

82 | * @author changtan.sun 83 | * @date 2018年4月23日 84 | */ 85 | public static class WcText { 86 | private List paragraphs; 87 | 88 | public List getParagraphs() { 89 | return paragraphs; 90 | } 91 | 92 | public void setParagraphs(List paragraphs) { 93 | this.paragraphs = Collections.unmodifiableList(paragraphs); 94 | } 95 | 96 | @Override 97 | public String toString() { 98 | return StringUtils.join(paragraphs, '\n'); 99 | } 100 | 101 | 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/WordTableCellContents.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | import org.apache.poi.hwpf.usermodel.TableCell; 4 | import org.apache.poi.xwpf.usermodel.XWPFTableCell; 5 | 6 | import com.suncht.wordread.parser.WordTableParser.WordDocType; 7 | 8 | public final class WordTableCellContents { 9 | public static WordTableCellContent getCellContent(XWPFTableCell cell) { 10 | WordTableCellContent content = null; 11 | if (isFormula(cell)) { //是公式 12 | content = new WordTableCellContentFormula(WordDocType.DOCX); 13 | } else if (isImage(cell)) { //图片 14 | content = new WordTableCellContentImage(WordDocType.DOCX); 15 | } else if (isOleObject(cell)) { //OLE对象 16 | content = new WordTableCellContentOleObject(WordDocType.DOCX); 17 | } else { //一般文本 18 | content = new WordTableCellContentText(WordDocType.DOCX); 19 | } 20 | 21 | content.load(cell); 22 | return content; 23 | } 24 | 25 | public static boolean isFormula(XWPFTableCell cell) { 26 | String xmlText = cell.getCTTc().xmlText(); 27 | return xmlText.contains("") && xmlText.contains(""); 28 | } 29 | 30 | public static boolean isImage(XWPFTableCell cell) { 31 | String xmlText = cell.getCTTc().xmlText(); 32 | return xmlText.contains("") && xmlText.contains(""); 33 | } 34 | 35 | public static boolean isOleObject(XWPFTableCell cell) { 36 | String xmlText = cell.getCTTc().xmlText(); 37 | return xmlText.contains("") && xmlText.contains(""); 38 | } 39 | 40 | public static WordTableCellContent getCellContent(TableCell cell) { 41 | WordTableCellContent content = null; 42 | if (isFormula(cell)) { //是公式 43 | content = new WordTableCellContentFormula(WordDocType.DOC); 44 | } else if (isImage(cell)) { //图片 45 | content = new WordTableCellContentImage(WordDocType.DOC); 46 | } else if (isOleObject(cell)) { //OLE对象 47 | content = new WordTableCellContentOleObject(WordDocType.DOC); 48 | } else { //一般文本 49 | content = new WordTableCellContentText(WordDocType.DOC); 50 | } 51 | 52 | content.load(cell); 53 | return content; 54 | } 55 | 56 | public static boolean isFormula(TableCell cell) { 57 | String xmlText = cell.text(); 58 | return xmlText.contains("") && xmlText.contains(""); 59 | } 60 | 61 | public static boolean isImage(TableCell cell) { 62 | String xmlText = cell.text(); 63 | return xmlText.contains("") && xmlText.contains(""); 64 | } 65 | 66 | public static boolean isOleObject(TableCell cell) { 67 | String xmlText = cell.text(); 68 | return xmlText.contains("") && xmlText.contains(""); 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/WordTableComplexCell.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | /** 4 | * 复杂类的单元格 5 | * 单元格中嵌套子单元格,可以看做嵌入了表格 6 | * @author suncht 7 | * 8 | */ 9 | public class WordTableComplexCell extends WordTableCell { 10 | /** 11 | * 单元格中嵌套子单元格,可以看做嵌入了表格 12 | */ 13 | private WordTable innerTable; 14 | 15 | public WordTable getInnerTable() { 16 | return innerTable; 17 | } 18 | 19 | public void setInnerTable(WordTable innerTable) { 20 | this.innerTable = innerTable; 21 | } 22 | 23 | @Override 24 | public String toString() { 25 | return innerTable.toString(); 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/WordTableHeader.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | import java.util.List; 4 | 5 | import com.google.common.collect.Lists; 6 | 7 | /** 8 | * 表格的列头 9 | * @author changtan.sun 10 | * 11 | */ 12 | public class WordTableHeader { 13 | private List columnHeader = Lists.newArrayList(); 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/WordTableMap.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | /** 4 | * 表格数据映射 5 | * @author changtan.sun 6 | * 7 | */ 8 | public class WordTableMap { 9 | private TTCPr[][] tableMemoryMap; 10 | 11 | public TTCPr[][] getTableMemoryMap() { 12 | return tableMemoryMap; 13 | } 14 | 15 | public void setTableMemoryMap(TTCPr[][] tableMemoryMap) { 16 | this.tableMemoryMap = tableMemoryMap; 17 | } 18 | 19 | public void clear() { 20 | tableMemoryMap = null; 21 | } 22 | 23 | /** 24 | * 获取在docx中实际行数(word中表格都处理成二维表格,忽略合并) 25 | * @return 26 | */ 27 | public int getRealMaxRowCount() { 28 | return tableMemoryMap.length; 29 | } 30 | 31 | /** 32 | * 获取行数(在表格映射对象中的行数) 33 | * @return 34 | */ 35 | public int getRowCount() { 36 | int rowCount = 0; 37 | for (int i = 0; i < tableMemoryMap.length; i++) { 38 | if (tableMemoryMap[i][0].isValid()) { 39 | rowCount++; 40 | } 41 | } 42 | return rowCount; 43 | } 44 | 45 | /** 46 | * 获取行对象 47 | * @param currentRowIndex 48 | * @return 49 | */ 50 | public WordTableRow getRow(int currentRowIndex) { 51 | TTCPr[] _rows = null; 52 | TTCPr _first_column_in_row = null; 53 | int rowCount = 0; 54 | for (int i = 0; i < tableMemoryMap.length; i++) { 55 | if (tableMemoryMap[i][0].isValid()) { 56 | if (currentRowIndex == rowCount++) { 57 | _rows = tableMemoryMap[i]; 58 | _first_column_in_row = tableMemoryMap[i][0]; 59 | break; 60 | } 61 | } 62 | } 63 | 64 | if (_rows == null) { 65 | return null; 66 | } 67 | 68 | int real_row_index = _first_column_in_row.getRealRowIndex(); 69 | //int _end_row_index = _first_column_in_row.getRowSpan() + _first_column_in_row.getRealRowIndex() - 1; 70 | int _row_span = _first_column_in_row.getRowSpan(); 71 | int _real_column_count = _rows.length; 72 | 73 | WordTableRow pwtr = new WordTableRow(); 74 | 75 | WordTableCell cell = null; 76 | // WordTableCell pwtc = null; 77 | for (int i = 0; i < _real_column_count; i++) { 78 | cell = getCellInRow(real_row_index, _row_span, i, currentRowIndex); 79 | if (cell == null) { 80 | continue; 81 | } 82 | pwtr.getCells().add(cell); 83 | // if (cells.size() == 1) { 84 | // pwtr.getCells().add(cells.get(0)); 85 | // } else { 86 | // pwtc = new WordTableCell(); 87 | // pwtc.getSubCells().addAll(cells); 88 | // pwtr.getCells().add(pwtc); 89 | // } 90 | } 91 | 92 | return pwtr; 93 | } 94 | 95 | /** 96 | * 获取一行中的单元格集合,将实际单元格转换成逻辑单元格 97 | * @param realRowIndex word中的实际开始行号 98 | * @param endRealRowIndex word中的实际结束行号 99 | * @param realColumnIndex word中的实际列 100 | * @param currentRowIndex 在表格映射对象中的行号 101 | * @return 102 | */ 103 | private WordTableCell getCellInRow(int realRowIndex, int realRowSpan, int realColumnIndex, int currentRowIndex) { 104 | WordTableCell cell = null; 105 | TTCPr currentRealCell = tableMemoryMap[realRowIndex][realColumnIndex]; 106 | 107 | boolean needHandleRowSpan = realRowSpan > 1 || currentRealCell.isDoneRowSpan(); //是否需要处理跨行的情况 108 | boolean needHandleColSpan = currentRealCell.isDoneColSpan();//是否需要处理跨列的情况 109 | 110 | boolean satisfyConditionOfComplexCell = false; //是否满足复杂单元格的条件 111 | 112 | satisfyConditionOfComplexCell = needHandleRowSpan && needHandleColSpan; 113 | if (!satisfyConditionOfComplexCell) { 114 | satisfyConditionOfComplexCell = currentRealCell.getRowSpan() < realRowSpan; 115 | } 116 | 117 | if (currentRealCell.isValid()) { //有效单元格 118 | if (satisfyConditionOfComplexCell) {//跨行又跨列 119 | WordTableComplexCell pwtc = new WordTableComplexCell(); //属于复杂单元格 120 | 121 | WordTable innerTable = new WordTable(); 122 | int _realColSpan = currentRealCell.getColSpan(); 123 | for (int i = 0; i < realRowSpan;) { 124 | WordTableRow innerRow = new WordTableRow(); 125 | int _rowSpan = 1; 126 | for (int j = 0; j < _realColSpan; j++) { 127 | TTCPr _ttcpr = tableMemoryMap[realRowIndex + i][realColumnIndex + j]; 128 | if (_ttcpr.isValid()) { 129 | WordTableCell _cell = new WordTableSimpleCell(); 130 | _cell.setRowSpan(_ttcpr.getRowSpan()); 131 | _cell.setColumnSpan(_ttcpr.getColSpan()); 132 | _cell.setContent(_ttcpr.getContent().copy()); 133 | innerRow.getCells().add(_cell); 134 | 135 | if (_ttcpr.getRowSpan() > _rowSpan) { 136 | _rowSpan = _ttcpr.getRowSpan(); 137 | } 138 | } 139 | } 140 | innerTable.getRows().add(innerRow); 141 | 142 | i = i + _rowSpan; 143 | } 144 | pwtc.setInnerTable(innerTable); 145 | cell = pwtc; 146 | } else { 147 | //跨列不跨行,不需要处理 148 | //跨行不跨列,不需要处理 149 | WordTableSimpleCell pwtc = new WordTableSimpleCell(); //属于简单单元格 150 | pwtc.setRowSpan(currentRealCell.getRowSpan()); 151 | pwtc.setColumnSpan(currentRealCell.getColSpan()); 152 | pwtc.setContent(currentRealCell.getContent().copy()); 153 | 154 | cell = pwtc; 155 | } 156 | } 157 | 158 | return cell; 159 | 160 | // if (currentCell.isValid()) { //有效单元格 161 | // pwtc = new WordTableCell(); 162 | // // pwtc.setRealColumnIndex(realColumnIndex); 163 | // // pwtc.setRealRowIndex(i); 164 | // // pwtc.setColumnSpan(pttcpr.getColSpan()); 165 | // // pwtc.setRowSpan(pttcpr.getRowSpan()); 166 | // // pwtc.setRowIndex(currentRowIndex); 167 | // // pwtc.setColumnIndex(realColumnIndex); 168 | // pwtc.setText(currentCell.getText()); 169 | // 170 | // if (currentCell.getType() == TTCPrEnum.VM_S) { 171 | // 172 | // } else if (currentCell.getType() == TTCPrEnum.HM_S) { 173 | // 174 | // } else if (currentCell.getType() == TTCPrEnum.HVM_S) { 175 | // 176 | // } 177 | // 178 | // cells.put(currentCell.getCellPosition(), pwtc); 179 | // } else { //无效单元格 180 | // if (i == realRowIndex) { //如果第一个单元格就是无效单元格, 当行合并时 181 | // if (currentCell.getType() == TTCPrEnum.VM && currentCell.getRoot() != null) { 182 | // TTCPr root = currentCell.getRoot(); 183 | // pwtc = new WordTableCell(); 184 | // // pwtc.setRealColumnIndex(root.getRealColumnIndex()); 185 | // // pwtc.setColumnSpan(root.getColSpan()); 186 | // // pwtc.setRealRowIndex(i); 187 | // // pwtc.setRowSpan(root.getRowSpan()); 188 | // // pwtc.setRowIndex(currentRowIndex); 189 | // // pwtc.setColumnIndex(realColumnIndex); 190 | // pwtc.setText(currentCell.getText()); 191 | // 192 | // cells.put(pwtc.getCellPosition(), pwtc); 193 | // } 194 | // } 195 | // 196 | // if (currentCell.getType() == TTCPrEnum.HM && currentCell.getRoot() != null) { //被行合并 197 | // pwtc = new WordTableCell(); 198 | // // pwtc.setRealColumnIndex(realColumnIndex); 199 | // // pwtc.setColumnSpan(pttcpr.getColSpan()); 200 | // // pwtc.setRealRowIndex(i); 201 | // // pwtc.setRowSpan(pttcpr.getRowSpan()); 202 | // // pwtc.setRowIndex(currentRowIndex); 203 | // // pwtc.setColumnIndex(realColumnIndex); 204 | // pwtc.setText(currentCell.getText()); 205 | // 206 | // cells.add(pwtc); 207 | // } 208 | // } 209 | // 210 | // for (int i = realRowIndex; i <= endRealRowIndex; i++) { 211 | // currentCell = tableMemoryMap[i][realColumnIndex]; 212 | // 213 | // } 214 | // 215 | // return cells; 216 | } 217 | } 218 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/WordTableRow.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | import java.util.List; 4 | 5 | import com.google.common.collect.Lists; 6 | 7 | /** 8 | * 表格行 9 | * 行中包含多个单元格 10 | * @author changtan.sun 11 | * 12 | */ 13 | public class WordTableRow { 14 | /** 15 | * 行中单元格集合 16 | */ 17 | private List cells = Lists.newArrayList(); 18 | 19 | public List getCells() { 20 | return cells; 21 | } 22 | 23 | @Override 24 | public String toString() { 25 | return cells.toString(); 26 | } 27 | 28 | public void clear() { 29 | this.cells.clear(); 30 | this.cells = null; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/model/WordTableSimpleCell.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.model; 2 | 3 | /** 4 | * 简单单元格 5 | * 比如:文字、公式、附件等 6 | * @author suncht 7 | * 8 | */ 9 | public class WordTableSimpleCell extends WordTableCell { 10 | 11 | @Override 12 | public String toString() { 13 | return getContent().getData().toString(); 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/output/DefaultWordTableOutputStrategy.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.output; 2 | 3 | import com.suncht.wordread.format.DefaultCellFormater; 4 | import com.suncht.wordread.format.ICellFormater; 5 | import com.suncht.wordread.model.WordTableCell; 6 | import com.suncht.wordread.model.WordTableCellContentImage.WcImage; 7 | import com.suncht.wordread.model.WordTableComplexCell; 8 | import com.suncht.wordread.model.WordTableSimpleCell; 9 | 10 | public class DefaultWordTableOutputStrategy implements IWordTableOutputStrategy { 11 | private ICellFormater cellFormater; 12 | 13 | public DefaultWordTableOutputStrategy() { 14 | cellFormater = new DefaultCellFormater(); 15 | } 16 | 17 | public DefaultWordTableOutputStrategy(ICellFormater cellFormater) { 18 | this.cellFormater = cellFormater; 19 | } 20 | 21 | @Override 22 | public void output(WordTableCell tableCell) { 23 | if (tableCell instanceof WordTableSimpleCell) { 24 | outputCell(tableCell.getContent().getData()); 25 | } else if (tableCell instanceof WordTableComplexCell) { 26 | // WordTableComplexCell cell = (WordTableComplexCell) tableCell; 27 | // 28 | // StringBuilder builder = new StringBuilder(); 29 | // 30 | // List rows = cell.getInnerTable().getRows(); 31 | // for (WordTableRow row : rows) { 32 | // for (WordTableCell wtcell : row.getCells()) { 33 | // builder.append(printCell(wtcell.getContent().getData()) + '\t'); 34 | // } 35 | // } 36 | // return builder.toString() + "" + '\t'; 37 | } 38 | } 39 | 40 | 41 | private void outputCell(Object cellContent) { 42 | // if (cellContent instanceof ImageContent) { 43 | // this.cellFormater.formatImage((ImageContent)cellContent); 44 | // } else { 45 | // this.cellFormater.formatText(cellContent); 46 | // } 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/output/IWordTableOutputStrategy.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.output; 2 | 3 | import com.suncht.wordread.model.WordTableCell; 4 | 5 | /** 6 | * 表格单元格内容输出策略 7 | * @author suncht 8 | * 9 | */ 10 | public interface IWordTableOutputStrategy { 11 | public void output(WordTableCell tableCell); 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/parser/ISingleWordTableParser.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.parser; 2 | 3 | import com.suncht.wordread.model.WordTable; 4 | 5 | public interface ISingleWordTableParser { 6 | public WordTable parse(); 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/parser/IWordTableParser.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.parser; 2 | 3 | import java.io.InputStream; 4 | import java.util.List; 5 | 6 | import com.suncht.wordread.model.WordTable; 7 | 8 | public interface IWordTableParser { 9 | public List parse(InputStream inputStream); 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/parser/WordTableParser.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.parser; 2 | 3 | import java.io.File; 4 | import java.io.FileInputStream; 5 | import java.io.FileNotFoundException; 6 | import java.io.InputStream; 7 | import java.util.List; 8 | 9 | import org.springframework.util.StringUtils; 10 | 11 | import com.google.common.base.Preconditions; 12 | import com.suncht.wordread.model.WordTable; 13 | import com.suncht.wordread.parser.mapping.IWordTableMemoryMappingVisitor; 14 | import com.suncht.wordread.parser.strategy.ITableTransferStrategy; 15 | import com.suncht.wordread.parser.wordh.WordHTableParser; 16 | import com.suncht.wordread.parser.wordx.WordXTableParser; 17 | 18 | /** 19 | * Word文档解析器 20 | * 支持2007以上的docx、2007以下的doc文档 21 | * @author changtan.sun 22 | * 23 | */ 24 | public class WordTableParser { 25 | private static final String DOCX_WORD_DOCUMENT = ".docx"; 26 | private static final String DOC_WORD_DOCUMENT = ".doc"; 27 | 28 | private WordTableTransferContext context; 29 | private IWordTableParser wordTableParser; 30 | 31 | private WordTableParser() { 32 | this.context = WordTableTransferContext.create(); 33 | } 34 | 35 | public static WordTableParser create() { 36 | return new WordTableParser(); 37 | } 38 | 39 | public WordTableParser transferStrategy(ITableTransferStrategy tableTransferStrategy) { 40 | context.transferStrategy(tableTransferStrategy); 41 | return this; 42 | } 43 | 44 | public WordTableParser memoryMappingVisitor(IWordTableMemoryMappingVisitor visitor) { 45 | context.visitor(visitor); 46 | return this; 47 | } 48 | 49 | public List parse(File wordFile) { 50 | Preconditions.checkArgument(wordFile.exists(), "文件不存在"); 51 | 52 | String fileName = wordFile.getName(); 53 | WordDocType docType = WordDocType.DOCX; 54 | if (StringUtils.endsWithIgnoreCase(fileName, DOCX_WORD_DOCUMENT)) { 55 | docType = WordDocType.DOCX; 56 | } else if (StringUtils.endsWithIgnoreCase(fileName, DOC_WORD_DOCUMENT)) { 57 | docType = WordDocType.DOC; 58 | } else { 59 | throw new IllegalArgumentException("不支持该文件类型"); 60 | } 61 | 62 | try(FileInputStream inputStream = new FileInputStream(wordFile);) { 63 | return this.parse(inputStream, docType); 64 | } catch (Exception e) { 65 | e.printStackTrace(); 66 | } 67 | return null; 68 | } 69 | 70 | 71 | public List parse(InputStream inputStream, WordDocType docType) { 72 | if (docType == WordDocType.DOCX) { 73 | wordTableParser = new WordXTableParser(this.context); 74 | } else if (docType == WordDocType.DOC) { 75 | wordTableParser = new WordHTableParser(this.context); 76 | } else { 77 | throw new IllegalArgumentException("不支持该文件类型"); 78 | } 79 | return wordTableParser.parse(inputStream); 80 | } 81 | 82 | /** 83 | * Word文档类型 84 | * @author changtan.sun 85 | * 86 | */ 87 | public static enum WordDocType { 88 | DOCX, DOC, UNKOWN 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/parser/WordTableTransferContext.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.parser; 2 | 3 | import com.suncht.wordread.model.WordTable; 4 | import com.suncht.wordread.parser.mapping.IWordTableMemoryMappingVisitor; 5 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping; 6 | import com.suncht.wordread.parser.strategy.ITableTransferStrategy; 7 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy; 8 | 9 | /** 10 | * Word表格转换上下文 11 | * @author changtan.sun 12 | * 13 | */ 14 | public class WordTableTransferContext { 15 | private ITableTransferStrategy strategy; 16 | private IWordTableMemoryMappingVisitor visitor; 17 | 18 | public static WordTableTransferContext create() { 19 | return new WordTableTransferContext(); 20 | } 21 | 22 | public WordTableTransferContext transferStrategy(ITableTransferStrategy strategy) { 23 | this.strategy = strategy; 24 | return this; 25 | } 26 | 27 | public WordTableTransferContext visitor(IWordTableMemoryMappingVisitor visitor) { 28 | this.visitor = visitor; 29 | return this; 30 | } 31 | 32 | public WordTable transfer(final WordTableMemoryMapping tableMemoryMapping) { 33 | if (strategy == null) { 34 | strategy = new LogicalTableStrategy(); 35 | } 36 | return strategy.transfer(tableMemoryMapping); 37 | } 38 | 39 | public ITableTransferStrategy getStrategy() { 40 | return strategy; 41 | } 42 | 43 | public IWordTableMemoryMappingVisitor getVisitor() { 44 | return visitor; 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/parser/mapping/IWordTableMemoryMappingVisitor.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.parser.mapping; 2 | 3 | import com.suncht.wordread.model.TTCPr; 4 | 5 | /** 6 | * Word表格内存映射表的单元格访问者接口 7 | * 用于修改内存映射表的单元格的数据 8 | * @author changtan.sun 9 | * 10 | */ 11 | public interface IWordTableMemoryMappingVisitor { 12 | public void visit(TTCPr cell, int realRowIndex, int realColumnIndex); 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/parser/mapping/WordTableMemoryMapping.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.parser.mapping; 2 | 3 | import java.util.Arrays; 4 | 5 | import com.google.common.base.Preconditions; 6 | import com.suncht.wordread.model.TTCPr; 7 | 8 | /** 9 | * Word表格内存映射 10 | * @author changtan.sun 11 | * 12 | */ 13 | public class WordTableMemoryMapping { 14 | private TTCPr[][] _tableMemoryMap; 15 | private int rowCount; 16 | private int columnCount; 17 | private IWordTableMemoryMappingVisitor visitor; 18 | 19 | public WordTableMemoryMapping(int row, int column) { 20 | _tableMemoryMap = new TTCPr[row][column]; 21 | this.rowCount = row; 22 | this.columnCount = column; 23 | } 24 | 25 | public void setTTCPr(final TTCPr data, int rowIndex, int columnIndex) { 26 | Preconditions.checkArgument(rowIndex < rowCount); 27 | Preconditions.checkArgument(columnIndex < columnCount); 28 | 29 | _tableMemoryMap[rowIndex][columnIndex] = data; 30 | 31 | if (visitor != null) { 32 | data.accept(visitor, rowIndex, columnIndex); 33 | } 34 | } 35 | 36 | public final TTCPr getTTCPr(int rowIndex, int columnIndex) { 37 | Preconditions.checkArgument(rowIndex < rowCount); 38 | Preconditions.checkArgument(columnIndex < columnCount); 39 | 40 | return _tableMemoryMap[rowIndex][columnIndex]; 41 | } 42 | 43 | public TTCPr[] getRow(int rowIndex) { 44 | Preconditions.checkArgument(rowIndex < rowCount); 45 | 46 | return Arrays.copyOf(_tableMemoryMap[rowIndex], columnCount); 47 | } 48 | 49 | public int getRowCount() { 50 | return rowCount; 51 | } 52 | 53 | public void setRowCount(int rowCount) { 54 | this.rowCount = rowCount; 55 | } 56 | 57 | public int getColumnCount() { 58 | return columnCount; 59 | } 60 | 61 | public void setColumnCount(int columnCount) { 62 | this.columnCount = columnCount; 63 | } 64 | 65 | public IWordTableMemoryMappingVisitor getVisitor() { 66 | return visitor; 67 | } 68 | 69 | public void setVisitor(IWordTableMemoryMappingVisitor visitor) { 70 | this.visitor = visitor; 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/parser/strategy/DefaultTableStrategy.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.parser.strategy; 2 | 3 | import com.suncht.wordread.model.WordTable; 4 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping; 5 | 6 | public class DefaultTableStrategy implements ITableTransferStrategy { 7 | 8 | public WordTable transfer(WordTableMemoryMapping tableMemoryMapping) { 9 | return null; 10 | } 11 | 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/parser/strategy/ITableTransferStrategy.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.parser.strategy; 2 | 3 | import com.suncht.wordread.model.WordTable; 4 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping; 5 | 6 | /** 7 | * 表格转换策略 8 | * 将表格内存映射转换成实际的表格模式 9 | * @author changtan.sun 10 | * 11 | */ 12 | public interface ITableTransferStrategy { 13 | public WordTable transfer(WordTableMemoryMapping tableMemoryMapping); 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/parser/strategy/LogicalTableStrategy.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.parser.strategy; 2 | 3 | import com.suncht.wordread.model.TTCPr; 4 | import com.suncht.wordread.model.WordTable; 5 | import com.suncht.wordread.model.WordTableCell; 6 | import com.suncht.wordread.model.WordTableComplexCell; 7 | import com.suncht.wordread.model.WordTableRow; 8 | import com.suncht.wordread.model.WordTableSimpleCell; 9 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping; 10 | 11 | public class LogicalTableStrategy implements ITableTransferStrategy { 12 | 13 | private WordTableMemoryMapping tableMemoryMapping; 14 | 15 | // /** 16 | // * 获取在docx中实际行数(word中表格都处理成二维表格,忽略合并) 17 | // * @return 18 | // */ 19 | // private int getRealMaxRowCount() { 20 | // return tableMemoryMap.length; 21 | // } 22 | 23 | /** 24 | * 获取行数(在表格映射对象中的行数) 25 | * @return 26 | */ 27 | private int getRowCount() { 28 | int rowCount = 0; 29 | for (int i = 0; i < tableMemoryMapping.getRowCount(); i++) { 30 | if (tableMemoryMapping.getTTCPr(i, 0).isValid()) { 31 | rowCount++; 32 | } 33 | } 34 | return rowCount; 35 | } 36 | 37 | public WordTable transfer(WordTableMemoryMapping tableMemoryMapping) { 38 | this.tableMemoryMapping = tableMemoryMapping; 39 | 40 | WordTable wordTable = new WordTable(); 41 | int rowCount = getRowCount(); 42 | WordTableRow tableRow = null; 43 | for (int i = 0; i < rowCount; i++) { 44 | tableRow = this.getTableRow(i); 45 | wordTable.getRows().add(tableRow); 46 | } 47 | return wordTable; 48 | } 49 | 50 | /** 51 | * 获取行对象 52 | * @param currentRowIndex 53 | * @return 54 | */ 55 | private WordTableRow getTableRow(int currentRowIndex) { 56 | TTCPr[] _rows = null; 57 | TTCPr _first_column_in_row = null; 58 | int rowCount = 0; 59 | for (int i = 0; i < tableMemoryMapping.getRowCount(); i++) { 60 | if (tableMemoryMapping.getTTCPr(i, 0).isValid()) { 61 | if (currentRowIndex == rowCount++) { 62 | _rows = tableMemoryMapping.getRow(i); 63 | _first_column_in_row = tableMemoryMapping.getTTCPr(i, 0); 64 | break; 65 | } 66 | } 67 | } 68 | 69 | if (_rows == null) { 70 | return null; 71 | } 72 | 73 | int _logic_row_index = _first_column_in_row.getLogicRowIndex(); 74 | //int _end_row_index = _first_column_in_row.getRowSpan() + _first_column_in_row.getRealRowIndex() - 1; 75 | int _row_span = _first_column_in_row.getRowSpan(); 76 | int _logic_column_count = _rows.length; 77 | 78 | WordTableRow pwtr = new WordTableRow(); 79 | 80 | WordTableCell cell = null; 81 | for (int i = 0; i < _logic_column_count; i++) { 82 | cell = getCellInRow(_logic_row_index, _row_span, i, currentRowIndex); 83 | if (cell == null) { 84 | continue; 85 | } 86 | pwtr.getCells().add(cell); 87 | } 88 | 89 | return pwtr; 90 | } 91 | 92 | /** 93 | * 获取一行中的单元格集合,将实际单元格转换成逻辑单元格 94 | * @param logicRowIndex 逻辑行号 95 | * @param endRealRowIndex 逻辑行号 96 | * @param logicColumnIndex word中的实际列 97 | * @param currentRowIndex 在表格映射对象中的行号 98 | * @return 99 | */ 100 | private WordTableCell getCellInRow(int logicRowIndex, int logicRowSpan, int logicColumnIndex, int currentRowIndex) { 101 | WordTableCell cell = null; 102 | TTCPr currentRealCell = tableMemoryMapping.getTTCPr(logicRowIndex, logicColumnIndex); 103 | 104 | boolean needHandleRowSpan = logicRowSpan > 0 || currentRealCell.isDoneRowSpan(); //是否需要处理跨行的情况 105 | boolean needHandleColSpan = currentRealCell.isDoneColSpan();//是否需要处理跨列的情况 106 | 107 | boolean satisfyConditionOfComplexCell = false; //是否满足复杂单元格的条件 108 | 109 | satisfyConditionOfComplexCell = needHandleRowSpan && needHandleColSpan; 110 | if (!satisfyConditionOfComplexCell) { 111 | satisfyConditionOfComplexCell = currentRealCell.getRowSpan() < logicRowSpan; 112 | } 113 | 114 | if (currentRealCell.isValid()) { //有效单元格 115 | if (satisfyConditionOfComplexCell) {//跨行又跨列 116 | WordTableComplexCell pwtc = new WordTableComplexCell(); //属于复杂单元格 117 | 118 | WordTable innerTable = new WordTable(); 119 | int _realColSpan = currentRealCell.getColSpan(); 120 | for (int i = 0; i < logicRowSpan;) { 121 | WordTableRow innerRow = new WordTableRow(); 122 | int _rowSpan = 1; 123 | for (int j = 0; j < _realColSpan; j++) { 124 | TTCPr _ttcpr = tableMemoryMapping.getTTCPr(logicRowIndex + i, logicColumnIndex + j); 125 | if (_ttcpr.isValid()) { 126 | WordTableCell _cell = new WordTableSimpleCell(); 127 | _cell.setRowSpan(_ttcpr.getRowSpan()); 128 | _cell.setColumnSpan(_ttcpr.getColSpan()); 129 | _cell.setContent(_ttcpr.getContent().copy()); 130 | innerRow.getCells().add(_cell); 131 | 132 | if (_ttcpr.getRowSpan() > _rowSpan) { 133 | _rowSpan = _ttcpr.getRowSpan(); 134 | } 135 | } 136 | } 137 | innerTable.getRows().add(innerRow); 138 | 139 | i = i + _rowSpan; 140 | } 141 | pwtc.setInnerTable(innerTable); 142 | cell = pwtc; 143 | } else { 144 | //跨列不跨行,不需要处理 145 | //跨行不跨列,不需要处理 146 | WordTableSimpleCell pwtc = new WordTableSimpleCell(); //属于简单单元格 147 | pwtc.setRowSpan(currentRealCell.getRowSpan()); 148 | pwtc.setColumnSpan(currentRealCell.getColSpan()); 149 | pwtc.setContent(currentRealCell.getContent().copy()); 150 | 151 | cell = pwtc; 152 | } 153 | } 154 | 155 | return cell; 156 | 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/parser/wordh/SingleWordHTableParser.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.parser.wordh; 2 | 3 | import java.math.BigInteger; 4 | 5 | import org.apache.poi.hwpf.usermodel.Table; 6 | import org.apache.poi.hwpf.usermodel.TableCell; 7 | import org.apache.poi.hwpf.usermodel.TableRow; 8 | 9 | import com.google.common.base.Preconditions; 10 | import com.suncht.wordread.model.TTCPr; 11 | import com.suncht.wordread.model.TTCPr.TTCPrEnum; 12 | import com.suncht.wordread.model.WordTable; 13 | import com.suncht.wordread.model.WordTableCellContents; 14 | import com.suncht.wordread.parser.ISingleWordTableParser; 15 | import com.suncht.wordread.parser.WordTableTransferContext; 16 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping; 17 | 18 | /** 19 | * Doc文档解析 20 | * 21 | *

22 | * 标题: SingleWordHTableParser 23 | *

24 | *

25 | * 描述: 对POI API进行调试发现,解析Doc单元格的方式与Docx方式不同:没有列合并,只有行合并,有列宽 26 | *

27 | * 28 | * @author changtan.sun 29 | * @date 2018年4月27日 30 | */ 31 | public class SingleWordHTableParser implements ISingleWordTableParser { 32 | private Table hwpfTable; 33 | 34 | private WordTableMemoryMapping _tableMemoryMapping; 35 | private WordTableTransferContext context; 36 | 37 | /** 38 | * 最大列数 39 | */ 40 | private int realMaxColumnCount = 0; 41 | /** 42 | * 最大列数所占的行Index 43 | */ 44 | private int rowIndexOfMaxColumnCount = 0; 45 | 46 | public SingleWordHTableParser(Table hwpfTable, WordTableTransferContext context) { 47 | this.hwpfTable = hwpfTable; 48 | this.context = context; 49 | } 50 | 51 | public WordTable parse() { 52 | int realMaxRowCount = this.hwpfTable.numRows(); 53 | 54 | realMaxColumnCount = 0; 55 | for (int i = 0; i < realMaxRowCount; i++) { 56 | TableRow tr = this.hwpfTable.getRow(i); 57 | int numCell = tr.numCells(); 58 | if (numCell > realMaxColumnCount) { 59 | realMaxColumnCount = numCell; 60 | rowIndexOfMaxColumnCount = i; 61 | } 62 | } 63 | 64 | _tableMemoryMapping = new WordTableMemoryMapping(realMaxRowCount, realMaxColumnCount); 65 | 66 | for (int i = 0; i < realMaxRowCount; i++) { 67 | TableRow preRow = i - 1 >= 0 ? this.hwpfTable.getRow(i - 1) : null; // 上一行 68 | parseRow(this.hwpfTable.getRow(i), i, preRow); 69 | } 70 | 71 | return context.transfer(_tableMemoryMapping); 72 | } 73 | 74 | private void parseRow(TableRow row, int realRowIndex, TableRow preRow) { 75 | int numCells = row.numCells(); 76 | //boolean existColMergedCells = realMaxColumnCount > numCells; // 该行中是否存在被列合并,如果存在,做逻辑列合并处理 77 | int logicColumnIndex = 0; 78 | int logicRowIndex = realRowIndex; //逻辑行号和实际行号一样的 79 | for (int realColumnIndex = 0; realColumnIndex < numCells; realColumnIndex++) { 80 | TableCell cell = row.getCell(realColumnIndex);// 取得单元格 81 | int skipColumn = parseCell(row, cell, realRowIndex, realColumnIndex, logicRowIndex, logicColumnIndex); 82 | logicColumnIndex = logicColumnIndex + skipColumn + 1; 83 | } 84 | } 85 | 86 | /** 87 | * 参考:https://blog.csdn.net/www1056481167/article/details/56835043 88 | * 解析Doc单元格的方式与Docx方式不同:没有列合并概念,只有行合并 89 | * 90 | * @param cell 91 | * @param realRowIndex 92 | * @param realColumnIndex 93 | * @return 94 | */ 95 | private int parseCell(TableRow row, TableCell cell, int realRowIndex, int realColumnIndex, int logicRowIndex, int logicColumnIndex) { 96 | // -----列合并----- 97 | int numOfCellHMerged = computeNumOfCellHMerged(row, cell, realColumnIndex); //就是该单元格合并了多少列 98 | 99 | // -----行合并----- 100 | if (cell.isFirstVerticallyMerged() && cell.isVerticallyMerged()) { // 行合并开始 101 | TTCPr ttc = new TTCPr(); 102 | if(numOfCellHMerged>0) { 103 | ttc.setType(TTCPrEnum.HVM_S); 104 | } else { 105 | ttc.setType(TTCPrEnum.VM_S); 106 | } 107 | ttc.setRealRowIndex(realRowIndex); 108 | ttc.setRealColumnIndex(realColumnIndex); 109 | ttc.setLogicRowIndex(logicRowIndex); 110 | ttc.setLogicColumnIndex(logicColumnIndex); 111 | ttc.setWidth(BigInteger.valueOf(cell.getWidth())); 112 | ttc.setColSpan(numOfCellHMerged); 113 | ttc.setRoot(null); 114 | // ttc.setText(cell.getText()); 115 | ttc.setContent(WordTableCellContents.getCellContent(cell)); 116 | 117 | _tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex); 118 | 119 | //处理其他被合并的列 120 | if(numOfCellHMerged>0) { 121 | for (int i = 0; i < numOfCellHMerged; i++) { 122 | TTCPr ttc_merged = new TTCPr(); 123 | ttc_merged.setType(TTCPrEnum.HM); 124 | ttc_merged.setRealRowIndex(realRowIndex); 125 | ttc_merged.setRealColumnIndex(realColumnIndex); 126 | ttc_merged.setLogicRowIndex(logicRowIndex); 127 | ttc_merged.setLogicColumnIndex(logicColumnIndex + i + 1); 128 | //ttc_merged.setWidth(BigInteger.valueOf(cell.getWidth())); 129 | //ttc_merged.setColSpan(numOfCellHMerged); 130 | ttc_merged.setRoot(ttc); 131 | 132 | _tableMemoryMapping.setTTCPr(ttc_merged, logicRowIndex, ttc_merged.getLogicColumnIndex()); 133 | } 134 | } 135 | } else if (!cell.isFirstVerticallyMerged() && cell.isVerticallyMerged()) { // 行被合并 136 | int _start = logicRowIndex, _end = 0; 137 | TTCPr root = null; 138 | for (int i = logicRowIndex - 1; i >= 0; i--) { 139 | TTCPr ttcpr = _tableMemoryMapping.getTTCPr(i, logicColumnIndex); 140 | if (ttcpr != null && (ttcpr.getType() == TTCPrEnum.VM_S || ttcpr.getType() == TTCPrEnum.HVM_S)) { 141 | _end = i; 142 | root = ttcpr; 143 | break; 144 | } else if (ttcpr != null && ttcpr.getRoot() != null) { 145 | _end = i; 146 | root = ttcpr.getRoot(); 147 | break; 148 | } 149 | } 150 | 151 | Preconditions.checkNotNull(root, "父单元格不能为空"); 152 | 153 | TTCPr ttc = new TTCPr(); 154 | ttc.setType(TTCPrEnum.VM); 155 | ttc.setRealRowIndex(realRowIndex); 156 | ttc.setRealColumnIndex(realColumnIndex); 157 | ttc.setLogicRowIndex(logicRowIndex); 158 | ttc.setLogicColumnIndex(logicColumnIndex); 159 | ttc.setWidth(BigInteger.valueOf(cell.getWidth())); 160 | ttc.setRoot(root); 161 | root.setRowSpan(_start - _end + 1); 162 | 163 | _tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex); 164 | } else { // 没有行合并 165 | TTCPr ttc = new TTCPr(); 166 | if(numOfCellHMerged>0) { 167 | ttc.setType(TTCPrEnum.HM_S); 168 | } else { 169 | ttc.setType(TTCPrEnum.NONE); 170 | } 171 | ttc.setRealRowIndex(realRowIndex); 172 | ttc.setRealColumnIndex(realColumnIndex); 173 | ttc.setLogicRowIndex(logicRowIndex); 174 | ttc.setLogicColumnIndex(logicColumnIndex); 175 | ttc.setWidth(BigInteger.valueOf(cell.getWidth())); 176 | ttc.setColSpan(numOfCellHMerged); 177 | ttc.setRoot(null); 178 | // ttc.setText(cell.getText()); 179 | ttc.setContent(WordTableCellContents.getCellContent(cell)); 180 | 181 | _tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex); 182 | 183 | //处理其他被合并的列 184 | if(numOfCellHMerged>0) { 185 | for (int i = 0; i < numOfCellHMerged; i++) { 186 | TTCPr ttc_merged = new TTCPr(); 187 | ttc_merged.setType(TTCPrEnum.HM); 188 | ttc_merged.setRealRowIndex(realRowIndex); 189 | ttc_merged.setRealColumnIndex(realColumnIndex); 190 | ttc_merged.setLogicRowIndex(logicRowIndex); 191 | ttc_merged.setLogicColumnIndex(logicColumnIndex + i + 1); 192 | //ttc_merged.setWidth(BigInteger.valueOf(cell.getWidth())); 193 | //ttc_merged.setColSpan(numOfCellHMerged); 194 | ttc_merged.setRoot(ttc); 195 | 196 | _tableMemoryMapping.setTTCPr(ttc_merged, logicRowIndex, ttc_merged.getLogicColumnIndex()); 197 | } 198 | } 199 | } 200 | 201 | return numOfCellHMerged; 202 | } 203 | 204 | /** 205 | * 计算合并了多少个单元格 206 | * 表格中其他行根据标准行进行列合并,属于标准表格 标准表格,比如 207 | * ——————————————— 208 | * | | | | 209 | * ——————————————— 210 | * | | | | | ---->该行为标准行 211 | * ——————————————— 212 | * | | | 213 | * ——————————————— 214 | * | | | 215 | * ——————————————— 216 | * 217 | * @param cell 218 | * @param realRowIndex 219 | * @param realColumnIndex 220 | * @return 221 | */ 222 | private int computeNumOfCellHMerged(TableRow currentRow, TableCell currentCell, int realColumnIndex) { 223 | TableRow standardRow = this.hwpfTable.getRow(this.rowIndexOfMaxColumnCount); 224 | 225 | if (currentRow.numCells() >= standardRow.numCells()) { 226 | return 0; 227 | } 228 | 229 | long totalWidth = 0; 230 | for (int i = 0; i <= realColumnIndex; i++) { 231 | totalWidth += currentRow.getCell(i).getWidth(); 232 | } 233 | 234 | int tempRowIndex = -1; 235 | long tempWidth = 0; 236 | for (int i = 0, size = standardRow.numCells(); i < size; i++) { 237 | tempWidth += standardRow.getCell(i).getWidth(); 238 | if (this.widthEqual(tempWidth, totalWidth)) { 239 | tempRowIndex = i; 240 | break; 241 | } 242 | } 243 | 244 | int currentCellWidth = currentCell.getWidth(); 245 | tempWidth = 0; 246 | int columnMerged = 0; 247 | for (int i = tempRowIndex; i >= 0; i--) { 248 | tempWidth += standardRow.getCell(i).getWidth(); 249 | if(this.widthEqual(tempWidth, currentCellWidth)) { 250 | break; 251 | } else { 252 | columnMerged++; 253 | } 254 | } 255 | 256 | return columnMerged; 257 | } 258 | 259 | private boolean widthEqual(long tempWidth, long totalWidth) { 260 | return tempWidth <= (totalWidth + 10) && tempWidth >= (totalWidth - 10); 261 | } 262 | 263 | } 264 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/parser/wordh/WordHTableParser.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.parser.wordh; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.util.List; 6 | 7 | import org.apache.poi.hwpf.HWPFDocument; 8 | import org.apache.poi.hwpf.usermodel.Range; 9 | import org.apache.poi.hwpf.usermodel.Table; 10 | import org.apache.poi.hwpf.usermodel.TableIterator; 11 | import org.apache.poi.poifs.filesystem.POIFSFileSystem; 12 | 13 | import com.google.common.collect.Lists; 14 | import com.suncht.wordread.model.WordTable; 15 | import com.suncht.wordread.parser.ISingleWordTableParser; 16 | import com.suncht.wordread.parser.IWordTableParser; 17 | import com.suncht.wordread.parser.WordTableTransferContext; 18 | 19 | public class WordHTableParser implements IWordTableParser { 20 | private WordTableTransferContext context; 21 | 22 | public WordHTableParser(WordTableTransferContext context) { 23 | this.context = context; 24 | } 25 | 26 | public List parse(InputStream inputStream) { 27 | 28 | List wordTables = Lists.newArrayList(); 29 | 30 | try { 31 | POIFSFileSystem pfs = new POIFSFileSystem(inputStream); // 载入文档 32 | HWPFDocument hwpf = new HWPFDocument(pfs); 33 | 34 | Range range = hwpf.getRange();//得到文档的读取范围 35 | TableIterator it = new TableIterator(range); 36 | //迭代文档中的表格 37 | while (it.hasNext()) { 38 | Table table = (Table) it.next(); 39 | ISingleWordTableParser parser = new SingleWordHTableParser(table, context); 40 | WordTable wordTable = parser.parse(); 41 | wordTables.add(wordTable); 42 | } 43 | } catch (Exception e) { 44 | e.printStackTrace(); 45 | } finally { 46 | if (inputStream != null) { 47 | try { 48 | inputStream.close(); 49 | } catch (IOException e) { 50 | e.printStackTrace(); 51 | } 52 | } 53 | } 54 | 55 | return wordTables; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/parser/wordx/SingleWordXTableParser.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.parser.wordx; 2 | 3 | import java.util.List; 4 | 5 | import org.apache.poi.xwpf.usermodel.XWPFTable; 6 | import org.apache.poi.xwpf.usermodel.XWPFTableCell; 7 | import org.apache.poi.xwpf.usermodel.XWPFTableRow; 8 | import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcPr; 9 | 10 | import com.google.common.base.Preconditions; 11 | import com.suncht.wordread.model.TTCPr; 12 | import com.suncht.wordread.model.TTCPr.TTCPrEnum; 13 | import com.suncht.wordread.model.WordTable; 14 | import com.suncht.wordread.model.WordTableCellContents; 15 | import com.suncht.wordread.parser.ISingleWordTableParser; 16 | import com.suncht.wordread.parser.WordTableTransferContext; 17 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping; 18 | 19 | /** 20 | * 21 | * @author changtan.sun 22 | * 23 | */ 24 | 25 | /** 26 | * 解析Docx中一张复杂表格内容 27 | * Docx不仅有列合并,而且有行合并,没有列宽 28 | *

标题: SingleWordXTableParser

29 | *

描述:

30 | * @author changtan.sun 31 | * @date 2018年4月27日 32 | */ 33 | public class SingleWordXTableParser implements ISingleWordTableParser { 34 | private XWPFTable xwpfTable; 35 | // private WordTable table; 36 | 37 | private WordTableMemoryMapping _tableMemoryMapping; 38 | private WordTableTransferContext context; 39 | 40 | public SingleWordXTableParser(XWPFTable xwpfTable, WordTableTransferContext context) { 41 | this.xwpfTable = xwpfTable; 42 | this.context = context; 43 | } 44 | 45 | // public WordTable getTable() { 46 | // return table; 47 | // } 48 | 49 | /** 50 | * 解析Docx的表格,将表格相关数据映射到表格映射对象中, 用于后面的操作 51 | * @return 52 | */ 53 | public WordTable parse() { 54 | List rows; 55 | List cells; 56 | 57 | rows = xwpfTable.getRows(); 58 | int realMaxRowCount = rows.size(); 59 | // table.setRealMaxRowCount(rows.size()); 60 | 61 | //计算最大列数 62 | int realMaxColumnCount = 0; 63 | for (XWPFTableRow row : rows) { 64 | //获取行对应的单元格 65 | cells = row.getTableCells(); 66 | int _columnCountOnRow = 0; 67 | for (XWPFTableCell cell : cells) { 68 | CTTcPr tt = cell.getCTTc().getTcPr(); 69 | if(tt.getGridSpan()!=null) { 70 | _columnCountOnRow += tt.getGridSpan().getVal().intValue(); 71 | } else { 72 | _columnCountOnRow += 1; 73 | } 74 | } 75 | 76 | if (_columnCountOnRow > realMaxColumnCount) { 77 | realMaxColumnCount = _columnCountOnRow; 78 | } 79 | } 80 | 81 | //table.setRealMaxColumnCount(columnCount); 82 | 83 | _tableMemoryMapping = new WordTableMemoryMapping(realMaxRowCount, realMaxColumnCount); 84 | _tableMemoryMapping.setVisitor(context.getVisitor()); 85 | for (int i = 0; i < realMaxRowCount; i++) { 86 | parseRow(rows.get(i), i); 87 | } 88 | 89 | //printTableMemoryMap(); 90 | 91 | // wordTableMap = new WordTableMap(); 92 | // wordTableMap.setTableMemoryMap(_tableMemoryMap); 93 | return context.transfer(_tableMemoryMapping); 94 | } 95 | 96 | public void dispose() { 97 | _tableMemoryMapping = null; 98 | xwpfTable = null; 99 | } 100 | 101 | // /** 102 | // * 打印表格映射 103 | // */ 104 | // private void printTableMemoryMap() { 105 | // int r = 1; 106 | // for (TTCPr[] columns : _tableMemoryMapping) { 107 | // int c = 1; 108 | // for (TTCPr column : columns) { 109 | // System.out.println(r + ":" + c + "===>" + column.getType() + " ==== " + column.getText()); 110 | // c++; 111 | // } 112 | // 113 | // r++; 114 | // } 115 | // } 116 | 117 | /** 118 | * 解析word中表格行 119 | * @param row 120 | * @param realRowIndex 121 | */ 122 | private void parseRow(XWPFTableRow row, int realRowIndex) { 123 | List cells = row.getTableCells(); 124 | int numCells = cells.size(); 125 | 126 | int logicColumnIndex = 0; 127 | int logicRowIndex = realRowIndex; //逻辑行号与实际行号一样 128 | for (int realColumnIndex = 0; realColumnIndex < numCells; realColumnIndex++) { 129 | XWPFTableCell cell = row.getCell(realColumnIndex); 130 | //skipColumn是否跳过多个单元格, 当列合并时候 131 | int skipColumn = parseCell(cell, realRowIndex, realColumnIndex, logicRowIndex, logicColumnIndex); 132 | logicColumnIndex = logicColumnIndex + skipColumn + 1; 133 | } 134 | } 135 | 136 | private int parseCell(XWPFTableCell cell, int realRowIndex, int realColumnIndex, int logicRowIndex, int logicColumnIndex) { 137 | int skipColumn = 0; 138 | // if (_tableMemoryMapping.getTTCPr(realRowIndex, realColumnIndex) != null) { 139 | // return skipColumn; 140 | // } 141 | 142 | CTTcPr tt = cell.getCTTc().getTcPr(); 143 | //-------行合并-------- 144 | if (tt.getVMerge() != null) { 145 | if (tt.getVMerge().getVal() != null && "restart".equals(tt.getVMerge().getVal().toString())) { //行合并的第一行单元格(行合并的开始单元格) 146 | TTCPr ttc = new TTCPr(); 147 | ttc.setType(TTCPrEnum.VM_S); 148 | ttc.setRealRowIndex(realRowIndex); 149 | ttc.setRealColumnIndex(realColumnIndex); 150 | ttc.setLogicRowIndex(logicRowIndex); 151 | ttc.setLogicColumnIndex(logicColumnIndex); 152 | ttc.setWidth(tt.getTcW().getW()); 153 | ttc.setRoot(null); 154 | //ttc.setText(cell.getText()); 155 | ttc.setContent(WordTableCellContents.getCellContent(cell)); 156 | 157 | _tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex); 158 | } else { //行合并的其他行单元格(被合并的单元格) 159 | int _start = logicRowIndex, _end = 0; 160 | TTCPr root = null; 161 | for (int i = logicRowIndex - 1; i >= 0; i--) { 162 | TTCPr ttcpr = _tableMemoryMapping.getTTCPr(i, logicRowIndex); 163 | if (ttcpr != null && (ttcpr.getType() == TTCPrEnum.VM_S || ttcpr.getType() == TTCPrEnum.HVM_S)) { 164 | _end = i; 165 | root = ttcpr; 166 | break; 167 | } else if(ttcpr != null && ttcpr.getRoot()!=null) { 168 | _end = i; 169 | root = ttcpr.getRoot(); 170 | break; 171 | } 172 | } 173 | 174 | Preconditions.checkNotNull(root, "父单元格不能为空"); 175 | 176 | TTCPr ttc = new TTCPr(); 177 | ttc.setType(TTCPrEnum.VM); 178 | ttc.setRealRowIndex(realRowIndex); 179 | ttc.setRealColumnIndex(realColumnIndex); 180 | ttc.setLogicRowIndex(logicRowIndex); 181 | ttc.setLogicColumnIndex(logicColumnIndex); 182 | ttc.setWidth(tt.getTcW().getW()); 183 | ttc.setRoot(root); 184 | root.setRowSpan(_start - _end + 1); 185 | _tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex); 186 | } 187 | } else { //没有进行行合并的单元格 188 | TTCPr currentCell = _tableMemoryMapping.getTTCPr(logicRowIndex, logicColumnIndex); 189 | if (currentCell != null && currentCell.getType() == TTCPrEnum.HM) { //被列合并的单元格 190 | 191 | } else { 192 | currentCell = new TTCPr(); 193 | currentCell.setType(TTCPrEnum.NONE); 194 | currentCell.setRealRowIndex(realRowIndex); 195 | currentCell.setRealColumnIndex(realColumnIndex); 196 | currentCell.setLogicRowIndex(logicRowIndex); 197 | currentCell.setLogicColumnIndex(logicColumnIndex); 198 | currentCell.setWidth(tt.getTcW().getW()); 199 | currentCell.setContent(WordTableCellContents.getCellContent(cell)); 200 | currentCell.setRoot(null); 201 | //判断是否有父单元格 202 | if (logicRowIndex > 0) { 203 | TTCPr parent = _tableMemoryMapping.getTTCPr(logicRowIndex - 1, logicColumnIndex); 204 | if (parent.isDoneColSpan()) { 205 | //currentCell.setParent(parent); 206 | currentCell.setRoot(parent); 207 | } 208 | } 209 | 210 | _tableMemoryMapping.setTTCPr(currentCell, logicRowIndex, logicColumnIndex); 211 | } 212 | } 213 | 214 | //-------列合并------- 215 | if (tt.getGridSpan() != null) { 216 | int colSpan = tt.getGridSpan().getVal().intValue(); 217 | TTCPr root = _tableMemoryMapping.getTTCPr(logicRowIndex, logicColumnIndex); 218 | root.setColSpan(colSpan); 219 | if (root.getType() == TTCPrEnum.VM_S) { 220 | root.setType(TTCPrEnum.HVM_S); 221 | } else { 222 | root.setType(TTCPrEnum.HM_S); 223 | } 224 | 225 | //给其他被列合并的单元格进行初始化 226 | for (int i = 1; i < colSpan; i++) { 227 | TTCPr cell_other = _tableMemoryMapping.getTTCPr(logicRowIndex, logicColumnIndex + i); 228 | if (cell_other == null){ 229 | cell_other = new TTCPr(); 230 | cell_other.setWidth(tt.getTcW().getW()); 231 | } 232 | cell_other.setRealRowIndex(realRowIndex); 233 | cell_other.setRealColumnIndex(realColumnIndex); 234 | cell_other.setLogicRowIndex(logicRowIndex); 235 | cell_other.setLogicColumnIndex(realColumnIndex + i); 236 | cell_other.setType(TTCPrEnum.HM); 237 | cell_other.setRoot(root); 238 | 239 | _tableMemoryMapping.setTTCPr(cell_other, logicRowIndex, realColumnIndex + i); 240 | } 241 | 242 | skipColumn = colSpan - 1; 243 | } 244 | 245 | return skipColumn; 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/parser/wordx/WordXTableParser.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.parser.wordx; 2 | 3 | import java.io.InputStream; 4 | import java.util.List; 5 | 6 | import org.apache.commons.io.IOUtils; 7 | import org.apache.poi.xwpf.usermodel.XWPFDocument; 8 | import org.apache.poi.xwpf.usermodel.XWPFTable; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | import com.google.common.collect.Lists; 13 | import com.suncht.wordread.model.WordTable; 14 | import com.suncht.wordread.parser.ISingleWordTableParser; 15 | import com.suncht.wordread.parser.IWordTableParser; 16 | import com.suncht.wordread.parser.WordTableTransferContext; 17 | 18 | /** 19 | * Docx文档的复杂表格解析器 20 | * @author changtan.sun 21 | * 22 | */ 23 | public class WordXTableParser implements IWordTableParser { 24 | private final static Logger logger = LoggerFactory.getLogger(WordXTableParser.class); 25 | 26 | private WordTableTransferContext context; 27 | 28 | public WordXTableParser(WordTableTransferContext context) { 29 | this.context = context; 30 | } 31 | 32 | public List parse(InputStream inputStream) { 33 | List wordTables = Lists.newArrayList(); 34 | 35 | try { 36 | XWPFDocument doc = new XWPFDocument(inputStream); // 载入文档 37 | 38 | //获取文档中所有的表格 39 | List tables = doc.getTables(); 40 | for (XWPFTable table : tables) { 41 | ISingleWordTableParser parser = new SingleWordXTableParser(table, this.context); 42 | WordTable wordTable = parser.parse(); 43 | wordTables.add(wordTable); 44 | } 45 | } catch (Exception e) { 46 | logger.error(e.getMessage(), e); 47 | } finally { 48 | IOUtils.closeQuietly(inputStream); 49 | } 50 | 51 | return wordTables; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/suncht/wordread/utils/MathmlUtils.java: -------------------------------------------------------------------------------- 1 | package com.suncht.wordread.utils; 2 | 3 | import java.io.InputStream; 4 | import java.io.StringReader; 5 | import java.io.StringWriter; 6 | 7 | import javax.xml.transform.Result; 8 | import javax.xml.transform.Source; 9 | import javax.xml.transform.Transformer; 10 | import javax.xml.transform.TransformerException; 11 | import javax.xml.transform.TransformerFactory; 12 | import javax.xml.transform.URIResolver; 13 | import javax.xml.transform.stream.StreamResult; 14 | import javax.xml.transform.stream.StreamSource; 15 | 16 | public class MathmlUtils { 17 | /** 18 | *

Description: xsl转换器

19 | */ 20 | public static String xslConvert(String s, String xslpath, URIResolver uriResolver) { 21 | TransformerFactory tFac = TransformerFactory.newInstance(); 22 | if (uriResolver != null) 23 | tFac.setURIResolver(uriResolver); 24 | StreamSource xslSource = new StreamSource(MathmlUtils.class.getResourceAsStream(xslpath)); 25 | StringWriter writer = new StringWriter(); 26 | try { 27 | Transformer t = tFac.newTransformer(xslSource); 28 | Source source = new StreamSource(new StringReader(s)); 29 | Result result = new StreamResult(writer); 30 | t.transform(source, result); 31 | } catch (TransformerException e) { 32 | System.out.println(e.getMessage()); 33 | } 34 | return writer.getBuffer().toString(); 35 | } 36 | 37 | /** 38 | *

Description: 将mathml转为latx

39 | * @param mml 40 | * @return 41 | */ 42 | public static String convertMML2Latex(String mml) { 43 | mml = mml.substring(mml.indexOf("?>") + 2, mml.length()); //去掉xml的头节点 44 | URIResolver r = new URIResolver() { //设置xls依赖文件的路径 45 | @Override 46 | public Source resolve(String href, String base) throws TransformerException { 47 | InputStream inputStream = MathmlUtils.class.getResourceAsStream("/conventer/mml2tex/" + href); 48 | return new StreamSource(inputStream); 49 | } 50 | }; 51 | String latex = xslConvert(mml, "/conventer/mml2tex/mmltex.xsl", r); 52 | if (latex != null && latex.length() > 1) { 53 | latex = latex.substring(1, latex.length() - 1); 54 | } 55 | return latex; 56 | } 57 | 58 | /** 59 | *

Description: office mathml转为mml

60 | * @param xml 61 | * @return 62 | */ 63 | public static String convertOMML2MML(String xml) { 64 | String result = xslConvert(xml, "/conventer/OMML2MML.XSL", null); 65 | return result; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/resources/1.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/1.doc -------------------------------------------------------------------------------- /src/main/resources/FMEA信息导入-客户实例.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/FMEA信息导入-客户实例.doc -------------------------------------------------------------------------------- /src/main/resources/FMEA信息导入-客户实例.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/FMEA信息导入-客户实例.docx -------------------------------------------------------------------------------- /src/main/resources/conventer/mml2tex/README: -------------------------------------------------------------------------------- 1 | README for the XSLT MathML Library 2.1.2 2 | 3 | XSLT MathML Library is a set of XSLT stylesheets to transform 4 | MathML 2.0 to LaTeX. 5 | 6 | For more information, see 7 | http://www.raleigh.ru/MathML/mmltex/index.php?lang=en 8 | 9 | Manifest 10 | -------- 11 | 12 | README this file 13 | mmltex.xsl 14 | tokens.xsl 15 | glayout.xsl 16 | scripts.xsl 17 | tables.xsl 18 | entities.xsl 19 | cmarkup.xsl 20 | 21 | Use 22 | --- 23 | 24 | There are two ways of using the library: 25 | 26 | * Use a local copy of the library. 27 | 28 | 1. Download the distribution (see below). 29 | 30 | 2. Unpack the distribution, using unzip. 31 | 32 | 3. In your stylesheet import or include either the main 33 | stylesheet, mmltex.xsl, or the stylesheet module you 34 | wish to use, such as tokens.xsl. This example assumes 35 | that the distribution has been extracted into the same 36 | directory as your own stylesheet: 37 | 38 | 39 | 40 | * Import or include either the main stylesheet, or the 41 | stylesheet module you wish to use, directly from the library 42 | website; http://www.raleigh.ru/MathML/mmltex/. For example: 43 | 44 | 45 | 46 | Obtaining The Library 47 | --------------------- 48 | 49 | The XSLT MathML Library is available for download as: 50 | 51 | * Zip file: http://www.raleigh.ru/MathML/mmltex/xsltml_2.1.2.zip 52 | 53 | Copyright 54 | --------- 55 | 56 | Copyright (C) 2001-2003 Vasil Yaroshevich 57 | 58 | Permission is hereby granted, free of charge, to any person 59 | obtaining a copy of this software and associated documentation 60 | files (the ``Software''), to deal in the Software without 61 | restriction, including without limitation the rights to use, 62 | copy, modify, merge, publish, distribute, sublicense, and/or 63 | sell copies of the Software, and to permit persons to whom the 64 | Software is furnished to do so, subject to the following 65 | conditions: 66 | 67 | The above copyright notice and this permission notice shall be 68 | included in all copies or substantial portions of the Software. 69 | 70 | Except as contained in this notice, the names of individuals 71 | credited with contribution to this software shall not be used in 72 | advertising or otherwise to promote the sale, use or other 73 | dealings in this Software without prior written authorization 74 | from the individuals in question. 75 | 76 | Any stylesheet derived from this Software that is publically 77 | distributed will be identified with a different name and the 78 | version strings in any derived Software will be changed so that 79 | no possibility of confusion between the derived package and this 80 | Software will exist. 81 | 82 | Warranty 83 | -------- 84 | 85 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 86 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 87 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 88 | NONINFRINGEMENT. IN NO EVENT SHALL NORMAN WALSH OR ANY OTHER 89 | CONTRIBUTOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 90 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 91 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 92 | OTHER DEALINGS IN THE SOFTWARE. 93 | 94 | Contacting the Author 95 | --------------------- 96 | 97 | These stylesheets are maintained by Vasil Yaroshevich, . 98 | -------------------------------------------------------------------------------- /src/main/resources/conventer/mml2tex/glayout.xsl: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | \genfrac{}{}{ 18 | 19 | 20 | 21 | ex 22 | 23 | 24 | 0ex 25 | 26 | 27 | .05ex 28 | 29 | 30 | 31 | .2ex 32 | 33 | 34 | 35 | 36 | 37 | }{}{ 38 | 39 | 40 | \frac{ 41 | 42 | 43 | 44 | \hfill 45 | 46 | 47 | 48 | \hfill 49 | 50 | }{ 51 | 52 | \hfill 53 | 54 | 55 | 56 | \hfill 57 | 58 | } 59 | 60 | 61 | 62 | \raisebox{1ex}{$ 63 | 64 | $}\!\left/ \!\raisebox{-1ex}{$ 65 | 66 | $}\right. 67 | 68 | 69 | 70 | 71 | 72 | 73 | \sqrt[ 74 | 75 | ]{ 76 | 77 | } 78 | 79 | 80 | 81 | exception 25: 82 | \text{exception 25:} 83 | 84 | 85 | 86 | 87 | 88 | \sqrt{ 89 | 90 | } 91 | 92 | 93 | 94 | 95 | 96 | 97 | \left 98 | 99 | 100 | \ 101 | 102 | 103 | \left. 104 | 105 | 106 | 107 | \left( 108 | 109 | 110 | 111 | 112 | 113 | 114 | , 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | \right 134 | 135 | 136 | \ 137 | 138 | 139 | \right. 140 | 141 | 142 | 143 | \right) 144 | 145 | 146 | 147 | 148 | \phantom{ 149 | 150 | } 151 | 152 | 153 | 154 | 155 | 156 | \overline{ 157 | 158 | \hspace{.2em}|} 159 | 160 | 161 | \sqrt{ 162 | 163 | } 164 | 165 | 166 | \overline{) 167 | 168 | } 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | {\displaystyle 180 | 181 | 182 | { 183 | 184 | \textstyle 185 | \scriptstyle 186 | \scriptscriptstyle 187 | 188 | 189 | 190 | \colorbox[rgb]{ 191 | 192 | 193 | 194 | }{$ 195 | 196 | 197 | \textcolor[rgb]{ 198 | 199 | 200 | 201 | }{ 202 | 203 | 204 | 205 | } 206 | 207 | 208 | $} 209 | 210 | 211 | } 212 | 213 | 214 | } 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | -------------------------------------------------------------------------------- /src/main/resources/conventer/mml2tex/mmltex.xsl: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | $ 26 | 27 | $ 28 | 29 | 30 | 31 | \[ 32 | 33 | \] 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/main/resources/conventer/mml2tex/tables.xsl: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 11 | 12 | 13 | 14 | \multicolumn{ 15 | 16 | }{c}{ 17 | 18 | } 19 | 20 | & 21 | 22 | 23 | 24 | 25 | 26 | 27 | \hfill 28 | 29 | 30 | 31 | \hfill 32 | 33 | 34 | 36 | & 37 | 38 | 39 | 40 | 41 | 42 | 43 | \\ 44 | 45 | 46 | 47 | 48 | \begin{array}{ 49 | 50 | | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | | 85 | 86 | } 87 | 88 | \hline 89 | 90 | 91 | 92 | \\ \hline 93 | 94 | \end{array} 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /src/main/resources/conventer/mml2tex/tokens.xsl: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | \textcolor{red}{ 20 | 21 | } 22 | 23 | 24 | 25 | 26 | 27 | \mathrm{ 28 | 29 | } 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | \mathrm{ 41 | 42 | } 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | \left 56 | 57 | 58 | \right 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | \text{ 72 | 73 | } 74 | 75 | 76 | 77 | \phantom{\rule 78 | 79 | [- 80 | 81 | ] 82 | 83 | { 84 | 85 | 0ex 86 | 87 | 88 | }{ 89 | 90 | 0ex 91 | 92 | 93 | }} 94 | 95 | 96 | 97 | 98 | 99 | '' 100 | 101 | 102 | '' 103 | 104 | 105 | 106 | 107 | 108 | \colorbox[rgb]{ 109 | 110 | 111 | 112 | }{$ 113 | 114 | 115 | \textcolor[rgb]{ 116 | 117 | 118 | 119 | }{ 120 | 121 | 122 | 123 | 124 | \mathrm{ 125 | 126 | 127 | \mathbf{ 128 | 129 | 130 | \mathit{ 131 | 132 | 133 | \mathit{ 134 | The value bold-italic for mathvariant is not supported 135 | 136 | 137 | \mathbb{ 138 | 139 | 140 | \mathfrak{ 141 | The value bold-fraktur for mathvariant is not supported 142 | 143 | 144 | \mathcal{ 145 | 146 | 147 | \mathcal{ 148 | The value bold-script for mathvariant is not supported 149 | 150 | 151 | \mathfrak{ 152 | 153 | 154 | \mathsf{ 155 | 156 | 157 | \mathsf{ 158 | The value bold-sans-serif for mathvariant is not supported 159 | 160 | 161 | \mathsf{ 162 | The value sans-serif-italic for mathvariant is not supported 163 | 164 | 165 | \mathsf{ 166 | The value sans-serif-bold-italic for mathvariant is not supported 167 | 168 | 169 | \mathtt{ 170 | 171 | 172 | { 173 | Error at mathvariant attribute 174 | 175 | 176 | 177 | 178 | 179 | } 180 | 181 | 182 | } 183 | 184 | 185 | $} 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | , 221 | 222 | 223 | 224 | 225 | 226 | , 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | , 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | , 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 0,1,1 271 | 0,0,0 272 | 0,0,1 273 | 1,0,1 274 | .5,.5,.5 275 | 0,.5,0 276 | 0,1,0 277 | .5,0,0 278 | 0,0,.5 279 | .5,.5,0 280 | .5,0,.5 281 | 1,0,0 282 | .75,.75,.75 283 | 0,.5,.5 284 | 1,1,1 285 | 1,1,0 286 | 287 | Exception at color template 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | Exception at Hex2Decimal template 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | -------------------------------------------------------------------------------- /src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | %d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | ERROR 34 | 35 | ACCEPT 36 | 37 | DENY 38 | 39 | 40 | 41 | 42 | 43 | ${log_dir}/error/%d{yyyy-MM-dd}/error-log.log 44 | 45 | 46 | ${maxHistory} 47 | 48 | 49 | 50 | 51 | %d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | WARN 63 | 64 | ACCEPT 65 | 66 | DENY 67 | 68 | 69 | 70 | ${log_dir}/warn/%d{yyyy-MM-dd}/warn-log.log 71 | ${maxHistory} 72 | 73 | 74 | %d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n 75 | 76 | 77 | 78 | 79 | 80 | 81 | INFO 82 | ACCEPT 83 | DENY 84 | 85 | 86 | ${log_dir}/info/%d{yyyy-MM-dd}/info-log.log 87 | ${maxHistory} 88 | 89 | 90 | %d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n 91 | 92 | 93 | 94 | 95 | 96 | 97 | DEBUG 98 | ACCEPT 99 | DENY 100 | 101 | 102 | ${log_dir}/debug/%d{yyyy-MM-dd}/debug-log.log 103 | ${maxHistory} 104 | 105 | 106 | %d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n 107 | 108 | 109 | 110 | 111 | 112 | 113 | TRACE 114 | ACCEPT 115 | DENY 116 | 117 | 118 | ${log_dir}/trace/%d{yyyy-MM-dd}/trace-log.log 119 | ${maxHistory} 120 | 121 | 122 | %d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /src/main/resources/故障模式分析表格样例.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/故障模式分析表格样例.docx -------------------------------------------------------------------------------- /src/main/resources/故障模式分析表格样例_处理模型.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/故障模式分析表格样例_处理模型.docx -------------------------------------------------------------------------------- /src/test/java/com/test/Doc2DocxTest.java: -------------------------------------------------------------------------------- 1 | package com.test; 2 | 3 | import com.suncht.convert.OfficeDocumentConvertServer; 4 | 5 | public class Doc2DocxTest { 6 | 7 | public static void main(String[] args) throws Exception { 8 | String inputFile = "D:\\FMEA信息导入-客户实例.doc"; 9 | String outputFile = "D:\\FMEA信息导入-客户实例.docx"; 10 | //Doc2DocxUtil.doc2Docx(outputFile, inputFile); 11 | 12 | // Thread.sleep(2000); 13 | String pdfFile = "D:\\FMEA信息导入-客户实例.pdf"; 14 | // OfficePDFConverter.getConverter().convert2PDF(outputFile, pdfFile); 15 | 16 | String OPEN_OFFICE_HOME = "D:\\Program Files\\LibreOffice 5\\"; 17 | // 服务端口 18 | int OPEN_OFFICE_PORT[] = { 8101 }; 19 | try (OfficeDocumentConvertServer server = new OfficeDocumentConvertServer(OPEN_OFFICE_HOME, OPEN_OFFICE_PORT);) { 20 | server.convert(inputFile, outputFile, false); 21 | server.convert(outputFile, pdfFile, true); 22 | } 23 | 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/test/java/com/test/MemoryMappingVisitorTest.java: -------------------------------------------------------------------------------- 1 | package com.test; 2 | 3 | import com.suncht.wordread.model.TTCPr; 4 | import com.suncht.wordread.parser.mapping.IWordTableMemoryMappingVisitor; 5 | 6 | public class MemoryMappingVisitorTest implements IWordTableMemoryMappingVisitor { 7 | 8 | @Override 9 | public void visit(final TTCPr cell, int realRowIndex, int realColumnIndex) { 10 | if (realRowIndex == 0 && realColumnIndex == 0) { 11 | //cell.getContent()("测试成功"); 12 | } 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /src/test/java/com/test/MuliHeaderXTableParserTest.java: -------------------------------------------------------------------------------- 1 | package com.test; 2 | 3 | import java.io.InputStream; 4 | import java.util.List; 5 | 6 | import org.junit.Test; 7 | 8 | import com.suncht.wordread.model.WordTable; 9 | import com.suncht.wordread.parser.WordTableParser; 10 | import com.suncht.wordread.parser.WordTableParser.WordDocType; 11 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy; 12 | 13 | public class MuliHeaderXTableParserTest { 14 | 15 | @Test 16 | public void test01() { 17 | try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/FMEA信息导入-客户实例.docx");) { 18 | //InputStream inputStream = new FileInputStream(new File(doc2)); 19 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX); 20 | for (WordTable wordTable : tables) { 21 | System.out.println(wordTable.format()); 22 | } 23 | } catch(Exception e) { 24 | e.printStackTrace(); 25 | } 26 | 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/test/java/com/test/MultiTextCellTest.java: -------------------------------------------------------------------------------- 1 | package com.test; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.util.List; 6 | 7 | import org.junit.Test; 8 | 9 | import com.suncht.wordread.model.WordTable; 10 | import com.suncht.wordread.parser.WordTableParser; 11 | import com.suncht.wordread.parser.WordTableParser.WordDocType; 12 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy; 13 | 14 | public class MultiTextCellTest { 15 | @Test 16 | public void testFormulaInCell() throws IOException { 17 | try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套多文本.docx");) { 18 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()) 19 | .memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX); 20 | for (WordTable wordTable : tables) { 21 | System.out.println(wordTable.format()); 22 | } 23 | } catch(Exception e) { 24 | e.printStackTrace(); 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/test/java/com/test/NestedFormulaTest.java: -------------------------------------------------------------------------------- 1 | package com.test; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.util.List; 6 | 7 | import org.junit.Test; 8 | 9 | import com.suncht.wordread.model.WordTable; 10 | import com.suncht.wordread.parser.WordTableParser; 11 | import com.suncht.wordread.parser.WordTableParser.WordDocType; 12 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy; 13 | 14 | public class NestedFormulaTest { 15 | @Test 16 | public void testFormulaInCell_docx() throws IOException { 17 | try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套公式.docx");) { 18 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()) 19 | .memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX); 20 | for (WordTable wordTable : tables) { 21 | System.out.println(wordTable.format()); 22 | } 23 | } catch(Exception e) { 24 | e.printStackTrace(); 25 | } 26 | } 27 | 28 | @Test 29 | public void testFormulaInCell_doc() throws IOException { 30 | try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套公式.doc");) { 31 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()) 32 | .memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOC); 33 | for (WordTable wordTable : tables) { 34 | System.out.println(wordTable.format()); 35 | } 36 | } catch(Exception e) { 37 | e.printStackTrace(); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/com/test/NestedImageCellTest.java: -------------------------------------------------------------------------------- 1 | package com.test; 2 | 3 | import java.io.InputStream; 4 | import java.util.List; 5 | 6 | import org.junit.Test; 7 | 8 | import com.suncht.wordread.format.DefaultCellFormater; 9 | import com.suncht.wordread.format.DefaultWordTableFormater; 10 | import com.suncht.wordread.format.IWordTableFormater; 11 | import com.suncht.wordread.model.WordTable; 12 | import com.suncht.wordread.output.DefaultWordTableOutputStrategy; 13 | import com.suncht.wordread.output.IWordTableOutputStrategy; 14 | import com.suncht.wordread.parser.WordTableParser; 15 | import com.suncht.wordread.parser.WordTableParser.WordDocType; 16 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy; 17 | 18 | /** 19 | * 嵌套图片单元格测试 20 | * @author suncht 21 | * 22 | */ 23 | public class NestedImageCellTest { 24 | @Test 25 | public void test01() { 26 | IWordTableFormater tableFormater = new DefaultWordTableFormater(new DefaultCellFormater()); 27 | IWordTableOutputStrategy outputStrategy = new DefaultWordTableOutputStrategy(); 28 | 29 | try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套图片02.docx");) { 30 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).parse(inputStream, WordDocType.DOCX); 31 | 32 | for (WordTable wordTable : tables) { 33 | System.out.println(wordTable.format(tableFormater)); 34 | wordTable.output(outputStrategy); 35 | } 36 | } catch (Exception e) { 37 | e.printStackTrace(); 38 | } 39 | 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/com/test/OfficeConverterTest.java: -------------------------------------------------------------------------------- 1 | package com.test; 2 | 3 | import org.junit.Test; 4 | 5 | import com.suncht.convert.OfficeDocumentConvertServer; 6 | 7 | public class OfficeConverterTest { 8 | private static String OPEN_OFFICE_HOME = "D:\\Program Files\\LibreOffice 5\\"; 9 | private static int OPEN_OFFICE_PORT[] = { 8101 }; 10 | 11 | @Test 12 | public void txt2docx() { 13 | String inputFile = "D:\\dic.txt"; 14 | String outputFile = "D:\\dic.docx"; 15 | 16 | // 服务端口 17 | try (OfficeDocumentConvertServer server = new OfficeDocumentConvertServer(OPEN_OFFICE_HOME, OPEN_OFFICE_PORT);) { 18 | server.convert(inputFile, outputFile, false); 19 | } catch(Exception e) { 20 | e.printStackTrace(); 21 | } 22 | } 23 | 24 | @Test 25 | public void docx2pdf() { 26 | String inputFile = "D:\\故障模式分析表格样例 - 副本.docx"; 27 | String outputFile = "D:\\故障模式分析表格样例 - 副本.pdf"; 28 | 29 | // 服务端口 30 | try (OfficeDocumentConvertServer server = new OfficeDocumentConvertServer(OPEN_OFFICE_HOME, OPEN_OFFICE_PORT);) { 31 | server.convert(inputFile, outputFile, false); 32 | } catch(Exception e) { 33 | e.printStackTrace(); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/test/java/com/test/OleObjectCellTest.java: -------------------------------------------------------------------------------- 1 | package com.test; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.util.List; 6 | 7 | import org.junit.Test; 8 | 9 | import com.suncht.wordread.model.WordTable; 10 | import com.suncht.wordread.parser.WordTableParser; 11 | import com.suncht.wordread.parser.WordTableParser.WordDocType; 12 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy; 13 | 14 | public class OleObjectCellTest { 15 | // @Test 16 | public void testOleInCell() throws IOException { 17 | try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套附件01.docx");) { 18 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()) 19 | .memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX); 20 | for (WordTable wordTable : tables) { 21 | System.out.println(wordTable.format()); 22 | } 23 | } catch(Exception e) { 24 | e.printStackTrace(); 25 | } 26 | } 27 | 28 | @Test 29 | public void testEmbedDocxInCell() throws IOException { 30 | try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套附件02.docx");) { 31 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()) 32 | .memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX); 33 | for (WordTable wordTable : tables) { 34 | System.out.println(wordTable.format()); 35 | } 36 | } catch(Exception e) { 37 | e.printStackTrace(); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/com/test/WordCellDataTest.java: -------------------------------------------------------------------------------- 1 | package com.test; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.util.List; 6 | 7 | import org.junit.Test; 8 | 9 | import com.suncht.wordread.model.WordTable; 10 | import com.suncht.wordread.parser.WordTableParser; 11 | import com.suncht.wordread.parser.WordTableParser.WordDocType; 12 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy; 13 | 14 | public class WordCellDataTest { 15 | @Test 16 | public void testFormulaInCell() throws IOException { 17 | InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套公式.docx"); 18 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX); 19 | for (WordTable wordTable : tables) { 20 | System.out.println(wordTable.format()); 21 | } 22 | 23 | inputStream.close(); 24 | } 25 | 26 | @Test 27 | public void testImageInCell() throws IOException { 28 | InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套图片.docx"); 29 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX); 30 | for (WordTable wordTable : tables) { 31 | System.out.println(wordTable.format()); 32 | } 33 | 34 | inputStream.close(); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/test/java/com/test/WordEmbedsTest.java: -------------------------------------------------------------------------------- 1 | package com.test; 2 | 3 | import java.io.InputStream; 4 | import java.util.Iterator; 5 | import java.util.List; 6 | 7 | import org.apache.poi.hssf.usermodel.HSSFCell; 8 | import org.apache.poi.hssf.usermodel.HSSFRow; 9 | import org.apache.poi.hssf.usermodel.HSSFSheet; 10 | import org.apache.poi.hssf.usermodel.HSSFWorkbook; 11 | import org.apache.poi.openxml4j.exceptions.OpenXML4JException; 12 | import org.apache.poi.openxml4j.opc.PackagePart; 13 | import org.apache.poi.poifs.dev.POIFSViewEngine; 14 | import org.apache.poi.poifs.filesystem.POIFSFileSystem; 15 | import org.apache.poi.ss.usermodel.Cell; 16 | import org.apache.poi.xwpf.usermodel.XWPFDocument; 17 | import org.junit.Test; 18 | 19 | public class WordEmbedsTest { 20 | @Test 21 | public void listAllEmbeds() { 22 | try (InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套附件01.docx");) { 23 | XWPFDocument document = new XWPFDocument(inputStream); 24 | listEmbeds(document); 25 | //listEmbeds2(document); 26 | } catch (Exception e) { 27 | e.printStackTrace(); 28 | } 29 | } 30 | 31 | private static void listEmbeds(XWPFDocument doc) throws OpenXML4JException { 32 | List embeddedDocs = doc.getAllEmbedds(); 33 | if (embeddedDocs != null && !embeddedDocs.isEmpty()) { 34 | Iterator pIter = embeddedDocs.iterator(); 35 | while (pIter.hasNext()) { 36 | PackagePart pPart = pIter.next(); 37 | System.out.print(pPart.getPartName() + ", "); 38 | 39 | System.out.print(pPart.getContentType() + ", "); 40 | System.out.println(); 41 | } 42 | } 43 | } 44 | 45 | private static void listEmbeds2(XWPFDocument doc) throws Exception { 46 | for (final PackagePart pPart : doc.getAllEmbedds()) { 47 | final String contentType = pPart.getContentType(); 48 | System.out.println(contentType + "\n"); 49 | if (contentType.equals("application/vnd.ms-excel")) { 50 | final HSSFWorkbook embeddedWorkbook = new HSSFWorkbook(pPart.getInputStream()); 51 | 52 | for (int sheet = 0; sheet < embeddedWorkbook.getNumberOfSheets(); sheet++) { 53 | final HSSFSheet activeSheet = embeddedWorkbook.getSheetAt(sheet); 54 | if (activeSheet.getSheetName().equalsIgnoreCase("Sheet1")) { 55 | for (int rowIndex = activeSheet.getFirstRowNum(); rowIndex <= activeSheet 56 | .getLastRowNum(); rowIndex++) { 57 | final HSSFRow row = activeSheet.getRow(rowIndex); 58 | for (int cellIndex = row.getFirstCellNum(); cellIndex <= row 59 | .getLastCellNum(); cellIndex++) { 60 | final HSSFCell cell = row.getCell(cellIndex); 61 | if (cell != null) { 62 | if (cell.getCellType() == Cell.CELL_TYPE_STRING) 63 | System.out.println("Row:" + rowIndex + " Cell:" + cellIndex + " = " 64 | + cell.getStringCellValue()); 65 | if (cell.getCellType() == Cell.CELL_TYPE_NUMERIC) { 66 | System.out.println("Row:" + rowIndex + " Cell:" + cellIndex + " = " 67 | + cell.getNumericCellValue()); 68 | 69 | cell.setCellValue(cell.getNumericCellValue() * 2); // update 70 | // the 71 | // value 72 | } 73 | } 74 | } 75 | } 76 | } 77 | } 78 | } 79 | } 80 | } 81 | 82 | 83 | @Test 84 | public void viewFile() { 85 | POIFSFileSystem fs = null; 86 | List strings = POIFSViewEngine.inspectViewable(fs, true, 0, " "); 87 | Iterator iter = strings.iterator(); 88 | 89 | while (iter.hasNext()) { 90 | //os.write( ((String)iter.next()).getBytes()); 91 | System.out.println(iter.next()); 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/test/java/com/test/WordHTableParserTest.java: -------------------------------------------------------------------------------- 1 | package com.test; 2 | 3 | import java.io.InputStream; 4 | import java.util.List; 5 | 6 | import org.junit.Test; 7 | 8 | import com.suncht.wordread.model.WordTable; 9 | import com.suncht.wordread.parser.WordTableParser; 10 | import com.suncht.wordread.parser.WordTableParser.WordDocType; 11 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy; 12 | 13 | public class WordHTableParserTest { 14 | @Test 15 | public void test01() { 16 | InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/标准表格1.doc"); 17 | //InputStream inputStream = new FileInputStream(new File(doc2)); 18 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).parse(inputStream, WordDocType.DOC); 19 | for (WordTable wordTable : tables) { 20 | System.out.println(wordTable.format()); 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/test/java/com/test/WordXTableParserTest.java: -------------------------------------------------------------------------------- 1 | package com.test; 2 | 3 | import java.io.InputStream; 4 | import java.util.List; 5 | 6 | import org.junit.Test; 7 | 8 | import com.suncht.wordread.model.WordTable; 9 | import com.suncht.wordread.parser.WordTableParser; 10 | import com.suncht.wordread.parser.WordTableParser.WordDocType; 11 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy; 12 | 13 | public class WordXTableParserTest { 14 | String doc1 = "D:\\故障模式分析表格样例01.docx"; 15 | String doc2 = "D:\\故障模式分析表格样例.docx"; 16 | 17 | @Test 18 | public void test01() { 19 | try (InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/1.docx");) { 20 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()) 21 | .memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX); 22 | for (WordTable wordTable : tables) { 23 | System.out.println(wordTable.format()); 24 | } 25 | } catch (Exception e) { 26 | e.printStackTrace(); 27 | } 28 | } 29 | 30 | @Test 31 | public void test02() { 32 | InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/故障模式分析表格样例.docx"); 33 | // InputStream inputStream = new FileInputStream(new File(doc2)); 34 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()) 35 | .parse(inputStream, WordDocType.DOCX); 36 | for (WordTable wordTable : tables) { 37 | System.out.println(wordTable.format()); 38 | } 39 | } 40 | 41 | @Test 42 | public void test03() { 43 | InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/故障模式分析表格样例01.docx"); 44 | // InputStream inputStream = new FileInputStream(new File(doc2)); 45 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()) 46 | .parse(inputStream, WordDocType.DOCX); 47 | for (WordTable wordTable : tables) { 48 | System.out.println(wordTable.format()); 49 | } 50 | } 51 | 52 | @Test 53 | public void test04() { 54 | InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/复杂表格.docx"); 55 | // InputStream inputStream = new FileInputStream(new File(doc2)); 56 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()) 57 | .parse(inputStream, WordDocType.DOCX); 58 | for (WordTable wordTable : tables) { 59 | System.out.println(wordTable.format()); 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/test/resources/1.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/1.doc -------------------------------------------------------------------------------- /src/test/resources/1.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/1.docx -------------------------------------------------------------------------------- /src/test/resources/2.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/2.doc -------------------------------------------------------------------------------- /src/test/resources/conventer/mml2tex/README: -------------------------------------------------------------------------------- 1 | README for the XSLT MathML Library 2.1.2 2 | 3 | XSLT MathML Library is a set of XSLT stylesheets to transform 4 | MathML 2.0 to LaTeX. 5 | 6 | For more information, see 7 | http://www.raleigh.ru/MathML/mmltex/index.php?lang=en 8 | 9 | Manifest 10 | -------- 11 | 12 | README this file 13 | mmltex.xsl 14 | tokens.xsl 15 | glayout.xsl 16 | scripts.xsl 17 | tables.xsl 18 | entities.xsl 19 | cmarkup.xsl 20 | 21 | Use 22 | --- 23 | 24 | There are two ways of using the library: 25 | 26 | * Use a local copy of the library. 27 | 28 | 1. Download the distribution (see below). 29 | 30 | 2. Unpack the distribution, using unzip. 31 | 32 | 3. In your stylesheet import or include either the main 33 | stylesheet, mmltex.xsl, or the stylesheet module you 34 | wish to use, such as tokens.xsl. This example assumes 35 | that the distribution has been extracted into the same 36 | directory as your own stylesheet: 37 | 38 | 39 | 40 | * Import or include either the main stylesheet, or the 41 | stylesheet module you wish to use, directly from the library 42 | website; http://www.raleigh.ru/MathML/mmltex/. For example: 43 | 44 | 45 | 46 | Obtaining The Library 47 | --------------------- 48 | 49 | The XSLT MathML Library is available for download as: 50 | 51 | * Zip file: http://www.raleigh.ru/MathML/mmltex/xsltml_2.1.2.zip 52 | 53 | Copyright 54 | --------- 55 | 56 | Copyright (C) 2001-2003 Vasil Yaroshevich 57 | 58 | Permission is hereby granted, free of charge, to any person 59 | obtaining a copy of this software and associated documentation 60 | files (the ``Software''), to deal in the Software without 61 | restriction, including without limitation the rights to use, 62 | copy, modify, merge, publish, distribute, sublicense, and/or 63 | sell copies of the Software, and to permit persons to whom the 64 | Software is furnished to do so, subject to the following 65 | conditions: 66 | 67 | The above copyright notice and this permission notice shall be 68 | included in all copies or substantial portions of the Software. 69 | 70 | Except as contained in this notice, the names of individuals 71 | credited with contribution to this software shall not be used in 72 | advertising or otherwise to promote the sale, use or other 73 | dealings in this Software without prior written authorization 74 | from the individuals in question. 75 | 76 | Any stylesheet derived from this Software that is publically 77 | distributed will be identified with a different name and the 78 | version strings in any derived Software will be changed so that 79 | no possibility of confusion between the derived package and this 80 | Software will exist. 81 | 82 | Warranty 83 | -------- 84 | 85 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 86 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 87 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 88 | NONINFRINGEMENT. IN NO EVENT SHALL NORMAN WALSH OR ANY OTHER 89 | CONTRIBUTOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 90 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 91 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 92 | OTHER DEALINGS IN THE SOFTWARE. 93 | 94 | Contacting the Author 95 | --------------------- 96 | 97 | These stylesheets are maintained by Vasil Yaroshevich, . 98 | -------------------------------------------------------------------------------- /src/test/resources/conventer/mml2tex/glayout.xsl: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | \genfrac{}{}{ 18 | 19 | 20 | 21 | ex 22 | 23 | 24 | 0ex 25 | 26 | 27 | .05ex 28 | 29 | 30 | 31 | .2ex 32 | 33 | 34 | 35 | 36 | 37 | }{}{ 38 | 39 | 40 | \frac{ 41 | 42 | 43 | 44 | \hfill 45 | 46 | 47 | 48 | \hfill 49 | 50 | }{ 51 | 52 | \hfill 53 | 54 | 55 | 56 | \hfill 57 | 58 | } 59 | 60 | 61 | 62 | \raisebox{1ex}{$ 63 | 64 | $}\!\left/ \!\raisebox{-1ex}{$ 65 | 66 | $}\right. 67 | 68 | 69 | 70 | 71 | 72 | 73 | \sqrt[ 74 | 75 | ]{ 76 | 77 | } 78 | 79 | 80 | 81 | exception 25: 82 | \text{exception 25:} 83 | 84 | 85 | 86 | 87 | 88 | \sqrt{ 89 | 90 | } 91 | 92 | 93 | 94 | 95 | 96 | 97 | \left 98 | 99 | 100 | \ 101 | 102 | 103 | \left. 104 | 105 | 106 | 107 | \left( 108 | 109 | 110 | 111 | 112 | 113 | 114 | , 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | \right 134 | 135 | 136 | \ 137 | 138 | 139 | \right. 140 | 141 | 142 | 143 | \right) 144 | 145 | 146 | 147 | 148 | \phantom{ 149 | 150 | } 151 | 152 | 153 | 154 | 155 | 156 | \overline{ 157 | 158 | \hspace{.2em}|} 159 | 160 | 161 | \sqrt{ 162 | 163 | } 164 | 165 | 166 | \overline{) 167 | 168 | } 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | {\displaystyle 180 | 181 | 182 | { 183 | 184 | \textstyle 185 | \scriptstyle 186 | \scriptscriptstyle 187 | 188 | 189 | 190 | \colorbox[rgb]{ 191 | 192 | 193 | 194 | }{$ 195 | 196 | 197 | \textcolor[rgb]{ 198 | 199 | 200 | 201 | }{ 202 | 203 | 204 | 205 | } 206 | 207 | 208 | $} 209 | 210 | 211 | } 212 | 213 | 214 | } 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | -------------------------------------------------------------------------------- /src/test/resources/conventer/mml2tex/mmltex.xsl: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | $ 26 | 27 | $ 28 | 29 | 30 | 31 | \[ 32 | 33 | \] 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/test/resources/conventer/mml2tex/tables.xsl: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 11 | 12 | 13 | 14 | \multicolumn{ 15 | 16 | }{c}{ 17 | 18 | } 19 | 20 | & 21 | 22 | 23 | 24 | 25 | 26 | 27 | \hfill 28 | 29 | 30 | 31 | \hfill 32 | 33 | 34 | 36 | & 37 | 38 | 39 | 40 | 41 | 42 | 43 | \\ 44 | 45 | 46 | 47 | 48 | \begin{array}{ 49 | 50 | | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | | 85 | 86 | } 87 | 88 | \hline 89 | 90 | 91 | 92 | \\ \hline 93 | 94 | \end{array} 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /src/test/resources/复杂表格.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/复杂表格.docx -------------------------------------------------------------------------------- /src/test/resources/嵌套公式.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套公式.doc -------------------------------------------------------------------------------- /src/test/resources/嵌套公式.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套公式.docx -------------------------------------------------------------------------------- /src/test/resources/嵌套图片.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套图片.docx -------------------------------------------------------------------------------- /src/test/resources/嵌套图片01.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套图片01.docx -------------------------------------------------------------------------------- /src/test/resources/嵌套图片02.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套图片02.docx -------------------------------------------------------------------------------- /src/test/resources/嵌套多文本.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套多文本.docx -------------------------------------------------------------------------------- /src/test/resources/嵌套附件01.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套附件01.docx -------------------------------------------------------------------------------- /src/test/resources/嵌套附件02.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套附件02.docx -------------------------------------------------------------------------------- /src/test/resources/故障模式分析表格样例01.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/故障模式分析表格样例01.docx -------------------------------------------------------------------------------- /src/test/resources/标准表格1.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/标准表格1.doc -------------------------------------------------------------------------------- /target/.gitignore: -------------------------------------------------------------------------------- 1 | /classes 2 | /test-classes 3 | --------------------------------------------------------------------------------