├── .classpath
├── .gitattributes
├── .gitignore
├── .project
├── .settings
    ├── org.eclipse.jdt.core.prefs
    └── org.eclipse.m2e.core.prefs
├── LICENSE
├── README.md
├── commit.bat
├── doc
    └── readme.txt
├── libs
    ├── commons-cli-1.1.jar
    ├── commons-io-1.4.jar
    ├── jodconverter-core-3.0-beta-4-sources.jar
    ├── jodconverter-core-3.0-beta-4.jar
    ├── json-20090211.jar
    ├── juh-3.2.1.jar
    ├── jurt-3.2.1.jar
    ├── ridl-3.2.1.jar
    └── unoil-3.2.1.jar
├── pom.xml
├── src
    ├── main
    │   ├── java
    │   │   └── com
    │   │   │   └── suncht
    │   │   │       ├── convert
    │   │   │           ├── CommonDocumentConverter.java
    │   │   │           ├── FileUtils.java
    │   │   │           ├── IOfficeDocumentConverter.java
    │   │   │           ├── OfficeDocumentConvertServer.java
    │   │   │           ├── TxtDocumentConverter.java
    │   │   │           └── demo
    │   │   │           │   ├── Doc2DocxUtil.java
    │   │   │           │   └── OfficePDFConverter.java
    │   │   │       └── wordread
    │   │   │           ├── exceptions
    │   │   │               └── ParseException.java
    │   │   │           ├── format
    │   │   │               ├── DefaultCellFormater.java
    │   │   │               ├── DefaultWordTableFormater.java
    │   │   │               ├── ICellFormater.java
    │   │   │               └── IWordTableFormater.java
    │   │   │           ├── model
    │   │   │               ├── ContentTypeEnum.java
    │   │   │               ├── TTCPr.java
    │   │   │               ├── WordTable.java
    │   │   │               ├── WordTableCell.java
    │   │   │               ├── WordTableCellContent.java
    │   │   │               ├── WordTableCellContentFormula.java
    │   │   │               ├── WordTableCellContentImage.java
    │   │   │               ├── WordTableCellContentOleObject.java
    │   │   │               ├── WordTableCellContentText.java
    │   │   │               ├── WordTableCellContents.java
    │   │   │               ├── WordTableComplexCell.java
    │   │   │               ├── WordTableHeader.java
    │   │   │               ├── WordTableMap.java
    │   │   │               ├── WordTableRow.java
    │   │   │               └── WordTableSimpleCell.java
    │   │   │           ├── output
    │   │   │               ├── DefaultWordTableOutputStrategy.java
    │   │   │               └── IWordTableOutputStrategy.java
    │   │   │           ├── parser
    │   │   │               ├── ISingleWordTableParser.java
    │   │   │               ├── IWordTableParser.java
    │   │   │               ├── WordTableParser.java
    │   │   │               ├── WordTableTransferContext.java
    │   │   │               ├── mapping
    │   │   │               │   ├── IWordTableMemoryMappingVisitor.java
    │   │   │               │   └── WordTableMemoryMapping.java
    │   │   │               ├── strategy
    │   │   │               │   ├── DefaultTableStrategy.java
    │   │   │               │   ├── ITableTransferStrategy.java
    │   │   │               │   └── LogicalTableStrategy.java
    │   │   │               ├── wordh
    │   │   │               │   ├── SingleWordHTableParser.java
    │   │   │               │   └── WordHTableParser.java
    │   │   │               └── wordx
    │   │   │               │   ├── SingleWordXTableParser.java
    │   │   │               │   └── WordXTableParser.java
    │   │   │           └── utils
    │   │   │               └── MathmlUtils.java
    │   └── resources
    │   │   ├── 1.doc
    │   │   ├── FMEA信息导入-客户实例.doc
    │   │   ├── FMEA信息导入-客户实例.docx
    │   │   ├── conventer
    │   │       ├── MML2OMML.XSL
    │   │       ├── OMML2MML.XSL
    │   │       └── mml2tex
    │   │       │   ├── README
    │   │       │   ├── cmarkup.xsl
    │   │       │   ├── entities.xsl
    │   │       │   ├── glayout.xsl
    │   │       │   ├── mmltex.xsl
    │   │       │   ├── scripts.xsl
    │   │       │   ├── tables.xsl
    │   │       │   └── tokens.xsl
    │   │   ├── logback.xml
    │   │   ├── 故障模式分析表格样例.docx
    │   │   └── 故障模式分析表格样例_处理模型.docx
    └── test
    │   ├── java
    │       └── com
    │       │   └── test
    │       │       ├── Doc2DocxTest.java
    │       │       ├── MemoryMappingVisitorTest.java
    │       │       ├── MuliHeaderXTableParserTest.java
    │       │       ├── MultiTextCellTest.java
    │       │       ├── NestedFormulaTest.java
    │       │       ├── NestedImageCellTest.java
    │       │       ├── OfficeConverterTest.java
    │       │       ├── OleObjectCellTest.java
    │       │       ├── WordCellDataTest.java
    │       │       ├── WordEmbedsTest.java
    │       │       ├── WordHTableParserTest.java
    │       │       └── WordXTableParserTest.java
    │   └── resources
    │       ├── 1.doc
    │       ├── 1.docx
    │       ├── 2.doc
    │       ├── conventer
    │           ├── MML2OMML.XSL
    │           ├── OMML2MML.XSL
    │           └── mml2tex
    │           │   ├── README
    │           │   ├── cmarkup.xsl
    │           │   ├── entities.xsl
    │           │   ├── glayout.xsl
    │           │   ├── mmltex.xsl
    │           │   ├── scripts.xsl
    │           │   ├── tables.xsl
    │           │   └── tokens.xsl
    │       ├── 复杂表格.docx
    │       ├── 嵌套公式.doc
    │       ├── 嵌套公式.docx
    │       ├── 嵌套图片.docx
    │       ├── 嵌套图片01.docx
    │       ├── 嵌套图片02.docx
    │       ├── 嵌套多文本.docx
    │       ├── 嵌套附件01.docx
    │       ├── 嵌套附件02.docx
    │       ├── 故障模式分析表格样例01.docx
    │       └── 标准表格1.doc
└── target
    └── .gitignore


/.classpath:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" output="target/classes" path="src/main/java">
 4 | 		<attributes>
 5 | 			<attribute name="optional" value="true"/>
 6 | 			<attribute name="maven.pomderived" value="true"/>
 7 | 		</attributes>
 8 | 	</classpathentry>
 9 | 	<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
10 | 		<attributes>
11 | 			<attribute name="maven.pomderived" value="true"/>
12 | 		</attributes>
13 | 	</classpathentry>
14 | 	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
15 | 		<attributes>
16 | 			<attribute name="optional" value="true"/>
17 | 			<attribute name="maven.pomderived" value="true"/>
18 | 		</attributes>
19 | 	</classpathentry>
20 | 	<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
21 | 		<attributes>
22 | 			<attribute name="maven.pomderived" value="true"/>
23 | 		</attributes>
24 | 	</classpathentry>
25 | 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/jdk1.7.0_25">
26 | 		<attributes>
27 | 			<attribute name="maven.pomderived" value="true"/>
28 | 		</attributes>
29 | 	</classpathentry>
30 | 	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
31 | 		<attributes>
32 | 			<attribute name="maven.pomderived" value="true"/>
33 | 		</attributes>
34 | 	</classpathentry>
35 | 	<classpathentry kind="lib" path="libs/commons-cli-1.1.jar"/>
36 | 	<classpathentry kind="lib" path="libs/json-20090211.jar"/>
37 | 	<classpathentry kind="lib" path="libs/juh-3.2.1.jar"/>
38 | 	<classpathentry kind="lib" path="libs/jurt-3.2.1.jar"/>
39 | 	<classpathentry kind="lib" path="libs/ridl-3.2.1.jar"/>
40 | 	<classpathentry kind="lib" path="libs/unoil-3.2.1.jar"/>
41 | 	<classpathentry kind="lib" path="libs/jodconverter-core-3.0-beta-4.jar" sourcepath="D:/开发/wordtable-read/wordtable-read/libs/jodconverter-core-3.0-beta-4-sources.jar"/>
42 | 	<classpathentry kind="output" path="target/classes"/>
43 | </classpath>
44 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.java linguist-language=Java
2 | *.jsp linguist-language=Java
3 | *.css linguist-language=Java
4 | *.js linguist-language=Java
5 | *.html linguist-language=Java
6 | *.doc linguist-language=Java
7 | *.docx linguist-language=Java
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled class file
 2 | *.class
 3 | 
 4 | # Log file
 5 | *.log
 6 | 
 7 | # BlueJ files
 8 | *.ctxt
 9 | 
10 | # Mobile Tools for Java (J2ME)
11 | .mtj.tmp/
12 | 
13 | # Package Files #
14 | #*.jar
15 | *.war
16 | *.ear
17 | *.zip
18 | *.tar.gz
19 | *.rar
20 | 
21 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
22 | hs_err_pid*
23 | /target/
24 | 


--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>sun-wordtable-read</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.jdt.core.javabuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 		<buildCommand>
14 | 			<name>org.springframework.ide.eclipse.core.springbuilder</name>
15 | 			<arguments>
16 | 			</arguments>
17 | 		</buildCommand>
18 | 		<buildCommand>
19 | 			<name>org.eclipse.m2e.core.maven2Builder</name>
20 | 			<arguments>
21 | 			</arguments>
22 | 		</buildCommand>
23 | 	</buildSpec>
24 | 	<natures>
25 | 		<nature>org.springframework.ide.eclipse.core.springnature</nature>
26 | 		<nature>org.eclipse.jdt.core.javanature</nature>
27 | 		<nature>org.eclipse.m2e.core.maven2Nature</nature>
28 | 	</natures>
29 | </projectDescription>
30 | 


--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
 1 | eclipse.preferences.version=1
 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
 3 | org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
 6 | org.eclipse.jdt.core.compiler.compliance=1.7
 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
12 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
13 | org.eclipse.jdt.core.compiler.source=1.7
14 | 


--------------------------------------------------------------------------------
/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # sun-wordtable-read
 3 | ========
 4 | 读取Word文档的各种复杂表格内容，支持2007以上的Docx文档（暂不支持2007以下的Doc类型文档）
 5 | 
 6 | ## 开发背景：
 7 | 工作上遇到如何读取Word文档中的表格内容，表格是有业务数据意义的，而且有一定规则的，因此不能直接读取表格文本，而是遍历表格单元格进行一行一列读取。
 8 | 
 9 | 表格规则：
10 |  1. 表格可以有表头，表头也有业务意思
11 |  2. 一行为一个业务数据，可能会跨行
12 |  3. 列可能会有跨列、跨行
13 |  4. 单元格中图片、数学公式、嵌套表格、文件等
14 | 
15 | 比如，以下表格
16 | [![](https://img-blog.csdn.net/20180414152908387?watermark/2/text/aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3N1bmN0/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70)](https://img-blog.csdn.net/20180414152908387?watermark/2/text/aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3N1bmN0/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70)
17 | 
18 | ## 设计理念：
19 |  1. 读取Word文档中表格数据到内存映射表，再通过自定义读取策略，将内存映射表转换成实际业务表格数据。
20 |  2. 使用统一的内存映射表，屏蔽了实际Word文档读取方式，开发者只关心如何转换为业务数据。
21 | 
22 | ## 功能现状：
23 |  1. 目前只支持读取2007以上Word文档表格单元格的文本，支持读取图片、数学公式、嵌套表格、附件内嵌对象（除PPT、WORD、EXCEL类型的OLE内嵌对象以外）。
24 |  2. 支持一般性的有规则的复杂表格。
25 |  3. 暂不支持2007以下的Doc类型文档，因为POI中暂未找到关于表格单元格合并信息的API。（目前已有解决方案，正在积极处理中。。。）
26 |  	目前折中解决方案：为了兼容2007以下的Doc类型文档，利用jodconverter3.0 + LibreOffice 5.3，“先将Doc类型文档转换为Docx类型文档，再进行读取表格内容”。
27 | 	注意：LibreOffice直接支持Docx类型文档，而OpenOffice不能直接支持Docx类型文档，需要AccessODF插件
28 | 
29 | ## 后续要增加的功能：
30 |  1. 处理PPT、WORD、EXCEL类型的OLE内嵌对象
31 |  2. 正处理2007以下的Doc类型文档的读取。（Docx文档、Doc文档解析读取单元格时有区别，区别在于Docx有行合并、列合并、列宽，而Doc只有行合并、列宽，而没有列合并）
32 |  3. 直接导入到目标（比如：数据库表、Excel等）的公共功能
33 |  4. 读取大文件的Word、性能优化策略


--------------------------------------------------------------------------------
/commit.bat:
--------------------------------------------------------------------------------
1 | git add .
2 | git commit -m "更新README.MD"
3 | git push -u origin master


--------------------------------------------------------------------------------
/doc/readme.txt:
--------------------------------------------------------------------------------
1 | 在网页中显示latex公式的方法：
2 | 直接在html源文件中插入 <img src="http://latex.codecogs.com/gif.latex?在这里填写你的latex代码" /> 
3 | 
4 | 
5 | Office中数学公式用Java解析：
6 | http://www.jianshu.com/p/ea7f62e3b23a


--------------------------------------------------------------------------------
/libs/commons-cli-1.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/commons-cli-1.1.jar


--------------------------------------------------------------------------------
/libs/commons-io-1.4.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/commons-io-1.4.jar


--------------------------------------------------------------------------------
/libs/jodconverter-core-3.0-beta-4-sources.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/jodconverter-core-3.0-beta-4-sources.jar


--------------------------------------------------------------------------------
/libs/jodconverter-core-3.0-beta-4.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/jodconverter-core-3.0-beta-4.jar


--------------------------------------------------------------------------------
/libs/json-20090211.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/json-20090211.jar


--------------------------------------------------------------------------------
/libs/juh-3.2.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/juh-3.2.1.jar


--------------------------------------------------------------------------------
/libs/jurt-3.2.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/jurt-3.2.1.jar


--------------------------------------------------------------------------------
/libs/ridl-3.2.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/ridl-3.2.1.jar


--------------------------------------------------------------------------------
/libs/unoil-3.2.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/libs/unoil-3.2.1.jar


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 | 	<modelVersion>4.0.0</modelVersion>
  4 | 	<groupId>com.suncht</groupId>
  5 | 	<artifactId>wordtable-read</artifactId>
  6 | 	<version>0.0.1-SNAPSHOT</version>
  7 | 
  8 | 	<properties>
  9 | 		<apache-poi>3.9</apache-poi>
 10 | 	</properties>
 11 | 	<dependencies>
 12 | 		<dependency>
 13 | 			<groupId>org.springframework</groupId>
 14 | 			<artifactId>spring-beans</artifactId>
 15 | 			<version>3.1.1.RELEASE</version>
 16 | 		</dependency>
 17 | 
 18 | 		<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
 19 | 		<dependency>
 20 | 			<groupId>org.apache.poi</groupId>
 21 | 			<artifactId>poi</artifactId>
 22 | 			<version>${apache-poi}</version>
 23 | 		</dependency>
 24 | 
 25 | 		<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
 26 | 		<dependency>
 27 | 			<groupId>org.apache.poi</groupId>
 28 | 			<artifactId>poi-ooxml</artifactId>
 29 | 			<version>${apache-poi}</version>
 30 | 		</dependency>
 31 | 
 32 | 		<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
 33 | 		<dependency>
 34 | 			<groupId>org.apache.poi</groupId>
 35 | 			<artifactId>poi-scratchpad</artifactId>
 36 | 			<version>${apache-poi}</version>
 37 | 		</dependency>
 38 | 
 39 | 		<dependency>
 40 | 			<groupId>com.google.guava</groupId>
 41 | 			<artifactId>guava</artifactId>
 42 | 			<version>19.0</version>
 43 | 		</dependency>
 44 | 		<dependency>
 45 | 			<groupId>org.apache.commons</groupId>
 46 | 			<artifactId>commons-lang3</artifactId>
 47 | 			<version>3.3.2</version>
 48 | 		</dependency>
 49 | 		
 50 | 		<dependency>
 51 | 		    <groupId>commons-io</groupId>
 52 | 		    <artifactId>commons-io</artifactId>
 53 | 		    <version>1.4</version>
 54 | 		</dependency>
 55 | 		
 56 | 		<dependency>
 57 | 		    <groupId>dom4j</groupId>
 58 | 		    <artifactId>dom4j</artifactId>
 59 | 		    <version>1.6.1</version>
 60 | 		</dependency>
 61 | 		
 62 | 		<dependency>
 63 | 		    <groupId>jaxen</groupId>
 64 | 		    <artifactId>jaxen</artifactId>
 65 | 		    <version>1.1.6</version>
 66 | 		</dependency>
 67 | 		
 68 | 		<dependency>
 69 | 		    <groupId>org.slf4j</groupId>
 70 | 		    <artifactId>slf4j-api</artifactId>
 71 | 		    <version>1.7.7</version>
 72 | 		</dependency>
 73 | 		<dependency>
 74 | 		    <groupId>ch.qos.logback</groupId>
 75 | 		    <artifactId>logback-core</artifactId>
 76 | 		    <version>1.1.7</version>
 77 | 		</dependency>
 78 | 		<dependency>
 79 | 		    <groupId>ch.qos.logback</groupId>
 80 | 		    <artifactId>logback-access</artifactId>
 81 | 		    <version>1.1.7</version>
 82 | 		</dependency>
 83 | 		<dependency>
 84 | 		    <groupId>ch.qos.logback</groupId>
 85 | 		    <artifactId>logback-classic</artifactId>
 86 | 		    <version>1.1.7</version>
 87 | 		</dependency>
 88 | 		
 89 | 		<dependency>
 90 | 		    <groupId>junit</groupId>
 91 | 		    <artifactId>junit</artifactId>
 92 | 		    <version>4.12</version>
 93 | 		    <scope>test</scope>
 94 | 		</dependency>
 95 | 	</dependencies>
 96 | 
 97 | 	<build>
 98 | 		<plugins>
 99 | 			<plugin>
100 | 				<artifactId>maven-assembly-plugin</artifactId>
101 | 				<configuration>
102 | 					<archive>
103 | 						<manifest>
104 | 						</manifest>
105 | 					</archive>
106 | 					<descriptorRefs>
107 | 						<descriptorRef>jar-with-dependencies</descriptorRef>
108 | 					</descriptorRefs>
109 | 				</configuration>
110 | 				<executions>
111 | 					<execution>
112 | 						<id>make-assembly</id> <!-- this is used for inheritance merges -->
113 | 						<phase>package</phase> <!-- 指定在打包节点执行jar包合并操作 -->
114 | 						<goals>
115 | 							<goal>single</goal>
116 | 						</goals>
117 | 					</execution>
118 | 				</executions>
119 | 			</plugin>
120 | 		</plugins>
121 | 	</build>
122 | </project>


--------------------------------------------------------------------------------
/src/main/java/com/suncht/convert/CommonDocumentConverter.java:
--------------------------------------------------------------------------------
  1 | package com.suncht.convert;
  2 | 
  3 | import java.io.File;
  4 | import java.util.Collections;
  5 | import java.util.HashMap;
  6 | import java.util.Map;
  7 | 
  8 | import org.apache.commons.lang3.StringUtils;
  9 | import org.artofsolving.jodconverter.OfficeDocumentConverter;
 10 | import org.artofsolving.jodconverter.document.DocumentFamily;
 11 | import org.artofsolving.jodconverter.document.DocumentFormat;
 12 | import org.artofsolving.jodconverter.office.OfficeManager;
 13 | 
 14 | /**
 15 |  * 通用文档转换，可支持docx文档
 16 |  * 支持docx文档，需要LibreOffice直接支持， 而OpenOffice不能直接支持Docx文档，需要AccessODF插件
 17 |  * LibreOffice/OpenOffice能支持哪些文档转换，那么该程序能支持哪些转换
 18 |  * @author changtan.sun
 19 |  *
 20 |  */
 21 | public class CommonDocumentConverter implements IOfficeDocumentConverter {
 22 | 	protected OfficeManager officeManager;
 23 | 	protected String inputFile;
 24 | 	protected String outputFile;
 25 | 	
 26 | 	protected boolean needTempFile = false; //是否删除中间文件
 27 | 	protected String tempFile;
 28 | 	protected boolean needDeleteInputFile = false; //是否需要删除输入文件
 29 | 	protected String extraOutputFormatToNeed;
 30 | 	
 31 | 	//额外的输出文档格式
 32 | 	private static Map<String, String> extraOutputFormatMap = new HashMap<String, String>();
 33 | 	
 34 | 	static {
 35 | 		//增加Docx文档格式
 36 | 		extraOutputFormatMap.put("docx", "MS Word 2007 XML");
 37 | 	}
 38 | 	
 39 | 	public CommonDocumentConverter(OfficeManager officeManager, String inputFile, String outputFile, boolean needDeleteInputFile) {
 40 | 		this.officeManager = officeManager;
 41 | 		this.inputFile = inputFile;
 42 | 		this.outputFile = outputFile;
 43 | 		this.needDeleteInputFile = needDeleteInputFile;
 44 | 	}
 45 | 	
 46 | 	public void before() {
 47 | 		tempFile = null;
 48 | 		needTempFile = false;
 49 | 		extraOutputFormatToNeed = null;
 50 | 		
 51 | 		String sufix = FileUtils.getFileSufix(outputFile);
 52 | 		this.judgeFormat(sufix);
 53 | 	}
 54 | 
 55 | 	@Override
 56 | 	public void convert() {
 57 | 		OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager);  
 58 | 		
 59 | 		System.out.println("转换前处理...");  
 60 | 		before();
 61 | 		
 62 |         //开始转换 
 63 | 		System.out.println("转换开始执行，["+inputFile+"]转换为["+outputFile+"]...");  
 64 | 		if(StringUtils.isNotBlank(extraOutputFormatToNeed)) {
 65 | 			DocumentFormat extraFormat = converter.getFormatRegistry().getFormatByExtension(extraOutputFormatToNeed);
 66 | 			extraFormat.setStoreProperties(DocumentFamily.TEXT, Collections.singletonMap("FilterName", extraOutputFormatMap.get(extraOutputFormatToNeed)));
 67 | 		
 68 |             if(needTempFile) {
 69 | 				converter.convert(new File(tempFile),new File(outputFile), extraFormat);  
 70 | 			} else {
 71 | 				converter.convert(new File(inputFile),new File(outputFile), extraFormat);  
 72 | 			}
 73 |             
 74 | 		} else {
 75 | 			if(needTempFile) {
 76 | 				converter.convert(new File(tempFile),new File(outputFile));  
 77 | 			} else {
 78 | 				converter.convert(new File(inputFile),new File(outputFile));  
 79 | 			}
 80 | 		}
 81 | 		
 82 | 		System.out.println("转换后处理...");  
 83 | 		after();
 84 | 		
 85 | 		System.out.println("转换完成");  
 86 | 	}
 87 | 
 88 | 	public void after() {
 89 | 		if(needTempFile) {
 90 | 			FileUtils.deleteFile(tempFile);
 91 | 		}
 92 | 		
 93 | 		if(needDeleteInputFile) {
 94 | 			FileUtils.deleteFile(inputFile);
 95 | 		}
 96 | 	}
 97 | 	
 98 | 	private void judgeFormat(String sufix) {
 99 | 		if(extraOutputFormatMap.containsKey(sufix)) {
100 | 			extraOutputFormatToNeed = sufix;
101 | 		}
102 | 	}
103 | 	
104 | 
105 | }
106 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/convert/FileUtils.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.convert;
 2 | 
 3 | import java.io.File;
 4 | import java.io.FileInputStream;
 5 | import java.io.FileNotFoundException;
 6 | import java.io.FileOutputStream;
 7 | import java.io.IOException;
 8 | 
 9 | 
10 | 
11 | public class FileUtils {
12 | 	
13 | 	public static String getFilePrefix(String fileName){
14 | 		int splitIndex = fileName.lastIndexOf(".");
15 |         return fileName.substring(0, splitIndex);
16 | 	}
17 | 	
18 | 	public static String getFileSufix(String fileName){
19 | 		int splitIndex = fileName.lastIndexOf(".");
20 |         return fileName.substring(splitIndex + 1);
21 | 	}
22 | 	
23 | 	public static void copyFile(String inputFile,String outputFile) throws FileNotFoundException{
24 | 		File sFile = new File(inputFile);
25 | 		File tFile = new File(outputFile);
26 | 		FileInputStream fis = new FileInputStream(sFile);
27 | 		FileOutputStream fos = new FileOutputStream(tFile);
28 | 		int temp = 0;  
29 |         try {  
30 | 			while ((temp = fis.read()) != -1) {  
31 | 			    fos.write(temp);  
32 | 			}
33 |         } catch (IOException e) {  
34 |             e.printStackTrace();  
35 |         } finally{
36 |             try {
37 | 				fis.close();
38 | 				fos.close();
39 | 			} catch (IOException e) {
40 | 				e.printStackTrace();
41 | 			}
42 |         } 
43 | 	}
44 | 	
45 | 	public static void deleteFile(String fileToDelete) {
46 | 		File file = new File(fileToDelete);
47 | 		if(file.exists()) {
48 | 			file.delete();
49 | 		}
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/convert/IOfficeDocumentConverter.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.convert;
 2 | 
 3 | /**
 4 |  * 文档转换接口
 5 |  * @author changtan.sun
 6 |  *
 7 |  */
 8 | public interface IOfficeDocumentConverter {
 9 | 	/**
10 | 	 * 转换
11 | 	 */
12 | 	void convert();
13 | }
14 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/convert/OfficeDocumentConvertServer.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.convert;
 2 | 
 3 | import java.io.Closeable;
 4 | import java.io.IOException;
 5 | 
 6 | import org.artofsolving.jodconverter.office.DefaultOfficeManagerConfiguration;
 7 | import org.artofsolving.jodconverter.office.OfficeManager;
 8 | 
 9 | /**
10 |  * 文档转换服务
11 |  * @author changtan.sun
12 |  *
13 |  */
14 | public class OfficeDocumentConvertServer implements Closeable {
15 | 	private OfficeManager officeManager;
16 | 
17 | 	public OfficeDocumentConvertServer(String officeHome, int... ports) {
18 | 		this.startService(officeHome, ports);
19 | 	}
20 | 	
21 | 	private void startService(String officeHome, int... ports) {
22 | 		DefaultOfficeManagerConfiguration configuration = new DefaultOfficeManagerConfiguration();
23 | 		try {
24 | 			System.out.println("准备启动office服务....");
25 | 			configuration.setOfficeHome(officeHome);// 设置安装目录
26 | 			configuration.setPortNumbers(ports); // 设置端口
27 | 			configuration.setTaskExecutionTimeout(1000 * 60 * 5L);
28 | 			configuration.setTaskQueueTimeout(1000 * 60 * 60 * 24L);
29 | 			officeManager = configuration.buildOfficeManager();
30 | 			officeManager.start(); // 启动服务
31 | 			System.out.println("office转换服务启动成功!");
32 | 		} catch (Exception ce) {
33 | 			System.out.println("office转换服务启动失败!详细信息:" + ce);
34 | 		}
35 | 	}
36 | 
37 | 	public void convert(String inputFile, String outputFile, boolean needDeleteInputFile) {
38 | 		IOfficeDocumentConverter converter = null;
39 | 		if(inputFile.endsWith(".txt")) {
40 | 			converter = new TxtDocumentConverter(officeManager, inputFile, outputFile, needDeleteInputFile);
41 | 		} else {
42 | 			converter = new CommonDocumentConverter(officeManager, inputFile, outputFile, needDeleteInputFile);
43 | 		}
44 | 		
45 | 		converter.convert();
46 | 	}
47 | 	
48 | 	public void convert(String inputFile, String outputFile) {
49 | 		this.convert(inputFile, outputFile, false);
50 | 	}
51 | 
52 | 	@Override
53 | 	public void close() throws IOException {
54 | 		if (officeManager != null) {
55 | 			officeManager.stop();
56 | 			System.out.println("关闭office服务");  
57 | 		}
58 | 	}
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/convert/TxtDocumentConverter.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.convert;
 2 | 
 3 | import java.io.File;
 4 | import java.io.FileNotFoundException;
 5 | 
 6 | import org.artofsolving.jodconverter.office.OfficeManager;
 7 | 
 8 | /**
 9 |  * 支持Txt类型文档转换
10 |  * txt需要先转为odt类型文档，才能进行下一步转换
11 |  * @author changtan.sun
12 |  *
13 |  */
14 | public class TxtDocumentConverter extends CommonDocumentConverter {
15 | 
16 | 	public TxtDocumentConverter(OfficeManager officeManager, String inputFile, String outputFile, boolean needDeleteInputFile) {
17 | 		super(officeManager, inputFile, outputFile, needDeleteInputFile);
18 | 	}
19 | 
20 | 	@Override
21 | 	public void before() {
22 | 		super.before();
23 | 		
24 | 		if(inputFile.endsWith(".txt")){ //如果是Txt文件，需要转换为odt文件
25 | 			needTempFile = true;
26 | 			tempFile = FileUtils.getFilePrefix(inputFile)+".odt";
27 |             if(new File(tempFile).exists()){
28 |                 System.out.println("odt文件已存在！");
29 |                 inputFile = tempFile;
30 |             }else{
31 |                 try {
32 |                     FileUtils.copyFile(inputFile, tempFile);
33 |                     inputFile = tempFile;
34 |                 } catch (FileNotFoundException e) {
35 |                     System.out.println("文档不存在！");
36 |                     e.printStackTrace();
37 |                 }
38 |             }
39 |         }
40 | 	}
41 | }
42 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/convert/demo/Doc2DocxUtil.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.convert.demo;
 2 | import java.io.File;
 3 | import java.util.Collections;
 4 | 
 5 | import org.artofsolving.jodconverter.OfficeDocumentConverter;
 6 | import org.artofsolving.jodconverter.document.DocumentFamily;
 7 | import org.artofsolving.jodconverter.document.DocumentFormat;
 8 | import org.artofsolving.jodconverter.office.DefaultOfficeManagerConfiguration;
 9 | import org.artofsolving.jodconverter.office.OfficeManager;
10 |   
11 | public class Doc2DocxUtil{  
12 |       
13 |     private static Doc2DocxUtil doc2DocxUtil = new Doc2DocxUtil();  
14 |     private static  OfficeManager officeManager;  
15 |     //openOffice安装路径  
16 |     private static String OPEN_OFFICE_HOME = "D:\\Program Files\\LibreOffice 5\\";  
17 |     //服务端口  
18 |     private static int OPEN_OFFICE_PORT[] = {8101};  
19 |       
20 |     public static Doc2DocxUtil getOffice2PdfUtil() {  
21 |         return doc2DocxUtil;  
22 |     }  
23 |       
24 |     
25 |     public static void doc2Docx(String inputFile,String outputFile) {
26 |     	File pdfFile = new File(outputFile);  
27 |         if (pdfFile.exists()) {  
28 |             pdfFile.delete();  
29 |         }  
30 |         try{  
31 |             //打开服务  
32 |             startService();          
33 |             OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager);  
34 |             DocumentFormat docx = converter.getFormatRegistry().getFormatByExtension("docx");
35 |             docx.setStoreProperties(DocumentFamily.TEXT, Collections.singletonMap("FilterName", "MS Word 2007 XML"));
36 |             //开始转换  
37 |             converter.convert(new File(inputFile),new File(outputFile), docx);  
38 |             //关闭  
39 |             stopService();  
40 |             System.out.println("运行结束");  
41 |         }catch (Exception e) {  
42 |             // TODO: handle exception  
43 |             e.printStackTrace();  
44 |         }  
45 |     }
46 |     
47 |     private static void transformBinaryWordDocToDocX(File in, File out) {
48 |         OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager);
49 |         DocumentFormat docx = converter.getFormatRegistry().getFormatByExtension("docx");
50 |         docx.setStoreProperties(DocumentFamily.TEXT, Collections.singletonMap("FilterName", "MS Word 2007 XML"));
51 | 
52 |         converter.convert(in, out, docx);
53 |     }
54 |     
55 |     private static void transformBinaryWordDocToW2003Xml(File in, File out) {
56 |         OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager);;
57 |         DocumentFormat w2003xml = new DocumentFormat("Microsoft Word 2003 XML", "xml", "text/xml");
58 |         w2003xml.setInputFamily(DocumentFamily.TEXT);
59 |         w2003xml.setStoreProperties(DocumentFamily.TEXT, Collections.singletonMap("FilterName", "MS Word 2003 XML"));
60 |         converter.convert(in, out, w2003xml);
61 |     }
62 | 
63 |       
64 |     public static void stopService(){  
65 |         if (officeManager != null) {  
66 |             officeManager.stop();  
67 |         }  
68 |     }  
69 |       
70 |     public static void startService(){  
71 |         DefaultOfficeManagerConfiguration configuration = new DefaultOfficeManagerConfiguration();  
72 |         try {  
73 |             configuration.setOfficeHome(OPEN_OFFICE_HOME);//设置安装目录  
74 |             configuration.setPortNumbers(OPEN_OFFICE_PORT); //设置端口  
75 |             configuration.setTaskExecutionTimeout(1000 * 60 * 5L);  
76 |             configuration.setTaskQueueTimeout(1000 * 60 * 60 * 24L);  
77 |             officeManager = configuration.buildOfficeManager();  
78 |             officeManager.start();    //启动服务  
79 |         } catch (Exception ce) {  
80 |             System.out.println("office转换服务启动失败!详细信息:" + ce);  
81 |         }  
82 |     }  
83 | }  


--------------------------------------------------------------------------------
/src/main/java/com/suncht/convert/demo/OfficePDFConverter.java:
--------------------------------------------------------------------------------
  1 | package com.suncht.convert.demo;
  2 | import java.io.File;
  3 | import java.io.FileNotFoundException;
  4 | import java.util.Collections;
  5 | 
  6 | import org.artofsolving.jodconverter.OfficeDocumentConverter;
  7 | import org.artofsolving.jodconverter.document.DocumentFamily;
  8 | import org.artofsolving.jodconverter.document.DocumentFormat;
  9 | import org.artofsolving.jodconverter.office.DefaultOfficeManagerConfiguration;
 10 | import org.artofsolving.jodconverter.office.OfficeManager;
 11 | 
 12 | import com.suncht.convert.FileUtils;
 13 |   
 14 | public class OfficePDFConverter{  
 15 |       
 16 |     private static OfficePDFConverter converter = new OfficePDFConverter();  
 17 |     private static  OfficeManager officeManager;  
 18 |     //openOffice安装路径  
 19 |     //private static String OPEN_OFFICE_HOME = "D:\\Program Files (x86)\\OpenOffice 4\\";  
 20 |     private static String OPEN_OFFICE_HOME = "D:\\Program Files\\LibreOffice 5\\";  
 21 |     //服务端口  
 22 |     private static int OPEN_OFFICE_PORT[] = {8100};  
 23 |       
 24 |     public static OfficePDFConverter getConverter() {  
 25 |         return converter;  
 26 |     }  
 27 |       
 28 |     /** 
 29 |      *  
 30 |      * office2Pdf 方法 
 31 |      * @descript：TODO 
 32 |      * @param inputFile 文件全路径 
 33 |      * @param outputFile pdf文件全路径 
 34 |      * @return void 
 35 |      * @author lxz 
 36 |      * @return  
 37 |      */      
 38 |     public void convert2PDF(String inputFile,String outputFile) {  
 39 |           
 40 |     	if(inputFile.endsWith(".txt")){
 41 |             String odtFile = FileUtils.getFilePrefix(inputFile)+".odt";
 42 |             if(new File(odtFile).exists()){
 43 |                 System.out.println("odt文件已存在！");
 44 |                 inputFile = odtFile;
 45 |             }else{
 46 |                 try {
 47 |                     FileUtils.copyFile(inputFile,odtFile);
 48 |                     inputFile = odtFile;
 49 |                 } catch (FileNotFoundException e) {
 50 |                     System.out.println("文档不存在！");
 51 |                     e.printStackTrace();
 52 |                 }
 53 |             }
 54 |         }
 55 |     	
 56 |         File pdfFile = new File(outputFile);  
 57 |         if (pdfFile.exists()) {  
 58 |             pdfFile.delete();  
 59 |         }  
 60 |         try{  
 61 |             long startTime = System.currentTimeMillis();  
 62 |             //打开服务  
 63 |             startService();          
 64 |             OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager);  
 65 |             //开始转换  
 66 |             converter.convert(new File(inputFile),new File(outputFile));  
 67 |             //关闭  
 68 |             stopService();  
 69 |             System.out.println("运行结束");  
 70 |         }catch (Exception e) {  
 71 |             // TODO: handle exception  
 72 |             e.printStackTrace();  
 73 |         }  
 74 |     }  
 75 |     
 76 |     public static void doc2Docx(String inputFile,String outputFile) {
 77 |     	File pdfFile = new File(outputFile);  
 78 |         if (pdfFile.exists()) {  
 79 |             pdfFile.delete();  
 80 |         }  
 81 |         try{  
 82 |             long startTime = System.currentTimeMillis();  
 83 |             //打开服务  
 84 |             startService();          
 85 |             OfficeDocumentConverter converter = new OfficeDocumentConverter(officeManager);  
 86 |             DocumentFormat docx = converter.getFormatRegistry().getFormatByExtension("docx");
 87 |             docx.setStoreProperties(DocumentFamily.TEXT, Collections.singletonMap("FilterName", "MS Word 2007 XML"));
 88 |             //开始转换  
 89 |             converter.convert(new File(inputFile),new File(outputFile), docx);  
 90 |             //关闭  
 91 |             stopService();  
 92 |             System.out.println("运行结束");  
 93 |         }catch (Exception e) {  
 94 |             // TODO: handle exception  
 95 |             e.printStackTrace();  
 96 |         }  
 97 |     }
 98 |       
 99 |     public static void stopService(){  
100 |         if (officeManager != null) {  
101 |             officeManager.stop();  
102 |         }  
103 |     }  
104 |       
105 |     public static void startService(){  
106 |         DefaultOfficeManagerConfiguration configuration = new DefaultOfficeManagerConfiguration();  
107 |         try {
108 |         	System.out.println("准备启动服务....");
109 |             configuration.setOfficeHome(OPEN_OFFICE_HOME);//设置安装目录  
110 |             configuration.setPortNumbers(OPEN_OFFICE_PORT); //设置端口  
111 |             configuration.setTaskExecutionTimeout(1000 * 60 * 5L);  
112 |             configuration.setTaskQueueTimeout(1000 * 60 * 60 * 24L);  
113 |             officeManager = configuration.buildOfficeManager();  
114 |             officeManager.start();    //启动服务  
115 |             System.out.println("office转换服务启动成功!");
116 |         } catch (Exception ce) {  
117 |             System.out.println("office转换服务启动失败!详细信息:" + ce);  
118 |         }  
119 |     }  
120 | }  


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/exceptions/ParseException.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.exceptions;
 2 | 
 3 | public class ParseException extends Exception {
 4 | 
 5 | 	private static final long serialVersionUID = 939204100093323412L;
 6 | 
 7 | 	public ParseException(Throwable e) {
 8 | 		super(e.getMessage(), e);
 9 | 	}
10 | 
11 | 	public ParseException(String message) {
12 | 		super(message);
13 | 	}
14 | 
15 | 	public ParseException(String messageTemplate, Object... params) {
16 | 		super(String.format(messageTemplate, params));
17 | 	}
18 | 
19 | 	public ParseException(String message, Throwable throwable) {
20 | 		super(message, throwable);
21 | 	}
22 | 
23 | 	public ParseException(Throwable throwable, String messageTemplate, Object... params) {
24 | 		super(String.format(messageTemplate, params), throwable);
25 | 	}
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/format/DefaultCellFormater.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.format;
 2 | 
 3 | import com.suncht.wordread.model.ContentTypeEnum;
 4 | import com.suncht.wordread.model.WordTableCellContent;
 5 | import com.suncht.wordread.model.WordTableCellContentFormula;
 6 | import com.suncht.wordread.model.WordTableCellContentImage;
 7 | import com.suncht.wordread.model.WordTableCellContentImage.WcImage;
 8 | import com.suncht.wordread.model.WordTableCellContentOleObject;
 9 | import com.suncht.wordread.model.WordTableCellContentOleObject.WcOleObject;
10 | import com.suncht.wordread.model.WordTableCellContentText;
11 | 
12 | /**
13 |  * 单元格内容格式化的默认实现
14 |  * @author suncht
15 |  *
16 |  */
17 | public class DefaultCellFormater implements ICellFormater {
18 | 	@Override
19 | 	public Object format(WordTableCellContent<?> cellContent) {
20 | 		if(cellContent.getContentType() == ContentTypeEnum.Text) {
21 | 			WordTableCellContentText _cellContent = (WordTableCellContentText)cellContent;
22 | 			return this.formatText(_cellContent);
23 | 		} else if(cellContent.getContentType() == ContentTypeEnum.Image) {
24 | 			WordTableCellContentImage _cellContent = (WordTableCellContentImage)cellContent;
25 | 			return this.formatImage(_cellContent);
26 | 		} else if(cellContent.getContentType() == ContentTypeEnum.Formula) {
27 | 			WordTableCellContentFormula _cellContent = (WordTableCellContentFormula)cellContent;
28 | 			return this.formatFormula(_cellContent);
29 | 		} else if(cellContent.getContentType() == ContentTypeEnum.OleObject) {
30 | 			WordTableCellContentOleObject _cellContent = (WordTableCellContentOleObject)cellContent;
31 | 			return this.formatOleObject(_cellContent);
32 | 		}
33 | 		return "";
34 | 	}
35 | 
36 | 
37 | 	public Object formatText(WordTableCellContentText cellContent) {
38 | 		String text = cellContent.getData().toString();
39 | 		return text;
40 | 	}
41 | 	
42 | 	public Object formatImage(WordTableCellContentImage cellContent) {
43 | 		WcImage imageContent = (WcImage)cellContent.getData();
44 | 		return imageContent!=null ? imageContent.getFileName(): "";
45 | 	}
46 | 	
47 | 	public Object formatFormula(WordTableCellContentFormula cellContent) {
48 | 		String formula = cellContent.getData().getLatex();
49 | 		return formula;
50 | 	}
51 | 	
52 | 	private Object formatOleObject(WordTableCellContentOleObject cellContent) {
53 | 		WcOleObject oleObject = cellContent.getData();
54 | 		return oleObject!=null ? oleObject.getFileName(): "";
55 | 	}
56 | }
57 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/format/DefaultWordTableFormater.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.format;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import com.suncht.wordread.model.WordTableCell;
 6 | import com.suncht.wordread.model.WordTableCellContent;
 7 | import com.suncht.wordread.model.WordTableComplexCell;
 8 | import com.suncht.wordread.model.WordTableRow;
 9 | import com.suncht.wordread.model.WordTableSimpleCell;
10 | 
11 | /**
12 |  * 默认的单元格内容Formatter
13 |  * 
14 |  * @author suncht
15 |  *
16 |  */
17 | public class DefaultWordTableFormater implements IWordTableFormater {
18 | 	private ICellFormater cellFormater;
19 | 
20 | 	private StringBuilder builder;
21 | 
22 | 	public DefaultWordTableFormater() {
23 | 		this.cellFormater = new DefaultCellFormater();
24 | 	}
25 | 
26 | 	public DefaultWordTableFormater(ICellFormater cellFormater) {
27 | 		this.cellFormater = cellFormater;
28 | 	}
29 | 
30 | 	public void format(WordTableCell tableCell, StringBuilder builder) {
31 | 		this.builder = builder!=null ? builder : new StringBuilder();
32 | 		
33 | 		if (tableCell instanceof WordTableSimpleCell) {
34 | 			printCell(tableCell.getContent());
35 | 		} else if (tableCell instanceof WordTableComplexCell) {
36 | 			WordTableComplexCell cell = (WordTableComplexCell) tableCell;
37 | 
38 | 			List<WordTableRow> rows = cell.getInnerTable().getRows();
39 | 			for (WordTableRow row : rows) {
40 | 				for (WordTableCell wtcell : row.getCells()) {
41 | 					printCell(wtcell.getContent());
42 | 				}
43 | 			}
44 | 		}
45 | 	}
46 | 
47 | 	private void printCell(WordTableCellContent cellContent) {
48 | 		Object data = cellFormater.format(cellContent);
49 | 		builder.append(data.toString());
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/format/ICellFormater.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.format;
 2 | 
 3 | import com.suncht.wordread.model.WordTableCellContent;
 4 | 
 5 | /**
 6 |  * 单元格数据格式化接口
 7 |  * @author changtan.sun
 8 |  *
 9 |  */
10 | public interface ICellFormater {
11 | 	public Object format(WordTableCellContent<?> cellContent);
12 | 	
13 | }
14 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/format/IWordTableFormater.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.format;
 2 | 
 3 | import com.suncht.wordread.model.WordTableCell;
 4 | 
 5 | /**
 6 |  * 表格数据格式化接口
 7 |  * @author changtan.sun
 8 |  *
 9 |  */
10 | public interface IWordTableFormater {
11 | 	public void format(WordTableCell tableCell, StringBuilder builder);
12 | }
13 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/ContentTypeEnum.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.model;
 2 | 
 3 | /**
 4 |  * 单元格内容的类型
 5 |  * @author suncht
 6 |  *
 7 |  */
 8 | public enum ContentTypeEnum {
 9 | 	/**
10 | 	 * 一般性文本
11 | 	 */
12 | 	Text, 
13 | 	/**
14 | 	 * 图片
15 | 	 */
16 | 	Image,
17 | 	/**
18 | 	 * 公式
19 | 	 */
20 | 	Formula,
21 | 	/**
22 | 	 * OLE对象
23 | 	 */
24 | 	OleObject
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/TTCPr.java:
--------------------------------------------------------------------------------
  1 | package com.suncht.wordread.model;
  2 | 
  3 | import java.io.Serializable;
  4 | import java.math.BigInteger;
  5 | 
  6 | import com.suncht.wordread.parser.mapping.IWordTableMemoryMappingVisitor;
  7 | 
  8 | /**
  9 |  * word的单元格
 10 |  * @author changtan.sun
 11 |  *
 12 |  */
 13 | public class TTCPr implements Serializable, Cloneable {
 14 | 	private static final long serialVersionUID = 463466191913957614L;
 15 | 	/**
 16 | 	 * 单元格的类型
 17 | 	 */
 18 | 	private TTCPrEnum type;
 19 | 	/**
 20 | 	 * 在word中实际行号
 21 | 	 */
 22 | 	private int realRowIndex;
 23 | 	/**
 24 | 	 * 在word中实际列号
 25 | 	 */
 26 | 	private int realColumnIndex;
 27 | 	
 28 | 	/**
 29 | 	 * 逻辑行号
 30 | 	 */
 31 | 	private int logicRowIndex;
 32 | 	
 33 | 	/**
 34 | 	 * 逻辑列号
 35 | 	 */
 36 | 	private int logicColumnIndex;
 37 | 	/**
 38 | 	 * 根单元格（如果是被合并的单元格(包括行合并、列合并)， 则指向合并开始的单元格）
 39 | 	 *  * 例子：
 40 | 	 *	|-------|-----------------------------	|
 41 | 	 	| 任务   	|    	   	故障影响                              	|	
 42 | 	 	|      	| ----------------------------	|
 43 | 	 	| 阶段   	| 局部影响 | 高一层次影响 | 最终影响		|
 44 | 	 	|-------|-----------------------------	|
 45 | 	 	
 46 | 	 	说明：“任务阶段”所在单元格进行2行合并，其中第一个单元格是root，其他单元格的Root是第一个单元格
 47 | 	 */
 48 | 	private TTCPr root;
 49 | 	/**
 50 | 	 * 单元格的数据内容
 51 | 	 */
 52 | 	private WordTableCellContent<?> content;
 53 | 	
 54 | 	/**
 55 | 	 * 单元格的宽度
 56 | 	 */
 57 | 	private BigInteger width;
 58 | 	/**
 59 | 	 * 合并了多少行
 60 | 	 */
 61 | 	private int rowSpan = 0;
 62 | 	/**
 63 | 	 * 合并了多少列
 64 | 	 */
 65 | 	private int colSpan = 0;
 66 | 
 67 | 	/**
 68 | 	 * 父单元格，当列合并时有效
 69 | 	 * 例子：
 70 | 	 *	|-------|-----------------------------	|
 71 | 	 	| 任务   	|    	   	故障影响                              	|	
 72 | 	 	|      	| ----------------------------	|
 73 | 	 	| 阶段   	| 局部影响 | 高一层次影响 | 最终影响		|
 74 | 	 	|-------|-----------------------------	|
 75 | 	 	说明：“局部影响”所在单元格的父单元格parent是“故障影响”所在单元格
 76 | 	 */
 77 | 	//private TTCPr parent;
 78 | 
 79 | 	/**
 80 | 	 * 是否有效单元格。 被合并的单元格不属于有效单元格
 81 | 	 * @return
 82 | 	 */
 83 | 	public boolean isValid() {
 84 | 		return type == TTCPrEnum.VM_S || type == TTCPrEnum.NONE || type == TTCPrEnum.HM_S || type == TTCPrEnum.HVM_S;
 85 | 	}
 86 | 
 87 | 	/**
 88 | 	 * 是否进行了列合并
 89 | 	 * @return
 90 | 	 */
 91 | 	public boolean isDoneColSpan() {
 92 | 		return type == TTCPrEnum.HM_S || type == TTCPrEnum.HM || type == TTCPrEnum.HVM_S;
 93 | 	}
 94 | 
 95 | 	/**
 96 | 	 * 是否进行了行合并
 97 | 	 * @return
 98 | 	 */
 99 | 	public boolean isDoneRowSpan() {
100 | 		return type == TTCPrEnum.VM_S || type == TTCPrEnum.VM || type == TTCPrEnum.HVM_S;
101 | 	}
102 | 
103 | 	/**
104 | 	 * 单元格坐标
105 | 	 * @return
106 | 	 */
107 | 	public String getCellPosition() {
108 | 		return realRowIndex + "-" + realColumnIndex;
109 | 	}
110 | 
111 | 	public TTCPrEnum getType() {
112 | 		return type;
113 | 	}
114 | 
115 | 	public void setType(TTCPrEnum type) {
116 | 		this.type = type;
117 | 	}
118 | 
119 | 	public int getRealRowIndex() {
120 | 		return realRowIndex;
121 | 	}
122 | 
123 | 	public void setRealRowIndex(int realRowIndex) {
124 | 		this.realRowIndex = realRowIndex;
125 | 	}
126 | 
127 | 	public int getRealColumnIndex() {
128 | 		return realColumnIndex;
129 | 	}
130 | 
131 | 	public void setRealColumnIndex(int realColumnIndex) {
132 | 		this.realColumnIndex = realColumnIndex;
133 | 	}
134 | 	
135 | 	public int getLogicRowIndex() {
136 | 		return logicRowIndex;
137 | 	}
138 | 
139 | 	public void setLogicRowIndex(int logicRowIndex) {
140 | 		this.logicRowIndex = logicRowIndex;
141 | 	}
142 | 
143 | 	public int getLogicColumnIndex() {
144 | 		return logicColumnIndex;
145 | 	}
146 | 
147 | 	public void setLogicColumnIndex(int logicColumnIndex) {
148 | 		this.logicColumnIndex = logicColumnIndex;
149 | 	}
150 | 
151 | 	public int getRowSpan() {
152 | 		return rowSpan;
153 | 	}
154 | 
155 | 	public void setRowSpan(int rowSpan) {
156 | 		this.rowSpan = rowSpan;
157 | 	}
158 | 
159 | 	public int getColSpan() {
160 | 		return colSpan;
161 | 	}
162 | 
163 | 	public void setColSpan(int colSpan) {
164 | 		this.colSpan = colSpan;
165 | 	}
166 | 
167 | 	public WordTableCellContent<?> getContent() {
168 | 		if (root != null) {
169 | 			return root.getContent();
170 | 		}
171 | 		return content;
172 | 	}
173 | 
174 | 	public void setContent(WordTableCellContent<?> content) {
175 | 		this.content = content;
176 | 	}
177 | 
178 | 	public TTCPr getRoot() {
179 | 		return root;
180 | 	}
181 | 
182 | 	public void setRoot(TTCPr root) {
183 | 		this.root = root;
184 | 	}
185 | 
186 | //	public TTCPr getParent() {
187 | //		return parent;
188 | //	}
189 | //
190 | //	public void setParent(TTCPr parent) {
191 | //		this.parent = parent;
192 | //	}
193 | 
194 | 	public BigInteger getWidth() {
195 | 		return width;
196 | 	}
197 | 
198 | 	public void setWidth(BigInteger width) {
199 | 		this.width = width;
200 | 	}
201 | 
202 | 	@Override
203 | 	public String toString() {
204 | 		if (root != null) {
205 | 			return "TTCPr [root=" + root + "]";
206 | 		}
207 | 		return "TTCPr [content=" + content.getData() + "]";
208 | 	}
209 | 
210 | 	public void accept(IWordTableMemoryMappingVisitor visitor, int realRowIndex, int realColumnIndex) {
211 | 		visitor.visit(this, realRowIndex, realColumnIndex);
212 | 	}
213 | 
214 | 	public static enum TTCPrEnum {
215 | 		/**
216 | 		 * 无任何格式
217 | 		 */
218 | 		NONE,
219 | 		/**
220 | 		 * 行合并的开始
221 | 		 */
222 | 		VM_S,
223 | 		/**
224 | 		 * 被行合并
225 | 		 */
226 | 		VM,
227 | 		/**
228 | 		 * 列合并的开始
229 | 		 */
230 | 		HM_S,
231 | 		/**
232 | 		 * 被列合并
233 | 		 */
234 | 		HM,
235 | 		/**
236 | 		 * 行合并的开始，又是列合并的开始
237 | 		 */
238 | 		HVM_S
239 | 	}
240 | }
241 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/WordTable.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.model;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import com.google.common.collect.Lists;
 6 | import com.suncht.wordread.format.DefaultWordTableFormater;
 7 | import com.suncht.wordread.format.IWordTableFormater;
 8 | import com.suncht.wordread.output.IWordTableOutputStrategy;
 9 | 
10 | /**
11 |  * 一个表格对象 每个表格有多个行
12 |  * 
13 |  * @author changtan.sun
14 |  *
15 |  */
16 | public class WordTable {
17 | 	private List<WordTableRow> rows = Lists.newArrayList();
18 | 
19 | 	public List<WordTableRow> getRows() {
20 | 		return rows;
21 | 	}
22 | 
23 | 	@Override
24 | 	public String toString() {
25 | 		return rows.toString();
26 | 	}
27 | 
28 | 	public void output(IWordTableOutputStrategy outputStrategy) {
29 | 		for (WordTableRow row : rows) {
30 | 			for (WordTableCell cell : row.getCells()) {
31 | 				outputStrategy.output(cell);
32 | 			}
33 | 		}
34 | 	}
35 | 
36 | 	public String format(IWordTableFormater tableFormater) {
37 | 		if (tableFormater == null) {
38 | 			tableFormater = new DefaultWordTableFormater();
39 | 		}
40 | 
41 | 		StringBuilder builder = new StringBuilder();
42 | 		for (WordTableRow row : rows) {
43 | 			for (WordTableCell cell : row.getCells()) {
44 | 				tableFormater.format(cell, builder);
45 | 				builder.append('\t');
46 | 			}
47 | 			builder.append(this.newline());
48 | 		}
49 | 
50 | 		return builder.toString();
51 | 	}
52 | 
53 | 	public String format() {
54 | 		return this.format(null);
55 | 	}
56 | 
57 | 	private String newline() {
58 | 		return System.getProperty("line.separator");
59 | 	}
60 | 
61 | }
62 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/WordTableCell.java:
--------------------------------------------------------------------------------
  1 | package com.suncht.wordread.model;
  2 | 
  3 | import com.suncht.wordread.format.IWordTableFormater;
  4 | 
  5 | /**
  6 |  * 表格的单元格
  7 |  * @author changtan.sun
  8 |  *
  9 |  */
 10 | public class WordTableCell {
 11 | 	//	/**
 12 | 	//	 * 行号
 13 | 	//	 */
 14 | 	//	private int rowIndex;
 15 | 	//	/**
 16 | 	//	 * 列号
 17 | 	//	 */
 18 | 	//	private int columnIndex;
 19 | 	//	/**
 20 | 	//	 * 在word中实际行号
 21 | 	//	 */
 22 | 	//	private int realRowIndex;
 23 | 	//	/**
 24 | 	//	 * 在word中实际列号
 25 | 	//	 */
 26 | 	//	private int realColumnIndex;
 27 | 	/**
 28 | 	 * 单元格的内容
 29 | 	 */
 30 | 	private WordTableCellContent content;
 31 | 
 32 | 	/**
 33 | 	 * 在word中合并了多少行
 34 | 	 */
 35 | 	private int rowSpan = 1;
 36 | 	/**
 37 | 	 * 在word中合并了多少列
 38 | 	 */
 39 | 	private int columnSpan = 1;
 40 | 
 41 | 	//	public String getCellPosition() {
 42 | 	//		return rowIndex + "-" + columnIndex;
 43 | 	//	}
 44 | 
 45 | 	//	public int getRealRowIndex() {
 46 | 	//		return realRowIndex;
 47 | 	//	}
 48 | 	//
 49 | 	//	public void setRealRowIndex(int realRowIndex) {
 50 | 	//		this.realRowIndex = realRowIndex;
 51 | 	//	}
 52 | 	//
 53 | 	//	public int getRealColumnIndex() {
 54 | 	//		return realColumnIndex;
 55 | 	//	}
 56 | 	//
 57 | 	//	public void setRealColumnIndex(int realColumnIndex) {
 58 | 	//		this.realColumnIndex = realColumnIndex;
 59 | 	//	}
 60 | 
 61 | 	public int getRowSpan() {
 62 | 		return rowSpan;
 63 | 	}
 64 | 
 65 | 	public WordTableCellContent getContent() {
 66 | 		return content;
 67 | 	}
 68 | 
 69 | 	public void setContent(WordTableCellContent content) {
 70 | 		this.content = content;
 71 | 	}
 72 | 
 73 | 	public void setRowSpan(int rowSpan) {
 74 | 		this.rowSpan = rowSpan;
 75 | 	}
 76 | 
 77 | 	public int getColumnSpan() {
 78 | 		return columnSpan;
 79 | 	}
 80 | 
 81 | 	public void setColumnSpan(int columnSpan) {
 82 | 		this.columnSpan = columnSpan;
 83 | 	}
 84 | 
 85 | 	//	public int getRowIndex() {
 86 | 	//		return rowIndex;
 87 | 	//	}
 88 | 	//
 89 | 	//	public void setRowIndex(int rowIndex) {
 90 | 	//		this.rowIndex = rowIndex;
 91 | 	//	}
 92 | 
 93 | 	//	public int getColumnIndex() {
 94 | 	//		return columnIndex;
 95 | 	//	}
 96 | 
 97 | 	//	public void setColumnIndex(int columnIndex) {
 98 | 	//		this.columnIndex = columnIndex;
 99 | 	//	}
100 | 
101 | 	//	@Override
102 | 	//	public String toString() {
103 | 	//		return "PeraWordTableCell [rowIndex=" + rowIndex + ", columnIndex=" + columnIndex + ", text=" + text + "]";
104 | 	//	}
105 | 
106 | 	/**
107 | 	 * 单元格数据格式化成字符串
108 | 	 * @param formater
109 | 	 * @return
110 | 	 */
111 | 	public String format(IWordTableFormater formater) {
112 | 		if (formater == null) {
113 | 			return this.toString();
114 | 		}
115 | 
116 | 		StringBuilder stringBuilder = new StringBuilder();
117 | 		formater.format(this, stringBuilder);
118 | 		return stringBuilder.toString();
119 | 	}
120 | 
121 | }
122 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/WordTableCellContent.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.model;
 2 | 
 3 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
 4 | 
 5 | /**
 6 | * <p>标题: 单元格内容对象</p>  
 7 | * <p>描述: </p>  
 8 | * @author changtan.sun  
 9 | * @date 2018年4月22日
10 |  */
11 | public abstract class WordTableCellContent<T> {
12 | 	protected WordDocType docType;
13 | 	
14 | 	protected ContentTypeEnum contentType;
15 | 	protected T data;
16 | 
17 | 	public T getData() {
18 | 		return data;
19 | 	}
20 | 
21 | 	public void setData(T data) {
22 | 		this.data = data;
23 | 	}
24 | 	
25 | 	public ContentTypeEnum getContentType() {
26 | 		return contentType;
27 | 	}
28 | 
29 | 	public void setContentType(ContentTypeEnum contentType) {
30 | 		this.contentType = contentType;
31 | 	}
32 | 
33 | 	/**
34 | 	 * 拷贝对象，具体实现由子类实现
35 | 	 * @return
36 | 	 */
37 | 	public abstract WordTableCellContent<T> copy();
38 | 	
39 | 	public abstract void load(Object cellObj);
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/WordTableCellContentFormula.java:
--------------------------------------------------------------------------------
  1 | package com.suncht.wordread.model;
  2 | 
  3 | import java.io.StringReader;
  4 | import java.util.HashMap;
  5 | import java.util.Map;
  6 | 
  7 | import org.apache.poi.xwpf.usermodel.XWPFTableCell;
  8 | import org.dom4j.Document;
  9 | import org.dom4j.DocumentException;
 10 | import org.dom4j.DocumentFactory;
 11 | import org.dom4j.Element;
 12 | import org.dom4j.io.SAXReader;
 13 | import org.slf4j.Logger;
 14 | import org.slf4j.LoggerFactory;
 15 | import org.xml.sax.InputSource;
 16 | 
 17 | import com.suncht.wordread.model.WordTableCellContentFormula.WcFormula;
 18 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
 19 | import com.suncht.wordread.utils.MathmlUtils;
 20 | 
 21 | 
 22 | public class WordTableCellContentFormula extends WordTableCellContent<WcFormula> {
 23 | 	private final static Logger logger = LoggerFactory.getLogger(WordTableCellContentFormula.class);
 24 | 	
 25 | 	public WordTableCellContentFormula(WordDocType docType) {
 26 | 		this.docType = docType;
 27 | 	}
 28 | 	
 29 | 	@Override
 30 | 	public void load(Object cellObj) {
 31 | 		this.setContentType(ContentTypeEnum.Formula);
 32 | 		
 33 | 		if(docType == WordDocType.DOCX) {
 34 | 			XWPFTableCell cell = (XWPFTableCell) cellObj;
 35 | 			
 36 | 			String xml = cell.getCTTc().xmlText();
 37 | 			String omml = this.extractOml(xml);
 38 | 
 39 | 			String mml = MathmlUtils.convertOMML2MML(omml);
 40 | 			String latex = MathmlUtils.convertMML2Latex(mml);
 41 | 			
 42 | 			WcFormula formulaContent = new WcFormula();
 43 | 			formulaContent.setMml(mml);
 44 | 			formulaContent.setLatex(latex);
 45 | 			this.setData(formulaContent);
 46 | 		} else if(docType == WordDocType.DOC) {
 47 | 			
 48 | 		}
 49 | 		
 50 | 	}
 51 | 
 52 | 
 53 | 	@Override
 54 | 	public WordTableCellContent<WcFormula> copy() {
 55 | 		WordTableCellContentFormula newContent = new WordTableCellContentFormula(this.docType);
 56 | 		newContent.setData(this.data);
 57 | 		newContent.setContentType(ContentTypeEnum.Formula);
 58 | 		return newContent;
 59 | 	}
 60 | 
 61 | 	private String extractOml(String xml) {
 62 | 		//dom4j解析器的初始化
 63 | 		SAXReader reader = new SAXReader(new DocumentFactory());
 64 | 		Map<String, String> map = new HashMap<String, String>();
 65 | 		map.put("xdr", "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing");
 66 | 		map.put("m", "http://schemas.openxmlformats.org/officeDocument/2006/math");
 67 | 		reader.getDocumentFactory().setXPathNamespaceURIs(map); //xml文档的namespace设置
 68 | 
 69 | 		InputSource source = new InputSource(new StringReader(xml));
 70 | 		source.setEncoding("utf-8");
 71 | 		try {
 72 | 			Document doc = reader.read(source);
 73 | 			Element root = doc.getRootElement();
 74 | 			Element e = (Element) root.selectSingleNode("//m:oMathPara"); //用xpath得到OMML节点
 75 | 			String omml = e.asXML(); //转为xml
 76 | 			return omml;
 77 | 		} catch (DocumentException e) {
 78 | 			e.printStackTrace();
 79 | 		}
 80 | 		return null;
 81 | 	}
 82 | 	
 83 | 	public static class WcFormula {
 84 | 		private String mml;
 85 | 		private String latex;
 86 | 		public String getMml() {
 87 | 			return mml;
 88 | 		}
 89 | 		public void setMml(String mml) {
 90 | 			this.mml = mml;
 91 | 		}
 92 | 		public String getLatex() {
 93 | 			return latex;
 94 | 		}
 95 | 		public void setLatex(String latex) {
 96 | 			this.latex = latex;
 97 | 		}
 98 | 	}
 99 | 
100 | 
101 | }
102 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/WordTableCellContentImage.java:
--------------------------------------------------------------------------------
  1 | package com.suncht.wordread.model;
  2 | 
  3 | import java.io.StringReader;
  4 | import java.util.Arrays;
  5 | import java.util.HashMap;
  6 | import java.util.Map;
  7 | 
  8 | import org.apache.commons.lang3.StringUtils;
  9 | import org.apache.poi.openxml4j.opc.PackageRelationship;
 10 | import org.apache.poi.xwpf.usermodel.XWPFDocument;
 11 | import org.apache.poi.xwpf.usermodel.XWPFPictureData;
 12 | import org.apache.poi.xwpf.usermodel.XWPFTableCell;
 13 | import org.dom4j.Attribute;
 14 | import org.dom4j.Document;
 15 | import org.dom4j.DocumentException;
 16 | import org.dom4j.DocumentFactory;
 17 | import org.dom4j.Element;
 18 | import org.dom4j.io.SAXReader;
 19 | import org.dom4j.tree.DefaultElement;
 20 | import org.slf4j.Logger;
 21 | import org.slf4j.LoggerFactory;
 22 | import org.xml.sax.InputSource;
 23 | 
 24 | import com.suncht.wordread.model.WordTableCellContentImage.WcImage;
 25 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
 26 | 
 27 | public class WordTableCellContentImage extends WordTableCellContent<WcImage> {
 28 | 	private final static Logger logger = LoggerFactory.getLogger(WordTableCellContentImage.class);
 29 | 	
 30 | 	public WordTableCellContentImage(WordDocType docType) {
 31 | 		this.docType = docType;
 32 | 	}
 33 | 	
 34 | 	@Override
 35 | 	public void load(Object cellObj) {
 36 | 		this.setContentType(ContentTypeEnum.Image);
 37 | 		
 38 | 		if(docType == WordDocType.DOCX) {
 39 | 			XWPFTableCell cell = (XWPFTableCell) cellObj;
 40 | 			String xml = cell.getCTTc().xmlText();
 41 | 			String embedId = extractEmbedId(xml);
 42 | 			this.setData(this.readImage(embedId, cell.getXWPFDocument()));
 43 | 		} else if(docType == WordDocType.DOC) {
 44 | 			
 45 | 		}
 46 | 	}
 47 | 	
 48 | 	
 49 | 	@Override
 50 | 	public WordTableCellContent<WcImage> copy() {
 51 | 		WordTableCellContentImage newContent = new WordTableCellContentImage(this.docType);
 52 | 		newContent.setData(data);
 53 | 		newContent.setContentType(contentType);
 54 | 		return newContent;
 55 | 	}
 56 | 
 57 | 	private String extractEmbedId(String xml) {
 58 | 		// dom4j解析器的初始化
 59 | 		SAXReader reader = new SAXReader(new DocumentFactory());
 60 | 		Map<String, String> map = new HashMap<String, String>();
 61 | 		map.put("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
 62 | 		map.put("a", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
 63 | 		map.put("xdr", "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing");
 64 | 		map.put("wp", "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing");
 65 | 		map.put("pic", "http://schemas.openxmlformats.org/drawingml/2006/picture");
 66 | 		map.put("r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships");
 67 | 		reader.getDocumentFactory().setXPathNamespaceURIs(map); // xml文档的namespace设置
 68 | 
 69 | 		InputSource source = new InputSource(new StringReader(xml));
 70 | 		source.setEncoding("utf-8");
 71 | 		try {
 72 | 			Document doc = reader.read(source);
 73 | 			Element root = doc.getRootElement();
 74 | 			Element e = (Element) root.selectSingleNode("//pic:blipFill");
 75 | 			Element blip = (DefaultElement) e.content().get(0);
 76 | 			String embedId = ((Attribute) (blip.attributes().get(0))).getValue();
 77 | 			return embedId;
 78 | 		} catch (DocumentException e) {
 79 | 			e.printStackTrace();
 80 | 		}
 81 | 		return null;
 82 | 	}
 83 | 
 84 | 	private WcImage readImage(String embedId, final XWPFDocument xdoc) {
 85 | 		if (StringUtils.isBlank(embedId)) {
 86 | 			return null;
 87 | 		}
 88 | 		WcImage imageContent = null;
 89 | 		for (XWPFPictureData pictureData : xdoc.getAllPictures()) {
 90 | 			PackageRelationship relationship = pictureData.getPackageRelationship();
 91 | 			if (embedId.equals(relationship.getId())) {
 92 | 				imageContent = new WcImage();
 93 | 				imageContent.setData(pictureData.getData());
 94 | 				imageContent.setFileName(pictureData.getFileName());
 95 | 				imageContent.setImageType(pictureData.getPictureType());
 96 | 				break;
 97 | 			}
 98 | 		}
 99 | 
100 | 		return imageContent;
101 | 	}
102 | 
103 | 	/**
104 | 	 * 图片内容
105 | 	* <p>标题: ImageContent</p>  
106 | 	* <p>描述: </p>  
107 | 	* @author changtan.sun  
108 | 	* @date 2018年4月22日
109 | 	 */
110 | 	public static class WcImage {
111 | 		private String fileName;
112 | 		private byte[] data;
113 | 		
114 | 		/**
115 | 		 * 图片类型，参考org.apache.poi.xwpf.usermodel
116 | 		 */
117 | 		private int imageType;
118 | 
119 | 		public String getFileName() {
120 | 			return fileName;
121 | 		}
122 | 
123 | 		public void setFileName(String fileName) {
124 | 			this.fileName = fileName;
125 | 		}
126 | 
127 | 		public byte[] getData() {
128 | 			if (data == null) {
129 | 				return new byte[0];
130 | 			}
131 | 			return Arrays.copyOf(data, data.length);
132 | 		}
133 | 
134 | 		public void setData(byte[] data) {
135 | 			if (data == null) {
136 | 				return;
137 | 			}
138 | 			this.data = Arrays.copyOf(data, data.length);
139 | 		}
140 | 
141 | 		public int getImageType() {
142 | 			return imageType;
143 | 		}
144 | 
145 | 		public void setImageType(int imageType) {
146 | 			this.imageType = imageType;
147 | 		}
148 | 
149 | 	}
150 | 
151 | 
152 | }
153 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/WordTableCellContentOleObject.java:
--------------------------------------------------------------------------------
  1 | package com.suncht.wordread.model;
  2 | 
  3 | import java.io.InputStream;
  4 | import java.io.StringReader;
  5 | import java.util.Arrays;
  6 | import java.util.HashMap;
  7 | import java.util.Iterator;
  8 | import java.util.List;
  9 | import java.util.Map;
 10 | 
 11 | import org.apache.commons.io.FilenameUtils;
 12 | import org.apache.commons.lang3.StringUtils;
 13 | import org.apache.poi.POIXMLDocumentPart;
 14 | import org.apache.poi.hwpf.usermodel.TableCell;
 15 | import org.apache.poi.openxml4j.opc.PackagePart;
 16 | import org.apache.poi.poifs.dev.POIFSViewEngine;
 17 | import org.apache.poi.poifs.filesystem.DirectoryEntry;
 18 | import org.apache.poi.poifs.filesystem.DirectoryNode;
 19 | import org.apache.poi.poifs.filesystem.DocumentEntry;
 20 | import org.apache.poi.poifs.filesystem.DocumentNode;
 21 | import org.apache.poi.poifs.filesystem.Entry;
 22 | import org.apache.poi.poifs.filesystem.Ole10Native;
 23 | import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 24 | import org.apache.poi.ss.formula.eval.NotImplementedException;
 25 | import org.apache.poi.xwpf.usermodel.XWPFDocument;
 26 | import org.apache.poi.xwpf.usermodel.XWPFTableCell;
 27 | import org.dom4j.Attribute;
 28 | import org.dom4j.Document;
 29 | import org.dom4j.DocumentException;
 30 | import org.dom4j.DocumentFactory;
 31 | import org.dom4j.Element;
 32 | import org.dom4j.io.SAXReader;
 33 | import org.dom4j.tree.DefaultElement;
 34 | import org.slf4j.Logger;
 35 | import org.slf4j.LoggerFactory;
 36 | import org.xml.sax.InputSource;
 37 | 
 38 | import com.suncht.wordread.model.WordTableCellContentOleObject.WcOleObject;
 39 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
 40 | 
 41 | /**
 42 |  * <p>
 43 |  * 标题: 单元格中嵌套OLE对象
 44 |  * </p>
 45 |  * <p>
 46 |  * 描述: OLE对象，比如：附件
 47 |  * </p>
 48 |  * 
 49 |  * @author changtan.sun
 50 |  * @date 2018年4月22日
 51 |  */
 52 | public class WordTableCellContentOleObject extends WordTableCellContent<WcOleObject> {
 53 | 	private final static Logger logger = LoggerFactory.getLogger(WordTableCellContentOleObject.class);
 54 | 
 55 | 	public WordTableCellContentOleObject(WordDocType docType) {
 56 | 		this.docType = docType;
 57 | 	}
 58 | 	
 59 | 	@Override
 60 | 	public void load(Object cellObj) {
 61 | 		this.setContentType(ContentTypeEnum.OleObject);
 62 | 		
 63 | 		if(docType == WordDocType.DOCX) {
 64 | 			XWPFTableCell cell = (XWPFTableCell) cellObj;
 65 | 			String xml = cell.getCTTc().xmlText();
 66 | 			Document doc = this.buildDocument(xml);
 67 | 			String embedId = extractOleObjectEmbedId(doc);
 68 | 
 69 | 			WcOleObject oleObject = this.readOleObject(embedId, cell.getXWPFDocument());
 70 | 			this.setData(oleObject);		
 71 | 		} else if(docType == WordDocType.DOC) {
 72 | 			
 73 | 		}
 74 | 	}
 75 | 
 76 | 	@Override
 77 | 	public WordTableCellContent<WcOleObject> copy() {
 78 | 		WordTableCellContentOleObject newContent = new WordTableCellContentOleObject(this.docType);
 79 | 		newContent.setData(data);
 80 | 		newContent.setContentType(contentType);
 81 | 		return newContent;
 82 | 	}
 83 | 
 84 | 	/**
 85 | 	 * 由单元格内容xml构建Document
 86 | 	 * 
 87 | 	 * @param xml
 88 | 	 * @return
 89 | 	 */
 90 | 	private Document buildDocument(String xml) {
 91 | 		// dom4j解析器的初始化
 92 | 		SAXReader reader = new SAXReader(new DocumentFactory());
 93 | 		Map<String, String> map = new HashMap<String, String>();
 94 | 		map.put("o", "urn:schemas-microsoft-com:office:office");
 95 | 		map.put("v", "urn:schemas-microsoft-com:vml");
 96 | 		map.put("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
 97 | 		map.put("a", "http://schemas.openxmlformats.org/wordprocessingml/2006/main");
 98 | 		map.put("xdr", "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing");
 99 | 		map.put("wp", "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing");
100 | 		map.put("r", "http://schemas.openxmlformats.org/officeDocument/2006/relationships");
101 | 		reader.getDocumentFactory().setXPathNamespaceURIs(map); // xml文档的namespace设置
102 | 
103 | 		InputSource source = new InputSource(new StringReader(xml));
104 | 		source.setEncoding("utf-8");
105 | 
106 | 		try {
107 | 			Document doc = reader.read(source);
108 | 			return doc;
109 | 		} catch (DocumentException e) {
110 | 			logger.error(e.getMessage(), e);
111 | 		}
112 | 		return null;
113 | 	}
114 | 
115 | 	/**
116 | 	 * 从单元格Document中获取OLE对象的embedId
117 | 	 * 
118 | 	 * @param doc
119 | 	 * @return
120 | 	 */
121 | 	private String extractOleObjectEmbedId(Document doc) {
122 | 		Element root = doc.getRootElement();
123 | 		Element e = (Element) root.selectSingleNode("//w:object");
124 | 		Element oOLEObject = (DefaultElement) e.content().get(1);
125 | 		String embedId = ((Attribute) (oOLEObject.attribute("id"))).getValue();
126 | 		return embedId;
127 | 	}
128 | 
129 | 	/**
130 | 	 * 从单元格Document中获取附件的显示图片的embedId
131 | 	 * 
132 | 	 * @param doc
133 | 	 * @return
134 | 	 */
135 | 	private String extractImageEmbedId(Document doc) {
136 | 		Element root = doc.getRootElement();
137 | 		Element e = (Element) root.selectSingleNode("//w:object");
138 | 		Element vShape = (DefaultElement) e.content().get(0);
139 | 		Element vImagedata = (Element) vShape.selectSingleNode("//v:imagedata");
140 | 		String embedId = ((Attribute) (vImagedata.attribute("id"))).getValue();
141 | 		return embedId;
142 | 	}
143 | 
144 | 	/**
145 | 	 * 读取Ole对象
146 | 	 * 
147 | 	 * @param embedId
148 | 	 * @param xdoc
149 | 	 * @return
150 | 	 */
151 | 	private WcOleObject readOleObject(String embedId, final XWPFDocument xdoc) {
152 | 		if (StringUtils.isBlank(embedId)) {
153 | 			return null;
154 | 		}
155 | 		WcOleObject oleObject = null;
156 | 		List<POIXMLDocumentPart> parts = xdoc.getRelations();
157 | 		for (POIXMLDocumentPart poixmlDocumentPart : parts) {
158 | 			String id = poixmlDocumentPart.getPackageRelationship().getId();
159 | 			if (embedId.equals(id)) {
160 | 				PackagePart packagePart = poixmlDocumentPart.getPackagePart();
161 | 
162 | 				oleObject = new WcOleObject();
163 | 				// oleObjectContent.setFileName(packagePart.getPartName().getName());
164 | 
165 | 				// 解析Ole对象中的文件，参考：http://poi.apache.org/poifs/how-to.html
166 | 				try (InputStream is = packagePart.getInputStream();) {
167 | 					POIFSFileSystem poifs = new POIFSFileSystem(is);
168 | 
169 | 					if (isOle10NativeObject(poifs.getRoot())) {
170 | 						oleObject = readOle10Native(poifs);
171 | 					} else {
172 | 						oleObject = readDocumentOle(poifs, is);
173 | 					}
174 | 				} catch (Exception e) {
175 | 					logger.error(e.getMessage(), e);
176 | 				}
177 | 			}
178 | 		}
179 | 
180 | 		return oleObject;
181 | 	}
182 | 
183 | 	private boolean isOle10NativeObject(DirectoryNode directory) {
184 | 		return directory.hasEntry(Ole10Native.OLE10_NATIVE);
185 | 	}
186 | 
187 | 	/**
188 | 	 * 读取非文档类的Ole对象
189 | 	 * 
190 | 	 * @param poifs
191 | 	 * @return
192 | 	 */
193 | 	private WcOleObject readOle10Native(POIFSFileSystem poifs) {
194 | 		WcOleObject oleObject = new WcOleObject();
195 | 		try {
196 | 			Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(poifs);
197 | 			oleObject.setFileName(FilenameUtils.getName(ole10.getFileName()));
198 | 
199 | 			// byte[] data = IOUtils.toByteArray(packagePart.getInputStream());
200 | 			oleObject.setDataSize(ole10.getDataSize());
201 | 			oleObject.setData(ole10.getDataBuffer());
202 | 		} catch (Exception e) {
203 | 			logger.error(e.getMessage(), e);
204 | 		}
205 | 
206 | 		return oleObject;
207 | 	}
208 | 
209 | 	/**
210 | 	 * 读取文档类的OLE对象，包括Docx、Doc、xlsx、xls、ppt、pptx等
211 | 	 * 暂未实现，需要进行大量资料查阅、寻找解决方案
212 | 	 * @param poifs
213 | 	 * @return
214 | 	 */
215 | 	private WcOleObject readDocumentOle(POIFSFileSystem poifs, InputStream is) {
216 | 		DirectoryNode directory = poifs.getRoot();
217 | 		if (!directory.hasEntry("WordDocument")) {
218 | 			return null;
219 | 		}
220 | 		
221 | 		List strings = POIFSViewEngine.inspectViewable(poifs, true, 0, "  ");
222 | 		Iterator iter = strings.iterator();
223 | 
224 | 		while (iter.hasNext()) {
225 | 			//os.write( ((String)iter.next()).getBytes());
226 | 			System.out.println(iter.next());
227 | 		}
228 | 		throw new NotImplementedException("暂未实现");
229 | 		
230 | 		
231 | //		WcOleObject oleObject = new WcOleObject();
232 | //		try {
233 | //			DocumentNode entry = (DocumentNode) directory.getEntry("WpsCustomData");
234 | //			byte[] data = new byte[entry.getSize()];
235 | //			directory.createDocumentInputStream(entry).read(data);
236 | //			
237 | //			XWPFDocument doc = new XWPFDocument(directory.createDocumentInputStream(entry)); // 载入文档  
238 | //			doc.toString();
239 | //			oleObject.setFileName(FilenameUtils.getName(entry.getName()));
240 | //			//
241 | //			// //byte[] data =
242 | //			// IOUtils.toByteArray(packagePart.getInputStream());
243 | //			oleObject.setDataSize(data.length);
244 | //			oleObject.setData(data);
245 | //		} catch (Exception e) {
246 | //			logger.error(e.getMessage(), e);
247 | //		}
248 | 
249 | 		//return oleObject;
250 | 	}
251 | 
252 | 	public static class WcOleObject {
253 | 		private String fileName;
254 | 		private byte[] data;
255 | 		private int dataSize;
256 | 
257 | 		public String getFileName() {
258 | 			return fileName;
259 | 		}
260 | 
261 | 		public void setFileName(String fileName) {
262 | 			this.fileName = fileName;
263 | 		}
264 | 
265 | 		public byte[] getData() {
266 | 			if (data == null) {
267 | 				return new byte[0];
268 | 			}
269 | 			return Arrays.copyOf(data, data.length);
270 | 		}
271 | 
272 | 		public void setData(byte[] data) {
273 | 			if (data == null) {
274 | 				return;
275 | 			}
276 | 			this.data = Arrays.copyOf(data, data.length);
277 | 		}
278 | 
279 | 		public int getDataSize() {
280 | 			return dataSize;
281 | 		}
282 | 
283 | 		public void setDataSize(int dataSize) {
284 | 			this.dataSize = dataSize;
285 | 		}
286 | 	}
287 | 
288 | }
289 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/WordTableCellContentText.java:
--------------------------------------------------------------------------------
  1 | package com.suncht.wordread.model;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.Collections;
  5 | import java.util.List;
  6 | 
  7 | import org.apache.commons.lang3.StringUtils;
  8 | import org.apache.poi.hwpf.usermodel.Paragraph;
  9 | import org.apache.poi.hwpf.usermodel.TableCell;
 10 | import org.apache.poi.xwpf.usermodel.XWPFParagraph;
 11 | import org.apache.poi.xwpf.usermodel.XWPFRun;
 12 | import org.apache.poi.xwpf.usermodel.XWPFTableCell;
 13 | import org.slf4j.Logger;
 14 | import org.slf4j.LoggerFactory;
 15 | 
 16 | import com.suncht.wordread.model.WordTableCellContentText.WcText;
 17 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
 18 | 
 19 | public class WordTableCellContentText extends WordTableCellContent<WcText> {
 20 | 	private final static Logger logger = LoggerFactory.getLogger(WordTableCellContentText.class);
 21 | 	
 22 | 	public WordTableCellContentText(WordDocType docType) {
 23 | 		this.docType = docType;
 24 | 	}
 25 | 	
 26 | 	@Override
 27 | 	public void load(Object cellObj) {
 28 | 		this.setContentType(ContentTypeEnum.Text);
 29 | 		
 30 | 		if(docType == WordDocType.DOCX) {
 31 | 			XWPFTableCell cell = (XWPFTableCell) cellObj;
 32 | 			List<String> texts = new ArrayList<String>();
 33 | 			
 34 | 			List<XWPFParagraph> paragraphs = cell.getParagraphs();
 35 | 			if(paragraphs!=null && paragraphs.size()>0) {
 36 | 				for (XWPFParagraph paragraph : paragraphs) {
 37 | 					texts.add(this.runsToLine(paragraph.getRuns()));
 38 | 				}
 39 | 			} 
 40 | 			
 41 | 			WcText text = new WcText();
 42 | 			text.setParagraphs(texts);
 43 | 			
 44 | 			this.setData(text);
 45 | 		} else if(docType == WordDocType.DOC) {
 46 | 			TableCell cell = (TableCell) cellObj;
 47 | 			
 48 | 			List<String> texts = new ArrayList<String>();
 49 | 			
 50 | 			for (int i = 0, num = cell.numParagraphs(); i < num; i++) {
 51 | 				Paragraph paragraph = cell.getParagraph(i);
 52 | 				texts.add(paragraph.text().trim());
 53 | 			}
 54 | 			
 55 | 			WcText text = new WcText();
 56 | 			text.setParagraphs(texts);
 57 | 			
 58 | 			this.setData(text);
 59 | 		}
 60 | 	}
 61 | 	
 62 | 	private String runsToLine(List<XWPFRun> runs) {
 63 | 		StringBuilder builder = new StringBuilder();
 64 | 		for (XWPFRun run : runs) {
 65 | 			builder.append(run.toString());
 66 | 		}
 67 | 		
 68 | 		return builder.toString();
 69 | 	}
 70 | 	
 71 | 	public WordTableCellContent<WcText> copy() {
 72 | 		WordTableCellContentText newContent = new WordTableCellContentText(this.docType);
 73 | 		newContent.setData(data);
 74 | 		newContent.setContentType(contentType);
 75 | 		return newContent;
 76 | 	}
 77 | 	
 78 | 	/**
 79 | 	 * 文本
 80 | 	* <p>标题: WcText</p>  
 81 | 	* <p>描述: </p>  
 82 | 	* @author changtan.sun  
 83 | 	* @date 2018年4月23日
 84 | 	 */
 85 | 	public static class WcText {
 86 | 		private List<String> paragraphs;
 87 | 
 88 | 		public List<String> getParagraphs() {
 89 | 			return paragraphs;
 90 | 		}
 91 | 
 92 | 		public void setParagraphs(List<String> paragraphs) {
 93 | 			this.paragraphs = Collections.unmodifiableList(paragraphs);
 94 | 		}
 95 | 
 96 | 		@Override
 97 | 		public String toString() {
 98 | 			return StringUtils.join(paragraphs, '\n');
 99 | 		}
100 | 		
101 | 		
102 | 	}
103 | }
104 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/WordTableCellContents.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.model;
 2 | 
 3 | import org.apache.poi.hwpf.usermodel.TableCell;
 4 | import org.apache.poi.xwpf.usermodel.XWPFTableCell;
 5 | 
 6 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
 7 | 
 8 | public final class WordTableCellContents {
 9 | 	public static WordTableCellContent<?> getCellContent(XWPFTableCell cell) {
10 | 		WordTableCellContent<?> content = null;
11 | 		if (isFormula(cell)) { //是公式
12 | 			content = new WordTableCellContentFormula(WordDocType.DOCX);
13 | 		} else if (isImage(cell)) { //图片
14 | 			content = new WordTableCellContentImage(WordDocType.DOCX);
15 | 		} else if (isOleObject(cell)) { //OLE对象
16 | 			content = new WordTableCellContentOleObject(WordDocType.DOCX);
17 | 		} else { //一般文本
18 | 			content = new WordTableCellContentText(WordDocType.DOCX);
19 | 		}
20 | 		
21 | 		content.load(cell);
22 | 		return content;
23 | 	}
24 | 
25 | 	public static boolean isFormula(XWPFTableCell cell) {
26 | 		String xmlText = cell.getCTTc().xmlText();
27 | 		return xmlText.contains("<m:oMathPara>") && xmlText.contains("</m:oMathPara>");
28 | 	}
29 | 
30 | 	public static boolean isImage(XWPFTableCell cell) {
31 | 		String xmlText = cell.getCTTc().xmlText();
32 | 		return xmlText.contains("<w:drawing>") && xmlText.contains("</w:drawing>");
33 | 	}
34 | 	
35 | 	public static boolean isOleObject(XWPFTableCell cell) {
36 | 		String xmlText = cell.getCTTc().xmlText();
37 | 		return xmlText.contains("<w:object>") && xmlText.contains("</w:object>");
38 | 	}
39 | 	
40 | 	public static WordTableCellContent<?> getCellContent(TableCell cell) {
41 | 		WordTableCellContent<?> content = null;
42 | 		if (isFormula(cell)) { //是公式
43 | 			content = new WordTableCellContentFormula(WordDocType.DOC);
44 | 		} else if (isImage(cell)) { //图片
45 | 			content = new WordTableCellContentImage(WordDocType.DOC);
46 | 		} else if (isOleObject(cell)) { //OLE对象
47 | 			content = new WordTableCellContentOleObject(WordDocType.DOC);
48 | 		} else { //一般文本
49 | 			content = new WordTableCellContentText(WordDocType.DOC);
50 | 		}
51 | 		
52 | 		content.load(cell);
53 | 		return content;
54 | 	}
55 | 	
56 | 	public static boolean isFormula(TableCell cell) {
57 | 		String xmlText = cell.text();
58 | 		return xmlText.contains("<m:oMathPara>") && xmlText.contains("</m:oMathPara>");
59 | 	}
60 | 	
61 | 	public static boolean isImage(TableCell cell) {
62 | 		String xmlText = cell.text();
63 | 		return xmlText.contains("<w:drawing>") && xmlText.contains("</w:drawing>");
64 | 	}
65 | 	
66 | 	public static boolean isOleObject(TableCell cell) {
67 | 		String xmlText = cell.text();
68 | 		return xmlText.contains("<w:object>") && xmlText.contains("</w:object>");
69 | 	}
70 | 
71 | }
72 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/WordTableComplexCell.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.model;
 2 | 
 3 | /**
 4 |  * 复杂类的单元格
 5 |  * 单元格中嵌套子单元格，可以看做嵌入了表格
 6 |  * @author suncht
 7 |  *
 8 |  */
 9 | public class WordTableComplexCell extends WordTableCell {
10 | 	/**
11 | 	 * 单元格中嵌套子单元格，可以看做嵌入了表格
12 | 	 */
13 | 	private WordTable innerTable;
14 | 
15 | 	public WordTable getInnerTable() {
16 | 		return innerTable;
17 | 	}
18 | 
19 | 	public void setInnerTable(WordTable innerTable) {
20 | 		this.innerTable = innerTable;
21 | 	}
22 | 
23 | 	@Override
24 | 	public String toString() {
25 | 		return innerTable.toString();
26 | 	}
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/WordTableHeader.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.model;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import com.google.common.collect.Lists;
 6 | 
 7 | /**
 8 |  * 表格的列头
 9 |  * @author changtan.sun
10 |  *
11 |  */
12 | public class WordTableHeader {
13 | 	private List<WordTableCell> columnHeader = Lists.newArrayList();
14 | }
15 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/WordTableMap.java:
--------------------------------------------------------------------------------
  1 | package com.suncht.wordread.model;
  2 | 
  3 | /**
  4 |  * 表格数据映射
  5 |  * @author changtan.sun
  6 |  *
  7 |  */
  8 | public class WordTableMap {
  9 | 	private TTCPr[][] tableMemoryMap;
 10 | 
 11 | 	public TTCPr[][] getTableMemoryMap() {
 12 | 		return tableMemoryMap;
 13 | 	}
 14 | 
 15 | 	public void setTableMemoryMap(TTCPr[][] tableMemoryMap) {
 16 | 		this.tableMemoryMap = tableMemoryMap;
 17 | 	}
 18 | 
 19 | 	public void clear() {
 20 | 		tableMemoryMap = null;
 21 | 	}
 22 | 
 23 | 	/**
 24 | 	 * 获取在docx中实际行数（word中表格都处理成二维表格，忽略合并）
 25 | 	 * @return
 26 | 	 */
 27 | 	public int getRealMaxRowCount() {
 28 | 		return tableMemoryMap.length;
 29 | 	}
 30 | 
 31 | 	/**
 32 | 	 * 获取行数（在表格映射对象中的行数）
 33 | 	 * @return
 34 | 	 */
 35 | 	public int getRowCount() {
 36 | 		int rowCount = 0;
 37 | 		for (int i = 0; i < tableMemoryMap.length; i++) {
 38 | 			if (tableMemoryMap[i][0].isValid()) {
 39 | 				rowCount++;
 40 | 			}
 41 | 		}
 42 | 		return rowCount;
 43 | 	}
 44 | 
 45 | 	/**
 46 | 	 * 获取行对象
 47 | 	 * @param currentRowIndex
 48 | 	 * @return
 49 | 	 */
 50 | 	public WordTableRow getRow(int currentRowIndex) {
 51 | 		TTCPr[] _rows = null;
 52 | 		TTCPr _first_column_in_row = null;
 53 | 		int rowCount = 0;
 54 | 		for (int i = 0; i < tableMemoryMap.length; i++) {
 55 | 			if (tableMemoryMap[i][0].isValid()) {
 56 | 				if (currentRowIndex == rowCount++) {
 57 | 					_rows = tableMemoryMap[i];
 58 | 					_first_column_in_row = tableMemoryMap[i][0];
 59 | 					break;
 60 | 				}
 61 | 			}
 62 | 		}
 63 | 
 64 | 		if (_rows == null) {
 65 | 			return null;
 66 | 		}
 67 | 
 68 | 		int real_row_index = _first_column_in_row.getRealRowIndex();
 69 | 		//int _end_row_index = _first_column_in_row.getRowSpan() + _first_column_in_row.getRealRowIndex() - 1;
 70 | 		int _row_span = _first_column_in_row.getRowSpan();
 71 | 		int _real_column_count = _rows.length;
 72 | 
 73 | 		WordTableRow pwtr = new WordTableRow();
 74 | 
 75 | 		WordTableCell cell = null;
 76 | 		//		WordTableCell pwtc = null;
 77 | 		for (int i = 0; i < _real_column_count; i++) {
 78 | 			cell = getCellInRow(real_row_index, _row_span, i, currentRowIndex);
 79 | 			if (cell == null) {
 80 | 				continue;
 81 | 			}
 82 | 			pwtr.getCells().add(cell);
 83 | 			//			if (cells.size() == 1) {
 84 | 			//				pwtr.getCells().add(cells.get(0));
 85 | 			//			} else {
 86 | 			//				pwtc = new WordTableCell();
 87 | 			//				pwtc.getSubCells().addAll(cells);
 88 | 			//				pwtr.getCells().add(pwtc);
 89 | 			//			}
 90 | 		}
 91 | 
 92 | 		return pwtr;
 93 | 	}
 94 | 
 95 | 	/**
 96 | 	 * 获取一行中的单元格集合,将实际单元格转换成逻辑单元格
 97 | 	 * @param realRowIndex word中的实际开始行号
 98 | 	 * @param endRealRowIndex word中的实际结束行号
 99 | 	 * @param realColumnIndex  word中的实际列
100 | 	 * @param currentRowIndex  在表格映射对象中的行号
101 | 	 * @return
102 | 	 */
103 | 	private WordTableCell getCellInRow(int realRowIndex, int realRowSpan, int realColumnIndex, int currentRowIndex) {
104 | 		WordTableCell cell = null;
105 | 		TTCPr currentRealCell = tableMemoryMap[realRowIndex][realColumnIndex];
106 | 
107 | 		boolean needHandleRowSpan = realRowSpan > 1 || currentRealCell.isDoneRowSpan(); //是否需要处理跨行的情况
108 | 		boolean needHandleColSpan = currentRealCell.isDoneColSpan();//是否需要处理跨列的情况
109 | 
110 | 		boolean satisfyConditionOfComplexCell = false; //是否满足复杂单元格的条件
111 | 
112 | 		satisfyConditionOfComplexCell = needHandleRowSpan && needHandleColSpan;
113 | 		if (!satisfyConditionOfComplexCell) {
114 | 			satisfyConditionOfComplexCell = currentRealCell.getRowSpan() < realRowSpan;
115 | 		}
116 | 
117 | 		if (currentRealCell.isValid()) { //有效单元格
118 | 			if (satisfyConditionOfComplexCell) {//跨行又跨列
119 | 				WordTableComplexCell pwtc = new WordTableComplexCell(); //属于复杂单元格
120 | 
121 | 				WordTable innerTable = new WordTable();
122 | 				int _realColSpan = currentRealCell.getColSpan();
123 | 				for (int i = 0; i < realRowSpan;) {
124 | 					WordTableRow innerRow = new WordTableRow();
125 | 					int _rowSpan = 1;
126 | 					for (int j = 0; j < _realColSpan; j++) {
127 | 						TTCPr _ttcpr = tableMemoryMap[realRowIndex + i][realColumnIndex + j];
128 | 						if (_ttcpr.isValid()) {
129 | 							WordTableCell _cell = new WordTableSimpleCell();
130 | 							_cell.setRowSpan(_ttcpr.getRowSpan());
131 | 							_cell.setColumnSpan(_ttcpr.getColSpan());
132 | 							_cell.setContent(_ttcpr.getContent().copy());
133 | 							innerRow.getCells().add(_cell);
134 | 
135 | 							if (_ttcpr.getRowSpan() > _rowSpan) {
136 | 								_rowSpan = _ttcpr.getRowSpan();
137 | 							}
138 | 						}
139 | 					}
140 | 					innerTable.getRows().add(innerRow);
141 | 
142 | 					i = i + _rowSpan;
143 | 				}
144 | 				pwtc.setInnerTable(innerTable);
145 | 				cell = pwtc;
146 | 			} else {
147 | 				//跨列不跨行，不需要处理
148 | 				//跨行不跨列，不需要处理
149 | 				WordTableSimpleCell pwtc = new WordTableSimpleCell(); //属于简单单元格
150 | 				pwtc.setRowSpan(currentRealCell.getRowSpan());
151 | 				pwtc.setColumnSpan(currentRealCell.getColSpan());
152 | 				pwtc.setContent(currentRealCell.getContent().copy());
153 | 
154 | 				cell = pwtc;
155 | 			}
156 | 		}
157 | 
158 | 		return cell;
159 | 
160 | 		//		if (currentCell.isValid()) { //有效单元格
161 | 		//			pwtc = new WordTableCell();
162 | 		//			//				pwtc.setRealColumnIndex(realColumnIndex);
163 | 		//			//				pwtc.setRealRowIndex(i);
164 | 		//			//				pwtc.setColumnSpan(pttcpr.getColSpan());
165 | 		//			//				pwtc.setRowSpan(pttcpr.getRowSpan());
166 | 		//			//			pwtc.setRowIndex(currentRowIndex);
167 | 		//			//			pwtc.setColumnIndex(realColumnIndex);
168 | 		//			pwtc.setText(currentCell.getText());
169 | 		//
170 | 		//			if (currentCell.getType() == TTCPrEnum.VM_S) {
171 | 		//
172 | 		//			} else if (currentCell.getType() == TTCPrEnum.HM_S) {
173 | 		//
174 | 		//			} else if (currentCell.getType() == TTCPrEnum.HVM_S) {
175 | 		//
176 | 		//			}
177 | 		//
178 | 		//			cells.put(currentCell.getCellPosition(), pwtc);
179 | 		//		} else { //无效单元格
180 | 		//			if (i == realRowIndex) { //如果第一个单元格就是无效单元格， 当行合并时
181 | 		//				if (currentCell.getType() == TTCPrEnum.VM && currentCell.getRoot() != null) {
182 | 		//					TTCPr root = currentCell.getRoot();
183 | 		//					pwtc = new WordTableCell();
184 | 		//					//						pwtc.setRealColumnIndex(root.getRealColumnIndex());
185 | 		//					//						pwtc.setColumnSpan(root.getColSpan());
186 | 		//					//						pwtc.setRealRowIndex(i);
187 | 		//					//						pwtc.setRowSpan(root.getRowSpan());
188 | 		//					//					pwtc.setRowIndex(currentRowIndex);
189 | 		//					//					pwtc.setColumnIndex(realColumnIndex);
190 | 		//					pwtc.setText(currentCell.getText());
191 | 		//
192 | 		//					cells.put(pwtc.getCellPosition(), pwtc);
193 | 		//				}
194 | 		//			}
195 | 		//
196 | 		//			if (currentCell.getType() == TTCPrEnum.HM && currentCell.getRoot() != null) { //被行合并
197 | 		//				pwtc = new WordTableCell();
198 | 		//				//					pwtc.setRealColumnIndex(realColumnIndex);
199 | 		//				//					pwtc.setColumnSpan(pttcpr.getColSpan());
200 | 		//				//					pwtc.setRealRowIndex(i);
201 | 		//				//					pwtc.setRowSpan(pttcpr.getRowSpan());
202 | 		//				//				pwtc.setRowIndex(currentRowIndex);
203 | 		//				//				pwtc.setColumnIndex(realColumnIndex);
204 | 		//				pwtc.setText(currentCell.getText());
205 | 		//
206 | 		//				cells.add(pwtc);
207 | 		//			}
208 | 		//		}
209 | 		//
210 | 		//		for (int i = realRowIndex; i <= endRealRowIndex; i++) {
211 | 		//			currentCell = tableMemoryMap[i][realColumnIndex];
212 | 		//
213 | 		//		}
214 | 		//
215 | 		//		return cells;
216 | 	}
217 | }
218 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/WordTableRow.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.model;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import com.google.common.collect.Lists;
 6 | 
 7 | /**
 8 |  * 表格行
 9 |  * 行中包含多个单元格
10 |  * @author changtan.sun
11 |  *
12 |  */
13 | public class WordTableRow {
14 | 	/**
15 | 	 * 行中单元格集合
16 | 	 */
17 | 	private List<WordTableCell> cells = Lists.newArrayList();
18 | 
19 | 	public List<WordTableCell> getCells() {
20 | 		return cells;
21 | 	}
22 | 
23 | 	@Override
24 | 	public String toString() {
25 | 		return cells.toString();
26 | 	}
27 | 
28 | 	public void clear() {
29 | 		this.cells.clear();
30 | 		this.cells = null;
31 | 	}
32 | }
33 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/model/WordTableSimpleCell.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.model;
 2 | 
 3 | /**
 4 |  * 简单单元格
 5 |  * 比如：文字、公式、附件等
 6 |  * @author suncht
 7 |  *
 8 |  */
 9 | public class WordTableSimpleCell extends WordTableCell {
10 | 
11 | 	@Override
12 | 	public String toString() {
13 | 		return getContent().getData().toString();
14 | 	}
15 | 
16 | }
17 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/output/DefaultWordTableOutputStrategy.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.output;
 2 | 
 3 | import com.suncht.wordread.format.DefaultCellFormater;
 4 | import com.suncht.wordread.format.ICellFormater;
 5 | import com.suncht.wordread.model.WordTableCell;
 6 | import com.suncht.wordread.model.WordTableCellContentImage.WcImage;
 7 | import com.suncht.wordread.model.WordTableComplexCell;
 8 | import com.suncht.wordread.model.WordTableSimpleCell;
 9 | 
10 | public class DefaultWordTableOutputStrategy implements IWordTableOutputStrategy {
11 | 	private ICellFormater cellFormater;
12 | 	
13 | 	public DefaultWordTableOutputStrategy() {
14 | 		cellFormater = new DefaultCellFormater();
15 | 	}
16 | 	
17 | 	public DefaultWordTableOutputStrategy(ICellFormater cellFormater) {
18 | 		this.cellFormater = cellFormater;
19 | 	}
20 | 
21 | 	@Override
22 | 	public void output(WordTableCell tableCell) {
23 | 		if (tableCell instanceof WordTableSimpleCell) {
24 | 			outputCell(tableCell.getContent().getData());
25 | 		}  else if (tableCell instanceof WordTableComplexCell) {
26 | //			WordTableComplexCell cell = (WordTableComplexCell) tableCell;
27 | //
28 | //			StringBuilder builder = new StringBuilder();
29 | //
30 | //			List<WordTableRow> rows = cell.getInnerTable().getRows();
31 | //			for (WordTableRow row : rows) {
32 | //				for (WordTableCell wtcell : row.getCells()) {
33 | //					builder.append(printCell(wtcell.getContent().getData()) + '\t');
34 | //				}
35 | //			}
36 | //			return builder.toString() + "" + '\t';
37 | 		}
38 | 	}
39 | 	
40 | 
41 | 	private void outputCell(Object cellContent) {
42 | //		if (cellContent instanceof ImageContent) {
43 | //			this.cellFormater.formatImage((ImageContent)cellContent);
44 | //		} else {
45 | //			this.cellFormater.formatText(cellContent);
46 | //		}
47 | 	}
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/output/IWordTableOutputStrategy.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.output;
 2 | 
 3 | import com.suncht.wordread.model.WordTableCell;
 4 | 
 5 | /**
 6 |  * 表格单元格内容输出策略
 7 |  * @author suncht
 8 |  *
 9 |  */
10 | public interface IWordTableOutputStrategy {
11 | 	public void output(WordTableCell tableCell);
12 | }
13 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/ISingleWordTableParser.java:
--------------------------------------------------------------------------------
1 | package com.suncht.wordread.parser;
2 | 
3 | import com.suncht.wordread.model.WordTable;
4 | 
5 | public interface ISingleWordTableParser {
6 | 	public WordTable parse();
7 | }
8 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/IWordTableParser.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.parser;
 2 | 
 3 | import java.io.InputStream;
 4 | import java.util.List;
 5 | 
 6 | import com.suncht.wordread.model.WordTable;
 7 | 
 8 | public interface IWordTableParser {
 9 | 	public List<WordTable> parse(InputStream inputStream);
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/WordTableParser.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.parser;
 2 | 
 3 | import java.io.File;
 4 | import java.io.FileInputStream;
 5 | import java.io.FileNotFoundException;
 6 | import java.io.InputStream;
 7 | import java.util.List;
 8 | 
 9 | import org.springframework.util.StringUtils;
10 | 
11 | import com.google.common.base.Preconditions;
12 | import com.suncht.wordread.model.WordTable;
13 | import com.suncht.wordread.parser.mapping.IWordTableMemoryMappingVisitor;
14 | import com.suncht.wordread.parser.strategy.ITableTransferStrategy;
15 | import com.suncht.wordread.parser.wordh.WordHTableParser;
16 | import com.suncht.wordread.parser.wordx.WordXTableParser;
17 | 
18 | /**
19 |  * Word文档解析器
20 |  * 支持2007以上的docx、2007以下的doc文档
21 |  * @author changtan.sun
22 |  *
23 |  */
24 | public class WordTableParser {
25 | 	private static final String DOCX_WORD_DOCUMENT = ".docx";
26 | 	private static final String DOC_WORD_DOCUMENT = ".doc";
27 | 
28 | 	private WordTableTransferContext context;
29 | 	private IWordTableParser wordTableParser;
30 | 
31 | 	private WordTableParser() {
32 | 		this.context = WordTableTransferContext.create();
33 | 	}
34 | 
35 | 	public static WordTableParser create() {
36 | 		return new WordTableParser();
37 | 	}
38 | 
39 | 	public WordTableParser transferStrategy(ITableTransferStrategy tableTransferStrategy) {
40 | 		context.transferStrategy(tableTransferStrategy);
41 | 		return this;
42 | 	}
43 | 
44 | 	public WordTableParser memoryMappingVisitor(IWordTableMemoryMappingVisitor visitor) {
45 | 		context.visitor(visitor);
46 | 		return this;
47 | 	}
48 | 
49 | 	public List<WordTable> parse(File wordFile) {
50 | 		Preconditions.checkArgument(wordFile.exists(), "文件不存在");
51 | 		
52 | 		String fileName = wordFile.getName();
53 | 		WordDocType docType = WordDocType.DOCX;
54 | 		if (StringUtils.endsWithIgnoreCase(fileName, DOCX_WORD_DOCUMENT)) {
55 | 			docType = WordDocType.DOCX;
56 | 		} else if (StringUtils.endsWithIgnoreCase(fileName, DOC_WORD_DOCUMENT)) {
57 | 			docType = WordDocType.DOC;
58 | 		} else {
59 | 			throw new IllegalArgumentException("不支持该文件类型");
60 | 		}
61 | 
62 | 		try(FileInputStream inputStream = new FileInputStream(wordFile);) {
63 | 			return this.parse(inputStream, docType);
64 | 		} catch (Exception e) {
65 | 			e.printStackTrace();
66 | 		}
67 | 		return null;
68 | 	}
69 | 	
70 | 
71 | 	public List<WordTable> parse(InputStream inputStream, WordDocType docType) {
72 | 		if (docType == WordDocType.DOCX) {
73 | 			wordTableParser = new WordXTableParser(this.context);
74 | 		} else if (docType == WordDocType.DOC) {
75 | 			wordTableParser = new WordHTableParser(this.context);
76 | 		} else {
77 | 			throw new IllegalArgumentException("不支持该文件类型");
78 | 		}
79 | 		return wordTableParser.parse(inputStream);
80 | 	}
81 | 
82 | 	/**
83 | 	 * Word文档类型
84 | 	 * @author changtan.sun
85 | 	 *
86 | 	 */
87 | 	public static enum WordDocType {
88 | 		DOCX, DOC, UNKOWN
89 | 	}
90 | }
91 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/WordTableTransferContext.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.parser;
 2 | 
 3 | import com.suncht.wordread.model.WordTable;
 4 | import com.suncht.wordread.parser.mapping.IWordTableMemoryMappingVisitor;
 5 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping;
 6 | import com.suncht.wordread.parser.strategy.ITableTransferStrategy;
 7 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
 8 | 
 9 | /**
10 |  * Word表格转换上下文
11 |  * @author changtan.sun
12 |  *
13 |  */
14 | public class WordTableTransferContext {
15 | 	private ITableTransferStrategy strategy;
16 | 	private IWordTableMemoryMappingVisitor visitor;
17 | 
18 | 	public static WordTableTransferContext create() {
19 | 		return new WordTableTransferContext();
20 | 	}
21 | 
22 | 	public WordTableTransferContext transferStrategy(ITableTransferStrategy strategy) {
23 | 		this.strategy = strategy;
24 | 		return this;
25 | 	}
26 | 
27 | 	public WordTableTransferContext visitor(IWordTableMemoryMappingVisitor visitor) {
28 | 		this.visitor = visitor;
29 | 		return this;
30 | 	}
31 | 
32 | 	public WordTable transfer(final WordTableMemoryMapping tableMemoryMapping) {
33 | 		if (strategy == null) {
34 | 			strategy = new LogicalTableStrategy();
35 | 		}
36 | 		return strategy.transfer(tableMemoryMapping);
37 | 	}
38 | 
39 | 	public ITableTransferStrategy getStrategy() {
40 | 		return strategy;
41 | 	}
42 | 
43 | 	public IWordTableMemoryMappingVisitor getVisitor() {
44 | 		return visitor;
45 | 	}
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/mapping/IWordTableMemoryMappingVisitor.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.parser.mapping;
 2 | 
 3 | import com.suncht.wordread.model.TTCPr;
 4 | 
 5 | /**
 6 |  * Word表格内存映射表的单元格访问者接口
 7 |  * 用于修改内存映射表的单元格的数据
 8 |  * @author changtan.sun
 9 |  *
10 |  */
11 | public interface IWordTableMemoryMappingVisitor {
12 | 	public void visit(TTCPr cell, int realRowIndex, int realColumnIndex);
13 | }
14 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/mapping/WordTableMemoryMapping.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.parser.mapping;
 2 | 
 3 | import java.util.Arrays;
 4 | 
 5 | import com.google.common.base.Preconditions;
 6 | import com.suncht.wordread.model.TTCPr;
 7 | 
 8 | /**
 9 |  * Word表格内存映射
10 |  * @author changtan.sun
11 |  *
12 |  */
13 | public class WordTableMemoryMapping {
14 | 	private TTCPr[][] _tableMemoryMap;
15 | 	private int rowCount;
16 | 	private int columnCount;
17 | 	private IWordTableMemoryMappingVisitor visitor;
18 | 
19 | 	public WordTableMemoryMapping(int row, int column) {
20 | 		_tableMemoryMap = new TTCPr[row][column];
21 | 		this.rowCount = row;
22 | 		this.columnCount = column;
23 | 	}
24 | 
25 | 	public void setTTCPr(final TTCPr data, int rowIndex, int columnIndex) {
26 | 		Preconditions.checkArgument(rowIndex < rowCount);
27 | 		Preconditions.checkArgument(columnIndex < columnCount);
28 | 
29 | 		_tableMemoryMap[rowIndex][columnIndex] = data;
30 | 
31 | 		if (visitor != null) {
32 | 			data.accept(visitor, rowIndex, columnIndex);
33 | 		}
34 | 	}
35 | 
36 | 	public final TTCPr getTTCPr(int rowIndex, int columnIndex) {
37 | 		Preconditions.checkArgument(rowIndex < rowCount);
38 | 		Preconditions.checkArgument(columnIndex < columnCount);
39 | 
40 | 		return _tableMemoryMap[rowIndex][columnIndex];
41 | 	}
42 | 
43 | 	public TTCPr[] getRow(int rowIndex) {
44 | 		Preconditions.checkArgument(rowIndex < rowCount);
45 | 
46 | 		return Arrays.copyOf(_tableMemoryMap[rowIndex], columnCount);
47 | 	}
48 | 
49 | 	public int getRowCount() {
50 | 		return rowCount;
51 | 	}
52 | 
53 | 	public void setRowCount(int rowCount) {
54 | 		this.rowCount = rowCount;
55 | 	}
56 | 
57 | 	public int getColumnCount() {
58 | 		return columnCount;
59 | 	}
60 | 
61 | 	public void setColumnCount(int columnCount) {
62 | 		this.columnCount = columnCount;
63 | 	}
64 | 
65 | 	public IWordTableMemoryMappingVisitor getVisitor() {
66 | 		return visitor;
67 | 	}
68 | 
69 | 	public void setVisitor(IWordTableMemoryMappingVisitor visitor) {
70 | 		this.visitor = visitor;
71 | 	}
72 | 
73 | }
74 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/strategy/DefaultTableStrategy.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.parser.strategy;
 2 | 
 3 | import com.suncht.wordread.model.WordTable;
 4 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping;
 5 | 
 6 | public class DefaultTableStrategy implements ITableTransferStrategy {
 7 | 
 8 | 	public WordTable transfer(WordTableMemoryMapping tableMemoryMapping) {
 9 | 		return null;
10 | 	}
11 | 
12 | }
13 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/strategy/ITableTransferStrategy.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.parser.strategy;
 2 | 
 3 | import com.suncht.wordread.model.WordTable;
 4 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping;
 5 | 
 6 | /**
 7 |  * 表格转换策略
 8 |  * 将表格内存映射转换成实际的表格模式
 9 |  * @author changtan.sun
10 |  *
11 |  */
12 | public interface ITableTransferStrategy {
13 | 	public WordTable transfer(WordTableMemoryMapping tableMemoryMapping);
14 | }
15 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/strategy/LogicalTableStrategy.java:
--------------------------------------------------------------------------------
  1 | package com.suncht.wordread.parser.strategy;
  2 | 
  3 | import com.suncht.wordread.model.TTCPr;
  4 | import com.suncht.wordread.model.WordTable;
  5 | import com.suncht.wordread.model.WordTableCell;
  6 | import com.suncht.wordread.model.WordTableComplexCell;
  7 | import com.suncht.wordread.model.WordTableRow;
  8 | import com.suncht.wordread.model.WordTableSimpleCell;
  9 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping;
 10 | 
 11 | public class LogicalTableStrategy implements ITableTransferStrategy {
 12 | 
 13 | 	private WordTableMemoryMapping tableMemoryMapping;
 14 | 
 15 | 	//	/**
 16 | 	//	 * 获取在docx中实际行数（word中表格都处理成二维表格，忽略合并）
 17 | 	//	 * @return
 18 | 	//	 */
 19 | 	//	private int getRealMaxRowCount() {
 20 | 	//		return tableMemoryMap.length;
 21 | 	//	}
 22 | 
 23 | 	/**
 24 | 	 * 获取行数（在表格映射对象中的行数）
 25 | 	 * @return
 26 | 	 */
 27 | 	private int getRowCount() {
 28 | 		int rowCount = 0;
 29 | 		for (int i = 0; i < tableMemoryMapping.getRowCount(); i++) {
 30 | 			if (tableMemoryMapping.getTTCPr(i, 0).isValid()) {
 31 | 				rowCount++;
 32 | 			}
 33 | 		}
 34 | 		return rowCount;
 35 | 	}
 36 | 
 37 | 	public WordTable transfer(WordTableMemoryMapping tableMemoryMapping) {
 38 | 		this.tableMemoryMapping = tableMemoryMapping;
 39 | 
 40 | 		WordTable wordTable = new WordTable();
 41 | 		int rowCount = getRowCount();
 42 | 		WordTableRow tableRow = null;
 43 | 		for (int i = 0; i < rowCount; i++) {
 44 | 			tableRow = this.getTableRow(i);
 45 | 			wordTable.getRows().add(tableRow);
 46 | 		}
 47 | 		return wordTable;
 48 | 	}
 49 | 
 50 | 	/**
 51 | 	 * 获取行对象
 52 | 	 * @param currentRowIndex
 53 | 	 * @return
 54 | 	 */
 55 | 	private WordTableRow getTableRow(int currentRowIndex) {
 56 | 		TTCPr[] _rows = null;
 57 | 		TTCPr _first_column_in_row = null;
 58 | 		int rowCount = 0;
 59 | 		for (int i = 0; i < tableMemoryMapping.getRowCount(); i++) {
 60 | 			if (tableMemoryMapping.getTTCPr(i, 0).isValid()) {
 61 | 				if (currentRowIndex == rowCount++) {
 62 | 					_rows = tableMemoryMapping.getRow(i);
 63 | 					_first_column_in_row = tableMemoryMapping.getTTCPr(i, 0);
 64 | 					break;
 65 | 				}
 66 | 			}
 67 | 		}
 68 | 
 69 | 		if (_rows == null) {
 70 | 			return null;
 71 | 		}
 72 | 
 73 | 		int _logic_row_index = _first_column_in_row.getLogicRowIndex();
 74 | 		//int _end_row_index = _first_column_in_row.getRowSpan() + _first_column_in_row.getRealRowIndex() - 1;
 75 | 		int _row_span = _first_column_in_row.getRowSpan();
 76 | 		int _logic_column_count = _rows.length;
 77 | 
 78 | 		WordTableRow pwtr = new WordTableRow();
 79 | 
 80 | 		WordTableCell cell = null;
 81 | 		for (int i = 0; i < _logic_column_count; i++) {
 82 | 			cell = getCellInRow(_logic_row_index, _row_span, i, currentRowIndex);
 83 | 			if (cell == null) {
 84 | 				continue;
 85 | 			}
 86 | 			pwtr.getCells().add(cell);
 87 | 		}
 88 | 
 89 | 		return pwtr;
 90 | 	}
 91 | 
 92 | 	/**
 93 | 	 * 获取一行中的单元格集合,将实际单元格转换成逻辑单元格
 94 | 	 * @param logicRowIndex 逻辑行号
 95 | 	 * @param endRealRowIndex 逻辑行号
 96 | 	 * @param logicColumnIndex  word中的实际列
 97 | 	 * @param currentRowIndex  在表格映射对象中的行号
 98 | 	 * @return
 99 | 	 */
100 | 	private WordTableCell getCellInRow(int logicRowIndex, int logicRowSpan, int logicColumnIndex, int currentRowIndex) {
101 | 		WordTableCell cell = null;
102 | 		TTCPr currentRealCell = tableMemoryMapping.getTTCPr(logicRowIndex, logicColumnIndex);
103 | 
104 | 		boolean needHandleRowSpan = logicRowSpan > 0 || currentRealCell.isDoneRowSpan(); //是否需要处理跨行的情况
105 | 		boolean needHandleColSpan = currentRealCell.isDoneColSpan();//是否需要处理跨列的情况
106 | 
107 | 		boolean satisfyConditionOfComplexCell = false; //是否满足复杂单元格的条件
108 | 
109 | 		satisfyConditionOfComplexCell = needHandleRowSpan && needHandleColSpan;
110 | 		if (!satisfyConditionOfComplexCell) {
111 | 			satisfyConditionOfComplexCell = currentRealCell.getRowSpan() < logicRowSpan;
112 | 		}
113 | 
114 | 		if (currentRealCell.isValid()) { //有效单元格
115 | 			if (satisfyConditionOfComplexCell) {//跨行又跨列
116 | 				WordTableComplexCell pwtc = new WordTableComplexCell(); //属于复杂单元格
117 | 
118 | 				WordTable innerTable = new WordTable();
119 | 				int _realColSpan = currentRealCell.getColSpan();
120 | 				for (int i = 0; i < logicRowSpan;) {
121 | 					WordTableRow innerRow = new WordTableRow();
122 | 					int _rowSpan = 1;
123 | 					for (int j = 0; j < _realColSpan; j++) {
124 | 						TTCPr _ttcpr = tableMemoryMapping.getTTCPr(logicRowIndex + i, logicColumnIndex + j);
125 | 						if (_ttcpr.isValid()) {
126 | 							WordTableCell _cell = new WordTableSimpleCell();
127 | 							_cell.setRowSpan(_ttcpr.getRowSpan());
128 | 							_cell.setColumnSpan(_ttcpr.getColSpan());
129 | 							_cell.setContent(_ttcpr.getContent().copy());
130 | 							innerRow.getCells().add(_cell);
131 | 
132 | 							if (_ttcpr.getRowSpan() > _rowSpan) {
133 | 								_rowSpan = _ttcpr.getRowSpan();
134 | 							}
135 | 						}
136 | 					}
137 | 					innerTable.getRows().add(innerRow);
138 | 
139 | 					i = i + _rowSpan;
140 | 				}
141 | 				pwtc.setInnerTable(innerTable);
142 | 				cell = pwtc;
143 | 			} else {
144 | 				//跨列不跨行，不需要处理
145 | 				//跨行不跨列，不需要处理
146 | 				WordTableSimpleCell pwtc = new WordTableSimpleCell(); //属于简单单元格
147 | 				pwtc.setRowSpan(currentRealCell.getRowSpan());
148 | 				pwtc.setColumnSpan(currentRealCell.getColSpan());
149 | 				pwtc.setContent(currentRealCell.getContent().copy());
150 | 
151 | 				cell = pwtc;
152 | 			}
153 | 		}
154 | 
155 | 		return cell;
156 | 
157 | 	}
158 | }
159 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/wordh/SingleWordHTableParser.java:
--------------------------------------------------------------------------------
  1 | package com.suncht.wordread.parser.wordh;
  2 | 
  3 | import java.math.BigInteger;
  4 | 
  5 | import org.apache.poi.hwpf.usermodel.Table;
  6 | import org.apache.poi.hwpf.usermodel.TableCell;
  7 | import org.apache.poi.hwpf.usermodel.TableRow;
  8 | 
  9 | import com.google.common.base.Preconditions;
 10 | import com.suncht.wordread.model.TTCPr;
 11 | import com.suncht.wordread.model.TTCPr.TTCPrEnum;
 12 | import com.suncht.wordread.model.WordTable;
 13 | import com.suncht.wordread.model.WordTableCellContents;
 14 | import com.suncht.wordread.parser.ISingleWordTableParser;
 15 | import com.suncht.wordread.parser.WordTableTransferContext;
 16 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping;
 17 | 
 18 | /**
 19 |  * Doc文档解析
 20 |  * 
 21 |  * <p>
 22 |  * 标题: SingleWordHTableParser
 23 |  * </p>
 24 |  * <p>
 25 |  * 描述: 对POI API进行调试发现，解析Doc单元格的方式与Docx方式不同：没有列合并，只有行合并，有列宽
 26 |  * </p>
 27 |  * 
 28 |  * @author changtan.sun
 29 |  * @date 2018年4月27日
 30 |  */
 31 | public class SingleWordHTableParser implements ISingleWordTableParser {
 32 | 	private Table hwpfTable;
 33 | 
 34 | 	private WordTableMemoryMapping _tableMemoryMapping;
 35 | 	private WordTableTransferContext context;
 36 | 
 37 | 	/**
 38 | 	 * 最大列数
 39 | 	 */
 40 | 	private int realMaxColumnCount = 0;
 41 | 	/**
 42 | 	 * 最大列数所占的行Index
 43 | 	 */
 44 | 	private int rowIndexOfMaxColumnCount = 0;
 45 | 
 46 | 	public SingleWordHTableParser(Table hwpfTable, WordTableTransferContext context) {
 47 | 		this.hwpfTable = hwpfTable;
 48 | 		this.context = context;
 49 | 	}
 50 | 
 51 | 	public WordTable parse() {
 52 | 		int realMaxRowCount = this.hwpfTable.numRows();
 53 | 
 54 | 		realMaxColumnCount = 0;
 55 | 		for (int i = 0; i < realMaxRowCount; i++) {
 56 | 			TableRow tr = this.hwpfTable.getRow(i);
 57 | 			int numCell = tr.numCells();
 58 | 			if (numCell > realMaxColumnCount) {
 59 | 				realMaxColumnCount = numCell;
 60 | 				rowIndexOfMaxColumnCount = i;
 61 | 			}
 62 | 		}
 63 | 
 64 | 		_tableMemoryMapping = new WordTableMemoryMapping(realMaxRowCount, realMaxColumnCount);
 65 | 
 66 | 		for (int i = 0; i < realMaxRowCount; i++) {
 67 | 			TableRow preRow = i - 1 >= 0 ? this.hwpfTable.getRow(i - 1) : null; // 上一行
 68 | 			parseRow(this.hwpfTable.getRow(i), i, preRow);
 69 | 		}
 70 | 
 71 | 		return context.transfer(_tableMemoryMapping);
 72 | 	}
 73 | 
 74 | 	private void parseRow(TableRow row, int realRowIndex, TableRow preRow) {
 75 | 		int numCells = row.numCells();
 76 | 		//boolean existColMergedCells = realMaxColumnCount > numCells; // 该行中是否存在被列合并，如果存在，做逻辑列合并处理
 77 | 		int logicColumnIndex = 0;
 78 | 		int logicRowIndex = realRowIndex; //逻辑行号和实际行号一样的
 79 | 		for (int realColumnIndex = 0; realColumnIndex < numCells; realColumnIndex++) {
 80 | 			TableCell cell = row.getCell(realColumnIndex);// 取得单元格
 81 | 			int skipColumn = parseCell(row, cell, realRowIndex, realColumnIndex, logicRowIndex, logicColumnIndex);
 82 | 			logicColumnIndex = logicColumnIndex + skipColumn + 1;
 83 | 		}
 84 | 	}
 85 | 
 86 | 	/**
 87 | 	 * 参考：https://blog.csdn.net/www1056481167/article/details/56835043
 88 | 	 * 解析Doc单元格的方式与Docx方式不同：没有列合并概念，只有行合并
 89 | 	 * 
 90 | 	 * @param cell
 91 | 	 * @param realRowIndex
 92 | 	 * @param realColumnIndex
 93 | 	 * @return
 94 | 	 */
 95 | 	private int parseCell(TableRow row, TableCell cell, int realRowIndex, int realColumnIndex, int logicRowIndex, int logicColumnIndex) {
 96 | 		// -----列合并-----
 97 | 		int numOfCellHMerged = computeNumOfCellHMerged(row, cell, realColumnIndex); //就是该单元格合并了多少列
 98 | 
 99 | 		// -----行合并-----
100 | 		if (cell.isFirstVerticallyMerged() && cell.isVerticallyMerged()) { // 行合并开始
101 | 			TTCPr ttc = new TTCPr();
102 | 			if(numOfCellHMerged>0) {
103 | 				ttc.setType(TTCPrEnum.HVM_S); 
104 | 			} else {
105 | 				ttc.setType(TTCPrEnum.VM_S);
106 | 			}
107 | 			ttc.setRealRowIndex(realRowIndex);
108 | 			ttc.setRealColumnIndex(realColumnIndex);
109 | 			ttc.setLogicRowIndex(logicRowIndex);
110 | 			ttc.setLogicColumnIndex(logicColumnIndex);
111 | 			ttc.setWidth(BigInteger.valueOf(cell.getWidth()));
112 | 			ttc.setColSpan(numOfCellHMerged);
113 | 			ttc.setRoot(null);
114 | 			// ttc.setText(cell.getText());
115 | 			ttc.setContent(WordTableCellContents.getCellContent(cell));
116 | 
117 | 			_tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex);
118 | 			
119 | 			//处理其他被合并的列
120 | 			if(numOfCellHMerged>0) {
121 | 				for (int i = 0; i < numOfCellHMerged; i++) {
122 | 					TTCPr ttc_merged = new TTCPr();
123 | 					ttc_merged.setType(TTCPrEnum.HM);
124 | 					ttc_merged.setRealRowIndex(realRowIndex);
125 | 					ttc_merged.setRealColumnIndex(realColumnIndex);
126 | 					ttc_merged.setLogicRowIndex(logicRowIndex);
127 | 					ttc_merged.setLogicColumnIndex(logicColumnIndex + i + 1);
128 | 					//ttc_merged.setWidth(BigInteger.valueOf(cell.getWidth()));
129 | 					//ttc_merged.setColSpan(numOfCellHMerged);
130 | 					ttc_merged.setRoot(ttc);
131 | 					
132 | 					_tableMemoryMapping.setTTCPr(ttc_merged, logicRowIndex, ttc_merged.getLogicColumnIndex());
133 | 				}
134 | 			}
135 | 		} else if (!cell.isFirstVerticallyMerged() && cell.isVerticallyMerged()) { // 行被合并
136 | 			int _start = logicRowIndex, _end = 0;
137 | 			TTCPr root = null;
138 | 			for (int i = logicRowIndex - 1; i >= 0; i--) {
139 | 				TTCPr ttcpr = _tableMemoryMapping.getTTCPr(i, logicColumnIndex);
140 | 				if (ttcpr != null && (ttcpr.getType() == TTCPrEnum.VM_S || ttcpr.getType() == TTCPrEnum.HVM_S)) {
141 | 					_end = i;
142 | 					root = ttcpr;
143 | 					break;
144 | 				} else if (ttcpr != null && ttcpr.getRoot() != null) {
145 | 					_end = i;
146 | 					root = ttcpr.getRoot();
147 | 					break;
148 | 				}
149 | 			}
150 | 			
151 | 			Preconditions.checkNotNull(root, "父单元格不能为空");
152 | 
153 | 			TTCPr ttc = new TTCPr();
154 | 			ttc.setType(TTCPrEnum.VM);
155 | 			ttc.setRealRowIndex(realRowIndex);
156 | 			ttc.setRealColumnIndex(realColumnIndex);
157 | 			ttc.setLogicRowIndex(logicRowIndex);
158 | 			ttc.setLogicColumnIndex(logicColumnIndex);
159 | 			ttc.setWidth(BigInteger.valueOf(cell.getWidth()));
160 | 			ttc.setRoot(root);
161 | 			root.setRowSpan(_start - _end + 1);
162 | 			
163 | 			_tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex);
164 | 		} else { // 没有行合并
165 | 			TTCPr ttc = new TTCPr();
166 | 			if(numOfCellHMerged>0) {
167 | 				ttc.setType(TTCPrEnum.HM_S);
168 | 			} else {
169 | 				ttc.setType(TTCPrEnum.NONE);
170 | 			}
171 | 			ttc.setRealRowIndex(realRowIndex);
172 | 			ttc.setRealColumnIndex(realColumnIndex);
173 | 			ttc.setLogicRowIndex(logicRowIndex);
174 | 			ttc.setLogicColumnIndex(logicColumnIndex);
175 | 			ttc.setWidth(BigInteger.valueOf(cell.getWidth()));
176 | 			ttc.setColSpan(numOfCellHMerged);
177 | 			ttc.setRoot(null);
178 | 			// ttc.setText(cell.getText());
179 | 			ttc.setContent(WordTableCellContents.getCellContent(cell));
180 | 
181 | 			_tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex);
182 | 			
183 | 			//处理其他被合并的列
184 | 			if(numOfCellHMerged>0) {
185 | 				for (int i = 0; i < numOfCellHMerged; i++) {
186 | 					TTCPr ttc_merged = new TTCPr();
187 | 					ttc_merged.setType(TTCPrEnum.HM);
188 | 					ttc_merged.setRealRowIndex(realRowIndex);
189 | 					ttc_merged.setRealColumnIndex(realColumnIndex);
190 | 					ttc_merged.setLogicRowIndex(logicRowIndex);
191 | 					ttc_merged.setLogicColumnIndex(logicColumnIndex + i + 1);
192 | 					//ttc_merged.setWidth(BigInteger.valueOf(cell.getWidth()));
193 | 					//ttc_merged.setColSpan(numOfCellHMerged);
194 | 					ttc_merged.setRoot(ttc);
195 | 					
196 | 					_tableMemoryMapping.setTTCPr(ttc_merged, logicRowIndex, ttc_merged.getLogicColumnIndex());
197 | 				}
198 | 			}
199 | 		}
200 | 
201 | 		return numOfCellHMerged;
202 | 	}
203 | 
204 | 	/**
205 | 	 * 计算合并了多少个单元格
206 | 	 * 表格中其他行根据标准行进行列合并，属于标准表格 标准表格，比如
207 | 	 * ——————————————— 
208 | 	 * |   |    |    |
209 | 	 * ——————————————— 
210 | 	 * | | |    |    | ---->该行为标准行 
211 | 	 * ——————————————— 
212 | 	 * | |           |
213 | 	 * ——————————————— 
214 | 	 * |        |    | 
215 | 	 * ———————————————
216 | 	 * 
217 | 	 * @param cell
218 | 	 * @param realRowIndex
219 | 	 * @param realColumnIndex
220 | 	 * @return
221 | 	 */
222 | 	private int computeNumOfCellHMerged(TableRow currentRow, TableCell currentCell, int realColumnIndex) {
223 | 		TableRow standardRow = this.hwpfTable.getRow(this.rowIndexOfMaxColumnCount);
224 | 
225 | 		if (currentRow.numCells() >= standardRow.numCells()) {
226 | 			return 0;
227 | 		}
228 | 
229 | 		long totalWidth = 0;
230 | 		for (int i = 0; i <= realColumnIndex; i++) {
231 | 			totalWidth += currentRow.getCell(i).getWidth();
232 | 		}
233 | 
234 | 		int tempRowIndex = -1;
235 | 		long tempWidth = 0;
236 | 		for (int i = 0, size = standardRow.numCells(); i < size; i++) {
237 | 			tempWidth += standardRow.getCell(i).getWidth();
238 | 			if (this.widthEqual(tempWidth, totalWidth)) {
239 | 				tempRowIndex = i;
240 | 				break;
241 | 			}
242 | 		}
243 | 		
244 | 		int currentCellWidth = currentCell.getWidth();
245 | 		tempWidth = 0;
246 | 		int columnMerged = 0;
247 | 		for (int i = tempRowIndex; i >= 0; i--) {
248 | 			tempWidth += standardRow.getCell(i).getWidth();
249 | 			if(this.widthEqual(tempWidth, currentCellWidth)) {
250 | 				break;
251 | 			} else {
252 | 				columnMerged++;
253 | 			}
254 | 		}
255 | 		
256 | 		return columnMerged;
257 | 	}
258 | 
259 | 	private boolean widthEqual(long tempWidth, long totalWidth) {
260 | 		return tempWidth <= (totalWidth + 10) && tempWidth >= (totalWidth - 10);
261 | 	}
262 | 
263 | }
264 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/wordh/WordHTableParser.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.parser.wordh;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.InputStream;
 5 | import java.util.List;
 6 | 
 7 | import org.apache.poi.hwpf.HWPFDocument;
 8 | import org.apache.poi.hwpf.usermodel.Range;
 9 | import org.apache.poi.hwpf.usermodel.Table;
10 | import org.apache.poi.hwpf.usermodel.TableIterator;
11 | import org.apache.poi.poifs.filesystem.POIFSFileSystem;
12 | 
13 | import com.google.common.collect.Lists;
14 | import com.suncht.wordread.model.WordTable;
15 | import com.suncht.wordread.parser.ISingleWordTableParser;
16 | import com.suncht.wordread.parser.IWordTableParser;
17 | import com.suncht.wordread.parser.WordTableTransferContext;
18 | 
19 | public class WordHTableParser implements IWordTableParser {
20 | 	private WordTableTransferContext context;
21 | 
22 | 	public WordHTableParser(WordTableTransferContext context) {
23 | 		this.context = context;
24 | 	}
25 | 
26 | 	public List<WordTable> parse(InputStream inputStream) {
27 | 
28 | 		List<WordTable> wordTables = Lists.newArrayList();
29 | 
30 | 		try {
31 | 			POIFSFileSystem pfs = new POIFSFileSystem(inputStream); // 载入文档  
32 | 			HWPFDocument hwpf = new HWPFDocument(pfs);
33 | 
34 | 			Range range = hwpf.getRange();//得到文档的读取范围  
35 | 			TableIterator it = new TableIterator(range);
36 | 			//迭代文档中的表格  
37 | 			while (it.hasNext()) {
38 | 				Table table = (Table) it.next();
39 | 				ISingleWordTableParser parser = new SingleWordHTableParser(table, context);
40 | 				WordTable wordTable = parser.parse();
41 | 				wordTables.add(wordTable);
42 | 			}
43 | 		} catch (Exception e) {
44 | 			e.printStackTrace();
45 | 		} finally {
46 | 			if (inputStream != null) {
47 | 				try {
48 | 					inputStream.close();
49 | 				} catch (IOException e) {
50 | 					e.printStackTrace();
51 | 				}
52 | 			}
53 | 		}
54 | 
55 | 		return wordTables;
56 | 	}
57 | }
58 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/wordx/SingleWordXTableParser.java:
--------------------------------------------------------------------------------
  1 | package com.suncht.wordread.parser.wordx;
  2 | 
  3 | import java.util.List;
  4 | 
  5 | import org.apache.poi.xwpf.usermodel.XWPFTable;
  6 | import org.apache.poi.xwpf.usermodel.XWPFTableCell;
  7 | import org.apache.poi.xwpf.usermodel.XWPFTableRow;
  8 | import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcPr;
  9 | 
 10 | import com.google.common.base.Preconditions;
 11 | import com.suncht.wordread.model.TTCPr;
 12 | import com.suncht.wordread.model.TTCPr.TTCPrEnum;
 13 | import com.suncht.wordread.model.WordTable;
 14 | import com.suncht.wordread.model.WordTableCellContents;
 15 | import com.suncht.wordread.parser.ISingleWordTableParser;
 16 | import com.suncht.wordread.parser.WordTableTransferContext;
 17 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping;
 18 | 
 19 | /**
 20 |  * 
 21 |  * @author changtan.sun
 22 |  *
 23 |  */
 24 | 
 25 | /**
 26 | * 解析Docx中一张复杂表格内容
 27 | * Docx不仅有列合并，而且有行合并，没有列宽
 28 | * <p>标题: SingleWordXTableParser</p>  
 29 | * <p>描述: </p>  
 30 | * @author changtan.sun  
 31 | * @date 2018年4月27日
 32 |  */
 33 | public class SingleWordXTableParser implements ISingleWordTableParser {
 34 | 	private XWPFTable xwpfTable;
 35 | 	//	private WordTable table;
 36 | 
 37 | 	private WordTableMemoryMapping _tableMemoryMapping;
 38 | 	private WordTableTransferContext context;
 39 | 
 40 | 	public SingleWordXTableParser(XWPFTable xwpfTable, WordTableTransferContext context) {
 41 | 		this.xwpfTable = xwpfTable;
 42 | 		this.context = context;
 43 | 	}
 44 | 
 45 | 	//	public WordTable getTable() {
 46 | 	//		return table;
 47 | 	//	}
 48 | 
 49 | 	/**
 50 | 	 * 解析Docx的表格，将表格相关数据映射到表格映射对象中， 用于后面的操作
 51 | 	 * @return
 52 | 	 */
 53 | 	public WordTable parse() {
 54 | 		List<XWPFTableRow> rows;
 55 | 		List<XWPFTableCell> cells;
 56 | 
 57 | 		rows = xwpfTable.getRows();
 58 | 		int realMaxRowCount = rows.size();
 59 | 		//		table.setRealMaxRowCount(rows.size());
 60 | 
 61 | 		//计算最大列数
 62 | 		int realMaxColumnCount = 0;
 63 | 		for (XWPFTableRow row : rows) {
 64 | 			//获取行对应的单元格  
 65 | 			cells = row.getTableCells();
 66 | 			int _columnCountOnRow = 0;
 67 | 			for (XWPFTableCell cell : cells) {
 68 | 				CTTcPr tt = cell.getCTTc().getTcPr();
 69 | 				if(tt.getGridSpan()!=null) {
 70 | 					_columnCountOnRow += tt.getGridSpan().getVal().intValue();
 71 | 				} else {
 72 | 					_columnCountOnRow += 1;
 73 | 				}
 74 | 			}
 75 | 			
 76 | 			if (_columnCountOnRow > realMaxColumnCount) {
 77 | 				realMaxColumnCount = _columnCountOnRow;
 78 | 			}
 79 | 		}
 80 | 
 81 | 		//table.setRealMaxColumnCount(columnCount);
 82 | 
 83 | 		_tableMemoryMapping = new WordTableMemoryMapping(realMaxRowCount, realMaxColumnCount);
 84 | 		_tableMemoryMapping.setVisitor(context.getVisitor());
 85 | 		for (int i = 0; i < realMaxRowCount; i++) {
 86 | 			parseRow(rows.get(i), i);
 87 | 		}
 88 | 
 89 | 		//printTableMemoryMap();
 90 | 
 91 | 		//		wordTableMap = new WordTableMap();
 92 | 		//		wordTableMap.setTableMemoryMap(_tableMemoryMap);
 93 | 		return context.transfer(_tableMemoryMapping);
 94 | 	}
 95 | 
 96 | 	public void dispose() {
 97 | 		_tableMemoryMapping = null;
 98 | 		xwpfTable = null;
 99 | 	}
100 | 
101 | 	//	/**
102 | 	//	 * 打印表格映射
103 | 	//	 */
104 | 	//	private void printTableMemoryMap() {
105 | 	//		int r = 1;
106 | 	//		for (TTCPr[] columns : _tableMemoryMapping) {
107 | 	//			int c = 1;
108 | 	//			for (TTCPr column : columns) {
109 | 	//				System.out.println(r + ":" + c + "===>" + column.getType() + " ==== " + column.getText());
110 | 	//				c++;
111 | 	//			}
112 | 	//
113 | 	//			r++;
114 | 	//		}
115 | 	//	}
116 | 
117 | 	/**
118 | 	 * 解析word中表格行
119 | 	 * @param row
120 | 	 * @param realRowIndex
121 | 	 */
122 | 	private void parseRow(XWPFTableRow row, int realRowIndex) {
123 | 		List<XWPFTableCell> cells = row.getTableCells();
124 | 		int numCells = cells.size();
125 | 
126 | 		int logicColumnIndex = 0;
127 | 		int logicRowIndex = realRowIndex; //逻辑行号与实际行号一样
128 | 		for (int realColumnIndex = 0; realColumnIndex < numCells; realColumnIndex++) {
129 | 			XWPFTableCell cell = row.getCell(realColumnIndex);
130 | 			//skipColumn是否跳过多个单元格, 当列合并时候
131 | 			int skipColumn = parseCell(cell, realRowIndex, realColumnIndex, logicRowIndex, logicColumnIndex);
132 | 			logicColumnIndex = logicColumnIndex + skipColumn + 1;
133 | 		}
134 | 	}
135 | 
136 | 	private int parseCell(XWPFTableCell cell, int realRowIndex, int realColumnIndex, int logicRowIndex,  int logicColumnIndex) {
137 | 		int skipColumn = 0;
138 | //		if (_tableMemoryMapping.getTTCPr(realRowIndex, realColumnIndex) != null) {
139 | //			return skipColumn;
140 | //		}
141 | 
142 | 		CTTcPr tt = cell.getCTTc().getTcPr();
143 | 		//-------行合并--------
144 | 		if (tt.getVMerge() != null) {
145 | 			if (tt.getVMerge().getVal() != null && "restart".equals(tt.getVMerge().getVal().toString())) { //行合并的第一行单元格(行合并的开始单元格)
146 | 				TTCPr ttc = new TTCPr();
147 | 				ttc.setType(TTCPrEnum.VM_S);
148 | 				ttc.setRealRowIndex(realRowIndex);
149 | 				ttc.setRealColumnIndex(realColumnIndex);
150 | 				ttc.setLogicRowIndex(logicRowIndex);
151 | 				ttc.setLogicColumnIndex(logicColumnIndex);
152 | 				ttc.setWidth(tt.getTcW().getW());
153 | 				ttc.setRoot(null);
154 | 				//ttc.setText(cell.getText());
155 | 				ttc.setContent(WordTableCellContents.getCellContent(cell));
156 | 
157 | 				_tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex);
158 | 			} else { //行合并的其他行单元格（被合并的单元格）
159 | 				int _start = logicRowIndex, _end = 0;
160 | 				TTCPr root = null;
161 | 				for (int i = logicRowIndex - 1; i >= 0; i--) {
162 | 					TTCPr ttcpr = _tableMemoryMapping.getTTCPr(i, logicRowIndex);
163 | 					if (ttcpr != null && (ttcpr.getType() == TTCPrEnum.VM_S || ttcpr.getType() == TTCPrEnum.HVM_S)) {
164 | 						_end = i;
165 | 						root = ttcpr;
166 | 						break;
167 | 					} else if(ttcpr != null && ttcpr.getRoot()!=null) {
168 | 						_end = i;
169 | 						root = ttcpr.getRoot();
170 | 						break;
171 | 					}
172 | 				}
173 | 
174 | 				Preconditions.checkNotNull(root, "父单元格不能为空");
175 | 				
176 | 				TTCPr ttc = new TTCPr();
177 | 				ttc.setType(TTCPrEnum.VM);
178 | 				ttc.setRealRowIndex(realRowIndex);
179 | 				ttc.setRealColumnIndex(realColumnIndex);
180 | 				ttc.setLogicRowIndex(logicRowIndex);
181 | 				ttc.setLogicColumnIndex(logicColumnIndex);
182 | 				ttc.setWidth(tt.getTcW().getW());
183 | 				ttc.setRoot(root);
184 | 				root.setRowSpan(_start - _end + 1);
185 | 				_tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex);
186 | 			}
187 | 		} else { //没有进行行合并的单元格
188 | 			TTCPr currentCell = _tableMemoryMapping.getTTCPr(logicRowIndex, logicColumnIndex);
189 | 			if (currentCell != null && currentCell.getType() == TTCPrEnum.HM) { //被列合并的单元格
190 | 
191 | 			} else {
192 | 				currentCell = new TTCPr();
193 | 				currentCell.setType(TTCPrEnum.NONE);
194 | 				currentCell.setRealRowIndex(realRowIndex);
195 | 				currentCell.setRealColumnIndex(realColumnIndex);
196 | 				currentCell.setLogicRowIndex(logicRowIndex);
197 | 				currentCell.setLogicColumnIndex(logicColumnIndex);
198 | 				currentCell.setWidth(tt.getTcW().getW());
199 | 				currentCell.setContent(WordTableCellContents.getCellContent(cell));
200 | 				currentCell.setRoot(null);
201 | 				//判断是否有父单元格
202 | 				if (logicRowIndex > 0) {
203 | 					TTCPr parent = _tableMemoryMapping.getTTCPr(logicRowIndex - 1, logicColumnIndex);
204 | 					if (parent.isDoneColSpan()) {
205 | 						//currentCell.setParent(parent);
206 | 						currentCell.setRoot(parent);
207 | 					}
208 | 				}
209 | 
210 | 				_tableMemoryMapping.setTTCPr(currentCell, logicRowIndex, logicColumnIndex);
211 | 			}
212 | 		}
213 | 
214 | 		//-------列合并-------
215 | 		if (tt.getGridSpan() != null) {
216 | 			int colSpan = tt.getGridSpan().getVal().intValue();
217 | 			TTCPr root = _tableMemoryMapping.getTTCPr(logicRowIndex, logicColumnIndex);
218 | 			root.setColSpan(colSpan);
219 | 			if (root.getType() == TTCPrEnum.VM_S) {
220 | 				root.setType(TTCPrEnum.HVM_S);
221 | 			} else {
222 | 				root.setType(TTCPrEnum.HM_S);
223 | 			}
224 | 
225 | 			//给其他被列合并的单元格进行初始化
226 | 			for (int i = 1; i < colSpan; i++) {
227 | 				TTCPr cell_other = _tableMemoryMapping.getTTCPr(logicRowIndex, logicColumnIndex + i);
228 | 				if (cell_other == null){
229 | 					cell_other = new TTCPr();
230 | 					cell_other.setWidth(tt.getTcW().getW());
231 | 				}
232 | 				cell_other.setRealRowIndex(realRowIndex);
233 | 				cell_other.setRealColumnIndex(realColumnIndex);
234 | 				cell_other.setLogicRowIndex(logicRowIndex);
235 | 				cell_other.setLogicColumnIndex(realColumnIndex + i);
236 | 				cell_other.setType(TTCPrEnum.HM);
237 | 				cell_other.setRoot(root);
238 | 
239 | 				_tableMemoryMapping.setTTCPr(cell_other, logicRowIndex, realColumnIndex + i);
240 | 			}
241 | 
242 | 			skipColumn = colSpan - 1;
243 | 		}
244 | 
245 | 		return skipColumn;
246 | 	}
247 | }
248 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/wordx/WordXTableParser.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.parser.wordx;
 2 | 
 3 | import java.io.InputStream;
 4 | import java.util.List;
 5 | 
 6 | import org.apache.commons.io.IOUtils;
 7 | import org.apache.poi.xwpf.usermodel.XWPFDocument;
 8 | import org.apache.poi.xwpf.usermodel.XWPFTable;
 9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 | 
12 | import com.google.common.collect.Lists;
13 | import com.suncht.wordread.model.WordTable;
14 | import com.suncht.wordread.parser.ISingleWordTableParser;
15 | import com.suncht.wordread.parser.IWordTableParser;
16 | import com.suncht.wordread.parser.WordTableTransferContext;
17 | 
18 | /**
19 |  * Docx文档的复杂表格解析器
20 |  * @author changtan.sun
21 |  *
22 |  */
23 | public class WordXTableParser implements IWordTableParser {
24 | 	private final static Logger logger = LoggerFactory.getLogger(WordXTableParser.class);
25 | 	
26 | 	private WordTableTransferContext context;
27 | 
28 | 	public WordXTableParser(WordTableTransferContext context) {
29 | 		this.context = context;
30 | 	}
31 | 
32 | 	public List<WordTable> parse(InputStream inputStream) {
33 | 		List<WordTable> wordTables = Lists.newArrayList();
34 | 
35 | 		try {
36 | 			XWPFDocument doc = new XWPFDocument(inputStream); // 载入文档  
37 | 
38 | 			//获取文档中所有的表格  
39 | 			List<XWPFTable> tables = doc.getTables();
40 | 			for (XWPFTable table : tables) {
41 | 				ISingleWordTableParser parser = new SingleWordXTableParser(table, this.context);
42 | 				WordTable wordTable = parser.parse();
43 | 				wordTables.add(wordTable);
44 | 			}
45 | 		} catch (Exception e) {
46 | 			logger.error(e.getMessage(), e);
47 | 		} finally {
48 | 			IOUtils.closeQuietly(inputStream);
49 | 		}
50 | 
51 | 		return wordTables;
52 | 	}
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/utils/MathmlUtils.java:
--------------------------------------------------------------------------------
 1 | package com.suncht.wordread.utils;
 2 | 
 3 | import java.io.InputStream;
 4 | import java.io.StringReader;
 5 | import java.io.StringWriter;
 6 | 
 7 | import javax.xml.transform.Result;
 8 | import javax.xml.transform.Source;
 9 | import javax.xml.transform.Transformer;
10 | import javax.xml.transform.TransformerException;
11 | import javax.xml.transform.TransformerFactory;
12 | import javax.xml.transform.URIResolver;
13 | import javax.xml.transform.stream.StreamResult;
14 | import javax.xml.transform.stream.StreamSource;
15 | 
16 | public class MathmlUtils {
17 | 	/**    
18 | 	 * <p>Description: xsl转换器</p>
19 | 	 */
20 | 	public static String xslConvert(String s, String xslpath, URIResolver uriResolver) {
21 | 		TransformerFactory tFac = TransformerFactory.newInstance();
22 | 		if (uriResolver != null)
23 | 			tFac.setURIResolver(uriResolver);
24 | 		StreamSource xslSource = new StreamSource(MathmlUtils.class.getResourceAsStream(xslpath));
25 | 		StringWriter writer = new StringWriter();
26 | 		try {
27 | 			Transformer t = tFac.newTransformer(xslSource);
28 | 			Source source = new StreamSource(new StringReader(s));
29 | 			Result result = new StreamResult(writer);
30 | 			t.transform(source, result);
31 | 		} catch (TransformerException e) {
32 | 			System.out.println(e.getMessage());
33 | 		}
34 | 		return writer.getBuffer().toString();
35 | 	}
36 | 
37 | 	/**
38 | 	 * <p>Description: 将mathml转为latx </p>
39 | 	 * @param mml
40 | 	 * @return
41 | 	 */
42 | 	public static String convertMML2Latex(String mml) {
43 | 		mml = mml.substring(mml.indexOf("?>") + 2, mml.length()); //去掉xml的头节点
44 | 		URIResolver r = new URIResolver() { //设置xls依赖文件的路径
45 | 			@Override
46 | 			public Source resolve(String href, String base) throws TransformerException {
47 | 				InputStream inputStream = MathmlUtils.class.getResourceAsStream("/conventer/mml2tex/" + href);
48 | 				return new StreamSource(inputStream);
49 | 			}
50 | 		};
51 | 		String latex = xslConvert(mml, "/conventer/mml2tex/mmltex.xsl", r);
52 | 		if (latex != null && latex.length() > 1) {
53 | 			latex = latex.substring(1, latex.length() - 1);
54 | 		}
55 | 		return latex;
56 | 	}
57 | 
58 | 	/**
59 | 	 * <p>Description: office mathml转为mml </p>
60 | 	 * @param xml
61 | 	 * @return
62 | 	 */
63 | 	public static String convertOMML2MML(String xml) {
64 | 		String result = xslConvert(xml, "/conventer/OMML2MML.XSL", null);
65 | 		return result;
66 | 	}
67 | }
68 | 


--------------------------------------------------------------------------------
/src/main/resources/1.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/1.doc


--------------------------------------------------------------------------------
/src/main/resources/FMEA信息导入-客户实例.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/FMEA信息导入-客户实例.doc


--------------------------------------------------------------------------------
/src/main/resources/FMEA信息导入-客户实例.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/FMEA信息导入-客户实例.docx


--------------------------------------------------------------------------------
/src/main/resources/conventer/mml2tex/README:
--------------------------------------------------------------------------------
 1 | README for the XSLT MathML Library 2.1.2
 2 | 
 3 | XSLT MathML Library is a set of XSLT stylesheets to transform
 4 | MathML 2.0 to LaTeX.
 5 | 
 6 | For more information, see
 7 | http://www.raleigh.ru/MathML/mmltex/index.php?lang=en
 8 | 
 9 | Manifest
10 | --------
11 | 
12 | README        this file
13 | mmltex.xsl
14 | tokens.xsl
15 | glayout.xsl
16 | scripts.xsl
17 | tables.xsl
18 | entities.xsl
19 | cmarkup.xsl
20 | 
21 | Use
22 | ---
23 | 
24 | There are two ways of using the library:
25 | 
26 |     * Use a local copy of the library.
27 | 
28 |         1. Download the distribution (see below).
29 | 
30 |         2. Unpack the distribution, using unzip.
31 | 
32 |         3. In your stylesheet import or include either the main
33 |            stylesheet, mmltex.xsl, or the stylesheet module you
34 |            wish to use, such as tokens.xsl. This example assumes
35 |            that the distribution has been extracted into the same
36 |            directory as your own stylesheet:
37 | 
38 |            <xsl:import href="mmltex.xsl"/>
39 | 
40 |     * Import or include either the main stylesheet, or the
41 |       stylesheet module you wish to use, directly from the library
42 |       website; http://www.raleigh.ru/MathML/mmltex/. For example:
43 | 
44 |       <xsl:import href="http://www.raleigh.ru/MathML/mmltex/mmltex.xsl"/>
45 | 
46 | Obtaining The Library
47 | ---------------------
48 | 
49 | The XSLT MathML Library is available for download as:
50 | 
51 |     * Zip file: http://www.raleigh.ru/MathML/mmltex/xsltml_2.1.2.zip
52 | 
53 | Copyright
54 | ---------
55 | 
56 | Copyright (C) 2001-2003 Vasil Yaroshevich
57 | 
58 | Permission is hereby granted, free of charge, to any person
59 | obtaining a copy of this software and associated documentation
60 | files (the ``Software''), to deal in the Software without
61 | restriction, including without limitation the rights to use,
62 | copy, modify, merge, publish, distribute, sublicense, and/or
63 | sell copies of the Software, and to permit persons to whom the
64 | Software is furnished to do so, subject to the following
65 | conditions:
66 | 
67 | The above copyright notice and this permission notice shall be
68 | included in all copies or substantial portions of the Software.
69 | 
70 | Except as contained in this notice, the names of individuals
71 | credited with contribution to this software shall not be used in
72 | advertising or otherwise to promote the sale, use or other
73 | dealings in this Software without prior written authorization
74 | from the individuals in question.
75 | 
76 | Any stylesheet derived from this Software that is publically
77 | distributed will be identified with a different name and the
78 | version strings in any derived Software will be changed so that
79 | no possibility of confusion between the derived package and this
80 | Software will exist.
81 | 
82 | Warranty
83 | --------
84 | 
85 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
86 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
87 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
88 | NONINFRINGEMENT.  IN NO EVENT SHALL NORMAN WALSH OR ANY OTHER
89 | CONTRIBUTOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
90 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
91 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
92 | OTHER DEALINGS IN THE SOFTWARE.
93 | 
94 | Contacting the Author
95 | ---------------------
96 | 
97 | These stylesheets are maintained by Vasil Yaroshevich, <yarosh@raleigh.ru>.
98 | 


--------------------------------------------------------------------------------
/src/main/resources/conventer/mml2tex/glayout.xsl:
--------------------------------------------------------------------------------
  1 | <?xml version='1.0' encoding="UTF-8"?>
  2 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  3 | 		xmlns:m="http://www.w3.org/1998/Math/MathML"
  4 |                 version='1.0'>
  5 | 
  6 | <!-- ====================================================================== -->
  7 | <!-- $Id: glayout.xsl,v 1.5 2003/06/10 12:24:04 shade33 Exp $
  8 |      This file is part of the XSLT MathML Library distribution.
  9 |      See ./README or http://www.raleigh.ru/MathML/mmltex for
 10 |      copyright and other information                                        -->
 11 | <!-- ====================================================================== -->
 12 | 
 13 | <!-- 3.3.2 mfrac -->
 14 | <xsl:template match="m:mfrac">
 15 | 	<xsl:choose>
 16 | 		<xsl:when test="@linethickness">
 17 | 			<xsl:text>\genfrac{}{}{</xsl:text>
 18 | 			<xsl:choose>
 19 | 				<xsl:when test="number(@linethickness)">
 20 | 					<xsl:value-of select="@linethickness div 10"/>
 21 | 					<xsl:text>ex</xsl:text>
 22 | 				</xsl:when>
 23 | 				<xsl:when test="@linethickness='0'">
 24 | 					<xsl:text>0ex</xsl:text>
 25 | 				</xsl:when>
 26 | 				<xsl:when test="@linethickness='thin'">
 27 | 					<xsl:text>.05ex</xsl:text>
 28 | 				</xsl:when>
 29 | 				<xsl:when test="@linethickness='medium'"/>
 30 | 				<xsl:when test="@linethickness='thick'">
 31 | 					<xsl:text>.2ex</xsl:text>
 32 | 				</xsl:when>
 33 | 				<xsl:otherwise>
 34 | 					<xsl:value-of select="@linethickness"/>
 35 | 				</xsl:otherwise>
 36 | 			</xsl:choose>
 37 | 			<xsl:text>}{}{</xsl:text>
 38 | 		</xsl:when>
 39 | 		<xsl:otherwise>
 40 | 			<xsl:text>\frac{</xsl:text>
 41 | 		</xsl:otherwise>
 42 | 	</xsl:choose>
 43 | 	<xsl:if test="@numalign='right'">
 44 | 		<xsl:text>\hfill </xsl:text>
 45 | 	</xsl:if>
 46 | 	<xsl:apply-templates select="./*[1]"/>
 47 | 	<xsl:if test="@numalign='left'">
 48 | 		<xsl:text>\hfill </xsl:text>
 49 | 	</xsl:if>
 50 | 	<xsl:text>}{</xsl:text>	
 51 | 	<xsl:if test="@denomalign='right'">
 52 | 		<xsl:text>\hfill </xsl:text>
 53 | 	</xsl:if>
 54 | 	<xsl:apply-templates select="./*[2]"/>
 55 | 		<xsl:if test="@denomalign='left'">
 56 | 		<xsl:text>\hfill </xsl:text>
 57 | 	</xsl:if>
 58 | 	<xsl:text>}</xsl:text>
 59 | </xsl:template>
 60 | 
 61 | <xsl:template match="m:mfrac[@bevelled='true']">
 62 | 	<xsl:text>\raisebox{1ex}{$</xsl:text>
 63 | 	<xsl:apply-templates select="./*[1]"/>
 64 | 	<xsl:text>$}\!\left/ \!\raisebox{-1ex}{$</xsl:text>
 65 | 	<xsl:apply-templates select="./*[2]"/>
 66 | 	<xsl:text>$}\right.</xsl:text>
 67 | </xsl:template>
 68 | 
 69 | 
 70 | <xsl:template match="m:mroot">
 71 | 	<xsl:choose>
 72 | 		<xsl:when test="count(./*)=2">
 73 | 			<xsl:text>\sqrt[</xsl:text>
 74 | 			<xsl:apply-templates select="./*[2]"/>
 75 | 			<xsl:text>]{</xsl:text>	
 76 | 			<xsl:apply-templates select="./*[1]"/>
 77 | 			<xsl:text>}</xsl:text>	
 78 | 		</xsl:when>
 79 | 		<xsl:otherwise>
 80 | 		<!-- number of argumnets is not 2 - code 25 -->
 81 | 			<xsl:message>exception 25:</xsl:message>
 82 | 			<xsl:text>\text{exception 25:}</xsl:text> 
 83 | 		</xsl:otherwise>
 84 | 	</xsl:choose>
 85 | </xsl:template>
 86 | 
 87 | <xsl:template match="m:msqrt">
 88 | 	<xsl:text>\sqrt{</xsl:text>
 89 | 	<xsl:apply-templates/>
 90 | 	<xsl:text>}</xsl:text>
 91 | </xsl:template>
 92 | 
 93 | <xsl:template match="m:mfenced">
 94 | 	<xsl:choose>
 95 | 		<xsl:when test="@open">
 96 | 			<xsl:if test="translate(@open,'{}[]()|','{{{{{{{')='{'">
 97 | 				<xsl:text>\left</xsl:text>
 98 | 			</xsl:if>
 99 | 			<xsl:if test="@open='{' or @open='}'">
100 | 				<xsl:text>\</xsl:text>
101 | 			</xsl:if>
102 | 			<xsl:if test="translate(@open,'{}[]()|','{{{{{{{')!='{' and (translate(@close,'{}[]()|','{{{{{{{')='{' or not(@close))">
103 | 				<xsl:text>\left.</xsl:text>
104 | 			</xsl:if>
105 | 			<xsl:value-of select="@open"/>
106 | 		</xsl:when>
107 | 		<xsl:otherwise><xsl:text>\left(</xsl:text></xsl:otherwise>
108 | 	</xsl:choose>
109 | 			<xsl:variable name="sep">
110 | 				<xsl:choose>
111 | 					<xsl:when test="@separators">
112 | 						<xsl:value-of select="translate(@separators,' ','')"/>
113 | 					</xsl:when>
114 | 					<xsl:otherwise>,</xsl:otherwise>
115 | 				</xsl:choose>
116 | 			</xsl:variable>
117 | 			<xsl:for-each select="./*">
118 | 				<xsl:apply-templates select="."/>
119 | 				<xsl:if test="not(position()=last())">
120 | 					<xsl:choose>
121 | 						<xsl:when test="position()>string-length($sep)">
122 | 							<xsl:value-of select="substring($sep,string-length($sep))"/>
123 | 						</xsl:when>
124 | 						<xsl:otherwise>
125 | 							<xsl:value-of select="substring($sep,position(),1)"/>
126 | 						</xsl:otherwise>
127 | 					</xsl:choose>
128 | 				</xsl:if>
129 | 			</xsl:for-each>
130 | 	<xsl:choose>
131 | 		<xsl:when test="@close">
132 | 			<xsl:if test="translate(@close,'{}[]()|','{{{{{{{')='{'">
133 | 				<xsl:text>\right</xsl:text>
134 | 			</xsl:if>
135 | 			<xsl:if test="@close='{' or @close='}'">
136 | 				<xsl:text>\</xsl:text>
137 | 			</xsl:if>
138 | 			<xsl:if test="translate(@close,'{}[]()|','{{{{{{{')!='{' and (translate(@open,'{}[]()|','{{{{{{{')='{' or not(@open))">
139 | 				<xsl:text>\right.</xsl:text>
140 | 			</xsl:if>
141 | 			<xsl:value-of select="@close"/>
142 | 		</xsl:when>
143 | 		<xsl:otherwise><xsl:text>\right)</xsl:text></xsl:otherwise>
144 | 	</xsl:choose>	
145 | </xsl:template>
146 | 
147 | <xsl:template match="m:mphantom">
148 | 	<xsl:text>\phantom{</xsl:text>
149 | 	<xsl:apply-templates/>
150 | 	<xsl:text>}</xsl:text>
151 | </xsl:template>
152 | 
153 | <xsl:template match="m:menclose">
154 | 	<xsl:choose>
155 | 		<xsl:when test="@notation = 'actuarial'">
156 | 			<xsl:text>\overline{</xsl:text>
157 | 			<xsl:apply-templates/>
158 | 			<xsl:text>\hspace{.2em}|}</xsl:text>
159 | 		</xsl:when>
160 | 		<xsl:when test="@notation = 'radical'">
161 | 			<xsl:text>\sqrt{</xsl:text>
162 | 			<xsl:apply-templates/>
163 | 			<xsl:text>}</xsl:text>
164 | 		</xsl:when>
165 | 		<xsl:otherwise>
166 | 			<xsl:text>\overline{)</xsl:text>
167 | 			<xsl:apply-templates/>
168 | 			<xsl:text>}</xsl:text>
169 | 		</xsl:otherwise>
170 | 	</xsl:choose>
171 | </xsl:template>
172 | 
173 | <xsl:template match="m:mrow">
174 | 	<xsl:apply-templates/>
175 | </xsl:template>
176 | 
177 | <xsl:template match="m:mstyle">
178 | 	<xsl:if test="@displaystyle='true'">
179 | 		<xsl:text>{\displaystyle </xsl:text>
180 | 	</xsl:if>
181 | 	<xsl:if test="@scriptlevel and not(@displaystyle='true')">
182 | 		<xsl:text>{</xsl:text>
183 | 		<xsl:choose>
184 | 			<xsl:when test="@scriptlevel=0"><xsl:text>\textstyle </xsl:text></xsl:when>
185 | 			<xsl:when test="@scriptlevel=1"><xsl:text>\scriptstyle </xsl:text></xsl:when>
186 | 			<xsl:otherwise><xsl:text>\scriptscriptstyle </xsl:text></xsl:otherwise> 
187 | 		</xsl:choose> 
188 | 	</xsl:if>	
189 | 	<xsl:if test="@background">
190 | 		<xsl:text>\colorbox[rgb]{</xsl:text>
191 | 		<xsl:call-template name="color">
192 | 			<xsl:with-param name="color" select="@background"/>
193 | 		</xsl:call-template>
194 | 		<xsl:text>}{$</xsl:text>
195 | 	</xsl:if>
196 | 	<xsl:if test="@color[not(@mathcolor)] or @mathcolor">
197 | 		<xsl:text>\textcolor[rgb]{</xsl:text>
198 | 		<xsl:call-template name="color">
199 | 			<xsl:with-param name="color" select="@color|@mathcolor"/>
200 | 		</xsl:call-template>
201 | 		<xsl:text>}{</xsl:text>
202 | 	</xsl:if>
203 | 	<xsl:apply-templates/>
204 | 	<xsl:if test="@color[not(@mathcolor)] or @mathcolor">
205 | 		<xsl:text>}</xsl:text>
206 | 	</xsl:if>
207 | 	<xsl:if test="@background">
208 | 		<xsl:text>$}</xsl:text>
209 | 	</xsl:if>
210 | 	<xsl:if test="@scriptlevel and not(@displaystyle='true')">
211 | 		<xsl:text>}</xsl:text>
212 | 	</xsl:if>	
213 | 	<xsl:if test="@displaystyle='true'">
214 | 		<xsl:text>}</xsl:text>
215 | 	</xsl:if>
216 | </xsl:template>
217 | 
218 | <xsl:template match="m:merror">
219 | 	<xsl:apply-templates/>
220 | </xsl:template>
221 | 
222 | </xsl:stylesheet>


--------------------------------------------------------------------------------
/src/main/resources/conventer/mml2tex/mmltex.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 3 | 		xmlns:m="http://www.w3.org/1998/Math/MathML"
 4 |                 version='1.0'>
 5 |                 
 6 | <xsl:output method="text" indent="no" encoding="UTF-8"/>
 7 | 
 8 | <!-- ====================================================================== -->
 9 | <!-- $Id: mmltex.xsl,v 1.7 2003/06/10 12:24:04 shade33 Exp $
10 |      This file is part of the XSLT MathML Library distribution.
11 |      See ./README or http://www.raleigh.ru/MathML/mmltex for
12 |      copyright and other information                                        -->
13 | <!-- ====================================================================== -->
14 | 
15 | <xsl:include href="tokens.xsl"/>
16 | <xsl:include href="glayout.xsl"/>
17 | <xsl:include href="scripts.xsl"/>
18 | <xsl:include href="tables.xsl"/>
19 | <xsl:include href="entities.xsl"/>
20 | <xsl:include href="cmarkup.xsl"/>
21 | 
22 | <xsl:strip-space elements="m:*"/>
23 | 
24 | <xsl:template match="m:math[not(@mode) or @mode='inline'][not(@display)] | m:math[@display='inline']">
25 | 	<xsl:text>&#x00024; </xsl:text>
26 | 	<xsl:apply-templates/>
27 | 	<xsl:text>&#x00024;</xsl:text>
28 | </xsl:template>
29 | 
30 | <xsl:template match="m:math[@display='block'] | m:math[@mode='display'][not(@display)]">
31 | 	<xsl:text>&#xA;\[&#xA;&#x9;</xsl:text>
32 | 	<xsl:apply-templates/>
33 | 	<xsl:text>&#xA;\]</xsl:text>
34 | </xsl:template>
35 | 
36 | </xsl:stylesheet>


--------------------------------------------------------------------------------
/src/main/resources/conventer/mml2tex/tables.xsl:
--------------------------------------------------------------------------------
  1 | <?xml version='1.0' encoding="UTF-8"?>
  2 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  3 | 		xmlns:m="http://www.w3.org/1998/Math/MathML"
  4 |                 version='1.0'>
  5 |                 
  6 | <!-- ====================================================================== -->
  7 | <!-- $id: tables.xsl, 2002/17/05 Exp $
  8 |      This file is part of the XSLT MathML Library distribution.
  9 |      See ./README or http://www.raleigh.ru/MathML/mmltex for
 10 |      copyright and other information                                        -->
 11 | <!-- ====================================================================== -->
 12 | 
 13 | <xsl:template match="m:mtd[@columnspan]">
 14 | 	<xsl:text>\multicolumn{</xsl:text>
 15 | 	<xsl:value-of select="@columnspan"/>
 16 | 	<xsl:text>}{c}{</xsl:text>
 17 | 	<xsl:apply-templates/>
 18 | 	<xsl:text>}</xsl:text>
 19 | 	<xsl:if test="count(following-sibling::*)>0">
 20 | 		<xsl:text>&amp; </xsl:text>
 21 | 	</xsl:if>
 22 | </xsl:template>
 23 | 
 24 | 
 25 | <xsl:template match="m:mtd">
 26 | 	<xsl:if test="@columnalign='right' or @columnalign='center'">
 27 | 		<xsl:text>\hfill </xsl:text>
 28 | 	</xsl:if>
 29 | 	<xsl:apply-templates/>
 30 | 	<xsl:if test="@columnalign='left' or @columnalign='center'">
 31 | 		<xsl:text>\hfill </xsl:text>
 32 | 	</xsl:if>
 33 | 	<xsl:if test="count(following-sibling::*)>0">
 34 | <!--    this test valid for Sablotron, another form - test="not(position()=last())".
 35 | 	Also for m:mtd[@columnspan] and m:mtr  -->
 36 | 		<xsl:text>&amp; </xsl:text>
 37 | 	</xsl:if>
 38 | </xsl:template>
 39 | 
 40 | <xsl:template match="m:mtr">
 41 | 	<xsl:apply-templates/>
 42 | 	<xsl:if test="count(following-sibling::*)>0">
 43 | 		<xsl:text>\\ </xsl:text>
 44 | 	</xsl:if>
 45 | </xsl:template>
 46 | 
 47 | <xsl:template match="m:mtable">
 48 | 	<xsl:text>\begin{array}{</xsl:text>
 49 | 	<xsl:if test="@frame='solid'">
 50 | 		<xsl:text>|</xsl:text>
 51 | 	</xsl:if>
 52 | 	<xsl:variable name="numbercols" select="count(./m:mtr[1]/m:mtd[not(@columnspan)])+sum(./m:mtr[1]/m:mtd/@columnspan)"/>
 53 | 	<xsl:choose>
 54 | 		<xsl:when test="@columnalign">
 55 | 			<xsl:variable name="colalign">
 56 | 				<xsl:call-template name="colalign">
 57 | 					<xsl:with-param name="colalign" select="@columnalign"/>
 58 | 				</xsl:call-template>
 59 | 			</xsl:variable>
 60 | 			<xsl:choose>
 61 | 				<xsl:when test="string-length($colalign) > $numbercols">
 62 | 					<xsl:value-of select="substring($colalign,1,$numbercols)"/>
 63 | 				</xsl:when>
 64 | 				<xsl:when test="string-length($colalign) &lt; $numbercols">
 65 | 					<xsl:value-of select="$colalign"/>
 66 | 					<xsl:call-template name="generate-string">
 67 | 						<xsl:with-param name="text" select="substring($colalign,string-length($colalign))"/>
 68 | 						<xsl:with-param name="count" select="$numbercols - string-length($colalign)"/>
 69 | 					</xsl:call-template>
 70 | 				</xsl:when>
 71 | 				<xsl:otherwise>
 72 | 					<xsl:value-of select="$colalign"/>
 73 | 				</xsl:otherwise>
 74 | 			</xsl:choose>
 75 | 		</xsl:when>
 76 | 		<xsl:otherwise>
 77 | 			<xsl:call-template name="generate-string">
 78 | 				<xsl:with-param name="text" select="'c'"/>
 79 | 				<xsl:with-param name="count" select="$numbercols"/>
 80 | 			</xsl:call-template>
 81 | 		</xsl:otherwise>
 82 | 	</xsl:choose>
 83 | 	<xsl:if test="@frame='solid'">
 84 | 		<xsl:text>|</xsl:text>
 85 | 	</xsl:if>
 86 | 	<xsl:text>}</xsl:text>
 87 | 	<xsl:if test="@frame='solid'">
 88 | 		<xsl:text>\hline </xsl:text>
 89 | 	</xsl:if>
 90 | 	<xsl:apply-templates/>
 91 | 	<xsl:if test="@frame='solid'">
 92 | 		<xsl:text>\\ \hline</xsl:text>
 93 | 	</xsl:if>
 94 | 	<xsl:text>\end{array}</xsl:text>
 95 | </xsl:template>
 96 | 
 97 | <xsl:template name="colalign">
 98 | 	<xsl:param name="colalign"/>
 99 | 	<xsl:choose>
100 | 		<xsl:when test="contains($colalign,' ')">
101 | 			<xsl:value-of select="substring($colalign,1,1)"/>
102 | 			<xsl:call-template name="colalign">
103 | 				<xsl:with-param name="colalign" select="substring-after($colalign,' ')"/>
104 | 			</xsl:call-template>
105 | 		</xsl:when>
106 | 		<xsl:otherwise>
107 | 			<xsl:value-of select="substring($colalign,1,1)"/>
108 | 		</xsl:otherwise>
109 | 	</xsl:choose>
110 | </xsl:template>
111 | 
112 | <xsl:template name="generate-string">
113 | <!-- template from XSLT Standard Library v1.1 -->
114 |     <xsl:param name="text"/>
115 |     <xsl:param name="count"/>
116 | 
117 |     <xsl:choose>
118 |       <xsl:when test="string-length($text) = 0 or $count &lt;= 0"/>
119 | 
120 |       <xsl:otherwise>
121 | 	<xsl:value-of select="$text"/>
122 | 	<xsl:call-template name="generate-string">
123 | 	  <xsl:with-param name="text" select="$text"/>
124 | 	  <xsl:with-param name="count" select="$count - 1"/>
125 | 	</xsl:call-template>
126 |       </xsl:otherwise>
127 |     </xsl:choose>
128 | </xsl:template>
129 | 
130 | </xsl:stylesheet>


--------------------------------------------------------------------------------
/src/main/resources/conventer/mml2tex/tokens.xsl:
--------------------------------------------------------------------------------
  1 | <?xml version='1.0' encoding="UTF-8"?>
  2 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  3 | 		xmlns:m="http://www.w3.org/1998/Math/MathML"
  4 |                 version='1.0'>
  5 |                 
  6 | <!-- ====================================================================== -->
  7 | <!-- $Id: tokens.xsl,v 1.7 2003/06/10 12:24:05 shade33 Exp $
  8 |      This file is part of the XSLT MathML Library distribution.
  9 |      See ./README or http://www.raleigh.ru/MathML/mmltex for
 10 |      copyright and other information                                        -->
 11 | <!-- ====================================================================== -->
 12 | 
 13 | <xsl:template match="m:mi|m:mn|m:mo|m:mtext|m:ms">
 14 | 	<xsl:call-template name="CommonTokenAtr"/>
 15 | </xsl:template>
 16 | 
 17 | <!-- 3.2.9 mglyph -->
 18 | <xsl:template match="m:mglyph">
 19 | 	<xsl:text>\textcolor{red}{</xsl:text>
 20 | 	<xsl:value-of select="@alt"/>
 21 | 	<xsl:text>}</xsl:text>
 22 | </xsl:template>
 23 | 
 24 | <xsl:template name="mi">
 25 | 	<xsl:choose>
 26 | 		<xsl:when test="string-length(normalize-space(.))>1 and not(@mathvariant)">
 27 | 			<xsl:text>\mathrm{</xsl:text>
 28 | 				<xsl:apply-templates/>
 29 | 			<xsl:text>}</xsl:text>
 30 | 		</xsl:when>
 31 | 		<xsl:otherwise>
 32 | 			<xsl:apply-templates/>
 33 | 		</xsl:otherwise>
 34 | 	</xsl:choose>
 35 | </xsl:template>
 36 | 
 37 | <xsl:template name="mn">
 38 | 	<xsl:choose>
 39 | 		<xsl:when test="string(number(.))='NaN' and not(@mathvariant)">
 40 | 			<xsl:text>\mathrm{</xsl:text>
 41 | 				<xsl:apply-templates/>
 42 | 			<xsl:text>}</xsl:text>
 43 | 		</xsl:when>
 44 | 		<xsl:otherwise>
 45 | 			<xsl:apply-templates/>
 46 | 		</xsl:otherwise>
 47 | 	</xsl:choose>
 48 | </xsl:template>
 49 | 
 50 | <!-- 3.2.5 Math Operator -->
 51 | <xsl:template name="mo">
 52 | <xsl:if test="translate(normalize-space(.),'()[]}|','{{{{{{')='{'">
 53 | 		<xsl:choose>
 54 | 	<xsl:when test="not(@stretchy='false') and count(preceding-sibling::m:mo[translate(normalize-space(.),'()[]}|','{{{{{{')='{'])mod 2=0 and following-sibling::m:mo[1][not(@stretchy='false')][translate(normalize-space(.),'()[]}|','{{{{{{')='{']">
 55 | 			<xsl:text>\left</xsl:text>
 56 | 		</xsl:when>
 57 | 		<xsl:when test="not(@stretchy='false') and count(preceding-sibling::m:mo[translate(normalize-space(.),'()[]}|','{{{{{{')='{'])mod 2=1 and preceding-sibling::m:mo[1][not(@stretchy='false')][translate(normalize-space(.),'()[]}|','{{{{{{')='{']">
 58 | 			<xsl:text>\right</xsl:text>
 59 | 		</xsl:when>
 60 | 	</xsl:choose>
 61 | </xsl:if>
 62 | <xsl:apply-templates/>
 63 | </xsl:template>
 64 | 
 65 | <xsl:template name="mtext">
 66 | 	<xsl:variable name="content">
 67 | 		<xsl:call-template name="replaceMtextEntities">
 68 | 			<xsl:with-param name="content" select="normalize-space(.)"/>
 69 | 		</xsl:call-template>
 70 | 	</xsl:variable>
 71 | 	<xsl:text>\text{</xsl:text>
 72 | 	<xsl:value-of select="$content"/>
 73 | 	<xsl:text>}</xsl:text>
 74 | </xsl:template>
 75 | 
 76 | <xsl:template match="m:mspace">
 77 | 	<xsl:text>\phantom{\rule</xsl:text>
 78 | 	<xsl:if test="@depth">
 79 | 		<xsl:text>[-</xsl:text>
 80 | 		<xsl:value-of select="@depth"/>
 81 | 		<xsl:text>]</xsl:text>
 82 | 	</xsl:if>
 83 | 	<xsl:text>{</xsl:text>
 84 | 	<xsl:if test="not(@width)">
 85 | 		<xsl:text>0ex</xsl:text>
 86 | 	</xsl:if>
 87 | 	<xsl:value-of select="@width"/>
 88 | 	<xsl:text>}{</xsl:text>
 89 | 	<xsl:if test="not(@height)">
 90 | 		<xsl:text>0ex</xsl:text>
 91 | 	</xsl:if>
 92 | 	<xsl:value-of select="@height"/>
 93 | 	<xsl:text>}}</xsl:text>
 94 | </xsl:template>
 95 | 
 96 | <xsl:template name="ms">
 97 | 	<xsl:choose>
 98 | 		<xsl:when test="@lquote"><xsl:value-of select="@lquote"/></xsl:when>
 99 | 		<xsl:otherwise><xsl:text>''</xsl:text></xsl:otherwise>
100 | 	</xsl:choose><xsl:apply-templates/><xsl:choose>
101 | 		<xsl:when test="@rquote"><xsl:value-of select="@rquote"/></xsl:when>
102 | 		<xsl:otherwise><xsl:text>''</xsl:text></xsl:otherwise>
103 | 	</xsl:choose>
104 | </xsl:template>
105 | 
106 | <xsl:template name="CommonTokenAtr">
107 | 	<xsl:if test="@mathbackground">
108 | 		<xsl:text>\colorbox[rgb]{</xsl:text>
109 | 		<xsl:call-template name="color">
110 | 			<xsl:with-param name="color" select="@mathbackground"/>
111 | 		</xsl:call-template>
112 | 		<xsl:text>}{$</xsl:text>
113 | 	</xsl:if>
114 | 	<xsl:if test="@color[not(@mathcolor)] or @mathcolor"> <!-- Note: @color is deprecated in MathML 2.0 -->
115 | 		<xsl:text>\textcolor[rgb]{</xsl:text>
116 | 		<xsl:call-template name="color">
117 | 			<xsl:with-param name="color" select="@color|@mathcolor"/>
118 | 		</xsl:call-template>
119 | 		<xsl:text>}{</xsl:text>
120 | 	</xsl:if>
121 | 	<xsl:if test="@mathvariant">
122 | 		<xsl:choose>
123 | 			<xsl:when test="@mathvariant='normal'">
124 | 				<xsl:text>\mathrm{</xsl:text>
125 | 			</xsl:when>
126 | 			<xsl:when test="@mathvariant='bold'">
127 | 				<xsl:text>\mathbf{</xsl:text>
128 | 			</xsl:when>
129 | 			<xsl:when test="@mathvariant='italic'">
130 | 				<xsl:text>\mathit{</xsl:text>
131 | 			</xsl:when>
132 | 			<xsl:when test="@mathvariant='bold-italic'"> <!-- not supported -->
133 | 				<xsl:text>\mathit{</xsl:text>
134 | 				<xsl:message>The value bold-italic for mathvariant is not supported</xsl:message>
135 | 			</xsl:when>
136 | 			<xsl:when test="@mathvariant='double-struck'">	<!-- Required amsfonts -->
137 | 				<xsl:text>\mathbb{</xsl:text>
138 | 			</xsl:when>
139 | 			<xsl:when test="@mathvariant='bold-fraktur'"> <!-- not supported -->
140 | 				<xsl:text>\mathfrak{</xsl:text>
141 | 				<xsl:message>The value bold-fraktur for mathvariant is not supported</xsl:message>
142 | 			</xsl:when>
143 | 			<xsl:when test="@mathvariant='script'">
144 | 				<xsl:text>\mathcal{</xsl:text>
145 | 			</xsl:when>
146 | 			<xsl:when test="@mathvariant='bold-script'"> <!-- not supported -->
147 | 				<xsl:text>\mathcal{</xsl:text>
148 | 				<xsl:message>The value bold-script for mathvariant is not supported</xsl:message>
149 | 			</xsl:when>
150 | 			<xsl:when test="@mathvariant='fraktur'">	<!-- Required amsfonts -->
151 | 				<xsl:text>\mathfrak{</xsl:text>
152 | 			</xsl:when>
153 | 			<xsl:when test="@mathvariant='sans-serif'">
154 | 				<xsl:text>\mathsf{</xsl:text>
155 | 			</xsl:when>
156 | 			<xsl:when test="@mathvariant='bold-sans-serif'"> <!-- not supported -->
157 | 				<xsl:text>\mathsf{</xsl:text>
158 | 				<xsl:message>The value bold-sans-serif for mathvariant is not supported</xsl:message>
159 | 			</xsl:when>
160 | 			<xsl:when test="@mathvariant='sans-serif-italic'"> <!-- not supported -->
161 | 				<xsl:text>\mathsf{</xsl:text>
162 | 				<xsl:message>The value sans-serif-italic for mathvariant is not supported</xsl:message>
163 | 			</xsl:when>
164 | 			<xsl:when test="@mathvariant='sans-serif-bold-italic'"> <!-- not supported -->
165 | 				<xsl:text>\mathsf{</xsl:text>
166 | 				<xsl:message>The value sans-serif-bold-italic for mathvariant is not supported</xsl:message>
167 | 			</xsl:when>
168 | 			<xsl:when test="@mathvariant='monospace'">
169 | 				<xsl:text>\mathtt{</xsl:text>
170 | 			</xsl:when>
171 | 			<xsl:otherwise>
172 | 				<xsl:text>{</xsl:text>
173 | 				<xsl:message>Error at mathvariant attribute</xsl:message>
174 | 			</xsl:otherwise>
175 | 		</xsl:choose>
176 | 	</xsl:if>
177 | 	<xsl:call-template name="selectTemplate"/>
178 | 	<xsl:if test="@mathvariant">
179 | 		<xsl:text>}</xsl:text>
180 | 	</xsl:if>
181 | 	<xsl:if test="@color or @mathcolor">
182 | 		<xsl:text>}</xsl:text>
183 | 	</xsl:if>
184 | 	<xsl:if test="@mathbackground">
185 | 		<xsl:text>$}</xsl:text>
186 | 	</xsl:if>
187 | </xsl:template>
188 | 
189 | <xsl:template name="selectTemplate">
190 | 	<xsl:choose>
191 | 		<xsl:when test="local-name(.)='mi'">
192 | 			<xsl:call-template name="mi"/>
193 | 		</xsl:when>
194 | 		<xsl:when test="local-name(.)='mn'">
195 | 			<xsl:call-template name="mn"/>
196 | 		</xsl:when>
197 | 		<xsl:when test="local-name(.)='mo'">
198 | 			<xsl:call-template name="mo"/>
199 | 		</xsl:when>
200 | 		<xsl:when test="local-name(.)='mtext'">
201 | 			<xsl:call-template name="mtext"/>
202 | 		</xsl:when>
203 | 		<xsl:when test="local-name(.)='ms'">
204 | 			<xsl:call-template name="ms"/>
205 | 		</xsl:when>
206 | 	</xsl:choose>
207 | </xsl:template>
208 | 
209 | <xsl:template name="color">
210 | <!-- NB: Variables colora and valueColor{n} only for Sablotron -->
211 | 	<xsl:param name="color"/>
212 | 	<xsl:variable name="colora" select="translate($color,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz')"/>
213 | 	<xsl:choose>
214 | 	<xsl:when test="starts-with($colora,'#') and string-length($colora)=4">
215 | 		<xsl:variable name="valueColor">
216 | 			<xsl:call-template name="Hex2Decimal">
217 | 				<xsl:with-param name="arg" select="substring($colora,2,1)"/>
218 | 			</xsl:call-template>
219 | 		</xsl:variable>
220 | 		<xsl:value-of select="$valueColor div 15"/><xsl:text>,</xsl:text>
221 | 		<xsl:variable name="valueColor1">
222 | 			<xsl:call-template name="Hex2Decimal">
223 | 				<xsl:with-param name="arg" select="substring($colora,3,1)"/>
224 | 			</xsl:call-template>
225 | 		</xsl:variable>
226 | 		<xsl:value-of select="$valueColor1 div 15"/><xsl:text>,</xsl:text>
227 | 		<xsl:variable name="valueColor2">
228 | 			<xsl:call-template name="Hex2Decimal">
229 | 				<xsl:with-param name="arg" select="substring($colora,4,1)"/>
230 | 			</xsl:call-template>
231 | 		</xsl:variable>
232 | 		<xsl:value-of select="$valueColor2 div 15"/>
233 | 	</xsl:when>
234 | 	<xsl:when test="starts-with($colora,'#') and string-length($colora)=7">
235 | 		<xsl:variable name="valueColor1">
236 | 			<xsl:call-template name="Hex2Decimal">
237 | 				<xsl:with-param name="arg" select="substring($colora,2,1)"/>
238 | 			</xsl:call-template>
239 | 		</xsl:variable>
240 | 		<xsl:variable name="valueColor2">
241 | 			<xsl:call-template name="Hex2Decimal">
242 | 				<xsl:with-param name="arg" select="substring($colora,3,1)"/>
243 | 			</xsl:call-template>
244 | 		</xsl:variable>
245 | 		<xsl:value-of select="($valueColor1*16 + $valueColor2) div 255"/><xsl:text>,</xsl:text>
246 | 		<xsl:variable name="valueColor1a">
247 | 			<xsl:call-template name="Hex2Decimal">
248 | 				<xsl:with-param name="arg" select="substring($colora,4,1)"/>
249 | 			</xsl:call-template>
250 | 		</xsl:variable>
251 | 		<xsl:variable name="valueColor2a">
252 | 			<xsl:call-template name="Hex2Decimal">
253 | 				<xsl:with-param name="arg" select="substring($colora,5,1)"/>
254 | 			</xsl:call-template>
255 | 		</xsl:variable>
256 | 		<xsl:value-of select="($valueColor1a*16 + $valueColor2a) div 255"/><xsl:text>,</xsl:text>
257 | 		<xsl:variable name="valueColor1b">
258 | 			<xsl:call-template name="Hex2Decimal">
259 | 				<xsl:with-param name="arg" select="substring($colora,6,1)"/>
260 | 			</xsl:call-template>
261 | 		</xsl:variable>
262 | 		<xsl:variable name="valueColor2b">
263 | 			<xsl:call-template name="Hex2Decimal">
264 | 				<xsl:with-param name="arg" select="substring($colora,7,1)"/>
265 | 			</xsl:call-template>
266 | 		</xsl:variable>
267 | 		<xsl:value-of select="($valueColor1b*16 + $valueColor2b) div 255"/>
268 | 	</xsl:when>
269 | <!-- ======================= if color specifed as an html-color-name ========================================== -->
270 | 	<xsl:when test="$colora='aqua'"><xsl:text>0,1,1</xsl:text></xsl:when>
271 | 	<xsl:when test="$colora='black'"><xsl:text>0,0,0</xsl:text></xsl:when>
272 | 	<xsl:when test="$colora='blue'"><xsl:text>0,0,1</xsl:text></xsl:when>
273 | 	<xsl:when test="$colora='fuchsia'"><xsl:text>1,0,1</xsl:text></xsl:when>
274 | 	<xsl:when test="$colora='gray'"><xsl:text>.5,.5,.5</xsl:text></xsl:when>
275 | 	<xsl:when test="$colora='green'"><xsl:text>0,.5,0</xsl:text></xsl:when>
276 | 	<xsl:when test="$colora='lime'"><xsl:text>0,1,0</xsl:text></xsl:when>
277 | 	<xsl:when test="$colora='maroon'"><xsl:text>.5,0,0</xsl:text></xsl:when>
278 | 	<xsl:when test="$colora='navy'"><xsl:text>0,0,.5</xsl:text></xsl:when>
279 | 	<xsl:when test="$colora='olive'"><xsl:text>.5,.5,0</xsl:text></xsl:when>
280 | 	<xsl:when test="$colora='purple'"><xsl:text>.5,0,.5</xsl:text></xsl:when>
281 | 	<xsl:when test="$colora='red'"><xsl:text>1,0,0</xsl:text></xsl:when>
282 | 	<xsl:when test="$colora='silver'"><xsl:text>.75,.75,.75</xsl:text></xsl:when>
283 | 	<xsl:when test="$colora='teal'"><xsl:text>0,.5,.5</xsl:text></xsl:when>
284 | 	<xsl:when test="$colora='white'"><xsl:text>1,1,1</xsl:text></xsl:when>
285 | 	<xsl:when test="$colora='yellow'"><xsl:text>1,1,0</xsl:text></xsl:when>
286 | 	<xsl:otherwise>
287 | 		<xsl:message>Exception at color template</xsl:message>
288 | 	</xsl:otherwise>
289 | 	</xsl:choose>
290 | </xsl:template>
291 | 
292 | <xsl:template name="Hex2Decimal">
293 | 	<xsl:param name="arg"/>
294 | 	<xsl:choose>
295 | 		<xsl:when test="$arg='f'">
296 | 			<xsl:value-of select="15"/>
297 | 		</xsl:when>
298 | 		<xsl:when test="$arg='e'">
299 | 			<xsl:value-of select="14"/>
300 | 		</xsl:when>
301 | 		<xsl:when test="$arg='d'">
302 | 			<xsl:value-of select="13"/>
303 | 		</xsl:when>
304 | 		<xsl:when test="$arg='c'">
305 | 			<xsl:value-of select="12"/>
306 | 		</xsl:when>
307 | 		<xsl:when test="$arg='b'">
308 | 			<xsl:value-of select="11"/>
309 | 		</xsl:when>
310 | 		<xsl:when test="$arg='a'">
311 | 			<xsl:value-of select="10"/>
312 | 		</xsl:when>
313 | 		<xsl:when test="translate($arg, '0123456789', '9999999999')='9'"> <!-- if $arg is number -->
314 | 			<xsl:value-of select="$arg"/>
315 | 		</xsl:when>
316 | 		<xsl:otherwise>
317 | 			<xsl:message>Exception at Hex2Decimal template</xsl:message>
318 | 		</xsl:otherwise>
319 | 	</xsl:choose>
320 | </xsl:template>
321 | 
322 | <xsl:template match="m:*/text()">
323 | 	<xsl:call-template name="replaceEntities">
324 | 		<xsl:with-param name="content" select="normalize-space()"/>
325 | 	</xsl:call-template>
326 | </xsl:template>
327 | 
328 | </xsl:stylesheet>


--------------------------------------------------------------------------------
/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
  1 | <!-- 级别从高到低 OFF 、 FATAL 、 ERROR 、 WARN 、 INFO 、 DEBUG 、 TRACE 、 ALL -->
  2 | <!-- 日志输出规则 根据当前ROOT 级别，日志输出时，级别高于root默认的级别时 会输出 -->
  3 | <!-- 以下 每个配置的 filter 是过滤掉输出文件里面，会出现高级别文件，依然出现低级别的日志信息，通过filter 过滤只记录本级别的日志 -->
  4 | <!-- scan 当此属性设置为true时，配置文件如果发生改变，将会被重新加载，默认值为true。 -->
  5 | <!-- scanPeriod 设置监测配置文件是否有修改的时间间隔，如果没有给出时间单位，默认单位是毫秒。当scan为true时，此属性生效。默认的时间间隔为1分钟。 -->
  6 | <!-- debug 当此属性设置为true时，将打印出logback内部日志信息，实时查看logback运行状态。默认值为false。 -->
  7 | <configuration scan="true" scanPeriod="60 seconds" debug="false">
  8 |     <!-- 动态日志级别 -->
  9 |     <jmxConfigurator />
 10 |     <!-- 定义日志文件 输出位置 -->
 11 |     <!-- <property name="log_dir" value="C:/test" />-->
 12 |     <property name="log_dir" value="logs/" />
 13 |     <!-- 日志最大的历史 30天 -->
 14 |     <property name="maxHistory" value="30" />
 15 | 
 16 |     <!-- ConsoleAppender 控制台输出日志 -->
 17 |     <appender name="console" class="ch.qos.logback.core.ConsoleAppender">
 18 |         <encoder>
 19 |             <pattern>
 20 |                 <!-- 设置日志输出格式 -->
 21 |                 %d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n
 22 |             </pattern>
 23 |         </encoder>
 24 |     </appender>
 25 | 
 26 |     <!-- ERROR级别日志 -->
 27 |     <!-- 滚动记录文件，先将日志记录到指定文件，当符合某个条件时，将日志记录到其他文件 RollingFileAppender -->
 28 |     <appender name="ERROR" class="ch.qos.logback.core.rolling.RollingFileAppender">
 29 |         <!-- 过滤器，只记录WARN级别的日志 -->
 30 |         <!-- 果日志级别等于配置级别，过滤器会根据onMath 和 onMismatch接收或拒绝日志。 -->
 31 |         <filter class="ch.qos.logback.classic.filter.LevelFilter">
 32 |             <!-- 设置过滤级别 -->
 33 |             <level>ERROR</level>
 34 |             <!-- 用于配置符合过滤条件的操作 -->
 35 |             <onMatch>ACCEPT</onMatch>
 36 |             <!-- 用于配置不符合过滤条件的操作 -->
 37 |             <onMismatch>DENY</onMismatch>
 38 |         </filter>
 39 |         <!-- 最常用的滚动策略，它根据时间来制定滚动策略.既负责滚动也负责出发滚动 -->
 40 |         <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
 41 |             <!--日志输出位置 可相对、和绝对路径 -->
 42 |             <fileNamePattern>
 43 |                 ${log_dir}/error/%d{yyyy-MM-dd}/error-log.log
 44 |             </fileNamePattern>
 45 |             <!-- 可选节点，控制保留的归档文件的最大数量，超出数量就删除旧文件假设设置每个月滚动，且<maxHistory>是6， 则只保存最近6个月的文件，删除之前的旧文件。注意，删除旧文件是，那些为了归档而创建的目录也会被删除 -->
 46 |             <maxHistory>${maxHistory}</maxHistory>
 47 |         </rollingPolicy>
 48 |         <encoder>
 49 |             <pattern>
 50 |                 <!-- 设置日志输出格式 -->
 51 |                 %d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n
 52 |             </pattern>
 53 |         </encoder>
 54 |     </appender>
 55 | 
 56 |     <!-- WARN级别日志 appender -->
 57 |     <appender name="WARN" class="ch.qos.logback.core.rolling.RollingFileAppender">
 58 |         <!-- 过滤器，只记录WARN级别的日志 -->
 59 |         <!-- 果日志级别等于配置级别，过滤器会根据onMath 和 onMismatch接收或拒绝日志。 -->
 60 |         <filter class="ch.qos.logback.classic.filter.LevelFilter">
 61 |             <!-- 设置过滤级别 -->
 62 |             <level>WARN</level>
 63 |             <!-- 用于配置符合过滤条件的操作 -->
 64 |             <onMatch>ACCEPT</onMatch>
 65 |             <!-- 用于配置不符合过滤条件的操作 -->
 66 |             <onMismatch>DENY</onMismatch>
 67 |         </filter>
 68 |         <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
 69 |             <!--日志输出位置 可相对、和绝对路径 -->
 70 |             <fileNamePattern>${log_dir}/warn/%d{yyyy-MM-dd}/warn-log.log</fileNamePattern>
 71 |             <maxHistory>${maxHistory}</maxHistory>
 72 |         </rollingPolicy>
 73 |         <encoder>
 74 |             <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n</pattern>
 75 |         </encoder>
 76 |     </appender>
 77 | 
 78 |     <!-- INFO级别日志 appender -->
 79 |     <appender name="INFO" class="ch.qos.logback.core.rolling.RollingFileAppender">
 80 |         <filter class="ch.qos.logback.classic.filter.LevelFilter">
 81 |             <level>INFO</level>
 82 |             <onMatch>ACCEPT</onMatch>
 83 |             <onMismatch>DENY</onMismatch>
 84 |         </filter>
 85 |         <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
 86 |             <fileNamePattern>${log_dir}/info/%d{yyyy-MM-dd}/info-log.log</fileNamePattern>
 87 |             <maxHistory>${maxHistory}</maxHistory>
 88 |         </rollingPolicy>
 89 |         <encoder>
 90 |             <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n</pattern>
 91 |         </encoder>
 92 |     </appender>
 93 | 
 94 |     <!-- DEBUG级别日志 appender -->
 95 |     <appender name="DEBUG" class="ch.qos.logback.core.rolling.RollingFileAppender">
 96 |         <filter class="ch.qos.logback.classic.filter.LevelFilter">
 97 |             <level>DEBUG</level>
 98 |             <onMatch>ACCEPT</onMatch>
 99 |             <onMismatch>DENY</onMismatch>
100 |         </filter>
101 |         <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
102 |             <fileNamePattern>${log_dir}/debug/%d{yyyy-MM-dd}/debug-log.log</fileNamePattern>
103 |             <maxHistory>${maxHistory}</maxHistory>
104 |         </rollingPolicy>
105 |         <encoder>
106 |             <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n</pattern>
107 |         </encoder>
108 |     </appender>
109 | 
110 |     <!-- TRACE级别日志 appender -->
111 |     <appender name="TRACE" class="ch.qos.logback.core.rolling.RollingFileAppender">
112 |         <filter class="ch.qos.logback.classic.filter.LevelFilter">
113 |             <level>TRACE</level>
114 |             <onMatch>ACCEPT</onMatch>
115 |             <onMismatch>DENY</onMismatch>
116 |         </filter>
117 |         <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
118 |             <fileNamePattern>${log_dir}/trace/%d{yyyy-MM-dd}/trace-log.log</fileNamePattern>
119 |             <maxHistory>${maxHistory}</maxHistory>
120 |         </rollingPolicy>
121 |         <encoder>
122 |             <pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n</pattern>
123 |         </encoder>
124 |     </appender>
125 | 
126 |     <!-- root级别 DEBUG -->
127 |     <root>
128 |         <!-- 打印debug级别日志及以上级别日志 -->
129 |         <level value="debug" />
130 |         <!-- 控制台输出 -->
131 |         <appender-ref ref="console" />
132 |         <!-- 文件输出 -->
133 |         <appender-ref ref="ERROR" />
134 |         <appender-ref ref="INFO" />
135 |         <appender-ref ref="WARN" />
136 |         <appender-ref ref="DEBUG" />
137 |         <appender-ref ref="TRACE" />
138 |     </root>
139 | </configuration>


--------------------------------------------------------------------------------
/src/main/resources/故障模式分析表格样例.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/故障模式分析表格样例.docx


--------------------------------------------------------------------------------
/src/main/resources/故障模式分析表格样例_处理模型.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/故障模式分析表格样例_处理模型.docx


--------------------------------------------------------------------------------
/src/test/java/com/test/Doc2DocxTest.java:
--------------------------------------------------------------------------------
 1 | package com.test;
 2 | 
 3 | import com.suncht.convert.OfficeDocumentConvertServer;
 4 | 
 5 | public class Doc2DocxTest {
 6 | 
 7 | 	public static void main(String[] args) throws Exception {
 8 | 		String inputFile = "D:\\FMEA信息导入-客户实例.doc";
 9 | 		String outputFile = "D:\\FMEA信息导入-客户实例.docx";
10 | 		//Doc2DocxUtil.doc2Docx(outputFile, inputFile);
11 | 
12 | //		Thread.sleep(2000);
13 | 		String pdfFile = "D:\\FMEA信息导入-客户实例.pdf";
14 | //		OfficePDFConverter.getConverter().convert2PDF(outputFile, pdfFile);
15 | 
16 | 		String OPEN_OFFICE_HOME = "D:\\Program Files\\LibreOffice 5\\";
17 | 		// 服务端口
18 | 		int OPEN_OFFICE_PORT[] = { 8101 };
19 | 		try (OfficeDocumentConvertServer server = new OfficeDocumentConvertServer(OPEN_OFFICE_HOME, OPEN_OFFICE_PORT);) {
20 | 			server.convert(inputFile, outputFile, false);
21 | 			server.convert(outputFile, pdfFile, true);
22 | 		}
23 | 
24 | 	}
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/src/test/java/com/test/MemoryMappingVisitorTest.java:
--------------------------------------------------------------------------------
 1 | package com.test;
 2 | 
 3 | import com.suncht.wordread.model.TTCPr;
 4 | import com.suncht.wordread.parser.mapping.IWordTableMemoryMappingVisitor;
 5 | 
 6 | public class MemoryMappingVisitorTest implements IWordTableMemoryMappingVisitor {
 7 | 
 8 | 	@Override
 9 | 	public void visit(final TTCPr cell, int realRowIndex, int realColumnIndex) {
10 | 		if (realRowIndex == 0 && realColumnIndex == 0) {
11 | 			//cell.getContent()("测试成功");
12 | 		}
13 | 	}
14 | 
15 | }
16 | 


--------------------------------------------------------------------------------
/src/test/java/com/test/MuliHeaderXTableParserTest.java:
--------------------------------------------------------------------------------
 1 | package com.test;
 2 | 
 3 | import java.io.InputStream;
 4 | import java.util.List;
 5 | 
 6 | import org.junit.Test;
 7 | 
 8 | import com.suncht.wordread.model.WordTable;
 9 | import com.suncht.wordread.parser.WordTableParser;
10 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
11 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
12 | 
13 | public class MuliHeaderXTableParserTest {
14 | 	
15 | 	@Test
16 | 	public void test01() {
17 | 		try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/FMEA信息导入-客户实例.docx");) {
18 | 			//InputStream inputStream = new FileInputStream(new File(doc2));
19 | 			List<WordTable> tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
20 | 			for (WordTable wordTable : tables) {
21 | 				System.out.println(wordTable.format());
22 | 			}
23 | 		} catch(Exception e) {
24 | 			e.printStackTrace();
25 | 		}
26 | 		
27 | 	}
28 | }
29 | 


--------------------------------------------------------------------------------
/src/test/java/com/test/MultiTextCellTest.java:
--------------------------------------------------------------------------------
 1 | package com.test;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.InputStream;
 5 | import java.util.List;
 6 | 
 7 | import org.junit.Test;
 8 | 
 9 | import com.suncht.wordread.model.WordTable;
10 | import com.suncht.wordread.parser.WordTableParser;
11 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
12 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
13 | 
14 | public class MultiTextCellTest {
15 | 	@Test
16 | 	public void testFormulaInCell() throws IOException {
17 | 		try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套多文本.docx");) {
18 | 			List<WordTable> tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
19 | 					.memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
20 | 			for (WordTable wordTable : tables) {
21 | 				System.out.println(wordTable.format());
22 | 			}
23 | 		} catch(Exception e) {
24 | 			e.printStackTrace();
25 | 		}
26 | 	}
27 | }
28 | 


--------------------------------------------------------------------------------
/src/test/java/com/test/NestedFormulaTest.java:
--------------------------------------------------------------------------------
 1 | package com.test;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.InputStream;
 5 | import java.util.List;
 6 | 
 7 | import org.junit.Test;
 8 | 
 9 | import com.suncht.wordread.model.WordTable;
10 | import com.suncht.wordread.parser.WordTableParser;
11 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
12 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
13 | 
14 | public class NestedFormulaTest {
15 | 	@Test
16 | 	public void testFormulaInCell_docx() throws IOException {
17 | 		try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套公式.docx");) {
18 | 			List<WordTable> tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
19 | 					.memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
20 | 			for (WordTable wordTable : tables) {
21 | 				System.out.println(wordTable.format());
22 | 			}
23 | 		} catch(Exception e) {
24 | 			e.printStackTrace();
25 | 		}
26 | 	}
27 | 	
28 | 	@Test
29 | 	public void testFormulaInCell_doc() throws IOException {
30 | 		try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套公式.doc");) {
31 | 			List<WordTable> tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
32 | 					.memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOC);
33 | 			for (WordTable wordTable : tables) {
34 | 				System.out.println(wordTable.format());
35 | 			}
36 | 		} catch(Exception e) {
37 | 			e.printStackTrace();
38 | 		}
39 | 	}
40 | }
41 | 


--------------------------------------------------------------------------------
/src/test/java/com/test/NestedImageCellTest.java:
--------------------------------------------------------------------------------
 1 | package com.test;
 2 | 
 3 | import java.io.InputStream;
 4 | import java.util.List;
 5 | 
 6 | import org.junit.Test;
 7 | 
 8 | import com.suncht.wordread.format.DefaultCellFormater;
 9 | import com.suncht.wordread.format.DefaultWordTableFormater;
10 | import com.suncht.wordread.format.IWordTableFormater;
11 | import com.suncht.wordread.model.WordTable;
12 | import com.suncht.wordread.output.DefaultWordTableOutputStrategy;
13 | import com.suncht.wordread.output.IWordTableOutputStrategy;
14 | import com.suncht.wordread.parser.WordTableParser;
15 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
16 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
17 | 
18 | /**
19 |  * 嵌套图片单元格测试
20 |  * @author suncht
21 |  *
22 |  */
23 | public class NestedImageCellTest {
24 | 	@Test
25 | 	public void test01() {
26 | 		IWordTableFormater tableFormater = new DefaultWordTableFormater(new DefaultCellFormater());
27 | 		IWordTableOutputStrategy outputStrategy = new DefaultWordTableOutputStrategy();
28 | 		
29 | 		try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套图片02.docx");) {
30 | 			List<WordTable> tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).parse(inputStream, WordDocType.DOCX);
31 | 			
32 | 			for (WordTable wordTable : tables) {
33 | 				System.out.println(wordTable.format(tableFormater));
34 | 				wordTable.output(outputStrategy);
35 | 			}
36 | 		} catch (Exception e) {
37 | 			e.printStackTrace();
38 | 		}
39 | 		
40 | 	}
41 | }
42 | 


--------------------------------------------------------------------------------
/src/test/java/com/test/OfficeConverterTest.java:
--------------------------------------------------------------------------------
 1 | package com.test;
 2 | 
 3 | import org.junit.Test;
 4 | 
 5 | import com.suncht.convert.OfficeDocumentConvertServer;
 6 | 
 7 | public class OfficeConverterTest {
 8 | 	private static String OPEN_OFFICE_HOME = "D:\\Program Files\\LibreOffice 5\\";
 9 | 	private static int OPEN_OFFICE_PORT[] = { 8101 };
10 | 	
11 | 	@Test
12 | 	public void txt2docx() {
13 | 		String inputFile = "D:\\dic.txt";
14 | 		String outputFile = "D:\\dic.docx";
15 | 
16 | 		// 服务端口
17 | 		try (OfficeDocumentConvertServer server = new OfficeDocumentConvertServer(OPEN_OFFICE_HOME, OPEN_OFFICE_PORT);) {
18 | 			server.convert(inputFile, outputFile, false);
19 | 		} catch(Exception e) {
20 | 			e.printStackTrace();
21 | 		}
22 | 	}
23 | 	
24 | 	@Test
25 | 	public void docx2pdf() {
26 | 		String inputFile = "D:\\故障模式分析表格样例 - 副本.docx";
27 | 		String outputFile = "D:\\故障模式分析表格样例 - 副本.pdf";
28 | 
29 | 		// 服务端口
30 | 		try (OfficeDocumentConvertServer server = new OfficeDocumentConvertServer(OPEN_OFFICE_HOME, OPEN_OFFICE_PORT);) {
31 | 			server.convert(inputFile, outputFile, false);
32 | 		} catch(Exception e) {
33 | 			e.printStackTrace();
34 | 		}
35 | 	}
36 | }
37 | 


--------------------------------------------------------------------------------
/src/test/java/com/test/OleObjectCellTest.java:
--------------------------------------------------------------------------------
 1 | package com.test;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.InputStream;
 5 | import java.util.List;
 6 | 
 7 | import org.junit.Test;
 8 | 
 9 | import com.suncht.wordread.model.WordTable;
10 | import com.suncht.wordread.parser.WordTableParser;
11 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
12 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
13 | 
14 | public class OleObjectCellTest {
15 | //	@Test
16 | 	public void testOleInCell() throws IOException {
17 | 		try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套附件01.docx");) {
18 | 			List<WordTable> tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
19 | 					.memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
20 | 			for (WordTable wordTable : tables) {
21 | 				System.out.println(wordTable.format());
22 | 			}
23 | 		} catch(Exception e) {
24 | 			e.printStackTrace();
25 | 		}
26 | 	}
27 | 	
28 | 	@Test
29 | 	public void testEmbedDocxInCell() throws IOException {
30 | 		try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套附件02.docx");) {
31 | 			List<WordTable> tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
32 | 					.memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
33 | 			for (WordTable wordTable : tables) {
34 | 				System.out.println(wordTable.format());
35 | 			}
36 | 		} catch(Exception e) {
37 | 			e.printStackTrace();
38 | 		}
39 | 	}
40 | }
41 | 


--------------------------------------------------------------------------------
/src/test/java/com/test/WordCellDataTest.java:
--------------------------------------------------------------------------------
 1 | package com.test;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.InputStream;
 5 | import java.util.List;
 6 | 
 7 | import org.junit.Test;
 8 | 
 9 | import com.suncht.wordread.model.WordTable;
10 | import com.suncht.wordread.parser.WordTableParser;
11 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
12 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
13 | 
14 | public class WordCellDataTest {
15 | 	@Test
16 | 	public void testFormulaInCell() throws IOException {
17 | 		InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套公式.docx");
18 | 		List<WordTable> tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
19 | 		for (WordTable wordTable : tables) {
20 | 			System.out.println(wordTable.format());
21 | 		}
22 | 
23 | 		inputStream.close();
24 | 	}
25 | 
26 | 	@Test
27 | 	public void testImageInCell() throws IOException {
28 | 		InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套图片.docx");
29 | 		List<WordTable> tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
30 | 		for (WordTable wordTable : tables) {
31 | 			System.out.println(wordTable.format());
32 | 		}
33 | 
34 | 		inputStream.close();
35 | 	}
36 | }
37 | 


--------------------------------------------------------------------------------
/src/test/java/com/test/WordEmbedsTest.java:
--------------------------------------------------------------------------------
 1 | package com.test;
 2 | 
 3 | import java.io.InputStream;
 4 | import java.util.Iterator;
 5 | import java.util.List;
 6 | 
 7 | import org.apache.poi.hssf.usermodel.HSSFCell;
 8 | import org.apache.poi.hssf.usermodel.HSSFRow;
 9 | import org.apache.poi.hssf.usermodel.HSSFSheet;
10 | import org.apache.poi.hssf.usermodel.HSSFWorkbook;
11 | import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
12 | import org.apache.poi.openxml4j.opc.PackagePart;
13 | import org.apache.poi.poifs.dev.POIFSViewEngine;
14 | import org.apache.poi.poifs.filesystem.POIFSFileSystem;
15 | import org.apache.poi.ss.usermodel.Cell;
16 | import org.apache.poi.xwpf.usermodel.XWPFDocument;
17 | import org.junit.Test;
18 | 
19 | public class WordEmbedsTest {
20 | 	@Test
21 | 	public void listAllEmbeds() {
22 | 		try (InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套附件01.docx");) {
23 | 			XWPFDocument document = new XWPFDocument(inputStream);
24 | 			listEmbeds(document);
25 | 			//listEmbeds2(document);
26 | 		} catch (Exception e) {
27 | 			e.printStackTrace();
28 | 		}
29 | 	}
30 | 
31 | 	private static void listEmbeds(XWPFDocument doc) throws OpenXML4JException {
32 | 		List<PackagePart> embeddedDocs = doc.getAllEmbedds();
33 | 		if (embeddedDocs != null && !embeddedDocs.isEmpty()) {
34 | 			Iterator<PackagePart> pIter = embeddedDocs.iterator();
35 | 			while (pIter.hasNext()) {
36 | 				PackagePart pPart = pIter.next();
37 | 				System.out.print(pPart.getPartName() + ", ");
38 | 
39 | 				System.out.print(pPart.getContentType() + ", ");
40 | 				System.out.println();
41 | 			}
42 | 		}
43 | 	}
44 | 
45 | 	private static void listEmbeds2(XWPFDocument doc) throws Exception {
46 | 		for (final PackagePart pPart : doc.getAllEmbedds()) {
47 | 			final String contentType = pPart.getContentType();
48 | 			System.out.println(contentType + "\n");
49 | 			if (contentType.equals("application/vnd.ms-excel")) {
50 | 				final HSSFWorkbook embeddedWorkbook = new HSSFWorkbook(pPart.getInputStream());
51 | 
52 | 				for (int sheet = 0; sheet < embeddedWorkbook.getNumberOfSheets(); sheet++) {
53 | 					final HSSFSheet activeSheet = embeddedWorkbook.getSheetAt(sheet);
54 | 					if (activeSheet.getSheetName().equalsIgnoreCase("Sheet1")) {
55 | 						for (int rowIndex = activeSheet.getFirstRowNum(); rowIndex <= activeSheet
56 | 								.getLastRowNum(); rowIndex++) {
57 | 							final HSSFRow row = activeSheet.getRow(rowIndex);
58 | 							for (int cellIndex = row.getFirstCellNum(); cellIndex <= row
59 | 									.getLastCellNum(); cellIndex++) {
60 | 								final HSSFCell cell = row.getCell(cellIndex);
61 | 								if (cell != null) {
62 | 									if (cell.getCellType() == Cell.CELL_TYPE_STRING)
63 | 										System.out.println("Row:" + rowIndex + " Cell:" + cellIndex + " = "
64 | 												+ cell.getStringCellValue());
65 | 									if (cell.getCellType() == Cell.CELL_TYPE_NUMERIC) {
66 | 										System.out.println("Row:" + rowIndex + " Cell:" + cellIndex + " = "
67 | 												+ cell.getNumericCellValue());
68 | 
69 | 										cell.setCellValue(cell.getNumericCellValue() * 2); // update
70 | 																							// the
71 | 																							// value
72 | 									}
73 | 								}
74 | 							}
75 | 						}
76 | 					}
77 | 				}
78 | 			}
79 | 		}
80 | 	}
81 | 	
82 | 	
83 | 	@Test
84 | 	public void viewFile() {
85 | 		POIFSFileSystem fs = null;
86 | 		List strings = POIFSViewEngine.inspectViewable(fs, true, 0, "  ");
87 | 		Iterator iter = strings.iterator();
88 | 
89 | 		while (iter.hasNext()) {
90 | 			//os.write( ((String)iter.next()).getBytes());
91 | 			System.out.println(iter.next());
92 | 		}
93 | 	}
94 | }
95 | 


--------------------------------------------------------------------------------
/src/test/java/com/test/WordHTableParserTest.java:
--------------------------------------------------------------------------------
 1 | package com.test;
 2 | 
 3 | import java.io.InputStream;
 4 | import java.util.List;
 5 | 
 6 | import org.junit.Test;
 7 | 
 8 | import com.suncht.wordread.model.WordTable;
 9 | import com.suncht.wordread.parser.WordTableParser;
10 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
11 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
12 | 
13 | public class WordHTableParserTest {
14 | 	@Test
15 | 	public void test01() {
16 | 		InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/标准表格1.doc");
17 | 		//InputStream inputStream = new FileInputStream(new File(doc2));
18 | 		List<WordTable> tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).parse(inputStream, WordDocType.DOC);
19 | 		for (WordTable wordTable : tables) {
20 | 			System.out.println(wordTable.format());
21 | 		}
22 | 	}
23 | }
24 | 


--------------------------------------------------------------------------------
/src/test/java/com/test/WordXTableParserTest.java:
--------------------------------------------------------------------------------
 1 | package com.test;
 2 | 
 3 | import java.io.InputStream;
 4 | import java.util.List;
 5 | 
 6 | import org.junit.Test;
 7 | 
 8 | import com.suncht.wordread.model.WordTable;
 9 | import com.suncht.wordread.parser.WordTableParser;
10 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
11 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
12 | 
13 | public class WordXTableParserTest {
14 | 	String doc1 = "D:\\故障模式分析表格样例01.docx";
15 | 	String doc2 = "D:\\故障模式分析表格样例.docx";
16 | 
17 | 	@Test
18 | 	public void test01() {
19 | 		try (InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/1.docx");) {
20 | 			List<WordTable> tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
21 | 					.memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
22 | 			for (WordTable wordTable : tables) {
23 | 				System.out.println(wordTable.format());
24 | 			}
25 | 		} catch (Exception e) {
26 | 			e.printStackTrace();
27 | 		}
28 | 	}
29 | 
30 | 	@Test
31 | 	public void test02() {
32 | 		InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/故障模式分析表格样例.docx");
33 | 		// InputStream inputStream = new FileInputStream(new File(doc2));
34 | 		List<WordTable> tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
35 | 				.parse(inputStream, WordDocType.DOCX);
36 | 		for (WordTable wordTable : tables) {
37 | 			System.out.println(wordTable.format());
38 | 		}
39 | 	}
40 | 
41 | 	@Test
42 | 	public void test03() {
43 | 		InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/故障模式分析表格样例01.docx");
44 | 		// InputStream inputStream = new FileInputStream(new File(doc2));
45 | 		List<WordTable> tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
46 | 				.parse(inputStream, WordDocType.DOCX);
47 | 		for (WordTable wordTable : tables) {
48 | 			System.out.println(wordTable.format());
49 | 		}
50 | 	}
51 | 	
52 | 	@Test
53 | 	public void test04() {
54 | 		InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/复杂表格.docx");
55 | 		// InputStream inputStream = new FileInputStream(new File(doc2));
56 | 		List<WordTable> tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
57 | 				.parse(inputStream, WordDocType.DOCX);
58 | 		for (WordTable wordTable : tables) {
59 | 			System.out.println(wordTable.format());
60 | 		}
61 | 	}
62 | }
63 | 


--------------------------------------------------------------------------------
/src/test/resources/1.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/1.doc


--------------------------------------------------------------------------------
/src/test/resources/1.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/1.docx


--------------------------------------------------------------------------------
/src/test/resources/2.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/2.doc


--------------------------------------------------------------------------------
/src/test/resources/conventer/mml2tex/README:
--------------------------------------------------------------------------------
 1 | README for the XSLT MathML Library 2.1.2
 2 | 
 3 | XSLT MathML Library is a set of XSLT stylesheets to transform
 4 | MathML 2.0 to LaTeX.
 5 | 
 6 | For more information, see
 7 | http://www.raleigh.ru/MathML/mmltex/index.php?lang=en
 8 | 
 9 | Manifest
10 | --------
11 | 
12 | README        this file
13 | mmltex.xsl
14 | tokens.xsl
15 | glayout.xsl
16 | scripts.xsl
17 | tables.xsl
18 | entities.xsl
19 | cmarkup.xsl
20 | 
21 | Use
22 | ---
23 | 
24 | There are two ways of using the library:
25 | 
26 |     * Use a local copy of the library.
27 | 
28 |         1. Download the distribution (see below).
29 | 
30 |         2. Unpack the distribution, using unzip.
31 | 
32 |         3. In your stylesheet import or include either the main
33 |            stylesheet, mmltex.xsl, or the stylesheet module you
34 |            wish to use, such as tokens.xsl. This example assumes
35 |            that the distribution has been extracted into the same
36 |            directory as your own stylesheet:
37 | 
38 |            <xsl:import href="mmltex.xsl"/>
39 | 
40 |     * Import or include either the main stylesheet, or the
41 |       stylesheet module you wish to use, directly from the library
42 |       website; http://www.raleigh.ru/MathML/mmltex/. For example:
43 | 
44 |       <xsl:import href="http://www.raleigh.ru/MathML/mmltex/mmltex.xsl"/>
45 | 
46 | Obtaining The Library
47 | ---------------------
48 | 
49 | The XSLT MathML Library is available for download as:
50 | 
51 |     * Zip file: http://www.raleigh.ru/MathML/mmltex/xsltml_2.1.2.zip
52 | 
53 | Copyright
54 | ---------
55 | 
56 | Copyright (C) 2001-2003 Vasil Yaroshevich
57 | 
58 | Permission is hereby granted, free of charge, to any person
59 | obtaining a copy of this software and associated documentation
60 | files (the ``Software''), to deal in the Software without
61 | restriction, including without limitation the rights to use,
62 | copy, modify, merge, publish, distribute, sublicense, and/or
63 | sell copies of the Software, and to permit persons to whom the
64 | Software is furnished to do so, subject to the following
65 | conditions:
66 | 
67 | The above copyright notice and this permission notice shall be
68 | included in all copies or substantial portions of the Software.
69 | 
70 | Except as contained in this notice, the names of individuals
71 | credited with contribution to this software shall not be used in
72 | advertising or otherwise to promote the sale, use or other
73 | dealings in this Software without prior written authorization
74 | from the individuals in question.
75 | 
76 | Any stylesheet derived from this Software that is publically
77 | distributed will be identified with a different name and the
78 | version strings in any derived Software will be changed so that
79 | no possibility of confusion between the derived package and this
80 | Software will exist.
81 | 
82 | Warranty
83 | --------
84 | 
85 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
86 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
87 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
88 | NONINFRINGEMENT.  IN NO EVENT SHALL NORMAN WALSH OR ANY OTHER
89 | CONTRIBUTOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
90 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
91 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
92 | OTHER DEALINGS IN THE SOFTWARE.
93 | 
94 | Contacting the Author
95 | ---------------------
96 | 
97 | These stylesheets are maintained by Vasil Yaroshevich, <yarosh@raleigh.ru>.
98 | 


--------------------------------------------------------------------------------
/src/test/resources/conventer/mml2tex/glayout.xsl:
--------------------------------------------------------------------------------
  1 | <?xml version='1.0' encoding="UTF-8"?>
  2 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  3 | 		xmlns:m="http://www.w3.org/1998/Math/MathML"
  4 |                 version='1.0'>
  5 | 
  6 | <!-- ====================================================================== -->
  7 | <!-- $Id: glayout.xsl,v 1.5 2003/06/10 12:24:04 shade33 Exp $
  8 |      This file is part of the XSLT MathML Library distribution.
  9 |      See ./README or http://www.raleigh.ru/MathML/mmltex for
 10 |      copyright and other information                                        -->
 11 | <!-- ====================================================================== -->
 12 | 
 13 | <!-- 3.3.2 mfrac -->
 14 | <xsl:template match="m:mfrac">
 15 | 	<xsl:choose>
 16 | 		<xsl:when test="@linethickness">
 17 | 			<xsl:text>\genfrac{}{}{</xsl:text>
 18 | 			<xsl:choose>
 19 | 				<xsl:when test="number(@linethickness)">
 20 | 					<xsl:value-of select="@linethickness div 10"/>
 21 | 					<xsl:text>ex</xsl:text>
 22 | 				</xsl:when>
 23 | 				<xsl:when test="@linethickness='0'">
 24 | 					<xsl:text>0ex</xsl:text>
 25 | 				</xsl:when>
 26 | 				<xsl:when test="@linethickness='thin'">
 27 | 					<xsl:text>.05ex</xsl:text>
 28 | 				</xsl:when>
 29 | 				<xsl:when test="@linethickness='medium'"/>
 30 | 				<xsl:when test="@linethickness='thick'">
 31 | 					<xsl:text>.2ex</xsl:text>
 32 | 				</xsl:when>
 33 | 				<xsl:otherwise>
 34 | 					<xsl:value-of select="@linethickness"/>
 35 | 				</xsl:otherwise>
 36 | 			</xsl:choose>
 37 | 			<xsl:text>}{}{</xsl:text>
 38 | 		</xsl:when>
 39 | 		<xsl:otherwise>
 40 | 			<xsl:text>\frac{</xsl:text>
 41 | 		</xsl:otherwise>
 42 | 	</xsl:choose>
 43 | 	<xsl:if test="@numalign='right'">
 44 | 		<xsl:text>\hfill </xsl:text>
 45 | 	</xsl:if>
 46 | 	<xsl:apply-templates select="./*[1]"/>
 47 | 	<xsl:if test="@numalign='left'">
 48 | 		<xsl:text>\hfill </xsl:text>
 49 | 	</xsl:if>
 50 | 	<xsl:text>}{</xsl:text>	
 51 | 	<xsl:if test="@denomalign='right'">
 52 | 		<xsl:text>\hfill </xsl:text>
 53 | 	</xsl:if>
 54 | 	<xsl:apply-templates select="./*[2]"/>
 55 | 		<xsl:if test="@denomalign='left'">
 56 | 		<xsl:text>\hfill </xsl:text>
 57 | 	</xsl:if>
 58 | 	<xsl:text>}</xsl:text>
 59 | </xsl:template>
 60 | 
 61 | <xsl:template match="m:mfrac[@bevelled='true']">
 62 | 	<xsl:text>\raisebox{1ex}{$</xsl:text>
 63 | 	<xsl:apply-templates select="./*[1]"/>
 64 | 	<xsl:text>$}\!\left/ \!\raisebox{-1ex}{$</xsl:text>
 65 | 	<xsl:apply-templates select="./*[2]"/>
 66 | 	<xsl:text>$}\right.</xsl:text>
 67 | </xsl:template>
 68 | 
 69 | 
 70 | <xsl:template match="m:mroot">
 71 | 	<xsl:choose>
 72 | 		<xsl:when test="count(./*)=2">
 73 | 			<xsl:text>\sqrt[</xsl:text>
 74 | 			<xsl:apply-templates select="./*[2]"/>
 75 | 			<xsl:text>]{</xsl:text>	
 76 | 			<xsl:apply-templates select="./*[1]"/>
 77 | 			<xsl:text>}</xsl:text>	
 78 | 		</xsl:when>
 79 | 		<xsl:otherwise>
 80 | 		<!-- number of argumnets is not 2 - code 25 -->
 81 | 			<xsl:message>exception 25:</xsl:message>
 82 | 			<xsl:text>\text{exception 25:}</xsl:text> 
 83 | 		</xsl:otherwise>
 84 | 	</xsl:choose>
 85 | </xsl:template>
 86 | 
 87 | <xsl:template match="m:msqrt">
 88 | 	<xsl:text>\sqrt{</xsl:text>
 89 | 	<xsl:apply-templates/>
 90 | 	<xsl:text>}</xsl:text>
 91 | </xsl:template>
 92 | 
 93 | <xsl:template match="m:mfenced">
 94 | 	<xsl:choose>
 95 | 		<xsl:when test="@open">
 96 | 			<xsl:if test="translate(@open,'{}[]()|','{{{{{{{')='{'">
 97 | 				<xsl:text>\left</xsl:text>
 98 | 			</xsl:if>
 99 | 			<xsl:if test="@open='{' or @open='}'">
100 | 				<xsl:text>\</xsl:text>
101 | 			</xsl:if>
102 | 			<xsl:if test="translate(@open,'{}[]()|','{{{{{{{')!='{' and (translate(@close,'{}[]()|','{{{{{{{')='{' or not(@close))">
103 | 				<xsl:text>\left.</xsl:text>
104 | 			</xsl:if>
105 | 			<xsl:value-of select="@open"/>
106 | 		</xsl:when>
107 | 		<xsl:otherwise><xsl:text>\left(</xsl:text></xsl:otherwise>
108 | 	</xsl:choose>
109 | 			<xsl:variable name="sep">
110 | 				<xsl:choose>
111 | 					<xsl:when test="@separators">
112 | 						<xsl:value-of select="translate(@separators,' ','')"/>
113 | 					</xsl:when>
114 | 					<xsl:otherwise>,</xsl:otherwise>
115 | 				</xsl:choose>
116 | 			</xsl:variable>
117 | 			<xsl:for-each select="./*">
118 | 				<xsl:apply-templates select="."/>
119 | 				<xsl:if test="not(position()=last())">
120 | 					<xsl:choose>
121 | 						<xsl:when test="position()>string-length($sep)">
122 | 							<xsl:value-of select="substring($sep,string-length($sep))"/>
123 | 						</xsl:when>
124 | 						<xsl:otherwise>
125 | 							<xsl:value-of select="substring($sep,position(),1)"/>
126 | 						</xsl:otherwise>
127 | 					</xsl:choose>
128 | 				</xsl:if>
129 | 			</xsl:for-each>
130 | 	<xsl:choose>
131 | 		<xsl:when test="@close">
132 | 			<xsl:if test="translate(@close,'{}[]()|','{{{{{{{')='{'">
133 | 				<xsl:text>\right</xsl:text>
134 | 			</xsl:if>
135 | 			<xsl:if test="@close='{' or @close='}'">
136 | 				<xsl:text>\</xsl:text>
137 | 			</xsl:if>
138 | 			<xsl:if test="translate(@close,'{}[]()|','{{{{{{{')!='{' and (translate(@open,'{}[]()|','{{{{{{{')='{' or not(@open))">
139 | 				<xsl:text>\right.</xsl:text>
140 | 			</xsl:if>
141 | 			<xsl:value-of select="@close"/>
142 | 		</xsl:when>
143 | 		<xsl:otherwise><xsl:text>\right)</xsl:text></xsl:otherwise>
144 | 	</xsl:choose>	
145 | </xsl:template>
146 | 
147 | <xsl:template match="m:mphantom">
148 | 	<xsl:text>\phantom{</xsl:text>
149 | 	<xsl:apply-templates/>
150 | 	<xsl:text>}</xsl:text>
151 | </xsl:template>
152 | 
153 | <xsl:template match="m:menclose">
154 | 	<xsl:choose>
155 | 		<xsl:when test="@notation = 'actuarial'">
156 | 			<xsl:text>\overline{</xsl:text>
157 | 			<xsl:apply-templates/>
158 | 			<xsl:text>\hspace{.2em}|}</xsl:text>
159 | 		</xsl:when>
160 | 		<xsl:when test="@notation = 'radical'">
161 | 			<xsl:text>\sqrt{</xsl:text>
162 | 			<xsl:apply-templates/>
163 | 			<xsl:text>}</xsl:text>
164 | 		</xsl:when>
165 | 		<xsl:otherwise>
166 | 			<xsl:text>\overline{)</xsl:text>
167 | 			<xsl:apply-templates/>
168 | 			<xsl:text>}</xsl:text>
169 | 		</xsl:otherwise>
170 | 	</xsl:choose>
171 | </xsl:template>
172 | 
173 | <xsl:template match="m:mrow">
174 | 	<xsl:apply-templates/>
175 | </xsl:template>
176 | 
177 | <xsl:template match="m:mstyle">
178 | 	<xsl:if test="@displaystyle='true'">
179 | 		<xsl:text>{\displaystyle </xsl:text>
180 | 	</xsl:if>
181 | 	<xsl:if test="@scriptlevel and not(@displaystyle='true')">
182 | 		<xsl:text>{</xsl:text>
183 | 		<xsl:choose>
184 | 			<xsl:when test="@scriptlevel=0"><xsl:text>\textstyle </xsl:text></xsl:when>
185 | 			<xsl:when test="@scriptlevel=1"><xsl:text>\scriptstyle </xsl:text></xsl:when>
186 | 			<xsl:otherwise><xsl:text>\scriptscriptstyle </xsl:text></xsl:otherwise> 
187 | 		</xsl:choose> 
188 | 	</xsl:if>	
189 | 	<xsl:if test="@background">
190 | 		<xsl:text>\colorbox[rgb]{</xsl:text>
191 | 		<xsl:call-template name="color">
192 | 			<xsl:with-param name="color" select="@background"/>
193 | 		</xsl:call-template>
194 | 		<xsl:text>}{$</xsl:text>
195 | 	</xsl:if>
196 | 	<xsl:if test="@color[not(@mathcolor)] or @mathcolor">
197 | 		<xsl:text>\textcolor[rgb]{</xsl:text>
198 | 		<xsl:call-template name="color">
199 | 			<xsl:with-param name="color" select="@color|@mathcolor"/>
200 | 		</xsl:call-template>
201 | 		<xsl:text>}{</xsl:text>
202 | 	</xsl:if>
203 | 	<xsl:apply-templates/>
204 | 	<xsl:if test="@color[not(@mathcolor)] or @mathcolor">
205 | 		<xsl:text>}</xsl:text>
206 | 	</xsl:if>
207 | 	<xsl:if test="@background">
208 | 		<xsl:text>$}</xsl:text>
209 | 	</xsl:if>
210 | 	<xsl:if test="@scriptlevel and not(@displaystyle='true')">
211 | 		<xsl:text>}</xsl:text>
212 | 	</xsl:if>	
213 | 	<xsl:if test="@displaystyle='true'">
214 | 		<xsl:text>}</xsl:text>
215 | 	</xsl:if>
216 | </xsl:template>
217 | 
218 | <xsl:template match="m:merror">
219 | 	<xsl:apply-templates/>
220 | </xsl:template>
221 | 
222 | </xsl:stylesheet>


--------------------------------------------------------------------------------
/src/test/resources/conventer/mml2tex/mmltex.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding="UTF-8"?>
 2 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
 3 | 		xmlns:m="http://www.w3.org/1998/Math/MathML"
 4 |                 version='1.0'>
 5 |                 
 6 | <xsl:output method="text" indent="no" encoding="UTF-8"/>
 7 | 
 8 | <!-- ====================================================================== -->
 9 | <!-- $Id: mmltex.xsl,v 1.7 2003/06/10 12:24:04 shade33 Exp $
10 |      This file is part of the XSLT MathML Library distribution.
11 |      See ./README or http://www.raleigh.ru/MathML/mmltex for
12 |      copyright and other information                                        -->
13 | <!-- ====================================================================== -->
14 | 
15 | <xsl:include href="tokens.xsl"/>
16 | <xsl:include href="glayout.xsl"/>
17 | <xsl:include href="scripts.xsl"/>
18 | <xsl:include href="tables.xsl"/>
19 | <xsl:include href="entities.xsl"/>
20 | <xsl:include href="cmarkup.xsl"/>
21 | 
22 | <xsl:strip-space elements="m:*"/>
23 | 
24 | <xsl:template match="m:math[not(@mode) or @mode='inline'][not(@display)] | m:math[@display='inline']">
25 | 	<xsl:text>&#x00024; </xsl:text>
26 | 	<xsl:apply-templates/>
27 | 	<xsl:text>&#x00024;</xsl:text>
28 | </xsl:template>
29 | 
30 | <xsl:template match="m:math[@display='block'] | m:math[@mode='display'][not(@display)]">
31 | 	<xsl:text>&#xA;\[&#xA;&#x9;</xsl:text>
32 | 	<xsl:apply-templates/>
33 | 	<xsl:text>&#xA;\]</xsl:text>
34 | </xsl:template>
35 | 
36 | </xsl:stylesheet>


--------------------------------------------------------------------------------
/src/test/resources/conventer/mml2tex/tables.xsl:
--------------------------------------------------------------------------------
  1 | <?xml version='1.0' encoding="UTF-8"?>
  2 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  3 | 		xmlns:m="http://www.w3.org/1998/Math/MathML"
  4 |                 version='1.0'>
  5 |                 
  6 | <!-- ====================================================================== -->
  7 | <!-- $id: tables.xsl, 2002/17/05 Exp $
  8 |      This file is part of the XSLT MathML Library distribution.
  9 |      See ./README or http://www.raleigh.ru/MathML/mmltex for
 10 |      copyright and other information                                        -->
 11 | <!-- ====================================================================== -->
 12 | 
 13 | <xsl:template match="m:mtd[@columnspan]">
 14 | 	<xsl:text>\multicolumn{</xsl:text>
 15 | 	<xsl:value-of select="@columnspan"/>
 16 | 	<xsl:text>}{c}{</xsl:text>
 17 | 	<xsl:apply-templates/>
 18 | 	<xsl:text>}</xsl:text>
 19 | 	<xsl:if test="count(following-sibling::*)>0">
 20 | 		<xsl:text>&amp; </xsl:text>
 21 | 	</xsl:if>
 22 | </xsl:template>
 23 | 
 24 | 
 25 | <xsl:template match="m:mtd">
 26 | 	<xsl:if test="@columnalign='right' or @columnalign='center'">
 27 | 		<xsl:text>\hfill </xsl:text>
 28 | 	</xsl:if>
 29 | 	<xsl:apply-templates/>
 30 | 	<xsl:if test="@columnalign='left' or @columnalign='center'">
 31 | 		<xsl:text>\hfill </xsl:text>
 32 | 	</xsl:if>
 33 | 	<xsl:if test="count(following-sibling::*)>0">
 34 | <!--    this test valid for Sablotron, another form - test="not(position()=last())".
 35 | 	Also for m:mtd[@columnspan] and m:mtr  -->
 36 | 		<xsl:text>&amp; </xsl:text>
 37 | 	</xsl:if>
 38 | </xsl:template>
 39 | 
 40 | <xsl:template match="m:mtr">
 41 | 	<xsl:apply-templates/>
 42 | 	<xsl:if test="count(following-sibling::*)>0">
 43 | 		<xsl:text>\\ </xsl:text>
 44 | 	</xsl:if>
 45 | </xsl:template>
 46 | 
 47 | <xsl:template match="m:mtable">
 48 | 	<xsl:text>\begin{array}{</xsl:text>
 49 | 	<xsl:if test="@frame='solid'">
 50 | 		<xsl:text>|</xsl:text>
 51 | 	</xsl:if>
 52 | 	<xsl:variable name="numbercols" select="count(./m:mtr[1]/m:mtd[not(@columnspan)])+sum(./m:mtr[1]/m:mtd/@columnspan)"/>
 53 | 	<xsl:choose>
 54 | 		<xsl:when test="@columnalign">
 55 | 			<xsl:variable name="colalign">
 56 | 				<xsl:call-template name="colalign">
 57 | 					<xsl:with-param name="colalign" select="@columnalign"/>
 58 | 				</xsl:call-template>
 59 | 			</xsl:variable>
 60 | 			<xsl:choose>
 61 | 				<xsl:when test="string-length($colalign) > $numbercols">
 62 | 					<xsl:value-of select="substring($colalign,1,$numbercols)"/>
 63 | 				</xsl:when>
 64 | 				<xsl:when test="string-length($colalign) &lt; $numbercols">
 65 | 					<xsl:value-of select="$colalign"/>
 66 | 					<xsl:call-template name="generate-string">
 67 | 						<xsl:with-param name="text" select="substring($colalign,string-length($colalign))"/>
 68 | 						<xsl:with-param name="count" select="$numbercols - string-length($colalign)"/>
 69 | 					</xsl:call-template>
 70 | 				</xsl:when>
 71 | 				<xsl:otherwise>
 72 | 					<xsl:value-of select="$colalign"/>
 73 | 				</xsl:otherwise>
 74 | 			</xsl:choose>
 75 | 		</xsl:when>
 76 | 		<xsl:otherwise>
 77 | 			<xsl:call-template name="generate-string">
 78 | 				<xsl:with-param name="text" select="'c'"/>
 79 | 				<xsl:with-param name="count" select="$numbercols"/>
 80 | 			</xsl:call-template>
 81 | 		</xsl:otherwise>
 82 | 	</xsl:choose>
 83 | 	<xsl:if test="@frame='solid'">
 84 | 		<xsl:text>|</xsl:text>
 85 | 	</xsl:if>
 86 | 	<xsl:text>}</xsl:text>
 87 | 	<xsl:if test="@frame='solid'">
 88 | 		<xsl:text>\hline </xsl:text>
 89 | 	</xsl:if>
 90 | 	<xsl:apply-templates/>
 91 | 	<xsl:if test="@frame='solid'">
 92 | 		<xsl:text>\\ \hline</xsl:text>
 93 | 	</xsl:if>
 94 | 	<xsl:text>\end{array}</xsl:text>
 95 | </xsl:template>
 96 | 
 97 | <xsl:template name="colalign">
 98 | 	<xsl:param name="colalign"/>
 99 | 	<xsl:choose>
100 | 		<xsl:when test="contains($colalign,' ')">
101 | 			<xsl:value-of select="substring($colalign,1,1)"/>
102 | 			<xsl:call-template name="colalign">
103 | 				<xsl:with-param name="colalign" select="substring-after($colalign,' ')"/>
104 | 			</xsl:call-template>
105 | 		</xsl:when>
106 | 		<xsl:otherwise>
107 | 			<xsl:value-of select="substring($colalign,1,1)"/>
108 | 		</xsl:otherwise>
109 | 	</xsl:choose>
110 | </xsl:template>
111 | 
112 | <xsl:template name="generate-string">
113 | <!-- template from XSLT Standard Library v1.1 -->
114 |     <xsl:param name="text"/>
115 |     <xsl:param name="count"/>
116 | 
117 |     <xsl:choose>
118 |       <xsl:when test="string-length($text) = 0 or $count &lt;= 0"/>
119 | 
120 |       <xsl:otherwise>
121 | 	<xsl:value-of select="$text"/>
122 | 	<xsl:call-template name="generate-string">
123 | 	  <xsl:with-param name="text" select="$text"/>
124 | 	  <xsl:with-param name="count" select="$count - 1"/>
125 | 	</xsl:call-template>
126 |       </xsl:otherwise>
127 |     </xsl:choose>
128 | </xsl:template>
129 | 
130 | </xsl:stylesheet>


--------------------------------------------------------------------------------
/src/test/resources/复杂表格.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/复杂表格.docx


--------------------------------------------------------------------------------
/src/test/resources/嵌套公式.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套公式.doc


--------------------------------------------------------------------------------
/src/test/resources/嵌套公式.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套公式.docx


--------------------------------------------------------------------------------
/src/test/resources/嵌套图片.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套图片.docx


--------------------------------------------------------------------------------
/src/test/resources/嵌套图片01.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套图片01.docx


--------------------------------------------------------------------------------
/src/test/resources/嵌套图片02.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套图片02.docx


--------------------------------------------------------------------------------
/src/test/resources/嵌套多文本.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套多文本.docx


--------------------------------------------------------------------------------
/src/test/resources/嵌套附件01.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套附件01.docx


--------------------------------------------------------------------------------
/src/test/resources/嵌套附件02.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套附件02.docx


--------------------------------------------------------------------------------
/src/test/resources/故障模式分析表格样例01.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/故障模式分析表格样例01.docx


--------------------------------------------------------------------------------
/src/test/resources/标准表格1.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/标准表格1.doc


--------------------------------------------------------------------------------
/target/.gitignore:
--------------------------------------------------------------------------------
1 | /classes
2 | /test-classes
3 | 


--------------------------------------------------------------------------------