parse(InputStream inputStream, WordDocType docType) {
72 | if (docType == WordDocType.DOCX) {
73 | wordTableParser = new WordXTableParser(this.context);
74 | } else if (docType == WordDocType.DOC) {
75 | wordTableParser = new WordHTableParser(this.context);
76 | } else {
77 | throw new IllegalArgumentException("不支持该文件类型");
78 | }
79 | return wordTableParser.parse(inputStream);
80 | }
81 |
82 | /**
83 | * Word文档类型
84 | * @author changtan.sun
85 | *
86 | */
87 | public static enum WordDocType {
88 | DOCX, DOC, UNKOWN
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/WordTableTransferContext.java:
--------------------------------------------------------------------------------
1 | package com.suncht.wordread.parser;
2 |
3 | import com.suncht.wordread.model.WordTable;
4 | import com.suncht.wordread.parser.mapping.IWordTableMemoryMappingVisitor;
5 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping;
6 | import com.suncht.wordread.parser.strategy.ITableTransferStrategy;
7 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
8 |
9 | /**
10 | * Word表格转换上下文
11 | * @author changtan.sun
12 | *
13 | */
14 | public class WordTableTransferContext {
15 | private ITableTransferStrategy strategy;
16 | private IWordTableMemoryMappingVisitor visitor;
17 |
18 | public static WordTableTransferContext create() {
19 | return new WordTableTransferContext();
20 | }
21 |
22 | public WordTableTransferContext transferStrategy(ITableTransferStrategy strategy) {
23 | this.strategy = strategy;
24 | return this;
25 | }
26 |
27 | public WordTableTransferContext visitor(IWordTableMemoryMappingVisitor visitor) {
28 | this.visitor = visitor;
29 | return this;
30 | }
31 |
32 | public WordTable transfer(final WordTableMemoryMapping tableMemoryMapping) {
33 | if (strategy == null) {
34 | strategy = new LogicalTableStrategy();
35 | }
36 | return strategy.transfer(tableMemoryMapping);
37 | }
38 |
39 | public ITableTransferStrategy getStrategy() {
40 | return strategy;
41 | }
42 |
43 | public IWordTableMemoryMappingVisitor getVisitor() {
44 | return visitor;
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/mapping/IWordTableMemoryMappingVisitor.java:
--------------------------------------------------------------------------------
1 | package com.suncht.wordread.parser.mapping;
2 |
3 | import com.suncht.wordread.model.TTCPr;
4 |
5 | /**
6 | * Word表格内存映射表的单元格访问者接口
7 | * 用于修改内存映射表的单元格的数据
8 | * @author changtan.sun
9 | *
10 | */
11 | public interface IWordTableMemoryMappingVisitor {
12 | public void visit(TTCPr cell, int realRowIndex, int realColumnIndex);
13 | }
14 |
--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/mapping/WordTableMemoryMapping.java:
--------------------------------------------------------------------------------
1 | package com.suncht.wordread.parser.mapping;
2 |
3 | import java.util.Arrays;
4 |
5 | import com.google.common.base.Preconditions;
6 | import com.suncht.wordread.model.TTCPr;
7 |
8 | /**
9 | * Word表格内存映射
10 | * @author changtan.sun
11 | *
12 | */
13 | public class WordTableMemoryMapping {
14 | private TTCPr[][] _tableMemoryMap;
15 | private int rowCount;
16 | private int columnCount;
17 | private IWordTableMemoryMappingVisitor visitor;
18 |
19 | public WordTableMemoryMapping(int row, int column) {
20 | _tableMemoryMap = new TTCPr[row][column];
21 | this.rowCount = row;
22 | this.columnCount = column;
23 | }
24 |
25 | public void setTTCPr(final TTCPr data, int rowIndex, int columnIndex) {
26 | Preconditions.checkArgument(rowIndex < rowCount);
27 | Preconditions.checkArgument(columnIndex < columnCount);
28 |
29 | _tableMemoryMap[rowIndex][columnIndex] = data;
30 |
31 | if (visitor != null) {
32 | data.accept(visitor, rowIndex, columnIndex);
33 | }
34 | }
35 |
36 | public final TTCPr getTTCPr(int rowIndex, int columnIndex) {
37 | Preconditions.checkArgument(rowIndex < rowCount);
38 | Preconditions.checkArgument(columnIndex < columnCount);
39 |
40 | return _tableMemoryMap[rowIndex][columnIndex];
41 | }
42 |
43 | public TTCPr[] getRow(int rowIndex) {
44 | Preconditions.checkArgument(rowIndex < rowCount);
45 |
46 | return Arrays.copyOf(_tableMemoryMap[rowIndex], columnCount);
47 | }
48 |
49 | public int getRowCount() {
50 | return rowCount;
51 | }
52 |
53 | public void setRowCount(int rowCount) {
54 | this.rowCount = rowCount;
55 | }
56 |
57 | public int getColumnCount() {
58 | return columnCount;
59 | }
60 |
61 | public void setColumnCount(int columnCount) {
62 | this.columnCount = columnCount;
63 | }
64 |
65 | public IWordTableMemoryMappingVisitor getVisitor() {
66 | return visitor;
67 | }
68 |
69 | public void setVisitor(IWordTableMemoryMappingVisitor visitor) {
70 | this.visitor = visitor;
71 | }
72 |
73 | }
74 |
--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/strategy/DefaultTableStrategy.java:
--------------------------------------------------------------------------------
1 | package com.suncht.wordread.parser.strategy;
2 |
3 | import com.suncht.wordread.model.WordTable;
4 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping;
5 |
6 | public class DefaultTableStrategy implements ITableTransferStrategy {
7 |
8 | public WordTable transfer(WordTableMemoryMapping tableMemoryMapping) {
9 | return null;
10 | }
11 |
12 | }
13 |
--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/strategy/ITableTransferStrategy.java:
--------------------------------------------------------------------------------
1 | package com.suncht.wordread.parser.strategy;
2 |
3 | import com.suncht.wordread.model.WordTable;
4 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping;
5 |
6 | /**
7 | * 表格转换策略
8 | * 将表格内存映射转换成实际的表格模式
9 | * @author changtan.sun
10 | *
11 | */
12 | public interface ITableTransferStrategy {
13 | public WordTable transfer(WordTableMemoryMapping tableMemoryMapping);
14 | }
15 |
--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/strategy/LogicalTableStrategy.java:
--------------------------------------------------------------------------------
1 | package com.suncht.wordread.parser.strategy;
2 |
3 | import com.suncht.wordread.model.TTCPr;
4 | import com.suncht.wordread.model.WordTable;
5 | import com.suncht.wordread.model.WordTableCell;
6 | import com.suncht.wordread.model.WordTableComplexCell;
7 | import com.suncht.wordread.model.WordTableRow;
8 | import com.suncht.wordread.model.WordTableSimpleCell;
9 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping;
10 |
11 | public class LogicalTableStrategy implements ITableTransferStrategy {
12 |
13 | private WordTableMemoryMapping tableMemoryMapping;
14 |
15 | // /**
16 | // * 获取在docx中实际行数(word中表格都处理成二维表格,忽略合并)
17 | // * @return
18 | // */
19 | // private int getRealMaxRowCount() {
20 | // return tableMemoryMap.length;
21 | // }
22 |
23 | /**
24 | * 获取行数(在表格映射对象中的行数)
25 | * @return
26 | */
27 | private int getRowCount() {
28 | int rowCount = 0;
29 | for (int i = 0; i < tableMemoryMapping.getRowCount(); i++) {
30 | if (tableMemoryMapping.getTTCPr(i, 0).isValid()) {
31 | rowCount++;
32 | }
33 | }
34 | return rowCount;
35 | }
36 |
37 | public WordTable transfer(WordTableMemoryMapping tableMemoryMapping) {
38 | this.tableMemoryMapping = tableMemoryMapping;
39 |
40 | WordTable wordTable = new WordTable();
41 | int rowCount = getRowCount();
42 | WordTableRow tableRow = null;
43 | for (int i = 0; i < rowCount; i++) {
44 | tableRow = this.getTableRow(i);
45 | wordTable.getRows().add(tableRow);
46 | }
47 | return wordTable;
48 | }
49 |
50 | /**
51 | * 获取行对象
52 | * @param currentRowIndex
53 | * @return
54 | */
55 | private WordTableRow getTableRow(int currentRowIndex) {
56 | TTCPr[] _rows = null;
57 | TTCPr _first_column_in_row = null;
58 | int rowCount = 0;
59 | for (int i = 0; i < tableMemoryMapping.getRowCount(); i++) {
60 | if (tableMemoryMapping.getTTCPr(i, 0).isValid()) {
61 | if (currentRowIndex == rowCount++) {
62 | _rows = tableMemoryMapping.getRow(i);
63 | _first_column_in_row = tableMemoryMapping.getTTCPr(i, 0);
64 | break;
65 | }
66 | }
67 | }
68 |
69 | if (_rows == null) {
70 | return null;
71 | }
72 |
73 | int _logic_row_index = _first_column_in_row.getLogicRowIndex();
74 | //int _end_row_index = _first_column_in_row.getRowSpan() + _first_column_in_row.getRealRowIndex() - 1;
75 | int _row_span = _first_column_in_row.getRowSpan();
76 | int _logic_column_count = _rows.length;
77 |
78 | WordTableRow pwtr = new WordTableRow();
79 |
80 | WordTableCell cell = null;
81 | for (int i = 0; i < _logic_column_count; i++) {
82 | cell = getCellInRow(_logic_row_index, _row_span, i, currentRowIndex);
83 | if (cell == null) {
84 | continue;
85 | }
86 | pwtr.getCells().add(cell);
87 | }
88 |
89 | return pwtr;
90 | }
91 |
92 | /**
93 | * 获取一行中的单元格集合,将实际单元格转换成逻辑单元格
94 | * @param logicRowIndex 逻辑行号
95 | * @param endRealRowIndex 逻辑行号
96 | * @param logicColumnIndex word中的实际列
97 | * @param currentRowIndex 在表格映射对象中的行号
98 | * @return
99 | */
100 | private WordTableCell getCellInRow(int logicRowIndex, int logicRowSpan, int logicColumnIndex, int currentRowIndex) {
101 | WordTableCell cell = null;
102 | TTCPr currentRealCell = tableMemoryMapping.getTTCPr(logicRowIndex, logicColumnIndex);
103 |
104 | boolean needHandleRowSpan = logicRowSpan > 0 || currentRealCell.isDoneRowSpan(); //是否需要处理跨行的情况
105 | boolean needHandleColSpan = currentRealCell.isDoneColSpan();//是否需要处理跨列的情况
106 |
107 | boolean satisfyConditionOfComplexCell = false; //是否满足复杂单元格的条件
108 |
109 | satisfyConditionOfComplexCell = needHandleRowSpan && needHandleColSpan;
110 | if (!satisfyConditionOfComplexCell) {
111 | satisfyConditionOfComplexCell = currentRealCell.getRowSpan() < logicRowSpan;
112 | }
113 |
114 | if (currentRealCell.isValid()) { //有效单元格
115 | if (satisfyConditionOfComplexCell) {//跨行又跨列
116 | WordTableComplexCell pwtc = new WordTableComplexCell(); //属于复杂单元格
117 |
118 | WordTable innerTable = new WordTable();
119 | int _realColSpan = currentRealCell.getColSpan();
120 | for (int i = 0; i < logicRowSpan;) {
121 | WordTableRow innerRow = new WordTableRow();
122 | int _rowSpan = 1;
123 | for (int j = 0; j < _realColSpan; j++) {
124 | TTCPr _ttcpr = tableMemoryMapping.getTTCPr(logicRowIndex + i, logicColumnIndex + j);
125 | if (_ttcpr.isValid()) {
126 | WordTableCell _cell = new WordTableSimpleCell();
127 | _cell.setRowSpan(_ttcpr.getRowSpan());
128 | _cell.setColumnSpan(_ttcpr.getColSpan());
129 | _cell.setContent(_ttcpr.getContent().copy());
130 | innerRow.getCells().add(_cell);
131 |
132 | if (_ttcpr.getRowSpan() > _rowSpan) {
133 | _rowSpan = _ttcpr.getRowSpan();
134 | }
135 | }
136 | }
137 | innerTable.getRows().add(innerRow);
138 |
139 | i = i + _rowSpan;
140 | }
141 | pwtc.setInnerTable(innerTable);
142 | cell = pwtc;
143 | } else {
144 | //跨列不跨行,不需要处理
145 | //跨行不跨列,不需要处理
146 | WordTableSimpleCell pwtc = new WordTableSimpleCell(); //属于简单单元格
147 | pwtc.setRowSpan(currentRealCell.getRowSpan());
148 | pwtc.setColumnSpan(currentRealCell.getColSpan());
149 | pwtc.setContent(currentRealCell.getContent().copy());
150 |
151 | cell = pwtc;
152 | }
153 | }
154 |
155 | return cell;
156 |
157 | }
158 | }
159 |
--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/wordh/SingleWordHTableParser.java:
--------------------------------------------------------------------------------
1 | package com.suncht.wordread.parser.wordh;
2 |
3 | import java.math.BigInteger;
4 |
5 | import org.apache.poi.hwpf.usermodel.Table;
6 | import org.apache.poi.hwpf.usermodel.TableCell;
7 | import org.apache.poi.hwpf.usermodel.TableRow;
8 |
9 | import com.google.common.base.Preconditions;
10 | import com.suncht.wordread.model.TTCPr;
11 | import com.suncht.wordread.model.TTCPr.TTCPrEnum;
12 | import com.suncht.wordread.model.WordTable;
13 | import com.suncht.wordread.model.WordTableCellContents;
14 | import com.suncht.wordread.parser.ISingleWordTableParser;
15 | import com.suncht.wordread.parser.WordTableTransferContext;
16 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping;
17 |
18 | /**
19 | * Doc文档解析
20 | *
21 | *
22 | * 标题: SingleWordHTableParser
23 | *
24 | *
25 | * 描述: 对POI API进行调试发现,解析Doc单元格的方式与Docx方式不同:没有列合并,只有行合并,有列宽
26 | *
27 | *
28 | * @author changtan.sun
29 | * @date 2018年4月27日
30 | */
31 | public class SingleWordHTableParser implements ISingleWordTableParser {
32 | private Table hwpfTable;
33 |
34 | private WordTableMemoryMapping _tableMemoryMapping;
35 | private WordTableTransferContext context;
36 |
37 | /**
38 | * 最大列数
39 | */
40 | private int realMaxColumnCount = 0;
41 | /**
42 | * 最大列数所占的行Index
43 | */
44 | private int rowIndexOfMaxColumnCount = 0;
45 |
46 | public SingleWordHTableParser(Table hwpfTable, WordTableTransferContext context) {
47 | this.hwpfTable = hwpfTable;
48 | this.context = context;
49 | }
50 |
51 | public WordTable parse() {
52 | int realMaxRowCount = this.hwpfTable.numRows();
53 |
54 | realMaxColumnCount = 0;
55 | for (int i = 0; i < realMaxRowCount; i++) {
56 | TableRow tr = this.hwpfTable.getRow(i);
57 | int numCell = tr.numCells();
58 | if (numCell > realMaxColumnCount) {
59 | realMaxColumnCount = numCell;
60 | rowIndexOfMaxColumnCount = i;
61 | }
62 | }
63 |
64 | _tableMemoryMapping = new WordTableMemoryMapping(realMaxRowCount, realMaxColumnCount);
65 |
66 | for (int i = 0; i < realMaxRowCount; i++) {
67 | TableRow preRow = i - 1 >= 0 ? this.hwpfTable.getRow(i - 1) : null; // 上一行
68 | parseRow(this.hwpfTable.getRow(i), i, preRow);
69 | }
70 |
71 | return context.transfer(_tableMemoryMapping);
72 | }
73 |
74 | private void parseRow(TableRow row, int realRowIndex, TableRow preRow) {
75 | int numCells = row.numCells();
76 | //boolean existColMergedCells = realMaxColumnCount > numCells; // 该行中是否存在被列合并,如果存在,做逻辑列合并处理
77 | int logicColumnIndex = 0;
78 | int logicRowIndex = realRowIndex; //逻辑行号和实际行号一样的
79 | for (int realColumnIndex = 0; realColumnIndex < numCells; realColumnIndex++) {
80 | TableCell cell = row.getCell(realColumnIndex);// 取得单元格
81 | int skipColumn = parseCell(row, cell, realRowIndex, realColumnIndex, logicRowIndex, logicColumnIndex);
82 | logicColumnIndex = logicColumnIndex + skipColumn + 1;
83 | }
84 | }
85 |
86 | /**
87 | * 参考:https://blog.csdn.net/www1056481167/article/details/56835043
88 | * 解析Doc单元格的方式与Docx方式不同:没有列合并概念,只有行合并
89 | *
90 | * @param cell
91 | * @param realRowIndex
92 | * @param realColumnIndex
93 | * @return
94 | */
95 | private int parseCell(TableRow row, TableCell cell, int realRowIndex, int realColumnIndex, int logicRowIndex, int logicColumnIndex) {
96 | // -----列合并-----
97 | int numOfCellHMerged = computeNumOfCellHMerged(row, cell, realColumnIndex); //就是该单元格合并了多少列
98 |
99 | // -----行合并-----
100 | if (cell.isFirstVerticallyMerged() && cell.isVerticallyMerged()) { // 行合并开始
101 | TTCPr ttc = new TTCPr();
102 | if(numOfCellHMerged>0) {
103 | ttc.setType(TTCPrEnum.HVM_S);
104 | } else {
105 | ttc.setType(TTCPrEnum.VM_S);
106 | }
107 | ttc.setRealRowIndex(realRowIndex);
108 | ttc.setRealColumnIndex(realColumnIndex);
109 | ttc.setLogicRowIndex(logicRowIndex);
110 | ttc.setLogicColumnIndex(logicColumnIndex);
111 | ttc.setWidth(BigInteger.valueOf(cell.getWidth()));
112 | ttc.setColSpan(numOfCellHMerged);
113 | ttc.setRoot(null);
114 | // ttc.setText(cell.getText());
115 | ttc.setContent(WordTableCellContents.getCellContent(cell));
116 |
117 | _tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex);
118 |
119 | //处理其他被合并的列
120 | if(numOfCellHMerged>0) {
121 | for (int i = 0; i < numOfCellHMerged; i++) {
122 | TTCPr ttc_merged = new TTCPr();
123 | ttc_merged.setType(TTCPrEnum.HM);
124 | ttc_merged.setRealRowIndex(realRowIndex);
125 | ttc_merged.setRealColumnIndex(realColumnIndex);
126 | ttc_merged.setLogicRowIndex(logicRowIndex);
127 | ttc_merged.setLogicColumnIndex(logicColumnIndex + i + 1);
128 | //ttc_merged.setWidth(BigInteger.valueOf(cell.getWidth()));
129 | //ttc_merged.setColSpan(numOfCellHMerged);
130 | ttc_merged.setRoot(ttc);
131 |
132 | _tableMemoryMapping.setTTCPr(ttc_merged, logicRowIndex, ttc_merged.getLogicColumnIndex());
133 | }
134 | }
135 | } else if (!cell.isFirstVerticallyMerged() && cell.isVerticallyMerged()) { // 行被合并
136 | int _start = logicRowIndex, _end = 0;
137 | TTCPr root = null;
138 | for (int i = logicRowIndex - 1; i >= 0; i--) {
139 | TTCPr ttcpr = _tableMemoryMapping.getTTCPr(i, logicColumnIndex);
140 | if (ttcpr != null && (ttcpr.getType() == TTCPrEnum.VM_S || ttcpr.getType() == TTCPrEnum.HVM_S)) {
141 | _end = i;
142 | root = ttcpr;
143 | break;
144 | } else if (ttcpr != null && ttcpr.getRoot() != null) {
145 | _end = i;
146 | root = ttcpr.getRoot();
147 | break;
148 | }
149 | }
150 |
151 | Preconditions.checkNotNull(root, "父单元格不能为空");
152 |
153 | TTCPr ttc = new TTCPr();
154 | ttc.setType(TTCPrEnum.VM);
155 | ttc.setRealRowIndex(realRowIndex);
156 | ttc.setRealColumnIndex(realColumnIndex);
157 | ttc.setLogicRowIndex(logicRowIndex);
158 | ttc.setLogicColumnIndex(logicColumnIndex);
159 | ttc.setWidth(BigInteger.valueOf(cell.getWidth()));
160 | ttc.setRoot(root);
161 | root.setRowSpan(_start - _end + 1);
162 |
163 | _tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex);
164 | } else { // 没有行合并
165 | TTCPr ttc = new TTCPr();
166 | if(numOfCellHMerged>0) {
167 | ttc.setType(TTCPrEnum.HM_S);
168 | } else {
169 | ttc.setType(TTCPrEnum.NONE);
170 | }
171 | ttc.setRealRowIndex(realRowIndex);
172 | ttc.setRealColumnIndex(realColumnIndex);
173 | ttc.setLogicRowIndex(logicRowIndex);
174 | ttc.setLogicColumnIndex(logicColumnIndex);
175 | ttc.setWidth(BigInteger.valueOf(cell.getWidth()));
176 | ttc.setColSpan(numOfCellHMerged);
177 | ttc.setRoot(null);
178 | // ttc.setText(cell.getText());
179 | ttc.setContent(WordTableCellContents.getCellContent(cell));
180 |
181 | _tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex);
182 |
183 | //处理其他被合并的列
184 | if(numOfCellHMerged>0) {
185 | for (int i = 0; i < numOfCellHMerged; i++) {
186 | TTCPr ttc_merged = new TTCPr();
187 | ttc_merged.setType(TTCPrEnum.HM);
188 | ttc_merged.setRealRowIndex(realRowIndex);
189 | ttc_merged.setRealColumnIndex(realColumnIndex);
190 | ttc_merged.setLogicRowIndex(logicRowIndex);
191 | ttc_merged.setLogicColumnIndex(logicColumnIndex + i + 1);
192 | //ttc_merged.setWidth(BigInteger.valueOf(cell.getWidth()));
193 | //ttc_merged.setColSpan(numOfCellHMerged);
194 | ttc_merged.setRoot(ttc);
195 |
196 | _tableMemoryMapping.setTTCPr(ttc_merged, logicRowIndex, ttc_merged.getLogicColumnIndex());
197 | }
198 | }
199 | }
200 |
201 | return numOfCellHMerged;
202 | }
203 |
204 | /**
205 | * 计算合并了多少个单元格
206 | * 表格中其他行根据标准行进行列合并,属于标准表格 标准表格,比如
207 | * ———————————————
208 | * | | | |
209 | * ———————————————
210 | * | | | | | ---->该行为标准行
211 | * ———————————————
212 | * | | |
213 | * ———————————————
214 | * | | |
215 | * ———————————————
216 | *
217 | * @param cell
218 | * @param realRowIndex
219 | * @param realColumnIndex
220 | * @return
221 | */
222 | private int computeNumOfCellHMerged(TableRow currentRow, TableCell currentCell, int realColumnIndex) {
223 | TableRow standardRow = this.hwpfTable.getRow(this.rowIndexOfMaxColumnCount);
224 |
225 | if (currentRow.numCells() >= standardRow.numCells()) {
226 | return 0;
227 | }
228 |
229 | long totalWidth = 0;
230 | for (int i = 0; i <= realColumnIndex; i++) {
231 | totalWidth += currentRow.getCell(i).getWidth();
232 | }
233 |
234 | int tempRowIndex = -1;
235 | long tempWidth = 0;
236 | for (int i = 0, size = standardRow.numCells(); i < size; i++) {
237 | tempWidth += standardRow.getCell(i).getWidth();
238 | if (this.widthEqual(tempWidth, totalWidth)) {
239 | tempRowIndex = i;
240 | break;
241 | }
242 | }
243 |
244 | int currentCellWidth = currentCell.getWidth();
245 | tempWidth = 0;
246 | int columnMerged = 0;
247 | for (int i = tempRowIndex; i >= 0; i--) {
248 | tempWidth += standardRow.getCell(i).getWidth();
249 | if(this.widthEqual(tempWidth, currentCellWidth)) {
250 | break;
251 | } else {
252 | columnMerged++;
253 | }
254 | }
255 |
256 | return columnMerged;
257 | }
258 |
259 | private boolean widthEqual(long tempWidth, long totalWidth) {
260 | return tempWidth <= (totalWidth + 10) && tempWidth >= (totalWidth - 10);
261 | }
262 |
263 | }
264 |
--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/wordh/WordHTableParser.java:
--------------------------------------------------------------------------------
1 | package com.suncht.wordread.parser.wordh;
2 |
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 | import java.util.List;
6 |
7 | import org.apache.poi.hwpf.HWPFDocument;
8 | import org.apache.poi.hwpf.usermodel.Range;
9 | import org.apache.poi.hwpf.usermodel.Table;
10 | import org.apache.poi.hwpf.usermodel.TableIterator;
11 | import org.apache.poi.poifs.filesystem.POIFSFileSystem;
12 |
13 | import com.google.common.collect.Lists;
14 | import com.suncht.wordread.model.WordTable;
15 | import com.suncht.wordread.parser.ISingleWordTableParser;
16 | import com.suncht.wordread.parser.IWordTableParser;
17 | import com.suncht.wordread.parser.WordTableTransferContext;
18 |
19 | public class WordHTableParser implements IWordTableParser {
20 | private WordTableTransferContext context;
21 |
22 | public WordHTableParser(WordTableTransferContext context) {
23 | this.context = context;
24 | }
25 |
26 | public List parse(InputStream inputStream) {
27 |
28 | List wordTables = Lists.newArrayList();
29 |
30 | try {
31 | POIFSFileSystem pfs = new POIFSFileSystem(inputStream); // 载入文档
32 | HWPFDocument hwpf = new HWPFDocument(pfs);
33 |
34 | Range range = hwpf.getRange();//得到文档的读取范围
35 | TableIterator it = new TableIterator(range);
36 | //迭代文档中的表格
37 | while (it.hasNext()) {
38 | Table table = (Table) it.next();
39 | ISingleWordTableParser parser = new SingleWordHTableParser(table, context);
40 | WordTable wordTable = parser.parse();
41 | wordTables.add(wordTable);
42 | }
43 | } catch (Exception e) {
44 | e.printStackTrace();
45 | } finally {
46 | if (inputStream != null) {
47 | try {
48 | inputStream.close();
49 | } catch (IOException e) {
50 | e.printStackTrace();
51 | }
52 | }
53 | }
54 |
55 | return wordTables;
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/wordx/SingleWordXTableParser.java:
--------------------------------------------------------------------------------
1 | package com.suncht.wordread.parser.wordx;
2 |
3 | import java.util.List;
4 |
5 | import org.apache.poi.xwpf.usermodel.XWPFTable;
6 | import org.apache.poi.xwpf.usermodel.XWPFTableCell;
7 | import org.apache.poi.xwpf.usermodel.XWPFTableRow;
8 | import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcPr;
9 |
10 | import com.google.common.base.Preconditions;
11 | import com.suncht.wordread.model.TTCPr;
12 | import com.suncht.wordread.model.TTCPr.TTCPrEnum;
13 | import com.suncht.wordread.model.WordTable;
14 | import com.suncht.wordread.model.WordTableCellContents;
15 | import com.suncht.wordread.parser.ISingleWordTableParser;
16 | import com.suncht.wordread.parser.WordTableTransferContext;
17 | import com.suncht.wordread.parser.mapping.WordTableMemoryMapping;
18 |
19 | /**
20 | *
21 | * @author changtan.sun
22 | *
23 | */
24 |
25 | /**
26 | * 解析Docx中一张复杂表格内容
27 | * Docx不仅有列合并,而且有行合并,没有列宽
28 | * 标题: SingleWordXTableParser
29 | * 描述:
30 | * @author changtan.sun
31 | * @date 2018年4月27日
32 | */
33 | public class SingleWordXTableParser implements ISingleWordTableParser {
34 | private XWPFTable xwpfTable;
35 | // private WordTable table;
36 |
37 | private WordTableMemoryMapping _tableMemoryMapping;
38 | private WordTableTransferContext context;
39 |
40 | public SingleWordXTableParser(XWPFTable xwpfTable, WordTableTransferContext context) {
41 | this.xwpfTable = xwpfTable;
42 | this.context = context;
43 | }
44 |
45 | // public WordTable getTable() {
46 | // return table;
47 | // }
48 |
49 | /**
50 | * 解析Docx的表格,将表格相关数据映射到表格映射对象中, 用于后面的操作
51 | * @return
52 | */
53 | public WordTable parse() {
54 | List rows;
55 | List cells;
56 |
57 | rows = xwpfTable.getRows();
58 | int realMaxRowCount = rows.size();
59 | // table.setRealMaxRowCount(rows.size());
60 |
61 | //计算最大列数
62 | int realMaxColumnCount = 0;
63 | for (XWPFTableRow row : rows) {
64 | //获取行对应的单元格
65 | cells = row.getTableCells();
66 | int _columnCountOnRow = 0;
67 | for (XWPFTableCell cell : cells) {
68 | CTTcPr tt = cell.getCTTc().getTcPr();
69 | if(tt.getGridSpan()!=null) {
70 | _columnCountOnRow += tt.getGridSpan().getVal().intValue();
71 | } else {
72 | _columnCountOnRow += 1;
73 | }
74 | }
75 |
76 | if (_columnCountOnRow > realMaxColumnCount) {
77 | realMaxColumnCount = _columnCountOnRow;
78 | }
79 | }
80 |
81 | //table.setRealMaxColumnCount(columnCount);
82 |
83 | _tableMemoryMapping = new WordTableMemoryMapping(realMaxRowCount, realMaxColumnCount);
84 | _tableMemoryMapping.setVisitor(context.getVisitor());
85 | for (int i = 0; i < realMaxRowCount; i++) {
86 | parseRow(rows.get(i), i);
87 | }
88 |
89 | //printTableMemoryMap();
90 |
91 | // wordTableMap = new WordTableMap();
92 | // wordTableMap.setTableMemoryMap(_tableMemoryMap);
93 | return context.transfer(_tableMemoryMapping);
94 | }
95 |
96 | public void dispose() {
97 | _tableMemoryMapping = null;
98 | xwpfTable = null;
99 | }
100 |
101 | // /**
102 | // * 打印表格映射
103 | // */
104 | // private void printTableMemoryMap() {
105 | // int r = 1;
106 | // for (TTCPr[] columns : _tableMemoryMapping) {
107 | // int c = 1;
108 | // for (TTCPr column : columns) {
109 | // System.out.println(r + ":" + c + "===>" + column.getType() + " ==== " + column.getText());
110 | // c++;
111 | // }
112 | //
113 | // r++;
114 | // }
115 | // }
116 |
117 | /**
118 | * 解析word中表格行
119 | * @param row
120 | * @param realRowIndex
121 | */
122 | private void parseRow(XWPFTableRow row, int realRowIndex) {
123 | List cells = row.getTableCells();
124 | int numCells = cells.size();
125 |
126 | int logicColumnIndex = 0;
127 | int logicRowIndex = realRowIndex; //逻辑行号与实际行号一样
128 | for (int realColumnIndex = 0; realColumnIndex < numCells; realColumnIndex++) {
129 | XWPFTableCell cell = row.getCell(realColumnIndex);
130 | //skipColumn是否跳过多个单元格, 当列合并时候
131 | int skipColumn = parseCell(cell, realRowIndex, realColumnIndex, logicRowIndex, logicColumnIndex);
132 | logicColumnIndex = logicColumnIndex + skipColumn + 1;
133 | }
134 | }
135 |
136 | private int parseCell(XWPFTableCell cell, int realRowIndex, int realColumnIndex, int logicRowIndex, int logicColumnIndex) {
137 | int skipColumn = 0;
138 | // if (_tableMemoryMapping.getTTCPr(realRowIndex, realColumnIndex) != null) {
139 | // return skipColumn;
140 | // }
141 |
142 | CTTcPr tt = cell.getCTTc().getTcPr();
143 | //-------行合并--------
144 | if (tt.getVMerge() != null) {
145 | if (tt.getVMerge().getVal() != null && "restart".equals(tt.getVMerge().getVal().toString())) { //行合并的第一行单元格(行合并的开始单元格)
146 | TTCPr ttc = new TTCPr();
147 | ttc.setType(TTCPrEnum.VM_S);
148 | ttc.setRealRowIndex(realRowIndex);
149 | ttc.setRealColumnIndex(realColumnIndex);
150 | ttc.setLogicRowIndex(logicRowIndex);
151 | ttc.setLogicColumnIndex(logicColumnIndex);
152 | ttc.setWidth(tt.getTcW().getW());
153 | ttc.setRoot(null);
154 | //ttc.setText(cell.getText());
155 | ttc.setContent(WordTableCellContents.getCellContent(cell));
156 |
157 | _tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex);
158 | } else { //行合并的其他行单元格(被合并的单元格)
159 | int _start = logicRowIndex, _end = 0;
160 | TTCPr root = null;
161 | for (int i = logicRowIndex - 1; i >= 0; i--) {
162 | TTCPr ttcpr = _tableMemoryMapping.getTTCPr(i, logicRowIndex);
163 | if (ttcpr != null && (ttcpr.getType() == TTCPrEnum.VM_S || ttcpr.getType() == TTCPrEnum.HVM_S)) {
164 | _end = i;
165 | root = ttcpr;
166 | break;
167 | } else if(ttcpr != null && ttcpr.getRoot()!=null) {
168 | _end = i;
169 | root = ttcpr.getRoot();
170 | break;
171 | }
172 | }
173 |
174 | Preconditions.checkNotNull(root, "父单元格不能为空");
175 |
176 | TTCPr ttc = new TTCPr();
177 | ttc.setType(TTCPrEnum.VM);
178 | ttc.setRealRowIndex(realRowIndex);
179 | ttc.setRealColumnIndex(realColumnIndex);
180 | ttc.setLogicRowIndex(logicRowIndex);
181 | ttc.setLogicColumnIndex(logicColumnIndex);
182 | ttc.setWidth(tt.getTcW().getW());
183 | ttc.setRoot(root);
184 | root.setRowSpan(_start - _end + 1);
185 | _tableMemoryMapping.setTTCPr(ttc, logicRowIndex, logicColumnIndex);
186 | }
187 | } else { //没有进行行合并的单元格
188 | TTCPr currentCell = _tableMemoryMapping.getTTCPr(logicRowIndex, logicColumnIndex);
189 | if (currentCell != null && currentCell.getType() == TTCPrEnum.HM) { //被列合并的单元格
190 |
191 | } else {
192 | currentCell = new TTCPr();
193 | currentCell.setType(TTCPrEnum.NONE);
194 | currentCell.setRealRowIndex(realRowIndex);
195 | currentCell.setRealColumnIndex(realColumnIndex);
196 | currentCell.setLogicRowIndex(logicRowIndex);
197 | currentCell.setLogicColumnIndex(logicColumnIndex);
198 | currentCell.setWidth(tt.getTcW().getW());
199 | currentCell.setContent(WordTableCellContents.getCellContent(cell));
200 | currentCell.setRoot(null);
201 | //判断是否有父单元格
202 | if (logicRowIndex > 0) {
203 | TTCPr parent = _tableMemoryMapping.getTTCPr(logicRowIndex - 1, logicColumnIndex);
204 | if (parent.isDoneColSpan()) {
205 | //currentCell.setParent(parent);
206 | currentCell.setRoot(parent);
207 | }
208 | }
209 |
210 | _tableMemoryMapping.setTTCPr(currentCell, logicRowIndex, logicColumnIndex);
211 | }
212 | }
213 |
214 | //-------列合并-------
215 | if (tt.getGridSpan() != null) {
216 | int colSpan = tt.getGridSpan().getVal().intValue();
217 | TTCPr root = _tableMemoryMapping.getTTCPr(logicRowIndex, logicColumnIndex);
218 | root.setColSpan(colSpan);
219 | if (root.getType() == TTCPrEnum.VM_S) {
220 | root.setType(TTCPrEnum.HVM_S);
221 | } else {
222 | root.setType(TTCPrEnum.HM_S);
223 | }
224 |
225 | //给其他被列合并的单元格进行初始化
226 | for (int i = 1; i < colSpan; i++) {
227 | TTCPr cell_other = _tableMemoryMapping.getTTCPr(logicRowIndex, logicColumnIndex + i);
228 | if (cell_other == null){
229 | cell_other = new TTCPr();
230 | cell_other.setWidth(tt.getTcW().getW());
231 | }
232 | cell_other.setRealRowIndex(realRowIndex);
233 | cell_other.setRealColumnIndex(realColumnIndex);
234 | cell_other.setLogicRowIndex(logicRowIndex);
235 | cell_other.setLogicColumnIndex(realColumnIndex + i);
236 | cell_other.setType(TTCPrEnum.HM);
237 | cell_other.setRoot(root);
238 |
239 | _tableMemoryMapping.setTTCPr(cell_other, logicRowIndex, realColumnIndex + i);
240 | }
241 |
242 | skipColumn = colSpan - 1;
243 | }
244 |
245 | return skipColumn;
246 | }
247 | }
248 |
--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/parser/wordx/WordXTableParser.java:
--------------------------------------------------------------------------------
1 | package com.suncht.wordread.parser.wordx;
2 |
3 | import java.io.InputStream;
4 | import java.util.List;
5 |
6 | import org.apache.commons.io.IOUtils;
7 | import org.apache.poi.xwpf.usermodel.XWPFDocument;
8 | import org.apache.poi.xwpf.usermodel.XWPFTable;
9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 |
12 | import com.google.common.collect.Lists;
13 | import com.suncht.wordread.model.WordTable;
14 | import com.suncht.wordread.parser.ISingleWordTableParser;
15 | import com.suncht.wordread.parser.IWordTableParser;
16 | import com.suncht.wordread.parser.WordTableTransferContext;
17 |
18 | /**
19 | * Docx文档的复杂表格解析器
20 | * @author changtan.sun
21 | *
22 | */
23 | public class WordXTableParser implements IWordTableParser {
24 | private final static Logger logger = LoggerFactory.getLogger(WordXTableParser.class);
25 |
26 | private WordTableTransferContext context;
27 |
28 | public WordXTableParser(WordTableTransferContext context) {
29 | this.context = context;
30 | }
31 |
32 | public List parse(InputStream inputStream) {
33 | List wordTables = Lists.newArrayList();
34 |
35 | try {
36 | XWPFDocument doc = new XWPFDocument(inputStream); // 载入文档
37 |
38 | //获取文档中所有的表格
39 | List tables = doc.getTables();
40 | for (XWPFTable table : tables) {
41 | ISingleWordTableParser parser = new SingleWordXTableParser(table, this.context);
42 | WordTable wordTable = parser.parse();
43 | wordTables.add(wordTable);
44 | }
45 | } catch (Exception e) {
46 | logger.error(e.getMessage(), e);
47 | } finally {
48 | IOUtils.closeQuietly(inputStream);
49 | }
50 |
51 | return wordTables;
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/main/java/com/suncht/wordread/utils/MathmlUtils.java:
--------------------------------------------------------------------------------
1 | package com.suncht.wordread.utils;
2 |
3 | import java.io.InputStream;
4 | import java.io.StringReader;
5 | import java.io.StringWriter;
6 |
7 | import javax.xml.transform.Result;
8 | import javax.xml.transform.Source;
9 | import javax.xml.transform.Transformer;
10 | import javax.xml.transform.TransformerException;
11 | import javax.xml.transform.TransformerFactory;
12 | import javax.xml.transform.URIResolver;
13 | import javax.xml.transform.stream.StreamResult;
14 | import javax.xml.transform.stream.StreamSource;
15 |
16 | public class MathmlUtils {
17 | /**
18 | * Description: xsl转换器
19 | */
20 | public static String xslConvert(String s, String xslpath, URIResolver uriResolver) {
21 | TransformerFactory tFac = TransformerFactory.newInstance();
22 | if (uriResolver != null)
23 | tFac.setURIResolver(uriResolver);
24 | StreamSource xslSource = new StreamSource(MathmlUtils.class.getResourceAsStream(xslpath));
25 | StringWriter writer = new StringWriter();
26 | try {
27 | Transformer t = tFac.newTransformer(xslSource);
28 | Source source = new StreamSource(new StringReader(s));
29 | Result result = new StreamResult(writer);
30 | t.transform(source, result);
31 | } catch (TransformerException e) {
32 | System.out.println(e.getMessage());
33 | }
34 | return writer.getBuffer().toString();
35 | }
36 |
37 | /**
38 | * Description: 将mathml转为latx
39 | * @param mml
40 | * @return
41 | */
42 | public static String convertMML2Latex(String mml) {
43 | mml = mml.substring(mml.indexOf("?>") + 2, mml.length()); //去掉xml的头节点
44 | URIResolver r = new URIResolver() { //设置xls依赖文件的路径
45 | @Override
46 | public Source resolve(String href, String base) throws TransformerException {
47 | InputStream inputStream = MathmlUtils.class.getResourceAsStream("/conventer/mml2tex/" + href);
48 | return new StreamSource(inputStream);
49 | }
50 | };
51 | String latex = xslConvert(mml, "/conventer/mml2tex/mmltex.xsl", r);
52 | if (latex != null && latex.length() > 1) {
53 | latex = latex.substring(1, latex.length() - 1);
54 | }
55 | return latex;
56 | }
57 |
58 | /**
59 | * Description: office mathml转为mml
60 | * @param xml
61 | * @return
62 | */
63 | public static String convertOMML2MML(String xml) {
64 | String result = xslConvert(xml, "/conventer/OMML2MML.XSL", null);
65 | return result;
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/src/main/resources/1.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/1.doc
--------------------------------------------------------------------------------
/src/main/resources/FMEA信息导入-客户实例.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/FMEA信息导入-客户实例.doc
--------------------------------------------------------------------------------
/src/main/resources/FMEA信息导入-客户实例.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/FMEA信息导入-客户实例.docx
--------------------------------------------------------------------------------
/src/main/resources/conventer/mml2tex/README:
--------------------------------------------------------------------------------
1 | README for the XSLT MathML Library 2.1.2
2 |
3 | XSLT MathML Library is a set of XSLT stylesheets to transform
4 | MathML 2.0 to LaTeX.
5 |
6 | For more information, see
7 | http://www.raleigh.ru/MathML/mmltex/index.php?lang=en
8 |
9 | Manifest
10 | --------
11 |
12 | README this file
13 | mmltex.xsl
14 | tokens.xsl
15 | glayout.xsl
16 | scripts.xsl
17 | tables.xsl
18 | entities.xsl
19 | cmarkup.xsl
20 |
21 | Use
22 | ---
23 |
24 | There are two ways of using the library:
25 |
26 | * Use a local copy of the library.
27 |
28 | 1. Download the distribution (see below).
29 |
30 | 2. Unpack the distribution, using unzip.
31 |
32 | 3. In your stylesheet import or include either the main
33 | stylesheet, mmltex.xsl, or the stylesheet module you
34 | wish to use, such as tokens.xsl. This example assumes
35 | that the distribution has been extracted into the same
36 | directory as your own stylesheet:
37 |
38 |
39 |
40 | * Import or include either the main stylesheet, or the
41 | stylesheet module you wish to use, directly from the library
42 | website; http://www.raleigh.ru/MathML/mmltex/. For example:
43 |
44 |
45 |
46 | Obtaining The Library
47 | ---------------------
48 |
49 | The XSLT MathML Library is available for download as:
50 |
51 | * Zip file: http://www.raleigh.ru/MathML/mmltex/xsltml_2.1.2.zip
52 |
53 | Copyright
54 | ---------
55 |
56 | Copyright (C) 2001-2003 Vasil Yaroshevich
57 |
58 | Permission is hereby granted, free of charge, to any person
59 | obtaining a copy of this software and associated documentation
60 | files (the ``Software''), to deal in the Software without
61 | restriction, including without limitation the rights to use,
62 | copy, modify, merge, publish, distribute, sublicense, and/or
63 | sell copies of the Software, and to permit persons to whom the
64 | Software is furnished to do so, subject to the following
65 | conditions:
66 |
67 | The above copyright notice and this permission notice shall be
68 | included in all copies or substantial portions of the Software.
69 |
70 | Except as contained in this notice, the names of individuals
71 | credited with contribution to this software shall not be used in
72 | advertising or otherwise to promote the sale, use or other
73 | dealings in this Software without prior written authorization
74 | from the individuals in question.
75 |
76 | Any stylesheet derived from this Software that is publically
77 | distributed will be identified with a different name and the
78 | version strings in any derived Software will be changed so that
79 | no possibility of confusion between the derived package and this
80 | Software will exist.
81 |
82 | Warranty
83 | --------
84 |
85 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
86 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
87 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
88 | NONINFRINGEMENT. IN NO EVENT SHALL NORMAN WALSH OR ANY OTHER
89 | CONTRIBUTOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
90 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
91 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
92 | OTHER DEALINGS IN THE SOFTWARE.
93 |
94 | Contacting the Author
95 | ---------------------
96 |
97 | These stylesheets are maintained by Vasil Yaroshevich, .
98 |
--------------------------------------------------------------------------------
/src/main/resources/conventer/mml2tex/glayout.xsl:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | \genfrac{}{}{
18 |
19 |
20 |
21 | ex
22 |
23 |
24 | 0ex
25 |
26 |
27 | .05ex
28 |
29 |
30 |
31 | .2ex
32 |
33 |
34 |
35 |
36 |
37 | }{}{
38 |
39 |
40 | \frac{
41 |
42 |
43 |
44 | \hfill
45 |
46 |
47 |
48 | \hfill
49 |
50 | }{
51 |
52 | \hfill
53 |
54 |
55 |
56 | \hfill
57 |
58 | }
59 |
60 |
61 |
62 | \raisebox{1ex}{$
63 |
64 | $}\!\left/ \!\raisebox{-1ex}{$
65 |
66 | $}\right.
67 |
68 |
69 |
70 |
71 |
72 |
73 | \sqrt[
74 |
75 | ]{
76 |
77 | }
78 |
79 |
80 |
81 | exception 25:
82 | \text{exception 25:}
83 |
84 |
85 |
86 |
87 |
88 | \sqrt{
89 |
90 | }
91 |
92 |
93 |
94 |
95 |
96 |
97 | \left
98 |
99 |
100 | \
101 |
102 |
103 | \left.
104 |
105 |
106 |
107 | \left(
108 |
109 |
110 |
111 |
112 |
113 |
114 | ,
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 | \right
134 |
135 |
136 | \
137 |
138 |
139 | \right.
140 |
141 |
142 |
143 | \right)
144 |
145 |
146 |
147 |
148 | \phantom{
149 |
150 | }
151 |
152 |
153 |
154 |
155 |
156 | \overline{
157 |
158 | \hspace{.2em}|}
159 |
160 |
161 | \sqrt{
162 |
163 | }
164 |
165 |
166 | \overline{)
167 |
168 | }
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 | {\displaystyle
180 |
181 |
182 | {
183 |
184 | \textstyle
185 | \scriptstyle
186 | \scriptscriptstyle
187 |
188 |
189 |
190 | \colorbox[rgb]{
191 |
192 |
193 |
194 | }{$
195 |
196 |
197 | \textcolor[rgb]{
198 |
199 |
200 |
201 | }{
202 |
203 |
204 |
205 | }
206 |
207 |
208 | $}
209 |
210 |
211 | }
212 |
213 |
214 | }
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
--------------------------------------------------------------------------------
/src/main/resources/conventer/mml2tex/mmltex.xsl:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
8 |
9 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | $
26 |
27 | $
28 |
29 |
30 |
31 |
\[
32 |
33 |
\]
34 |
35 |
36 |
--------------------------------------------------------------------------------
/src/main/resources/conventer/mml2tex/tables.xsl:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
11 |
12 |
13 |
14 | \multicolumn{
15 |
16 | }{c}{
17 |
18 | }
19 |
20 | &
21 |
22 |
23 |
24 |
25 |
26 |
27 | \hfill
28 |
29 |
30 |
31 | \hfill
32 |
33 |
34 |
36 | &
37 |
38 |
39 |
40 |
41 |
42 |
43 | \\
44 |
45 |
46 |
47 |
48 | \begin{array}{
49 |
50 | |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 | |
85 |
86 | }
87 |
88 | \hline
89 |
90 |
91 |
92 | \\ \hline
93 |
94 | \end{array}
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
--------------------------------------------------------------------------------
/src/main/resources/conventer/mml2tex/tokens.xsl:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | \textcolor{red}{
20 |
21 | }
22 |
23 |
24 |
25 |
26 |
27 | \mathrm{
28 |
29 | }
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 | \mathrm{
41 |
42 | }
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 | \left
56 |
57 |
58 | \right
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 | \text{
72 |
73 | }
74 |
75 |
76 |
77 | \phantom{\rule
78 |
79 | [-
80 |
81 | ]
82 |
83 | {
84 |
85 | 0ex
86 |
87 |
88 | }{
89 |
90 | 0ex
91 |
92 |
93 | }}
94 |
95 |
96 |
97 |
98 |
99 | ''
100 |
101 |
102 | ''
103 |
104 |
105 |
106 |
107 |
108 | \colorbox[rgb]{
109 |
110 |
111 |
112 | }{$
113 |
114 |
115 | \textcolor[rgb]{
116 |
117 |
118 |
119 | }{
120 |
121 |
122 |
123 |
124 | \mathrm{
125 |
126 |
127 | \mathbf{
128 |
129 |
130 | \mathit{
131 |
132 |
133 | \mathit{
134 | The value bold-italic for mathvariant is not supported
135 |
136 |
137 | \mathbb{
138 |
139 |
140 | \mathfrak{
141 | The value bold-fraktur for mathvariant is not supported
142 |
143 |
144 | \mathcal{
145 |
146 |
147 | \mathcal{
148 | The value bold-script for mathvariant is not supported
149 |
150 |
151 | \mathfrak{
152 |
153 |
154 | \mathsf{
155 |
156 |
157 | \mathsf{
158 | The value bold-sans-serif for mathvariant is not supported
159 |
160 |
161 | \mathsf{
162 | The value sans-serif-italic for mathvariant is not supported
163 |
164 |
165 | \mathsf{
166 | The value sans-serif-bold-italic for mathvariant is not supported
167 |
168 |
169 | \mathtt{
170 |
171 |
172 | {
173 | Error at mathvariant attribute
174 |
175 |
176 |
177 |
178 |
179 | }
180 |
181 |
182 | }
183 |
184 |
185 | $}
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 | ,
221 |
222 |
223 |
224 |
225 |
226 | ,
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 | ,
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 | ,
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 | 0,1,1
271 | 0,0,0
272 | 0,0,1
273 | 1,0,1
274 | .5,.5,.5
275 | 0,.5,0
276 | 0,1,0
277 | .5,0,0
278 | 0,0,.5
279 | .5,.5,0
280 | .5,0,.5
281 | 1,0,0
282 | .75,.75,.75
283 | 0,.5,.5
284 | 1,1,1
285 | 1,1,0
286 |
287 | Exception at color template
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 | Exception at Hex2Decimal template
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
--------------------------------------------------------------------------------
/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | %d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | ERROR
34 |
35 | ACCEPT
36 |
37 | DENY
38 |
39 |
40 |
41 |
42 |
43 | ${log_dir}/error/%d{yyyy-MM-dd}/error-log.log
44 |
45 |
46 | ${maxHistory}
47 |
48 |
49 |
50 |
51 | %d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 | WARN
63 |
64 | ACCEPT
65 |
66 | DENY
67 |
68 |
69 |
70 | ${log_dir}/warn/%d{yyyy-MM-dd}/warn-log.log
71 | ${maxHistory}
72 |
73 |
74 | %d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n
75 |
76 |
77 |
78 |
79 |
80 |
81 | INFO
82 | ACCEPT
83 | DENY
84 |
85 |
86 | ${log_dir}/info/%d{yyyy-MM-dd}/info-log.log
87 | ${maxHistory}
88 |
89 |
90 | %d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n
91 |
92 |
93 |
94 |
95 |
96 |
97 | DEBUG
98 | ACCEPT
99 | DENY
100 |
101 |
102 | ${log_dir}/debug/%d{yyyy-MM-dd}/debug-log.log
103 | ${maxHistory}
104 |
105 |
106 | %d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n
107 |
108 |
109 |
110 |
111 |
112 |
113 | TRACE
114 | ACCEPT
115 | DENY
116 |
117 |
118 | ${log_dir}/trace/%d{yyyy-MM-dd}/trace-log.log
119 | ${maxHistory}
120 |
121 |
122 | %d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger - %msg%n
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
--------------------------------------------------------------------------------
/src/main/resources/故障模式分析表格样例.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/故障模式分析表格样例.docx
--------------------------------------------------------------------------------
/src/main/resources/故障模式分析表格样例_处理模型.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/main/resources/故障模式分析表格样例_处理模型.docx
--------------------------------------------------------------------------------
/src/test/java/com/test/Doc2DocxTest.java:
--------------------------------------------------------------------------------
1 | package com.test;
2 |
3 | import com.suncht.convert.OfficeDocumentConvertServer;
4 |
5 | public class Doc2DocxTest {
6 |
7 | public static void main(String[] args) throws Exception {
8 | String inputFile = "D:\\FMEA信息导入-客户实例.doc";
9 | String outputFile = "D:\\FMEA信息导入-客户实例.docx";
10 | //Doc2DocxUtil.doc2Docx(outputFile, inputFile);
11 |
12 | // Thread.sleep(2000);
13 | String pdfFile = "D:\\FMEA信息导入-客户实例.pdf";
14 | // OfficePDFConverter.getConverter().convert2PDF(outputFile, pdfFile);
15 |
16 | String OPEN_OFFICE_HOME = "D:\\Program Files\\LibreOffice 5\\";
17 | // 服务端口
18 | int OPEN_OFFICE_PORT[] = { 8101 };
19 | try (OfficeDocumentConvertServer server = new OfficeDocumentConvertServer(OPEN_OFFICE_HOME, OPEN_OFFICE_PORT);) {
20 | server.convert(inputFile, outputFile, false);
21 | server.convert(outputFile, pdfFile, true);
22 | }
23 |
24 | }
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/src/test/java/com/test/MemoryMappingVisitorTest.java:
--------------------------------------------------------------------------------
1 | package com.test;
2 |
3 | import com.suncht.wordread.model.TTCPr;
4 | import com.suncht.wordread.parser.mapping.IWordTableMemoryMappingVisitor;
5 |
6 | public class MemoryMappingVisitorTest implements IWordTableMemoryMappingVisitor {
7 |
8 | @Override
9 | public void visit(final TTCPr cell, int realRowIndex, int realColumnIndex) {
10 | if (realRowIndex == 0 && realColumnIndex == 0) {
11 | //cell.getContent()("测试成功");
12 | }
13 | }
14 |
15 | }
16 |
--------------------------------------------------------------------------------
/src/test/java/com/test/MuliHeaderXTableParserTest.java:
--------------------------------------------------------------------------------
1 | package com.test;
2 |
3 | import java.io.InputStream;
4 | import java.util.List;
5 |
6 | import org.junit.Test;
7 |
8 | import com.suncht.wordread.model.WordTable;
9 | import com.suncht.wordread.parser.WordTableParser;
10 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
11 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
12 |
13 | public class MuliHeaderXTableParserTest {
14 |
15 | @Test
16 | public void test01() {
17 | try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/FMEA信息导入-客户实例.docx");) {
18 | //InputStream inputStream = new FileInputStream(new File(doc2));
19 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
20 | for (WordTable wordTable : tables) {
21 | System.out.println(wordTable.format());
22 | }
23 | } catch(Exception e) {
24 | e.printStackTrace();
25 | }
26 |
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/src/test/java/com/test/MultiTextCellTest.java:
--------------------------------------------------------------------------------
1 | package com.test;
2 |
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 | import java.util.List;
6 |
7 | import org.junit.Test;
8 |
9 | import com.suncht.wordread.model.WordTable;
10 | import com.suncht.wordread.parser.WordTableParser;
11 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
12 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
13 |
14 | public class MultiTextCellTest {
15 | @Test
16 | public void testFormulaInCell() throws IOException {
17 | try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套多文本.docx");) {
18 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
19 | .memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
20 | for (WordTable wordTable : tables) {
21 | System.out.println(wordTable.format());
22 | }
23 | } catch(Exception e) {
24 | e.printStackTrace();
25 | }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src/test/java/com/test/NestedFormulaTest.java:
--------------------------------------------------------------------------------
1 | package com.test;
2 |
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 | import java.util.List;
6 |
7 | import org.junit.Test;
8 |
9 | import com.suncht.wordread.model.WordTable;
10 | import com.suncht.wordread.parser.WordTableParser;
11 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
12 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
13 |
14 | public class NestedFormulaTest {
15 | @Test
16 | public void testFormulaInCell_docx() throws IOException {
17 | try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套公式.docx");) {
18 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
19 | .memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
20 | for (WordTable wordTable : tables) {
21 | System.out.println(wordTable.format());
22 | }
23 | } catch(Exception e) {
24 | e.printStackTrace();
25 | }
26 | }
27 |
28 | @Test
29 | public void testFormulaInCell_doc() throws IOException {
30 | try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套公式.doc");) {
31 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
32 | .memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOC);
33 | for (WordTable wordTable : tables) {
34 | System.out.println(wordTable.format());
35 | }
36 | } catch(Exception e) {
37 | e.printStackTrace();
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/test/java/com/test/NestedImageCellTest.java:
--------------------------------------------------------------------------------
1 | package com.test;
2 |
3 | import java.io.InputStream;
4 | import java.util.List;
5 |
6 | import org.junit.Test;
7 |
8 | import com.suncht.wordread.format.DefaultCellFormater;
9 | import com.suncht.wordread.format.DefaultWordTableFormater;
10 | import com.suncht.wordread.format.IWordTableFormater;
11 | import com.suncht.wordread.model.WordTable;
12 | import com.suncht.wordread.output.DefaultWordTableOutputStrategy;
13 | import com.suncht.wordread.output.IWordTableOutputStrategy;
14 | import com.suncht.wordread.parser.WordTableParser;
15 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
16 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
17 |
18 | /**
19 | * 嵌套图片单元格测试
20 | * @author suncht
21 | *
22 | */
23 | public class NestedImageCellTest {
24 | @Test
25 | public void test01() {
26 | IWordTableFormater tableFormater = new DefaultWordTableFormater(new DefaultCellFormater());
27 | IWordTableOutputStrategy outputStrategy = new DefaultWordTableOutputStrategy();
28 |
29 | try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套图片02.docx");) {
30 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).parse(inputStream, WordDocType.DOCX);
31 |
32 | for (WordTable wordTable : tables) {
33 | System.out.println(wordTable.format(tableFormater));
34 | wordTable.output(outputStrategy);
35 | }
36 | } catch (Exception e) {
37 | e.printStackTrace();
38 | }
39 |
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/test/java/com/test/OfficeConverterTest.java:
--------------------------------------------------------------------------------
1 | package com.test;
2 |
3 | import org.junit.Test;
4 |
5 | import com.suncht.convert.OfficeDocumentConvertServer;
6 |
7 | public class OfficeConverterTest {
8 | private static String OPEN_OFFICE_HOME = "D:\\Program Files\\LibreOffice 5\\";
9 | private static int OPEN_OFFICE_PORT[] = { 8101 };
10 |
11 | @Test
12 | public void txt2docx() {
13 | String inputFile = "D:\\dic.txt";
14 | String outputFile = "D:\\dic.docx";
15 |
16 | // 服务端口
17 | try (OfficeDocumentConvertServer server = new OfficeDocumentConvertServer(OPEN_OFFICE_HOME, OPEN_OFFICE_PORT);) {
18 | server.convert(inputFile, outputFile, false);
19 | } catch(Exception e) {
20 | e.printStackTrace();
21 | }
22 | }
23 |
24 | @Test
25 | public void docx2pdf() {
26 | String inputFile = "D:\\故障模式分析表格样例 - 副本.docx";
27 | String outputFile = "D:\\故障模式分析表格样例 - 副本.pdf";
28 |
29 | // 服务端口
30 | try (OfficeDocumentConvertServer server = new OfficeDocumentConvertServer(OPEN_OFFICE_HOME, OPEN_OFFICE_PORT);) {
31 | server.convert(inputFile, outputFile, false);
32 | } catch(Exception e) {
33 | e.printStackTrace();
34 | }
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/test/java/com/test/OleObjectCellTest.java:
--------------------------------------------------------------------------------
1 | package com.test;
2 |
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 | import java.util.List;
6 |
7 | import org.junit.Test;
8 |
9 | import com.suncht.wordread.model.WordTable;
10 | import com.suncht.wordread.parser.WordTableParser;
11 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
12 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
13 |
14 | public class OleObjectCellTest {
15 | // @Test
16 | public void testOleInCell() throws IOException {
17 | try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套附件01.docx");) {
18 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
19 | .memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
20 | for (WordTable wordTable : tables) {
21 | System.out.println(wordTable.format());
22 | }
23 | } catch(Exception e) {
24 | e.printStackTrace();
25 | }
26 | }
27 |
28 | @Test
29 | public void testEmbedDocxInCell() throws IOException {
30 | try(InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套附件02.docx");) {
31 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
32 | .memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
33 | for (WordTable wordTable : tables) {
34 | System.out.println(wordTable.format());
35 | }
36 | } catch(Exception e) {
37 | e.printStackTrace();
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/test/java/com/test/WordCellDataTest.java:
--------------------------------------------------------------------------------
1 | package com.test;
2 |
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 | import java.util.List;
6 |
7 | import org.junit.Test;
8 |
9 | import com.suncht.wordread.model.WordTable;
10 | import com.suncht.wordread.parser.WordTableParser;
11 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
12 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
13 |
14 | public class WordCellDataTest {
15 | @Test
16 | public void testFormulaInCell() throws IOException {
17 | InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套公式.docx");
18 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
19 | for (WordTable wordTable : tables) {
20 | System.out.println(wordTable.format());
21 | }
22 |
23 | inputStream.close();
24 | }
25 |
26 | @Test
27 | public void testImageInCell() throws IOException {
28 | InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套图片.docx");
29 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
30 | for (WordTable wordTable : tables) {
31 | System.out.println(wordTable.format());
32 | }
33 |
34 | inputStream.close();
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/test/java/com/test/WordEmbedsTest.java:
--------------------------------------------------------------------------------
1 | package com.test;
2 |
3 | import java.io.InputStream;
4 | import java.util.Iterator;
5 | import java.util.List;
6 |
7 | import org.apache.poi.hssf.usermodel.HSSFCell;
8 | import org.apache.poi.hssf.usermodel.HSSFRow;
9 | import org.apache.poi.hssf.usermodel.HSSFSheet;
10 | import org.apache.poi.hssf.usermodel.HSSFWorkbook;
11 | import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
12 | import org.apache.poi.openxml4j.opc.PackagePart;
13 | import org.apache.poi.poifs.dev.POIFSViewEngine;
14 | import org.apache.poi.poifs.filesystem.POIFSFileSystem;
15 | import org.apache.poi.ss.usermodel.Cell;
16 | import org.apache.poi.xwpf.usermodel.XWPFDocument;
17 | import org.junit.Test;
18 |
19 | public class WordEmbedsTest {
20 | @Test
21 | public void listAllEmbeds() {
22 | try (InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/嵌套附件01.docx");) {
23 | XWPFDocument document = new XWPFDocument(inputStream);
24 | listEmbeds(document);
25 | //listEmbeds2(document);
26 | } catch (Exception e) {
27 | e.printStackTrace();
28 | }
29 | }
30 |
31 | private static void listEmbeds(XWPFDocument doc) throws OpenXML4JException {
32 | List embeddedDocs = doc.getAllEmbedds();
33 | if (embeddedDocs != null && !embeddedDocs.isEmpty()) {
34 | Iterator pIter = embeddedDocs.iterator();
35 | while (pIter.hasNext()) {
36 | PackagePart pPart = pIter.next();
37 | System.out.print(pPart.getPartName() + ", ");
38 |
39 | System.out.print(pPart.getContentType() + ", ");
40 | System.out.println();
41 | }
42 | }
43 | }
44 |
45 | private static void listEmbeds2(XWPFDocument doc) throws Exception {
46 | for (final PackagePart pPart : doc.getAllEmbedds()) {
47 | final String contentType = pPart.getContentType();
48 | System.out.println(contentType + "\n");
49 | if (contentType.equals("application/vnd.ms-excel")) {
50 | final HSSFWorkbook embeddedWorkbook = new HSSFWorkbook(pPart.getInputStream());
51 |
52 | for (int sheet = 0; sheet < embeddedWorkbook.getNumberOfSheets(); sheet++) {
53 | final HSSFSheet activeSheet = embeddedWorkbook.getSheetAt(sheet);
54 | if (activeSheet.getSheetName().equalsIgnoreCase("Sheet1")) {
55 | for (int rowIndex = activeSheet.getFirstRowNum(); rowIndex <= activeSheet
56 | .getLastRowNum(); rowIndex++) {
57 | final HSSFRow row = activeSheet.getRow(rowIndex);
58 | for (int cellIndex = row.getFirstCellNum(); cellIndex <= row
59 | .getLastCellNum(); cellIndex++) {
60 | final HSSFCell cell = row.getCell(cellIndex);
61 | if (cell != null) {
62 | if (cell.getCellType() == Cell.CELL_TYPE_STRING)
63 | System.out.println("Row:" + rowIndex + " Cell:" + cellIndex + " = "
64 | + cell.getStringCellValue());
65 | if (cell.getCellType() == Cell.CELL_TYPE_NUMERIC) {
66 | System.out.println("Row:" + rowIndex + " Cell:" + cellIndex + " = "
67 | + cell.getNumericCellValue());
68 |
69 | cell.setCellValue(cell.getNumericCellValue() * 2); // update
70 | // the
71 | // value
72 | }
73 | }
74 | }
75 | }
76 | }
77 | }
78 | }
79 | }
80 | }
81 |
82 |
83 | @Test
84 | public void viewFile() {
85 | POIFSFileSystem fs = null;
86 | List strings = POIFSViewEngine.inspectViewable(fs, true, 0, " ");
87 | Iterator iter = strings.iterator();
88 |
89 | while (iter.hasNext()) {
90 | //os.write( ((String)iter.next()).getBytes());
91 | System.out.println(iter.next());
92 | }
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/src/test/java/com/test/WordHTableParserTest.java:
--------------------------------------------------------------------------------
1 | package com.test;
2 |
3 | import java.io.InputStream;
4 | import java.util.List;
5 |
6 | import org.junit.Test;
7 |
8 | import com.suncht.wordread.model.WordTable;
9 | import com.suncht.wordread.parser.WordTableParser;
10 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
11 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
12 |
13 | public class WordHTableParserTest {
14 | @Test
15 | public void test01() {
16 | InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/标准表格1.doc");
17 | //InputStream inputStream = new FileInputStream(new File(doc2));
18 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy()).parse(inputStream, WordDocType.DOC);
19 | for (WordTable wordTable : tables) {
20 | System.out.println(wordTable.format());
21 | }
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/src/test/java/com/test/WordXTableParserTest.java:
--------------------------------------------------------------------------------
1 | package com.test;
2 |
3 | import java.io.InputStream;
4 | import java.util.List;
5 |
6 | import org.junit.Test;
7 |
8 | import com.suncht.wordread.model.WordTable;
9 | import com.suncht.wordread.parser.WordTableParser;
10 | import com.suncht.wordread.parser.WordTableParser.WordDocType;
11 | import com.suncht.wordread.parser.strategy.LogicalTableStrategy;
12 |
13 | public class WordXTableParserTest {
14 | String doc1 = "D:\\故障模式分析表格样例01.docx";
15 | String doc2 = "D:\\故障模式分析表格样例.docx";
16 |
17 | @Test
18 | public void test01() {
19 | try (InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/1.docx");) {
20 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
21 | .memoryMappingVisitor(new MemoryMappingVisitorTest()).parse(inputStream, WordDocType.DOCX);
22 | for (WordTable wordTable : tables) {
23 | System.out.println(wordTable.format());
24 | }
25 | } catch (Exception e) {
26 | e.printStackTrace();
27 | }
28 | }
29 |
30 | @Test
31 | public void test02() {
32 | InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/故障模式分析表格样例.docx");
33 | // InputStream inputStream = new FileInputStream(new File(doc2));
34 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
35 | .parse(inputStream, WordDocType.DOCX);
36 | for (WordTable wordTable : tables) {
37 | System.out.println(wordTable.format());
38 | }
39 | }
40 |
41 | @Test
42 | public void test03() {
43 | InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/故障模式分析表格样例01.docx");
44 | // InputStream inputStream = new FileInputStream(new File(doc2));
45 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
46 | .parse(inputStream, WordDocType.DOCX);
47 | for (WordTable wordTable : tables) {
48 | System.out.println(wordTable.format());
49 | }
50 | }
51 |
52 | @Test
53 | public void test04() {
54 | InputStream inputStream = WordXTableParserTest.class.getResourceAsStream("/复杂表格.docx");
55 | // InputStream inputStream = new FileInputStream(new File(doc2));
56 | List tables = WordTableParser.create().transferStrategy(new LogicalTableStrategy())
57 | .parse(inputStream, WordDocType.DOCX);
58 | for (WordTable wordTable : tables) {
59 | System.out.println(wordTable.format());
60 | }
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/src/test/resources/1.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/1.doc
--------------------------------------------------------------------------------
/src/test/resources/1.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/1.docx
--------------------------------------------------------------------------------
/src/test/resources/2.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/2.doc
--------------------------------------------------------------------------------
/src/test/resources/conventer/mml2tex/README:
--------------------------------------------------------------------------------
1 | README for the XSLT MathML Library 2.1.2
2 |
3 | XSLT MathML Library is a set of XSLT stylesheets to transform
4 | MathML 2.0 to LaTeX.
5 |
6 | For more information, see
7 | http://www.raleigh.ru/MathML/mmltex/index.php?lang=en
8 |
9 | Manifest
10 | --------
11 |
12 | README this file
13 | mmltex.xsl
14 | tokens.xsl
15 | glayout.xsl
16 | scripts.xsl
17 | tables.xsl
18 | entities.xsl
19 | cmarkup.xsl
20 |
21 | Use
22 | ---
23 |
24 | There are two ways of using the library:
25 |
26 | * Use a local copy of the library.
27 |
28 | 1. Download the distribution (see below).
29 |
30 | 2. Unpack the distribution, using unzip.
31 |
32 | 3. In your stylesheet import or include either the main
33 | stylesheet, mmltex.xsl, or the stylesheet module you
34 | wish to use, such as tokens.xsl. This example assumes
35 | that the distribution has been extracted into the same
36 | directory as your own stylesheet:
37 |
38 |
39 |
40 | * Import or include either the main stylesheet, or the
41 | stylesheet module you wish to use, directly from the library
42 | website; http://www.raleigh.ru/MathML/mmltex/. For example:
43 |
44 |
45 |
46 | Obtaining The Library
47 | ---------------------
48 |
49 | The XSLT MathML Library is available for download as:
50 |
51 | * Zip file: http://www.raleigh.ru/MathML/mmltex/xsltml_2.1.2.zip
52 |
53 | Copyright
54 | ---------
55 |
56 | Copyright (C) 2001-2003 Vasil Yaroshevich
57 |
58 | Permission is hereby granted, free of charge, to any person
59 | obtaining a copy of this software and associated documentation
60 | files (the ``Software''), to deal in the Software without
61 | restriction, including without limitation the rights to use,
62 | copy, modify, merge, publish, distribute, sublicense, and/or
63 | sell copies of the Software, and to permit persons to whom the
64 | Software is furnished to do so, subject to the following
65 | conditions:
66 |
67 | The above copyright notice and this permission notice shall be
68 | included in all copies or substantial portions of the Software.
69 |
70 | Except as contained in this notice, the names of individuals
71 | credited with contribution to this software shall not be used in
72 | advertising or otherwise to promote the sale, use or other
73 | dealings in this Software without prior written authorization
74 | from the individuals in question.
75 |
76 | Any stylesheet derived from this Software that is publically
77 | distributed will be identified with a different name and the
78 | version strings in any derived Software will be changed so that
79 | no possibility of confusion between the derived package and this
80 | Software will exist.
81 |
82 | Warranty
83 | --------
84 |
85 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
86 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
87 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
88 | NONINFRINGEMENT. IN NO EVENT SHALL NORMAN WALSH OR ANY OTHER
89 | CONTRIBUTOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
90 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
91 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
92 | OTHER DEALINGS IN THE SOFTWARE.
93 |
94 | Contacting the Author
95 | ---------------------
96 |
97 | These stylesheets are maintained by Vasil Yaroshevich, .
98 |
--------------------------------------------------------------------------------
/src/test/resources/conventer/mml2tex/glayout.xsl:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | \genfrac{}{}{
18 |
19 |
20 |
21 | ex
22 |
23 |
24 | 0ex
25 |
26 |
27 | .05ex
28 |
29 |
30 |
31 | .2ex
32 |
33 |
34 |
35 |
36 |
37 | }{}{
38 |
39 |
40 | \frac{
41 |
42 |
43 |
44 | \hfill
45 |
46 |
47 |
48 | \hfill
49 |
50 | }{
51 |
52 | \hfill
53 |
54 |
55 |
56 | \hfill
57 |
58 | }
59 |
60 |
61 |
62 | \raisebox{1ex}{$
63 |
64 | $}\!\left/ \!\raisebox{-1ex}{$
65 |
66 | $}\right.
67 |
68 |
69 |
70 |
71 |
72 |
73 | \sqrt[
74 |
75 | ]{
76 |
77 | }
78 |
79 |
80 |
81 | exception 25:
82 | \text{exception 25:}
83 |
84 |
85 |
86 |
87 |
88 | \sqrt{
89 |
90 | }
91 |
92 |
93 |
94 |
95 |
96 |
97 | \left
98 |
99 |
100 | \
101 |
102 |
103 | \left.
104 |
105 |
106 |
107 | \left(
108 |
109 |
110 |
111 |
112 |
113 |
114 | ,
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 | \right
134 |
135 |
136 | \
137 |
138 |
139 | \right.
140 |
141 |
142 |
143 | \right)
144 |
145 |
146 |
147 |
148 | \phantom{
149 |
150 | }
151 |
152 |
153 |
154 |
155 |
156 | \overline{
157 |
158 | \hspace{.2em}|}
159 |
160 |
161 | \sqrt{
162 |
163 | }
164 |
165 |
166 | \overline{)
167 |
168 | }
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 | {\displaystyle
180 |
181 |
182 | {
183 |
184 | \textstyle
185 | \scriptstyle
186 | \scriptscriptstyle
187 |
188 |
189 |
190 | \colorbox[rgb]{
191 |
192 |
193 |
194 | }{$
195 |
196 |
197 | \textcolor[rgb]{
198 |
199 |
200 |
201 | }{
202 |
203 |
204 |
205 | }
206 |
207 |
208 | $}
209 |
210 |
211 | }
212 |
213 |
214 | }
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
--------------------------------------------------------------------------------
/src/test/resources/conventer/mml2tex/mmltex.xsl:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
8 |
9 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | $
26 |
27 | $
28 |
29 |
30 |
31 |
\[
32 |
33 |
\]
34 |
35 |
36 |
--------------------------------------------------------------------------------
/src/test/resources/conventer/mml2tex/tables.xsl:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
11 |
12 |
13 |
14 | \multicolumn{
15 |
16 | }{c}{
17 |
18 | }
19 |
20 | &
21 |
22 |
23 |
24 |
25 |
26 |
27 | \hfill
28 |
29 |
30 |
31 | \hfill
32 |
33 |
34 |
36 | &
37 |
38 |
39 |
40 |
41 |
42 |
43 | \\
44 |
45 |
46 |
47 |
48 | \begin{array}{
49 |
50 | |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 | |
85 |
86 | }
87 |
88 | \hline
89 |
90 |
91 |
92 | \\ \hline
93 |
94 | \end{array}
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
--------------------------------------------------------------------------------
/src/test/resources/复杂表格.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/复杂表格.docx
--------------------------------------------------------------------------------
/src/test/resources/嵌套公式.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套公式.doc
--------------------------------------------------------------------------------
/src/test/resources/嵌套公式.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套公式.docx
--------------------------------------------------------------------------------
/src/test/resources/嵌套图片.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套图片.docx
--------------------------------------------------------------------------------
/src/test/resources/嵌套图片01.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套图片01.docx
--------------------------------------------------------------------------------
/src/test/resources/嵌套图片02.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套图片02.docx
--------------------------------------------------------------------------------
/src/test/resources/嵌套多文本.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套多文本.docx
--------------------------------------------------------------------------------
/src/test/resources/嵌套附件01.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套附件01.docx
--------------------------------------------------------------------------------
/src/test/resources/嵌套附件02.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/嵌套附件02.docx
--------------------------------------------------------------------------------
/src/test/resources/故障模式分析表格样例01.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/故障模式分析表格样例01.docx
--------------------------------------------------------------------------------
/src/test/resources/标准表格1.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suncht/sun-wordtable-read/ef21891009c9af217f2cc365192f6156dd68f083/src/test/resources/标准表格1.doc
--------------------------------------------------------------------------------
/target/.gitignore:
--------------------------------------------------------------------------------
1 | /classes
2 | /test-classes
3 |
--------------------------------------------------------------------------------