├── img
├── 1.png
├── 2.png
├── 3.png
├── 4.png
└── 5.png
├── pom.xml
├── src
└── main
│ └── java
│ └── com
│ └── doccon
│ └── Tool
│ ├── SQLTool.java
│ └── pdf2md.java
└── README.md
/img/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HOUYULIN/DocCon/HEAD/img/1.png
--------------------------------------------------------------------------------
/img/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HOUYULIN/DocCon/HEAD/img/2.png
--------------------------------------------------------------------------------
/img/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HOUYULIN/DocCon/HEAD/img/3.png
--------------------------------------------------------------------------------
/img/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HOUYULIN/DocCon/HEAD/img/4.png
--------------------------------------------------------------------------------
/img/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HOUYULIN/DocCon/HEAD/img/5.png
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | org.example
8 | DocCon
9 | 1.0-SNAPSHOT
10 |
11 |
12 | 8
13 | 8
14 |
15 |
16 |
17 |
18 | org.apache.pdfbox
19 | pdfbox
20 | 2.0.4
21 |
22 |
23 | net.coobird
24 | thumbnailator
25 | 0.4.8
26 |
27 |
28 |
--------------------------------------------------------------------------------
/src/main/java/com/doccon/Tool/SQLTool.java:
--------------------------------------------------------------------------------
1 | package com.doccon.Tool;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 | import java.util.Scanner;
6 |
7 | public class SQLTool {
8 | public static void main(String[] args) {
9 | Scanner sc = new Scanner(System.in);
10 | String line = "";
11 | List result = new ArrayList<>();
12 | System.out.println();
13 | result.add("CREATE TABLE `author` (");
14 | while (!"end".equals(line = sc.nextLine())) {
15 | String s = "";
16 | String[] split = line.split("\t");
17 | for (int i =0;i names = resources.getXObjectNames();
63 |
64 | //迭代器遍历
65 | Iterator iterator = names.iterator();
66 | while (iterator.hasNext()) {
67 | COSName cosName = iterator.next();
68 |
69 |
70 |
71 |
72 | if (resources.isImageXObject(cosName)) {
73 | PDImageXObject imageXObject = (PDImageXObject) resources.getXObject(cosName);
74 |
75 | //图片路径
76 | String timeimg = "\\"+System.currentTimeMillis() + "";
77 | String img = filename + timeimg + ".jpg";
78 | //创建img文件
79 | File outImgFile = new File(img);
80 | //图片写入img文件
81 | Thumbnails.of(imageXObject.getImage()).scale(0.9).rotate(0).toFile(outImgFile);
82 |
83 | //缩放图片
84 | /*BufferedImage bufferedImage = ImageIO.read(outImgFile);
85 | int width = bufferedImage.getWidth();
86 | int height = bufferedImage.getHeight();
87 | if (width > 600) {
88 | double ratio = Math.round((double) width / 550.0);
89 | System.out.println("缩放比ratio:" + ratio);
90 | width = (int) (width / ratio);
91 | height = (int) (height / ratio);
92 |
93 | }else{
94 |
95 | }
96 | */
97 |
98 |
99 | //图片写入输入流
100 | FileInputStream in = new FileInputStream(outImgFile);
101 | byte[] ba = new byte[in.available()];
102 | in.read(ba);
103 | ByteArrayInputStream byteInputStream = new ByteArrayInputStream(ba);
104 |
105 | //图片路径
106 | String url = "";
107 | byte[] urlby = url.getBytes();
108 | fos.write("\r\n".getBytes());
109 | fos.write(urlby);
110 | }
111 | }
112 |
113 |
114 | //pdf文本处理
115 |
116 | //创建pdf文档处理对象
117 | PDFTextStripper stripper = new PDFTextStripper();
118 | stripper.setSortByPosition(true);
119 | stripper.setStartPage(i);
120 | stripper.setEndPage(i);
121 | //当前页中的文字
122 | String text = stripper.getText(pdf);
123 | System.out.println(text);
124 |
125 | //将每一行分成string字段
126 | String[] split = text.split("\\r\\n");
127 | String dl = "";
128 | int status = 1;
129 | //正则判断每一行是否首页字段
130 | for (int x = 0; x < split.length; x++) {
131 | if (Pattern.matches("^[0-9].[0-9].[0-9].[0-9][\\s\\S]*", split[x])) {
132 | String[] spl = split[x].split("\\.");
133 | dl = "#### " + spl[spl.length-2]+"\\."+ spl[spl.length-1];
134 | } else if (Pattern.matches("^[0-9].[0-9].[0-9][\\s\\S]*", split[x])) {
135 | String[] spl = split[x].split("\\.");
136 | dl = "### " + spl[spl.length-1];
137 | } else if (Pattern.matches("^[0-9].[0-9].[\\s\\S]*", split[x])) {
138 | dl = "## " + split[x];
139 | } else if (Pattern.matches("^[0-9] [\\s\\S]*", split[x])) {
140 | dl = "# " + split[x];
141 | } else {
142 |
143 | //以:(开头,或者以)结尾,且汉字长度不超过10的行:
144 | if(Pattern.matches("[\\s\\S]*[:]$|[\\s\\S]*[:] $|^([\\s\\S]*|[\\s\\S]*[)]$|[\\s\\S]*[)] $",split[x])&&split[x].length()<=30){
145 | split[x]= "\n"+"**"+split[x]+"**"+"\n";
146 | }else if(Pattern.matches("[\\s\\S]*[:;]$|[\\s\\S]*[:;] $",split[x])){
147 | split[x]= split[x]+"\n\n";
148 | }
149 | dl = split[x];
150 | status = 2;
151 | }
152 |
153 | //写入输出流
154 | byte[] bytes = dl.getBytes();
155 |
156 |
157 | //判断是否换行
158 | //status:2非标题, 正则表达式最后非以:。结尾
159 | if (status == 2 && !Pattern.matches("[\\s\\S]*[。:]$", dl)) {
160 | fos.write(bytes);
161 | } else {
162 | //以[#]开头[:,: ]结尾必须换行^([\s\S]*|[\s\S]*[)]$
163 | if (Pattern.matches("^#[\\s\\S]*", dl) ) {
164 | fos.write("\r\n".getBytes());
165 | fos.write(bytes);
166 | fos.write("\r\n".getBytes());
167 | } else {
168 | fos.write(bytes);
169 | //换行
170 | fos.write("\r\n".getBytes());
171 | fos.write("\n".getBytes());
172 | }
173 |
174 |
175 | }
176 | //重置参数
177 | status = 1;
178 |
179 | }
180 | }
181 | fos.close();
182 | pdf.close();
183 | System.out.println("pdf转md转换解析结束!!----");
184 | }
185 |
186 |
187 | }
188 |
--------------------------------------------------------------------------------