├── img ├── 1.png ├── 2.png ├── 3.png ├── 4.png └── 5.png ├── pom.xml ├── src └── main │ └── java │ └── com │ └── doccon │ └── Tool │ ├── SQLTool.java │ └── pdf2md.java └── README.md /img/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HOUYULIN/DocCon/HEAD/img/1.png -------------------------------------------------------------------------------- /img/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HOUYULIN/DocCon/HEAD/img/2.png -------------------------------------------------------------------------------- /img/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HOUYULIN/DocCon/HEAD/img/3.png -------------------------------------------------------------------------------- /img/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HOUYULIN/DocCon/HEAD/img/4.png -------------------------------------------------------------------------------- /img/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HOUYULIN/DocCon/HEAD/img/5.png -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.example 8 | DocCon 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 8 13 | 8 14 | 15 | 16 | 17 | 18 | org.apache.pdfbox 19 | pdfbox 20 | 2.0.4 21 | 22 | 23 | net.coobird 24 | thumbnailator 25 | 0.4.8 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/main/java/com/doccon/Tool/SQLTool.java: -------------------------------------------------------------------------------- 1 | package com.doccon.Tool; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Scanner; 6 | 7 | public class SQLTool { 8 | public static void main(String[] args) { 9 | Scanner sc = new Scanner(System.in); 10 | String line = ""; 11 | List result = new ArrayList<>(); 12 | System.out.println(); 13 | result.add("CREATE TABLE `author` ("); 14 | while (!"end".equals(line = sc.nextLine())) { 15 | String s = ""; 16 | String[] split = line.split("\t"); 17 | for (int i =0;i names = resources.getXObjectNames(); 63 | 64 | //迭代器遍历 65 | Iterator iterator = names.iterator(); 66 | while (iterator.hasNext()) { 67 | COSName cosName = iterator.next(); 68 | 69 | 70 | 71 | 72 | if (resources.isImageXObject(cosName)) { 73 | PDImageXObject imageXObject = (PDImageXObject) resources.getXObject(cosName); 74 | 75 | //图片路径 76 | String timeimg = "\\"+System.currentTimeMillis() + ""; 77 | String img = filename + timeimg + ".jpg"; 78 | //创建img文件 79 | File outImgFile = new File(img); 80 | //图片写入img文件 81 | Thumbnails.of(imageXObject.getImage()).scale(0.9).rotate(0).toFile(outImgFile); 82 | 83 | //缩放图片 84 | /*BufferedImage bufferedImage = ImageIO.read(outImgFile); 85 | int width = bufferedImage.getWidth(); 86 | int height = bufferedImage.getHeight(); 87 | if (width > 600) { 88 | double ratio = Math.round((double) width / 550.0); 89 | System.out.println("缩放比ratio:" + ratio); 90 | width = (int) (width / ratio); 91 | height = (int) (height / ratio); 92 | 93 | }else{ 94 | 95 | } 96 | */ 97 | 98 | 99 | //图片写入输入流 100 | FileInputStream in = new FileInputStream(outImgFile); 101 | byte[] ba = new byte[in.available()]; 102 | in.read(ba); 103 | ByteArrayInputStream byteInputStream = new ByteArrayInputStream(ba); 104 | 105 | //图片路径 106 | String url = "![img](img/" + timeimg + ".jpg)"; 107 | byte[] urlby = url.getBytes(); 108 | fos.write("\r\n".getBytes()); 109 | fos.write(urlby); 110 | } 111 | } 112 | 113 | 114 | //pdf文本处理 115 | 116 | //创建pdf文档处理对象 117 | PDFTextStripper stripper = new PDFTextStripper(); 118 | stripper.setSortByPosition(true); 119 | stripper.setStartPage(i); 120 | stripper.setEndPage(i); 121 | //当前页中的文字 122 | String text = stripper.getText(pdf); 123 | System.out.println(text); 124 | 125 | //将每一行分成string字段 126 | String[] split = text.split("\\r\\n"); 127 | String dl = ""; 128 | int status = 1; 129 | //正则判断每一行是否首页字段 130 | for (int x = 0; x < split.length; x++) { 131 | if (Pattern.matches("^[0-9].[0-9].[0-9].[0-9][\\s\\S]*", split[x])) { 132 | String[] spl = split[x].split("\\."); 133 | dl = "#### " + spl[spl.length-2]+"\\."+ spl[spl.length-1]; 134 | } else if (Pattern.matches("^[0-9].[0-9].[0-9][\\s\\S]*", split[x])) { 135 | String[] spl = split[x].split("\\."); 136 | dl = "### " + spl[spl.length-1]; 137 | } else if (Pattern.matches("^[0-9].[0-9].[\\s\\S]*", split[x])) { 138 | dl = "## " + split[x]; 139 | } else if (Pattern.matches("^[0-9] [\\s\\S]*", split[x])) { 140 | dl = "# " + split[x]; 141 | } else { 142 | 143 | //以:(开头,或者以)结尾,且汉字长度不超过10的行: 144 | if(Pattern.matches("[\\s\\S]*[:]$|[\\s\\S]*[:] $|^([\\s\\S]*|[\\s\\S]*[)]$|[\\s\\S]*[)] $",split[x])&&split[x].length()<=30){ 145 | split[x]= "\n"+"**"+split[x]+"**"+"\n"; 146 | }else if(Pattern.matches("[\\s\\S]*[:;]$|[\\s\\S]*[:;] $",split[x])){ 147 | split[x]= split[x]+"\n\n"; 148 | } 149 | dl = split[x]; 150 | status = 2; 151 | } 152 | 153 | //写入输出流 154 | byte[] bytes = dl.getBytes(); 155 | 156 | 157 | //判断是否换行 158 | //status:2非标题, 正则表达式最后非以:。结尾 159 | if (status == 2 && !Pattern.matches("[\\s\\S]*[。:]$", dl)) { 160 | fos.write(bytes); 161 | } else { 162 | //以[#]开头[:,: ]结尾必须换行^([\s\S]*|[\s\S]*[)]$ 163 | if (Pattern.matches("^#[\\s\\S]*", dl) ) { 164 | fos.write("\r\n".getBytes()); 165 | fos.write(bytes); 166 | fos.write("\r\n".getBytes()); 167 | } else { 168 | fos.write(bytes); 169 | //换行 170 | fos.write("\r\n".getBytes()); 171 | fos.write("\n".getBytes()); 172 | } 173 | 174 | 175 | } 176 | //重置参数 177 | status = 1; 178 | 179 | } 180 | } 181 | fos.close(); 182 | pdf.close(); 183 | System.out.println("pdf转md转换解析结束!!----"); 184 | } 185 | 186 | 187 | } 188 | --------------------------------------------------------------------------------