├── .DS_Store ├── .idea ├── compiler.xml ├── csdn2hexo.iml ├── inspectionProfiles │ └── Project_Default.xml ├── jarRepositories.xml ├── misc.xml ├── vcs.xml └── workspace.xml ├── README.md ├── out └── .DS_Store ├── pom.xml ├── src └── main │ ├── java │ └── com │ │ └── github │ │ └── csccoder │ │ └── csdn2md │ │ ├── Main.java │ │ ├── model │ │ └── Article.java │ │ ├── paser │ │ ├── ArticlePaser.java │ │ └── CorePaser.java │ │ └── util │ │ ├── FileUtil.java │ │ ├── HexoMdUtil.java │ │ ├── HttpClientUtil.java │ │ ├── PropertiesUtil.java │ │ ├── ProxyIP.java │ │ ├── RegexUtil.java │ │ └── html2markdown │ │ ├── DateUtil.java │ │ ├── FilesUtil.java │ │ ├── HTML2Md.java │ │ ├── MDLine.java │ │ └── Testbed.java │ └── resources │ ├── config.properties │ ├── img │ ├── blog_csdn.png │ ├── blog_hexo.png │ ├── dir_0.png │ ├── dir_html.png │ ├── dir_md.png │ ├── html_detail.png │ ├── markdown_detail.png │ └── run_out.png │ └── start.sh └── target ├── classes ├── com │ └── github │ │ └── csccoder │ │ └── csdn2md │ │ ├── Main.class │ │ ├── model │ │ └── Article.class │ │ ├── paser │ │ ├── ArticlePaser.class │ │ └── CorePaser.class │ │ └── util │ │ ├── FileUtil.class │ │ ├── HexoMdUtil.class │ │ ├── HttpClientUtil$1.class │ │ ├── HttpClientUtil.class │ │ ├── PropertiesUtil.class │ │ ├── ProxyIP.class │ │ ├── RegexUtil.class │ │ └── html2markdown │ │ ├── DateUtil.class │ │ ├── FilesUtil.class │ │ ├── HTML2Md.class │ │ ├── MDLine$MDLineType.class │ │ ├── MDLine.class │ │ └── Testbed.class ├── config.properties ├── img │ ├── blog_csdn.png │ ├── blog_hexo.png │ ├── dir_0.png │ ├── dir_html.png │ ├── dir_md.png │ ├── html_detail.png │ ├── markdown_detail.png │ └── run_out.png └── start.sh ├── config.properties ├── csdn2hexo-1.0-SNAPSHOT.jar ├── dependency-reduced-pom.xml ├── maven-archiver └── pom.properties ├── maven-status └── maven-compiler-plugin │ └── compile │ └── default-compile │ ├── createdFiles.lst │ └── inputFiles.lst ├── original-csdn2hexo-1.0-SNAPSHOT.jar └── start.sh /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/.DS_Store -------------------------------------------------------------------------------- /.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 17 | -------------------------------------------------------------------------------- /.idea/csdn2hexo.iml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 36 | -------------------------------------------------------------------------------- /.idea/jarRepositories.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 9 | 10 | 14 | 15 | 19 | 20 | 24 | 25 | 29 | 30 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 49 | 50 | 52 | 53 | 58 | 59 | 60 | 63 | 64 | 65 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 98 | 99 | 104 | 105 | 110 | 117 | 118 | 124 | 125 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 1605885898842 144 | 154 | 155 | 156 | 157 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | file://$PROJECT_DIR$/src/main/java/com/github/csccoder/csdn2md/util/FileUtil.java 234 | 81 235 | 237 | 238 | file://$PROJECT_DIR$/src/main/java/com/github/csccoder/csdn2md/util/HexoMdUtil.java 239 | 49 240 | 242 | 243 | file://$PROJECT_DIR$/src/main/java/com/github/csccoder/csdn2md/paser/CorePaser.java 244 | 82 245 | 247 | 248 | file://$PROJECT_DIR$/src/main/java/com/github/csccoder/csdn2md/paser/ArticlePaser.java 249 | 33 250 | 252 | 253 | file://$PROJECT_DIR$/src/main/java/com/github/csccoder/csdn2md/util/HexoMdUtil.java 254 | 26 255 | 257 | 258 | file://$PROJECT_DIR$/src/main/java/com/github/csccoder/csdn2md/util/FileUtil.java 259 | 95 260 | 262 | 263 | 264 | 265 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # 功能 4 | 概述:利用爬虫爬取指定用户的CSDN博客文章转为md格式,目的是完成博客迁移 5 | 6 | ## 爬取的方式: 7 | 1 默认轮询从第一页开始往后 8 | 2 专栏方式 9 | 3 指定某篇文章 10 | 4 指定页数 11 | 5 分类 12 | ## 设置生成的md文件命名规则: 13 | 可选值:date title ,date根据日期命名,title根据文章名命名 14 | 15 | ## 设置md文件的头部信息 16 | title= 17 | author= 18 | tags= 19 | categories= 20 | ## 是否显示csdn中的锚点"文章目录"字样,以及下面具体的锚点 默认false(因为csdn中是集成了[toc]功能的,hexo并没有集成) 21 | anchor=false 22 | ## 是否开启版权声明 默认false(csdn文章头部有我们自定的版权声明,false即为去掉。) 23 | copyright=false 24 | 25 | # 工具 26 | [html2markdown](https://github.com/pnikosis/jHTML2Md) 27 | Jsoup 28 | 29 | # 提示 30 | - 运行过程中可能出现异常(小概率): 因为csdn有反爬虫机制,如果迁移过程中出现”应该是被反爬虫了,换个wifi或者网络试试~~~~~~~~~~~~~~~~~~~~~~~ “,那你就照着我打的这个日志去做吧,哈哈 31 | - 运行./start.sh 之前需要给这个脚本赋予权限,执行 `chmod 777 ./start.shg` 32 | - 需要jdk1.8 33 | 34 | # 用法: 35 | ## 方式一: 36 | git clone 到本地,进入target目录,修改config.properties中的某些配置(视自己情况而修改) 37 | 运行 38 | ```js 39 | java -jar csdn2hexo-1.0-SNAPSHOT.jar 40 | ``` 41 | ## 方式二: 42 | git clone 到本地,进入target目录,修改config.properties中的某些配置(视自己情况而修改) 43 | 运行 ./start.sh a,b,c 1,2,3 articleName tips:这里的a,b,c 是hexo标签,逗号隔开 1,2,3是hexo分类,逗号隔开 articleName是文章标题,不写的话默认标题是csdn的文章标题。这三个参数如果不写的话默认是配置文件中的内容 44 | ## 方式三: 45 | git clone 到本地,通过idea import本project, 46 | 修改src/main/resource目录中的config.properties文件(视自己情况而修改) 47 | 修改读取配置文件的路径:找到com.github.csccoder.csdn2md.util.PropertiesUtil 48 | ```js 49 | package com.github.csccoder.csdn2md.util; 50 | 51 | public class PropertiesUtil { 52 | public static String getProperties(String key){ 53 | String value = null; 54 | try { 55 | Properties pp = new Properties(); 56 | 57 | //通过idea运行程序 58 | InputStream inputStream = Main.class.getClassLoader().getResourceAsStream("config.properties"); 59 | //通过java -jar 或者 ./start.sh运行程序 60 | //String filePath = System.getProperty("user.dir")+ "/config.properties"; 61 | //InputStream inputStream = new BufferedInputStream(new FileInputStream(filePath)); 62 | 63 | pp.load(inputStream); 64 | value= (String) pp.get(key); 65 | return value; 66 | } catch (IOException e) { 67 | e.printStackTrace(); 68 | return value; 69 | } 70 | } 71 | } 72 | ``` 73 | 找到 Main 这个类运行即可 74 | # 展示 75 | 76 | 详细效果展示:[利用爬虫爬取指定用户的CSDN博客文章转为md格式,目的是完成博客迁移博文到Hexo等金静态博客](https://blog.csdn.net/dataiyangu/article/details/88637312) 77 | 78 | 79 | 查看头部信息 80 | ![image.png](https://upload-images.jianshu.io/upload_images/11496534-c09953e83e0a9172.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240) 81 | 82 | 83 | 查看头部信息显示效果 84 | ![image.png](https://upload-images.jianshu.io/upload_images/11496534-e8d82c5291a5d012.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240) 85 | ![image.png](https://upload-images.jianshu.io/upload_images/11496534-2806b5fa05c16500.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240) 86 | # 配置文件信息展示 87 | ```js 88 | # md文章头部配置 89 | #分类和标签逗号隔开 90 | title= 91 | author=Leesin.Dong 92 | tags=a,b,c 93 | categories=a,b,c 94 | 95 | # 文件命名规则 可选值:date title ,date根据日期命名,title根据文章名命名 96 | MdFileName_type=date 97 | 98 | 99 | # csdn host 这里如果没有自定义域名的话不用改 100 | csdn_host=https://blog.csdn.net 101 | # csdn用户名 102 | casn_name=dataiyangu 103 | 104 | 105 | # 文件保存的绝对路径,即img html post这三个文件夹的父文件夹 106 | file_Path=/Users/leesin/Desktop/hexo_blog_md 107 | # 设置下载的文件夹名字 108 | html_path=html 109 | image_path=images 110 | md_path=_posts 111 | 112 | 113 | # 设置下载的方式,默认是从第一页往后不断的下载的 114 | #可选的方式: 115 | # 1 默认轮询从第一页开始往后 116 | # 2 专栏方式 117 | # 3 指定某篇文章 118 | # 4 指定页数 119 | # 5 分类 120 | url_way=2 121 | # 具体的五种抓取方法的地址填写(视自己情况而定) 122 | # 比如 我的: https://blog.csdn.net/dataiyangu/article/category/8118370 123 | # 下面的几种情况只写 https://blog.csdn.net/dataiyangu 后面的内容即可,后面写什么自己手动点到相应的页面粘贴过来就行了 124 | url_way_1=/article/list/ 125 | url_way_2=/column/info/32118 126 | url_way_3=/article/details/88525801 127 | url_way_4=/article/list/2 128 | url_way_5=/article/category/8118370 129 | 130 | #是否显示csdn中的锚点"文章目录"字样,以及下面具体的锚点 默认false 131 | anchor=false 132 | #是否开启版权声明 默认false 133 | copyright=false 134 | ``` 135 | -------------------------------------------------------------------------------- /out/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/out/.DS_Store -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.github.csccoder 8 | csdn2hexo 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 13 | 14 | org.apache.httpcomponents 15 | httpclient 16 | 17 | 4.5.7 18 | 19 | 20 | 21 | org.jsoup 22 | jsoup 23 | 1.11.2 24 | 25 | 26 | 27 | junit 28 | junit 29 | RELEASE 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | org.apache.maven.plugins 60 | maven-resources-plugin 61 | 2.7 62 | 63 | 64 | copy-resources 65 | 66 | package 67 | 68 | copy-resources 69 | 70 | 71 | 72 | ${project.build.directory} 73 | 74 | 75 | src/main/resources 76 | 77 | **/*.xml 78 | **/*.conf 79 | **/*.properties 80 | **/*.sh 81 | 82 | true 83 | 84 | 85 | UTF-8 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | src/main/java 97 | 98 | **/*.properties 99 | **/*.xml 100 | **/*.sh 101 | 102 | true 103 | 104 | 105 | src/main/resources 106 | 107 | **/* 108 | **/* 109 | 110 | true 111 | 112 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/Main.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md; 2 | 3 | import com.github.csccoder.csdn2md.paser.CorePaser; 4 | import com.github.csccoder.csdn2md.util.PropertiesUtil; 5 | 6 | import java.io.IOException; 7 | import java.util.concurrent.ExecutorService; 8 | import java.util.concurrent.Executors; 9 | 10 | 11 | public class Main implements Runnable{ 12 | 13 | static ExecutorService service = Executors.newFixedThreadPool(1); 14 | 15 | static String csdn_host = PropertiesUtil.getProperties("csdn_host"); 16 | private static final String HOST = csdn_host; 17 | private static final String AUTHOR = PropertiesUtil.getProperties("casn_name"); 18 | /** 19 | * 文件保存路径(绝对路径) 20 | */ 21 | private static final String FILE_PATH = PropertiesUtil.getProperties("file_Path"); 22 | //csdn 用户名 23 | 24 | public static void main(String args[]) throws IOException { 25 | service.execute(new Main()); 26 | } 27 | 28 | public void run() { 29 | new CorePaser().parse(HOST, 30 | AUTHOR, 31 | FILE_PATH, 32 | //是否爬取图片 默认false 33 | true); 34 | } 35 | } 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/model/Article.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md.model; 2 | 3 | import java.util.Date; 4 | 5 | public class Article { 6 | private int id; 7 | private String title; 8 | private String content; 9 | private String author; 10 | private String tags[]; 11 | private String catagory[]; 12 | private Date date; 13 | 14 | public String getAuthor() { 15 | return author; 16 | } 17 | 18 | public void setAuthor(final String author) { 19 | this.author = author; 20 | } 21 | 22 | public int getId() { 23 | return id; 24 | } 25 | 26 | public void setId(int id) { 27 | this.id = id; 28 | } 29 | 30 | public String getTitle() { 31 | return title; 32 | } 33 | 34 | public void setTitle(String title) { 35 | this.title = title; 36 | } 37 | 38 | public String getContent() { 39 | return content; 40 | } 41 | 42 | public void setContent(String content) { 43 | this.content = content; 44 | } 45 | 46 | public String[] getTags() { 47 | return tags; 48 | } 49 | 50 | public void setTags(String[] tags) { 51 | this.tags = tags; 52 | } 53 | 54 | public Date getDate() { 55 | return date; 56 | } 57 | 58 | public void setDate(Date date) { 59 | this.date = date; 60 | } 61 | 62 | public String[] getCatagory() { 63 | return catagory; 64 | } 65 | 66 | public void setCatagory(String catagory[]) { 67 | this.catagory = catagory; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/paser/ArticlePaser.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md.paser; 2 | 3 | import com.github.csccoder.csdn2md.model.Article; 4 | import com.github.csccoder.csdn2md.util.PropertiesUtil; 5 | import com.github.csccoder.csdn2md.util.RegexUtil; 6 | import org.jsoup.nodes.Document; 7 | import org.jsoup.nodes.Element; 8 | import org.jsoup.select.Elements; 9 | 10 | import java.text.SimpleDateFormat; 11 | import java.util.ArrayList; 12 | 13 | public class ArticlePaser { 14 | //2017-10-07 23:13 15 | private static final SimpleDateFormat dateFormat=new SimpleDateFormat("yyyy-MM-dd hh:mm:ss"); 16 | 17 | public static Article parseArticle(String url){ 18 | Document document=CorePaser.getDocument(url); 19 | Article article = new Article(); 20 | 21 | String articleId=parseArticleId(url); 22 | String articleTitle=document.select(".article-title-box>h1").text().trim(); 23 | String articleContent=document.select("#article_content").html(); 24 | String[] tags =PropertiesUtil.getProperties("tags").split(","); 25 | String[] category = PropertiesUtil.getProperties("categories").split(","); 26 | String author = PropertiesUtil.getProperties("author"); 27 | article.setAuthor(author); 28 | article.setId(Integer.parseInt(articleId)); 29 | article.setTitle(articleTitle); 30 | article.setContent(articleContent); 31 | article.setCatagory(category); 32 | try { 33 | // Elements element = document.select(".article-bar-top>.time"); 34 | Elements element = document.select(".bar-content>.time"); 35 | article.setDate(dateFormat.parse(element.text())); 36 | } catch (Exception e) { 37 | System.out.println("应该是被反爬虫了,换个wifi或者网络试试~~~~~~~~~~~~~~~~~~~~~~~"); 38 | e.printStackTrace(); 39 | } 40 | article.setTags(tags); 41 | return article; 42 | } 43 | 44 | private static String parseCata(Document document) { 45 | Elements elements = document.select(".tags-box>a"); 46 | for(Element element:elements){ 47 | String url=element.attr("href"); 48 | if(url != null && url.trim().length() != 0){ 49 | if(url.lastIndexOf("category") != -1){ 50 | return element.select(".tag-link").text().trim(); 51 | } 52 | } 53 | } 54 | return null; 55 | } 56 | 57 | public static String parseArticleId(String uri){ 58 | return RegexUtil.match(".*/(\\d*)",uri,1); 59 | } 60 | 61 | public static String[] parseTags(Document document){ 62 | ArrayList list = new ArrayList(); 63 | Elements elements = document.select(".artic-tag-box>a"); 64 | for(Element element:elements){ 65 | String text=element.select(".tag-link").text().trim(); 66 | list.add(text); 67 | } 68 | return list.toArray(new String[0]); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/paser/CorePaser.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md.paser; 2 | 3 | import com.github.csccoder.csdn2md.model.Article; 4 | import com.github.csccoder.csdn2md.util.*; 5 | import org.jsoup.Jsoup; 6 | import org.jsoup.nodes.Document; 7 | import org.jsoup.nodes.Element; 8 | import org.jsoup.select.Elements; 9 | 10 | 11 | import java.util.ArrayList; 12 | import java.util.List; 13 | 14 | 15 | public class CorePaser { 16 | 17 | 18 | /** 19 | * 解析博客的入口函数 20 | * 21 | * @param host csdn域名 22 | * @param author csdn账号 23 | */ 24 | public void parse(String host, String author, String path, boolean img) { 25 | Document document; 26 | FileUtil fileUtil = new FileUtil(path, img); 27 | int recordCount = 1; 28 | int pageCount = 1; 29 | List uris = new ArrayList(); 30 | while (true) { 31 | 32 | 33 | 34 | 35 | System.out.println("正在爬取第" + pageCount + "页"); 36 | try { 37 | int url_way = Integer.parseInt(PropertiesUtil.getProperties("url_way")); 38 | 39 | String url_url_way_1 = PropertiesUtil.getProperties("url_way_1"); 40 | String url_url_way_2 = PropertiesUtil.getProperties("url_way_2"); 41 | String url_url_way_3 = PropertiesUtil.getProperties("url_way_3"); 42 | String url_url_way_4 = PropertiesUtil.getProperties("url_way_4"); 43 | String url_url_way_5 = PropertiesUtil.getProperties("url_way_5"); 44 | 45 | switch (url_way) { 46 | case 1: 47 | document = getDocument(host + "/" + author + url_url_way_1 + pageCount); 48 | uris = parseArticleURIs(document); 49 | break; 50 | case 2: 51 | document = getDocument(host + "/" + author + url_url_way_2); 52 | uris = parseArticleURIsOfZhuanLan(document); 53 | break; 54 | case 3: 55 | uris.add(host + "/" + author + url_url_way_3); 56 | recordCount=1; 57 | break; 58 | case 4: 59 | document = getDocument(host + "/" + author + url_url_way_4); 60 | uris = parseArticleURIs(document); 61 | break; 62 | case 5: 63 | document = getDocument(host + "/" + author + url_url_way_5); 64 | uris = parseArticleURIsOfCategory(document); 65 | break; 66 | default: 67 | break; 68 | } 69 | 70 | 71 | // 获得当前页所有文章的URI 72 | // System.out.println(document); 73 | 74 | if (uris.size() == 0) { 75 | break; 76 | } 77 | for (String uri : uris) { 78 | if (uri.indexOf(author) <= 0) { 79 | System.out.println("网址中没有包含用户名"); 80 | continue; 81 | } 82 | // 核心 83 | Article article = ArticlePaser.parseArticle(uri); 84 | System.out.println("第" + recordCount + "篇 =>" + article.getId() + " " + article.getTitle()); 85 | // 核心 86 | fileUtil.html2HexoMd(article); 87 | recordCount++; 88 | } 89 | pageCount++; 90 | } catch (Exception e) { 91 | e.printStackTrace(); 92 | } 93 | } 94 | 95 | } 96 | 97 | 98 | public static Document getDocument(String url) { 99 | String content = HttpClientUtil.get(url); 100 | Document document = Jsoup.parse(content); 101 | return document; 102 | } 103 | 104 | 105 | public ArrayList parseArticleURIs(Document document) { 106 | ArrayList ids = new ArrayList(); 107 | Elements elements = document.select(".article-item-box>h4>a"); 108 | for (Element element : elements) { 109 | ids.add(element.attr("href")); 110 | } 111 | return ids; 112 | } 113 | 114 | public ArrayList parseArticleURIsOfZhuanLan(Document document) { 115 | ArrayList ids = new ArrayList(); 116 | Elements elements = document.select(".column_article_list>li>a"); 117 | for (Element element : elements) { 118 | ids.add(element.attr("href")); 119 | } 120 | return ids; 121 | } 122 | 123 | // 这里其实和上面的parseArticleURIs 实现是一样的,为了区分,重新写一个方法把,只是名字不一样而已。 124 | public ArrayList parseArticleURIsOfCategory(Document document) { 125 | ArrayList ids = new ArrayList(); 126 | Elements elements = document.select(".article-item-box>h4>a"); 127 | for (Element element : elements) { 128 | ids.add(element.attr("href")); 129 | } 130 | return ids; 131 | } 132 | 133 | public int getRecordCount(String papeList) { 134 | String value = RegexUtil.match("\\s*?(\\d*)条.*", papeList, 1); 135 | return value == null ? 0 : Integer.parseInt(value); 136 | } 137 | 138 | public int getPageCount(String pageList) { 139 | String value = RegexUtil.match(".*共(\\d*)页", pageList, 1); 140 | return value == null ? 0 : Integer.parseInt(value); 141 | } 142 | 143 | 144 | 145 | 146 | } 147 | -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/util/FileUtil.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md.util; 2 | 3 | import com.github.csccoder.csdn2md.model.Article; 4 | import com.github.csccoder.csdn2md.util.html2markdown.HTML2Md; 5 | 6 | 7 | import java.io.*; 8 | import java.net.URL; 9 | import java.text.SimpleDateFormat; 10 | import java.util.Date; 11 | import java.util.UUID; 12 | import java.util.regex.Pattern; 13 | 14 | public class FileUtil { 15 | private static String dir; 16 | private static String htmlDir; 17 | private static String mdDir; 18 | private static String imgDir; 19 | private static boolean img; 20 | private static Pattern FilePattern = Pattern.compile("[\\s\\\\/:\\*\\?\\\"<>\\|]"); 21 | 22 | public FileUtil(String path, boolean imgSwitch) { 23 | if (path == null) return; 24 | dir = path; 25 | String html_path = PropertiesUtil.getProperties("html_path"); 26 | String image_path = PropertiesUtil.getProperties("image_path"); 27 | String md_path = PropertiesUtil.getProperties("md_path"); 28 | htmlDir = path + "/" + html_path + "/"; 29 | mdDir = path + "/" + md_path + "/"; 30 | imgDir = path + "/" + image_path + "/"; 31 | if (img) img = false; 32 | else img = imgSwitch; 33 | judeDirExists(dir, htmlDir, mdDir, imgDir); 34 | } 35 | 36 | private void save(String content, String filePath) { 37 | File file = new File(filePath); 38 | FileWriter writer = null; 39 | try { 40 | writer = new FileWriter(file); 41 | writer.write(content); 42 | } catch (IOException e) { 43 | e.printStackTrace(); 44 | } finally { 45 | if (writer != null) { 46 | try { 47 | writer.close(); 48 | } catch (IOException e) { 49 | e.printStackTrace(); 50 | } 51 | } 52 | } 53 | 54 | } 55 | 56 | private void saveHtml(Article article) { 57 | String fileName = article.getTitle(); 58 | if ("".equals(fileName)) return; 59 | fileName = (fileName == null ? null : FilePattern.matcher(fileName).replaceAll("")); //过滤文件名特殊字符 60 | String filePath = htmlDir + fileName; 61 | save(article.getContent(), filePath); 62 | 63 | } 64 | 65 | private void saveHexomd(Article article) { 66 | String mdhead = ""; 67 | Boolean head = Boolean.valueOf(PropertiesUtil.getProperties("head")); 68 | if (head) { 69 | mdhead = HexoMdUtil.getHeader(article); 70 | } 71 | try { 72 | SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 73 | String fileName = ""; 74 | String MdFileName = ""; 75 | fileName = article.getTitle(); 76 | fileName = (fileName == null ? null : FilePattern.matcher(fileName).replaceAll("")); //过滤文件名特殊字符 77 | String mdContent = HTML2Md.convertFile(new File(htmlDir + fileName), "utf-8"); 78 | String realContent = mdhead + mdContent; 79 | 80 | String MdFileName_type = PropertiesUtil.getProperties("MdFileName_type"); 81 | if (MdFileName_type.equals("date")) { 82 | MdFileName = dateFormat.format(article.getDate()).replace(" ", "-").replaceAll(":", "-"); 83 | } else { 84 | MdFileName = article.getTitle(); 85 | } 86 | String filePath = mdDir + MdFileName + ".md"; 87 | save(realContent, filePath); 88 | } catch (IOException e) { 89 | e.printStackTrace(); 90 | } 91 | } 92 | 93 | 94 | public void html2HexoMd(Article article) { 95 | saveHtml(article); 96 | saveHexomd(article); 97 | } 98 | 99 | public static String getPicture(String url) { 100 | String fix = null; 101 | if (img && !url.equals("")) { 102 | URL ur; 103 | // https://img-blog.csdnimg.cn/20190316212631882.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2RhdGFpeWFuZ3U=,size_16,color_FFFFFF,t_70 104 | BufferedInputStream in; 105 | ByteArrayOutputStream outStream; 106 | try { 107 | // fix = url.substring(url.lastIndexOf("img-blog.csdnimg.cn") + 20, url.length()); 108 | // fix = fix.replace("/", "-"); 109 | // fix = fix.replace(".png", "-"); 110 | // fix = fix.replace("?", "-"); 111 | // fix = fix + ".png"; 112 | 113 | // 直接uuid,csdn的太长 114 | fix = UUID.randomUUID() + ".png"; 115 | 116 | String fileName = UUID.randomUUID().toString(); 117 | ur = new URL(url); 118 | in = new BufferedInputStream(ur.openStream()); 119 | outStream = new ByteArrayOutputStream(); 120 | byte[] buf = new byte[1024]; 121 | int length = 0; 122 | while ((length = in.read(buf, 0, buf.length)) != -1) { 123 | outStream.write(buf, 0, length); 124 | } 125 | byte[] bytes = outStream.toByteArray(); 126 | if (!"".equals(fix)) { 127 | if (!fix.contains(".")) { 128 | fix = fix + "." + GetFileSuffix(bytes); 129 | } 130 | } 131 | 132 | 133 | File fileOut = new File(imgDir + fix); 134 | FileOutputStream op = new FileOutputStream(fileOut); 135 | 136 | op.write(bytes); 137 | op.close(); 138 | in.close(); 139 | outStream.close(); 140 | return fix; 141 | } catch (Exception e) { 142 | e.printStackTrace(); 143 | return fix; 144 | } 145 | } 146 | return fix; 147 | } 148 | 149 | private static String GetFileSuffix(byte[] fileData) { 150 | if (fileData == null || fileData.length < 10) { 151 | return null; 152 | } 153 | 154 | if (fileData[0] == 'G' && fileData[1] == 'I' && fileData[2] == 'F') { 155 | return "gif"; 156 | } else if (fileData[1] == 'P' && fileData[2] == 'N' && fileData[3] == 'G') { 157 | return "png"; 158 | } else if (fileData[6] == 'J' && fileData[7] == 'F' && fileData[8] == 'I' && fileData[9] == 'F') { 159 | return "jpg"; 160 | } else if (fileData[0] == 'B' && fileData[1] == 'M') { 161 | return "bmp"; 162 | } else { 163 | return null; 164 | } 165 | } 166 | 167 | // 判断文件夹是否存在 168 | public static void judeDirExists(String dirRoot, String htmlDirPath, String mdDirPath, String imgDirPath) { 169 | try { 170 | File file = new File(dirRoot); 171 | if (!file.exists()) { 172 | if (!file.isDirectory()) { 173 | File dirR = new File(dirRoot); 174 | if (dirR.mkdirs()) { 175 | System.out.println("已创建根目录!"); 176 | } 177 | } 178 | } 179 | 180 | File htmlDirP = new File(htmlDirPath); 181 | File mdDirP = new File(mdDirPath); 182 | File imgDirP = new File(imgDirPath); 183 | if (htmlDirP.mkdirs() && mdDirP.mkdirs() && imgDirP.mkdirs()) { 184 | System.out.println("目录创建成功!"); 185 | } else { 186 | System.out.println(htmlDirP.mkdirs()); 187 | System.out.println("目录已经存在,如果还是不行请尝试删掉根目录下所有文件"); 188 | } 189 | 190 | } catch (Exception e) { 191 | e.printStackTrace(); 192 | 193 | } 194 | 195 | 196 | } 197 | 198 | 199 | } 200 | -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/util/HexoMdUtil.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md.util; 2 | 3 | import com.github.csccoder.csdn2md.model.Article; 4 | import com.github.csccoder.csdn2md.util.html2markdown.DateUtil; 5 | 6 | import java.text.SimpleDateFormat; 7 | import java.util.Arrays; 8 | import java.util.Map; 9 | import java.util.TreeMap; 10 | 11 | import static java.io.File.separator; 12 | 13 | public class HexoMdUtil { 14 | private static final SimpleDateFormat dateFormat=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 15 | /** 16 | * 17 | * 适配hexo头部 18 | * 19 | --- 20 | title: hexo deploy时重复输入用户名密码的问题 21 | date: 2017-12-12 19:17:34 22 | tags: hexo 23 | --- 24 | */ 25 | 26 | public static String getHeader(Article article){ 27 | String separator = System.getProperty("line.separator"); 28 | String article_title = article.getTitle(); 29 | String[] article_tags = article.getTags(); 30 | StringBuilder hexo_tags = new StringBuilder(separator); 31 | String[] article_catagory = article.getCatagory(); 32 | StringBuilder hexo_categories = new StringBuilder(separator); 33 | for (String tag : article_tags) { 34 | hexo_tags.append(" - ").append(tag).append(separator); 35 | } 36 | for (String category : article_catagory) { 37 | hexo_categories.append(" - ").append(category).append(separator); 38 | } 39 | String title = PropertiesUtil.getProperties("title"); 40 | if (!("").equals(title)){ 41 | article_title = title; 42 | } 43 | 44 | StringBuilder sb=new StringBuilder(); 45 | sb.append("---\n"). 46 | append(String.format("title: %s\n",article_title)). 47 | append(String.format("author: %s\n",article.getAuthor())). 48 | append("tags: "+hexo_categories+"\n"). 49 | append("category: "+hexo_tags+"\n"). 50 | append(String.format("date: %s\n",dateFormat.format(article.getDate()))). 51 | append("---\n"); 52 | return sb.toString(); 53 | } 54 | 55 | public static String array2String(String[] array){ 56 | String str=""; 57 | for(String temp:array){ 58 | str+=temp; 59 | } 60 | return str; 61 | } 62 | 63 | 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/util/HttpClientUtil.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md.util; 2 | 3 | import org.apache.http.*; 4 | import org.apache.http.client.HttpClient; 5 | import org.apache.http.client.HttpRequestRetryHandler; 6 | import org.apache.http.client.config.CookieSpecs; 7 | import org.apache.http.client.config.RequestConfig; 8 | import org.apache.http.client.entity.UrlEncodedFormEntity; 9 | import org.apache.http.client.methods.CloseableHttpResponse; 10 | import org.apache.http.client.methods.HttpGet; 11 | import org.apache.http.client.methods.HttpPost; 12 | import org.apache.http.client.methods.HttpRequestBase; 13 | import org.apache.http.client.params.CookiePolicy; 14 | import org.apache.http.client.params.HttpClientParams; 15 | import org.apache.http.client.protocol.HttpClientContext; 16 | import org.apache.http.config.Registry; 17 | import org.apache.http.config.RegistryBuilder; 18 | import org.apache.http.conn.ConnectTimeoutException; 19 | import org.apache.http.conn.routing.HttpRoute; 20 | import org.apache.http.conn.socket.ConnectionSocketFactory; 21 | import org.apache.http.conn.socket.LayeredConnectionSocketFactory; 22 | import org.apache.http.conn.socket.PlainConnectionSocketFactory; 23 | import org.apache.http.conn.ssl.SSLConnectionSocketFactory; 24 | import org.apache.http.impl.client.CloseableHttpClient; 25 | import org.apache.http.impl.client.DefaultHttpClient; 26 | import org.apache.http.impl.client.HttpClients; 27 | import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; 28 | import org.apache.http.message.BasicNameValuePair; 29 | import org.apache.http.protocol.HttpContext; 30 | import org.apache.http.util.EntityUtils; 31 | 32 | import javax.net.ssl.SSLException; 33 | import javax.net.ssl.SSLHandshakeException; 34 | import java.io.IOException; 35 | import java.io.InterruptedIOException; 36 | import java.io.UnsupportedEncodingException; 37 | import java.net.UnknownHostException; 38 | import java.util.ArrayList; 39 | import java.util.List; 40 | import java.util.Map; 41 | import java.util.Set; 42 | 43 | public class HttpClientUtil { 44 | 45 | static final int timeOut = 15 * 1000; 46 | 47 | private static CloseableHttpClient httpClient = null; 48 | 49 | private final static Object syncLock = new Object(); 50 | 51 | private static void config(HttpRequestBase httpRequestBase) { 52 | // 设置Header等 53 | // httpRequestBase.setHeader("User-Agent", "Mozilla/5.0"); 54 | // httpRequestBase 55 | // .setHeader("Accept", 56 | // "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); 57 | // httpRequestBase.setHeader("Accept-Language", 58 | // "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");// "en-US,en;q=0.5"); 59 | // httpRequestBase.setHeader("Accept-Charset", 60 | // "ISO-8859-1,utf-8,gbk,gb2312;q=0.7,*;q=0.7"); 61 | 62 | // 配置请求的超时设置 63 | RequestConfig requestConfig = RequestConfig.custom() 64 | .setConnectionRequestTimeout(timeOut) 65 | .setConnectTimeout(timeOut).setSocketTimeout(timeOut).build(); 66 | httpRequestBase.setConfig(requestConfig); 67 | } 68 | 69 | /** 70 | * 获取HttpClient对象 71 | * 72 | * @return 73 | * @author SHANHY 74 | * @create 2015年12月18日 75 | */ 76 | public static CloseableHttpClient getHttpClient(String url) { 77 | String hostname = url.split("/")[2]; 78 | int port = 80; 79 | if (hostname.contains(":")) { 80 | String[] arr = hostname.split(":"); 81 | hostname = arr[0]; 82 | port = Integer.parseInt(arr[1]); 83 | } 84 | if (httpClient == null) { 85 | synchronized (syncLock) { 86 | if (httpClient == null) { 87 | httpClient = createHttpClient(200, 40, 100, hostname, port); 88 | } 89 | } 90 | } 91 | return httpClient; 92 | } 93 | 94 | /** 95 | * 创建HttpClient对象 96 | * 97 | * @return 98 | * @author SHANHY 99 | * @create 2015年12月18日 100 | */ 101 | public static CloseableHttpClient createHttpClient(int maxTotal, 102 | int maxPerRoute, int maxRoute, String hostname, int port) { 103 | ConnectionSocketFactory plainsf = PlainConnectionSocketFactory 104 | .getSocketFactory(); 105 | LayeredConnectionSocketFactory sslsf = SSLConnectionSocketFactory 106 | .getSocketFactory(); 107 | Registry registry = RegistryBuilder 108 | .create().register("http", plainsf) 109 | .register("https", sslsf).build(); 110 | PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager( 111 | registry); 112 | // 将最大连接数增加 113 | cm.setMaxTotal(maxTotal); 114 | // 将每个路由基础的连接增加 115 | cm.setDefaultMaxPerRoute(maxPerRoute); 116 | HttpHost httpHost = new HttpHost(hostname, port); 117 | // 将目标主机的最大连接数增加 118 | cm.setMaxPerRoute(new HttpRoute(httpHost), maxRoute); 119 | 120 | // 请求重试处理 121 | HttpRequestRetryHandler httpRequestRetryHandler = new HttpRequestRetryHandler() { 122 | public boolean retryRequest(IOException exception, 123 | int executionCount, HttpContext context) { 124 | if (executionCount >= 5) {// 如果已经重试了5次,就放弃 125 | return false; 126 | } 127 | if (exception instanceof NoHttpResponseException) {// 如果服务器丢掉了连接,那么就重试 128 | return true; 129 | } 130 | if (exception instanceof SSLHandshakeException) {// 不要重试SSL握手异常 131 | return false; 132 | } 133 | if (exception instanceof InterruptedIOException) {// 超时 134 | return false; 135 | } 136 | if (exception instanceof UnknownHostException) {// 目标服务器不可达 137 | return false; 138 | } 139 | if (exception instanceof ConnectTimeoutException) {// 连接被拒绝 140 | return false; 141 | } 142 | if (exception instanceof SSLException) {// SSL握手异常 143 | return false; 144 | } 145 | 146 | HttpClientContext clientContext = HttpClientContext 147 | .adapt(context); 148 | HttpRequest request = clientContext.getRequest(); 149 | // 如果请求是幂等的,就再次尝试 150 | if (!(request instanceof HttpEntityEnclosingRequest)) { 151 | return true; 152 | } 153 | return false; 154 | } 155 | }; 156 | 157 | // 这里有警告 158 | CloseableHttpClient httpClient = HttpClients.custom() 159 | .setConnectionManager(cm) 160 | .setRetryHandler(httpRequestRetryHandler).build(); 161 | 162 | return httpClient; 163 | } 164 | 165 | private static void setPostParams(HttpPost httpost, 166 | Map params) { 167 | List nvps = new ArrayList(); 168 | Set keySet = params.keySet(); 169 | for (String key : keySet) { 170 | nvps.add(new BasicNameValuePair(key, params.get(key).toString())); 171 | } 172 | try { 173 | httpost.setEntity(new UrlEncodedFormEntity(nvps, "UTF-8")); 174 | } catch (UnsupportedEncodingException e) { 175 | e.printStackTrace(); 176 | } 177 | } 178 | 179 | /** 180 | * post请求URL获取内容 181 | * 182 | * @param url 183 | * @return 184 | * @throws IOException 185 | * @author SHANHY 186 | * @create 2015年12月18日 187 | */ 188 | public static String post(String url, Map params) throws Exception { 189 | HttpPost httppost = new HttpPost(url); 190 | config(httppost); 191 | setPostParams(httppost, params); 192 | CloseableHttpResponse response = null; 193 | try { 194 | response = getHttpClient(url).execute(httppost, 195 | HttpClientContext.create()); 196 | HttpEntity entity = response.getEntity(); 197 | String result = EntityUtils.toString(entity, "utf-8"); 198 | EntityUtils.consume(entity); 199 | return result; 200 | } catch (Exception e) { 201 | // e.printStackTrace(); 202 | throw e; 203 | } finally { 204 | try { 205 | if (response != null) 206 | response.close(); 207 | } catch (IOException e) { 208 | e.printStackTrace(); 209 | } 210 | } 211 | } 212 | 213 | /** 214 | * GET请求URL获取内容 215 | * 216 | * @param url 217 | * @return 218 | * @author SHANHY 219 | * @create 2015年12月18日 220 | */ 221 | public static String get(String url) { 222 | HttpGet httpget = new HttpGet(url); 223 | config(httpget); 224 | CloseableHttpResponse response = null; 225 | try { 226 | response = getHttpClient(url).execute(httpget, 227 | HttpClientContext.create()); 228 | HttpEntity entity = response.getEntity(); 229 | String result = EntityUtils.toString(entity, "utf-8"); 230 | EntityUtils.consume(entity); 231 | return result; 232 | } catch (IOException e) { 233 | e.printStackTrace(); 234 | } finally { 235 | try { 236 | if (response != null) 237 | response.close(); 238 | } catch (IOException e) { 239 | e.printStackTrace(); 240 | } 241 | } 242 | return null; 243 | } 244 | 245 | } 246 | -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/util/PropertiesUtil.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md.util; 2 | 3 | import com.github.csccoder.csdn2md.Main; 4 | 5 | import java.io.BufferedInputStream; 6 | import java.io.FileInputStream; 7 | import java.io.IOException; 8 | import java.io.InputStream; 9 | import java.util.Properties; 10 | 11 | public class PropertiesUtil { 12 | public static String getProperties(String key){ 13 | String value = null; 14 | try { 15 | Properties pp = new Properties(); 16 | 17 | //通过idea运行程序 18 | InputStream inputStream = Main.class.getClassLoader().getResourceAsStream("config.properties"); 19 | 20 | //通过java -jar运行程序 21 | // String filePath = System.getProperty("user.dir")+ "/config.properties"; 22 | // InputStream inputStream = new BufferedInputStream(new FileInputStream(filePath)); 23 | 24 | pp.load(inputStream); 25 | value= (String) pp.get(key); 26 | return value; 27 | } catch (IOException e) { 28 | e.printStackTrace(); 29 | return value; 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/util/ProxyIP.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md.util; 2 | 3 | import java.util.Properties; 4 | 5 | public class ProxyIP { 6 | public static void daili(String ip, String dk) { 7 | Properties prop = System.getProperties(); 8 | // 设置http访问要使用的代理服务器的地址 9 | prop.setProperty("http.proxyHost", ip); 10 | // 设置http访问要使用的代理服务器的端口 11 | prop.setProperty("http.proxyPort", dk); 12 | // 设置不需要通过代理服务器访问的主机,可以使用*通配符,多个地址用|分隔 13 | prop.setProperty("http.nonProxyHosts", "localhost|192.168.168.*"); 14 | // 设置安全访问使用的代理服务器地址与端口 15 | // 它没有https.nonProxyHosts属性,它按照http.nonProxyHosts 中设置的规则访问 16 | prop.setProperty("https.proxyHost", ip); 17 | prop.setProperty("https.proxyPort", dk); 18 | // 使用ftp代理服务器的主机、端口以及不需要使用ftp代理服务器的主机 19 | prop.setProperty("ftp.proxyHost", ip); 20 | prop.setProperty("ftp.proxyPort", dk); 21 | prop.setProperty("ftp.nonProxyHosts", "localhost|192.168.168.*"); 22 | // socks代理服务器的地址与端口 23 | prop.setProperty("socksProxyHost", ip); 24 | prop.setProperty("socksProxyPort", dk); 25 | System.out.println("即将开始代理进行访问 ip:" + ip + " port:" + dk); 26 | } 27 | public static String[] dl = PropertiesUtil.getProperties("IP_And_Port").split(","); 28 | 29 | 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/util/RegexUtil.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md.util; 2 | 3 | import java.util.regex.Matcher; 4 | import java.util.regex.Pattern; 5 | 6 | public class RegexUtil { 7 | public static String match(String regex,String content,int group){ 8 | String str=null; 9 | Pattern pattern = Pattern.compile(regex); 10 | Matcher matcher = pattern.matcher(content); 11 | if(matcher.matches()){ 12 | str=matcher.group(group); 13 | } 14 | return str; 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/util/html2markdown/DateUtil.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md.util.html2markdown; 2 | 3 | import java.text.DateFormat; 4 | import java.text.ParsePosition; 5 | import java.text.SimpleDateFormat; 6 | import java.util.Calendar; 7 | import java.util.Date; 8 | import java.util.StringTokenizer; 9 | 10 | /** 11 | * DateUtil 12 | * 13 | * @author robin 14 | * @author refactor by Sevan Joe 15 | */ 16 | public class DateUtil { 17 | 18 | public DateUtil() { 19 | 20 | } 21 | 22 | /** 23 | * return current date value in format: yyyy-MM-dd 24 | * 25 | * @return String value 26 | */ 27 | public static String getNowDate() { 28 | return dateToStringWithPattern(new Date(), "yyyy-MM-dd"); 29 | } 30 | 31 | /** 32 | * return current time value in format: yyyy-MM-dd HH:mm:ss:sss 33 | * 34 | * @return String value 35 | */ 36 | public static String getNowTime() { 37 | return dateToStringWithPattern(new Date(), "yyyy-MM-dd HH:mm:ss:sss"); 38 | } 39 | 40 | /** 41 | * return time value of specified date in format: yyyy-MM-dd HH:mm 42 | * 43 | * @param date the specified date to convert 44 | * @return String value 45 | */ 46 | public static String dateToString(Date date) { 47 | return dateToStringWithPattern(date, "yyyy-MM-dd HH:mm"); 48 | } 49 | 50 | /** 51 | * return date value only of specified date in format: yyyy-MM-dd 52 | * 53 | * @param date the specified date to convert 54 | * @return String value 55 | */ 56 | public static String dateToShortString(Date date) { 57 | return dateToStringWithPattern(date, "yyyy-MM-dd"); 58 | } 59 | 60 | /** 61 | * return time value of specified date in format: yyyy-MM-dd HH:mm:ss 62 | * 63 | * @param date the specified date to convert 64 | * @return String value 65 | */ 66 | public static String dateToLongString(Date date) { 67 | return dateToStringWithPattern(date, "yyyy-MM-dd HH:mm:ss"); 68 | } 69 | 70 | /** 71 | * return time value only of specified date in format: HH:mm:ss 72 | * 73 | * @param date the specified date to convert 74 | * @return String value 75 | */ 76 | public static String dateToTimeString(Date date) { 77 | return dateToStringWithPattern(date, "HH:mm:ss"); 78 | } 79 | 80 | /** 81 | * return time value of specified date 82 | * 83 | * @param date the specified date to convert 84 | * @param pattern time format 85 | * @return String value 86 | */ 87 | public static String dateToStringWithPattern(Date date, String pattern) { 88 | try { 89 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat(pattern); 90 | return simpleDateFormat.format(date); 91 | } catch (Exception e) { 92 | return ""; 93 | } 94 | } 95 | 96 | /** 97 | * split date value of specified date by '-' 98 | * 99 | * @param date the specified date to convert 100 | * @return String[] value 101 | */ 102 | public static String[] SplitDate(Date date) { 103 | String s = dateToShortString(date); 104 | String[] temp = new String[3]; 105 | StringTokenizer stringTokenizer = new StringTokenizer(s, "-"); 106 | int i = 0; 107 | while (stringTokenizer.hasMoreTokens()) { 108 | temp[i] = stringTokenizer.nextToken(); 109 | i++; 110 | } 111 | return temp; 112 | } 113 | 114 | /** 115 | * return string value of specified date in format: yyyy-MM-ddTHH:mm:ss 116 | * 117 | * @param date the specified date to convert 118 | * @return String value 119 | */ 120 | public static String dateToBOMCStringDate(Date date) { 121 | try { 122 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 123 | String string = simpleDateFormat.format(date); 124 | string = StringToBOMCStringDate(string); 125 | return string; 126 | } catch (Exception e) { 127 | return ""; 128 | } 129 | } 130 | 131 | /** 132 | * return handled string value of date 133 | * 134 | * @param date string value to convert 135 | * @return String value 136 | */ 137 | public static String StringToBOMCStringDate(String date) { 138 | return date.replace(" ", "T"); 139 | } 140 | 141 | /** 142 | * return date value of specified string value in format: yyyy-MM-dd HH:mm:ss 143 | * 144 | * @param string string value to convert 145 | * @return Date value 146 | */ 147 | public static Date stringToDate(String string) { 148 | try { 149 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 150 | return simpleDateFormat.parse(string); 151 | } catch (Exception e) { 152 | return null; 153 | } 154 | } 155 | 156 | /** 157 | * return date value of specified string value in format: HH:mm:ss 158 | * 159 | * @param string string value to convert 160 | * @return Date value 161 | */ 162 | public static Date timeStringToDate(String string) { 163 | try { 164 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("HH:mm:ss"); 165 | return simpleDateFormat.parse(string); 166 | } catch (Exception e) { 167 | return null; 168 | } 169 | } 170 | 171 | /** 172 | * return date value of specified string value in format: yyyy-MM-dd 173 | * 174 | * @param string string value to convert 175 | * @return Date value 176 | */ 177 | public static Date stringToShortDate(String string) { 178 | try { 179 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd"); 180 | ParsePosition parsePosition = new ParsePosition(0); 181 | return simpleDateFormat.parse(string, parsePosition); 182 | } catch (Exception e) { 183 | return null; 184 | } 185 | } 186 | 187 | /** 188 | * return date value of specified string value in format: yyyyMMdd 189 | * 190 | * @param string string value to convert 191 | * @return Date value 192 | */ 193 | public static Date stringToShortNoDate(String string) { 194 | try { 195 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyyMMdd"); 196 | ParsePosition parsePosition = new ParsePosition(0); 197 | return simpleDateFormat.parse(string, parsePosition); 198 | } catch (Exception e) { 199 | return null; 200 | } 201 | } 202 | 203 | /** 204 | * return date value of now 205 | * 206 | * @return Date value 207 | */ 208 | public static Date getNow() { 209 | return new Date(); 210 | } 211 | 212 | /** 213 | * return unix timestamp of now 214 | * 215 | * @return long value 216 | */ 217 | public static long getCurrentTimestamp() { 218 | return (new Date()).getTime(); 219 | } 220 | 221 | /** 222 | * return unix timestamp of specified string value in format: yyyy-MM-dd 223 | * 224 | * @param string string value to convert 225 | * @return long value 226 | */ 227 | public static long getTimestamp(String string) { 228 | try { 229 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd"); 230 | ParsePosition parsePosition = new ParsePosition(0); 231 | Date date = simpleDateFormat.parse(string, parsePosition); 232 | return date.getTime(); 233 | } catch (Exception e) { 234 | return -1; 235 | } 236 | } 237 | 238 | /** 239 | * return unix timestamp of specified string value in format: yyyy-MM-dd HH:mm:ss 240 | * 241 | * @param string string value to convert 242 | * @return long value 243 | */ 244 | public static long getStringToTimestamp(String string) { 245 | try { 246 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 247 | ParsePosition parsePosition = new ParsePosition(0); 248 | Date date = simpleDateFormat.parse(string, parsePosition); 249 | return date.getTime(); 250 | } catch (Exception e) { 251 | return -1; 252 | } 253 | } 254 | 255 | /** 256 | * return the time difference from a specified time to now in minutes 257 | * 258 | * @param timestamp unix timestamp of a specified time 259 | * @return long value 260 | */ 261 | public static long getOffMinutes(long timestamp) { 262 | return getOffMinutes(timestamp, System.currentTimeMillis()); 263 | } 264 | 265 | /** 266 | * return the time difference from two specified time 267 | * 268 | * @param left unix timestamp of the first specified time 269 | * @param right unix timestamp of the second specified time 270 | * @return long value 271 | */ 272 | public static long getOffMinutes(long left, long right) { 273 | return (left - right) / 60000L; 274 | } 275 | 276 | /** 277 | * return string value of specified unix timestamp 278 | * 279 | * @param timestamp unix timestamp 280 | * @return String value 281 | */ 282 | public static String LongToDateString(long timestamp) { 283 | DateFormat dateFormat; 284 | Date date; 285 | try { 286 | dateFormat = DateFormat.getDateTimeInstance(); 287 | String dateString = String.valueOf(timestamp); 288 | date = new Date(Long.parseLong(dateString)); 289 | } catch (Exception ex) { 290 | ex.printStackTrace(); 291 | return null; 292 | } 293 | return dateFormat.format(date); 294 | } 295 | 296 | /** 297 | * return date's weekday value of specified string value in format: yyyy-MM-dd Date first = 298 | * DateUtil.getMonday(today,Calendar.SUNDAY); Date last = DateUtil.getMonday(today,Calendar.SATURDAY); 299 | * 300 | * @param dateString String value of date 301 | * @param weekDay int index of weekday to get, first Calendar.SUNDAY, last Calendar.SATURDAY 302 | * @return Date value 303 | */ 304 | public static Date getWeekDay(String dateString, int weekDay) { 305 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd"); 306 | Date date = null; 307 | try { 308 | date = simpleDateFormat.parse(dateString); 309 | } catch (Exception e) { 310 | e.printStackTrace(); 311 | } 312 | Calendar calendar = Calendar.getInstance(); 313 | if (date != null) { 314 | calendar.setTime(date); 315 | } 316 | // DAY_OF_WEEK 317 | // Field number for get and set indicating the day of the week. This field takes values 318 | // SUNDAY, MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, and SATURDAY 319 | calendar.set(Calendar.DAY_OF_WEEK, weekDay); 320 | calendar.add(Calendar.DATE, 1); 321 | return calendar.getTime(); 322 | } 323 | 324 | /** 325 | * return the first day of the date's month of specified string value in format: yyyy-MM 326 | * 327 | * @param dateString String value of date 328 | * @return Date value 329 | */ 330 | public static Date getMonthFirstDay(String dateString) { 331 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM"); 332 | Date date = null; 333 | try { 334 | date = simpleDateFormat.parse(dateString); 335 | } catch (Exception e) { 336 | e.printStackTrace(); 337 | } 338 | Calendar calendar = Calendar.getInstance(); 339 | if (date != null) { 340 | calendar.setTime(date); 341 | } 342 | calendar.add(Calendar.DAY_OF_MONTH, 0); 343 | return calendar.getTime(); 344 | } 345 | 346 | /** 347 | * return the last day of the date's month of specified string value in format: yyyy-MM 348 | * 349 | * @param dateString String value of date 350 | * @return Date value 351 | */ 352 | public static Date getMonthLastDay(String dateString) { 353 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM"); 354 | Date date = null; 355 | try { 356 | date = simpleDateFormat.parse(dateString); 357 | } catch (Exception e) { 358 | e.printStackTrace(); 359 | } 360 | Calendar calendar = Calendar.getInstance(); 361 | if (date != null) { 362 | calendar.setTime(date); 363 | } 364 | calendar.add(Calendar.MONTH, 1); 365 | calendar.add(Calendar.DATE, -1); 366 | return calendar.getTime(); 367 | } 368 | 369 | /** 370 | * return the first day of the date's year of specified string value in format: yyyy 371 | * 372 | * @param dateString String value of date 373 | * @return Date value 374 | */ 375 | public static Date getYearFirstDay(String dateString) { 376 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy"); 377 | Date date = null; 378 | try { 379 | date = simpleDateFormat.parse(dateString); 380 | } catch (Exception e) { 381 | e.printStackTrace(); 382 | } 383 | Calendar calendar = Calendar.getInstance(); 384 | if (date != null) { 385 | calendar.setTime(date); 386 | } 387 | calendar.add(Calendar.DAY_OF_YEAR, 0); 388 | return calendar.getTime(); 389 | } 390 | 391 | /** 392 | * return the last day of the date's year of specified string value in format: yyyy 393 | * 394 | * @param dateString String value 395 | * @return Date value 396 | */ 397 | public static Date getYearLastDay(String dateString) { 398 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy"); 399 | Date date = null; 400 | try { 401 | date = simpleDateFormat.parse(dateString); 402 | } catch (Exception e) { 403 | e.printStackTrace(); 404 | } 405 | Calendar calendar = Calendar.getInstance(); 406 | if (date != null) { 407 | calendar.setTime(date); 408 | } 409 | calendar.add(Calendar.YEAR, 1); 410 | calendar.add(Calendar.DATE, -1); 411 | return calendar.getTime(); 412 | } 413 | 414 | /** 415 | * return date value with specified field value 416 | * 417 | * @param date Date value 418 | * @param field int Date filed, such as Calendar.DAY_OF_MONTH 419 | * @param amount int the value of the field to set 420 | * @return Date value 421 | */ 422 | public static Date getDate(Date date, int field, int amount) { 423 | Calendar calendar = Calendar.getInstance(); 424 | calendar.setTime(date); 425 | calendar.add(field, amount); 426 | return calendar.getTime(); 427 | } 428 | } -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/util/html2markdown/FilesUtil.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md.util.html2markdown; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.FileOutputStream; 7 | import java.io.FileWriter; 8 | import java.io.IOException; 9 | import java.io.InputStream; 10 | import java.io.InputStreamReader; 11 | import java.io.PrintWriter; 12 | import java.net.URLDecoder; 13 | import java.util.ArrayList; 14 | import java.util.HashMap; 15 | import java.util.List; 16 | import java.util.Map; 17 | import java.util.StringTokenizer; 18 | 19 | /** 20 | * FilesUtil 21 | * 22 | * @author robin 23 | * @author refactor by Sevan Joe 24 | */ 25 | public class FilesUtil { 26 | 27 | /** 28 | * read text file content, return string split by "\n" 29 | * 30 | * @param filePathAndName String file name with absolute path 31 | * @return String text content 32 | */ 33 | public static String readAll(String filePathAndName) { 34 | return readAll(filePathAndName, "UTF-8"); 35 | } 36 | 37 | /** 38 | * read text file content, return string split by "\n" 39 | * 40 | * @param filePathAndName String file name with absolute path 41 | * @param encoding String file encoding 42 | * @return String text content 43 | */ 44 | public static String readAll(String filePathAndName, String encoding) { 45 | String string = ""; 46 | StringBuilder stringBuilder = new StringBuilder(""); 47 | FileInputStream fileInputStream = null; 48 | try { 49 | fileInputStream = new FileInputStream(URLDecoder.decode(filePathAndName, encoding)); 50 | InputStreamReader inputStreamReader; 51 | if ("".equals(encoding)) { 52 | inputStreamReader = new InputStreamReader(fileInputStream); 53 | } else { 54 | inputStreamReader = new InputStreamReader(fileInputStream, encoding); 55 | } 56 | try { 57 | String data; 58 | BufferedReader bufferedReader = new BufferedReader(inputStreamReader); 59 | while ((data = bufferedReader.readLine()) != null) { 60 | stringBuilder.append(data).append("\n"); 61 | } 62 | } catch (Exception e) { 63 | return ""; 64 | } 65 | string = stringBuilder.toString(); 66 | } catch (IOException es) { 67 | string = ""; 68 | } finally { 69 | try { 70 | if (fileInputStream != null) fileInputStream.close(); 71 | } catch (IOException e) { 72 | e.printStackTrace(); 73 | } 74 | } 75 | return string; 76 | } 77 | 78 | /** 79 | * read the specified line content of text file 80 | * 81 | * @param filePathAndName String file name with absolute path 82 | * @param encoding String file encoding 83 | * @return String text content of the line 84 | */ 85 | public static String readLine(String filePathAndName, long lineIndex, String encoding) { 86 | String string = ""; 87 | StringBuilder stringBuilder = new StringBuilder(""); 88 | long i = 0; 89 | FileInputStream fileInputStream = null; 90 | try { 91 | fileInputStream = new FileInputStream(URLDecoder.decode(filePathAndName, encoding)); 92 | InputStreamReader inputStreamReader; 93 | if ("".equals(encoding)) { 94 | inputStreamReader = new InputStreamReader(fileInputStream); 95 | } else { 96 | inputStreamReader = new InputStreamReader(fileInputStream, encoding); 97 | } 98 | BufferedReader bufferedReader = new BufferedReader(inputStreamReader); 99 | try { 100 | String data; 101 | while ((data = bufferedReader.readLine()) != null) { 102 | if (lineIndex == i) { 103 | stringBuilder.append(data); 104 | break; 105 | } else { 106 | i++; 107 | } 108 | } 109 | } catch (Exception e) { 110 | return ""; 111 | } 112 | string = stringBuilder.toString(); 113 | } catch (IOException es) { 114 | return ""; 115 | } finally { 116 | try { 117 | if (fileInputStream != null) fileInputStream.close(); 118 | } catch (IOException e) { 119 | e.printStackTrace(); 120 | } 121 | } 122 | return string; 123 | } 124 | 125 | /** 126 | * read the first line content of text file 127 | * 128 | * @param filePathAndName String file name with absolute path 129 | * @return String text content of the first line 130 | */ 131 | public static String readLine(String filePathAndName) { 132 | return readLine(filePathAndName, 0, "UTF-8"); 133 | } 134 | 135 | /** 136 | * read the specified line content of text file 137 | * 138 | * @param filePathAndName String file name with absolute path 139 | * @return String text content of the line 140 | */ 141 | public static String readLine(String filePathAndName, long rowIndex) { 142 | return readLine(filePathAndName, rowIndex, "UTF-8"); 143 | } 144 | 145 | /** 146 | * create file 147 | * 148 | * @param filePathAndName String file path and name 149 | * @param fileContent String file content 150 | * @return boolean flag to indicate create success or not 151 | */ 152 | public static boolean newFile(String filePathAndName, String fileContent) { 153 | return newFile(filePathAndName, fileContent, false); 154 | } 155 | 156 | /** 157 | * create file 158 | * 159 | * @param filePathAndName String file path and name 160 | * @param fileContent String file content 161 | * @param flag boolean flag to indicate is append, true to append, false to create 162 | * @return boolean flag to indicate create success or not 163 | */ 164 | public static boolean newFile(String filePathAndName, String fileContent, boolean flag) { 165 | try { 166 | File file = new File(filePathAndName); 167 | if (!file.exists()) { 168 | file.createNewFile(); 169 | } 170 | FileWriter fileWriter = new FileWriter(file, flag); 171 | PrintWriter printWriter = new PrintWriter(fileWriter); 172 | printWriter.println(fileContent); 173 | fileWriter.close(); 174 | return true; 175 | } catch (Exception e) { 176 | System.out.println("create file failed"); 177 | e.printStackTrace(); 178 | } 179 | return false; 180 | } 181 | 182 | /** 183 | * create file with specified encoding 184 | * 185 | * @param filePathAndName String file path and name 186 | * @param fileContent String file content 187 | * @param encoding the specified encoding, such as GBK or UTF-8 188 | * @return boolean flag to indicate create success or not 189 | */ 190 | public static boolean newFile(String filePathAndName, String fileContent, String encoding) { 191 | try { 192 | File file = new File(filePathAndName); 193 | if (!file.exists()) { 194 | file.createNewFile(); 195 | } 196 | PrintWriter printWriter = new PrintWriter(file, encoding); 197 | printWriter.println(fileContent); 198 | printWriter.close(); 199 | return true; 200 | } catch (Exception e) { 201 | System.out.println("create file failed"); 202 | e.printStackTrace(); 203 | } 204 | return false; 205 | } 206 | 207 | /** 208 | * delete file 209 | * 210 | * @param filePathAndName String file path and name 211 | */ 212 | public static boolean delFile(String filePathAndName) { 213 | try { 214 | File file = new File(filePathAndName); 215 | return file.delete(); 216 | } catch (Exception e) { 217 | System.out.println("delete file failed"); 218 | e.printStackTrace(); 219 | return false; 220 | } 221 | } 222 | 223 | /** 224 | * create folder 225 | * 226 | * @param folderPath String folder path 227 | * @return String created folder path 228 | */ 229 | public static String newFolder(String folderPath) { 230 | String filePath = folderPath; 231 | try { 232 | File myFilePath = new File(filePath); 233 | if (!myFilePath.exists()) { 234 | myFilePath.mkdirs(); 235 | } 236 | } catch (Exception e) { 237 | System.out.println("create folder failed"); 238 | filePath = ""; 239 | e.printStackTrace(); 240 | } 241 | return filePath; 242 | } 243 | 244 | /** 245 | * delete folder 246 | * 247 | * @param folderPath String folder path 248 | */ 249 | public static void delFolder(String folderPath) { 250 | try { 251 | delAllFile(folderPath); // delete all files inside 252 | File file = new File(folderPath); 253 | file.delete(); // delete the empty folder 254 | } catch (Exception e) { 255 | System.out.println("delete folder failed"); 256 | e.printStackTrace(); 257 | } 258 | } 259 | 260 | /** 261 | * delete all files inside folder 262 | * 263 | * @param path String folder path 264 | */ 265 | public static void delAllFile(String path) { 266 | File file = new File(path); 267 | if (!file.exists()) { 268 | return; 269 | } 270 | if (!file.isDirectory()) { 271 | return; 272 | } 273 | if (file.getAbsolutePath().equalsIgnoreCase("/")) { 274 | System.out.println("this is a root directory, you cannot delete all files in it!"); 275 | System.out.println("please change the path!"); 276 | return; 277 | } 278 | if (file.getAbsolutePath().equalsIgnoreCase("/root")) { 279 | System.out.println("this is a root directory, you cannot delete all files in it!"); 280 | System.out.println("please change the path!"); 281 | return; 282 | } 283 | if (file.getAbsolutePath().equalsIgnoreCase("/usr") || file.getAbsolutePath().equalsIgnoreCase("/opt") 284 | || file.getAbsolutePath().equalsIgnoreCase("/bin") || file.getAbsolutePath().equalsIgnoreCase("/sbin") 285 | || file.getAbsolutePath().equalsIgnoreCase("/etc") || file.getAbsolutePath().equalsIgnoreCase("/selinux") 286 | || file.getAbsolutePath().equalsIgnoreCase("/sys") || file.getAbsolutePath().equalsIgnoreCase("/var") 287 | || file.getAbsolutePath().equalsIgnoreCase("/home") || file.getAbsolutePath().equalsIgnoreCase("/net")) { 288 | System.out.println("this is a root directory, you cannot delete all files in it!"); 289 | System.out.println("please change the path!"); 290 | return; 291 | } 292 | if (file.getAbsolutePath().equalsIgnoreCase("C://") || file.getAbsolutePath().equalsIgnoreCase("C:\\\\")) { 293 | System.out.println("this is a root directory, you cannot delete all files in it!"); 294 | System.out.println("please change the path!"); 295 | return; 296 | } 297 | String[] tempList = file.list(); 298 | File temp; 299 | if (tempList == null) { 300 | return; 301 | } 302 | for (String aTempList : tempList) { 303 | if (path.endsWith(File.separator)) { 304 | temp = new File(path + aTempList); 305 | } else { 306 | temp = new File(path + File.separator + aTempList); 307 | } 308 | if (temp.isFile()) { 309 | temp.delete(); 310 | } 311 | if (temp.isDirectory()) { 312 | delAllFile(path + "/" + aTempList);// delete all files inside 313 | delFolder(path + "/" + aTempList);// delete the empty folder 314 | } 315 | } 316 | } 317 | 318 | /** 319 | * copy a file 320 | * 321 | * @param srcPath String the source path 322 | * @param dstPath String the destination path 323 | */ 324 | public static void copyFile(String srcPath, String dstPath) { 325 | InputStream inputStream = null; 326 | FileOutputStream fileOutputStream = null; 327 | try { 328 | int byteRead; 329 | File srcFile = new File(srcPath); 330 | 331 | if (srcFile.exists()) { // file exists 332 | inputStream = new FileInputStream(srcPath); // read the source file 333 | fileOutputStream = new FileOutputStream(dstPath); 334 | byte[] buffer = new byte[1444]; 335 | while ((byteRead = inputStream.read(buffer)) != -1) { 336 | fileOutputStream.write(buffer, 0, byteRead); 337 | } 338 | } 339 | } catch (Exception e) { 340 | System.out.println("copy file failed"); 341 | e.printStackTrace(); 342 | } finally { 343 | try { 344 | if (fileOutputStream != null) 345 | fileOutputStream.close(); 346 | if (inputStream != null) 347 | inputStream.close(); 348 | } catch (IOException e) { 349 | e.printStackTrace(); 350 | } 351 | } 352 | } 353 | 354 | /** 355 | * copy a folder 356 | * 357 | * @param srcPath String the source path 358 | * @param dstPath String the destination path 359 | */ 360 | public static void copyFolder(String srcPath, String dstPath) { 361 | 362 | try { 363 | (new File(dstPath)).mkdirs(); // if the folder does not exits, create it 364 | File file = new File(srcPath); 365 | String[] fileList = file.list(); 366 | File tempFile; 367 | for (String fileName : fileList) { 368 | if (srcPath.endsWith(File.separator)) { 369 | tempFile = new File(srcPath + fileName); 370 | } else { 371 | tempFile = new File(srcPath + File.separator + fileName); 372 | } 373 | 374 | if (tempFile.isFile()) { 375 | FileInputStream fileInputStream = new FileInputStream(tempFile); 376 | FileOutputStream fileOutputStream = new FileOutputStream(dstPath + "/" + (tempFile.getName())); 377 | byte[] bytes = new byte[1024 * 5]; 378 | int length; 379 | while ((length = fileInputStream.read(bytes)) != -1) { 380 | fileOutputStream.write(bytes, 0, length); 381 | } 382 | fileOutputStream.flush(); 383 | fileOutputStream.close(); 384 | fileInputStream.close(); 385 | } 386 | if (tempFile.isDirectory()) { // it is a subdirectory 387 | copyFolder(srcPath + "/" + fileName, dstPath + "/" + fileName); 388 | } 389 | } 390 | } catch (Exception e) { 391 | System.out.println("copy folder failed"); 392 | e.printStackTrace(); 393 | } 394 | } 395 | 396 | /** 397 | * move a file 398 | * 399 | * @param srcPath String the source path 400 | * @param dstPath String the destination path 401 | */ 402 | public static void moveFile(String srcPath, String dstPath) { 403 | copyFile(srcPath, dstPath); 404 | delFile(srcPath); 405 | } 406 | 407 | /** 408 | * move a folder 409 | * 410 | * @param srcPath String the source path 411 | * @param dstPath String the destination path 412 | */ 413 | public static void moveFolder(String srcPath, String dstPath) { 414 | copyFolder(srcPath, dstPath); 415 | delFolder(srcPath); 416 | } 417 | 418 | /** 419 | * create multi-level directory 420 | * 421 | * @param folderPath the path to create multi-level directory 422 | * @param paths directories, split by '|' 423 | * @return String the created directory path 424 | */ 425 | public static String createFolders(String folderPath, String paths) { 426 | String pathString; 427 | try { 428 | String path; 429 | pathString = folderPath; 430 | StringTokenizer stringTokenizer = new StringTokenizer(paths, "|"); 431 | for (; stringTokenizer.hasMoreTokens(); ) { 432 | path = stringTokenizer.nextToken(); 433 | if (pathString.lastIndexOf("/") != -1) { 434 | pathString = newFolder(pathString + path); 435 | } else { 436 | pathString = newFolder(pathString + path + "/"); 437 | } 438 | } 439 | } catch (Exception e) { 440 | System.out.println("create multi-level directory failed"); 441 | pathString = ""; 442 | e.printStackTrace(); 443 | } 444 | return pathString; 445 | } 446 | 447 | /** 448 | * create multi-level directory 449 | * 450 | * @param folderPath the multi-level directory to create 451 | * @return String the created directory path 452 | */ 453 | public static String createFolders(String folderPath) { 454 | String pathString = folderPath; 455 | try { 456 | File file = new File(pathString); 457 | if (!file.exists()) { 458 | file.mkdirs(); 459 | } 460 | } catch (Exception e) { 461 | System.out.println("create multi-level directory failed"); 462 | pathString = ""; 463 | e.printStackTrace(); 464 | } 465 | return pathString; 466 | } 467 | 468 | /** 469 | * check if the specified file exists 470 | * 471 | * @param fileName the name of the file to be checked 472 | * @return boolean true if exits, false if not 473 | */ 474 | public static boolean isFileExist(String fileName) { 475 | return new File(fileName).isFile(); 476 | } 477 | 478 | /** 479 | * get all files in a folder 480 | * 481 | * @param path String folder path 482 | * @return List 483 | */ 484 | public static List getAllFiles(String path) { 485 | List fileList = new ArrayList(); 486 | File file = new File(path); 487 | if (!file.exists()) { 488 | return fileList; 489 | } 490 | if (!file.isDirectory()) { 491 | return fileList; 492 | } 493 | String[] tempList = file.list(); 494 | File tempFile; 495 | for (String fileName : tempList) { 496 | if (path.endsWith(File.separator)) { 497 | tempFile = new File(path + fileName); 498 | } else { 499 | tempFile = new File(path + File.separator + fileName); 500 | } 501 | if (tempFile.isFile()) { 502 | fileList.add(tempFile); 503 | } 504 | if (tempFile.isDirectory()) { 505 | List allFiles = getAllFiles(tempFile.getAbsolutePath()); 506 | fileList.addAll(allFiles); 507 | } 508 | } 509 | return fileList; 510 | } 511 | 512 | /** 513 | * get all files with specified suffix in a folder 514 | * 515 | * @param path String folder path 516 | * @param suffix String the specified suffix 517 | * @return List 518 | */ 519 | public static List getAllFiles(String path, String suffix) { 520 | List fileList = new ArrayList(); 521 | File file = new File(path); 522 | if (!file.exists()) { 523 | return fileList; 524 | } 525 | if (!file.isDirectory()) { 526 | return fileList; 527 | } 528 | String[] tempList = file.list(); 529 | File tempFile; 530 | for (String fileName : tempList) { 531 | if (path.endsWith(File.separator)) { 532 | tempFile = new File(path + fileName); 533 | } else { 534 | tempFile = new File(path + File.separator + fileName); 535 | } 536 | if (tempFile.isFile()) { 537 | if (suffix == null || "".equals(suffix)) 538 | fileList.add(tempFile); 539 | else { 540 | String filePath = tempFile.getAbsolutePath(); 541 | if (!suffix.equals("")) { 542 | int beginIndex = filePath.lastIndexOf("."); // the last '.' index before suffix 543 | String tempSuffix; 544 | 545 | if (beginIndex != -1) { 546 | tempSuffix = filePath.substring(beginIndex + 1, filePath.length()); 547 | if (tempSuffix.equals(suffix)) { 548 | fileList.add(tempFile); 549 | } 550 | } 551 | } 552 | } 553 | } 554 | if (tempFile.isDirectory()) { 555 | List allFiles = getAllFiles(tempFile.getAbsolutePath(), suffix); 556 | fileList.addAll(allFiles); 557 | } 558 | } 559 | return fileList; 560 | } 561 | 562 | /** 563 | * get all names of file with specified suffix in a folder 564 | * 565 | * @param path String folder path 566 | * @param suffix String the specified suffix 567 | * @param isDepth boolean is need to scan all subdirectories 568 | * @return List 569 | */ 570 | public static List getAllFileNames(String path, String suffix, boolean isDepth) { 571 | List fileNamesList = new ArrayList(); 572 | File file = new File(path); 573 | return listFileName(fileNamesList, file, suffix, isDepth); 574 | } 575 | 576 | private static List listFileName(List fileNamesList, File file, String suffix, boolean isDepth) { 577 | // if is directory, scan all subdirectories by recursion 578 | if (file.isDirectory()) { 579 | File[] fileList = file.listFiles(); 580 | 581 | if (fileList != null) { 582 | for (File tempFile : fileList) { 583 | if (isDepth || tempFile.isFile()) { 584 | listFileName(fileNamesList, tempFile, suffix, isDepth); 585 | } 586 | } 587 | } 588 | } else { 589 | String filePath = file.getAbsolutePath(); 590 | if (!suffix.equals("")) { 591 | int begIndex = filePath.lastIndexOf("."); // the last '.' index before suffix 592 | String tempSuffix; 593 | 594 | if (begIndex != -1) { 595 | tempSuffix = filePath.substring(begIndex + 1, filePath.length()); 596 | if (tempSuffix.equals(suffix)) { 597 | fileNamesList.add(filePath); 598 | } 599 | } 600 | } else { 601 | fileNamesList.add(filePath); 602 | } 603 | } 604 | return fileNamesList; 605 | } 606 | 607 | /** 608 | * get all file names in a folder 609 | * 610 | * @param path String folder path 611 | * @return List 612 | */ 613 | public static List getAllFileNames(String path) { 614 | List fileNamesList = new ArrayList(); 615 | File file = new File(path); 616 | if (!file.exists()) { 617 | return fileNamesList; 618 | } 619 | if (!file.isDirectory()) { 620 | return fileNamesList; 621 | } 622 | String[] tempList = file.list(); 623 | File tempFile; 624 | for (String fileName : tempList) { 625 | if (path.endsWith(File.separator)) { 626 | tempFile = new File(path + fileName); 627 | } else { 628 | tempFile = new File(path + File.separator + fileName); 629 | } 630 | if (tempFile.isFile()) { 631 | fileNamesList.add(tempFile.getName()); 632 | } 633 | } 634 | return fileNamesList; 635 | } 636 | 637 | /** 638 | * get all file names in a folder 639 | * 640 | * @param path String folder path 641 | * @return Map 642 | */ 643 | public static Map getAllFileNamesByMap(String path) { 644 | Map fileNamesMap = new HashMap(); 645 | File file = new File(path); 646 | if (!file.exists()) { 647 | return fileNamesMap; 648 | } 649 | if (!file.isDirectory()) { 650 | return fileNamesMap; 651 | } 652 | String[] tempList = file.list(); 653 | File tempFile; 654 | for (String fileName : tempList) { 655 | if (path.endsWith(File.separator)) { 656 | tempFile = new File(path + fileName); 657 | } else { 658 | tempFile = new File(path + File.separator + fileName); 659 | } 660 | if (tempFile.isFile()) { 661 | fileNamesMap.put(tempFile.getName(), tempFile.getName()); 662 | } 663 | } 664 | return fileNamesMap; 665 | } 666 | 667 | /** 668 | * get all file names in a folder 669 | * 670 | * @param path String folder path 671 | * @return String[] 672 | */ 673 | public static String[] getAllFileNamesByPath(String path) { 674 | File file = new File(path); 675 | if (!file.exists()) { 676 | return null; 677 | } 678 | if (!file.isDirectory()) { 679 | return null; 680 | } 681 | String[] tempList = file.list(); 682 | List fileList = new ArrayList(); 683 | File tempFile; 684 | for (String fileName : tempList) { 685 | if (path.endsWith(File.separator)) { 686 | tempFile = new File(path + fileName); 687 | } else { 688 | tempFile = new File(path + File.separator + fileName); 689 | } 690 | if (tempFile.isFile()) { 691 | fileList.add(tempFile.getName()); 692 | } 693 | } 694 | return fileList.toArray(new String[fileList.size()]); 695 | } 696 | 697 | /** 698 | * remove suffix of a file 699 | * 700 | * @param fileName file name 701 | * @return String file name without suffix 702 | */ 703 | public static String getNameNoSuffix(String fileName) { 704 | if (fileName.lastIndexOf(".") >= 0) 705 | return fileName.substring(0, fileName.lastIndexOf(".")); 706 | else 707 | return fileName; 708 | } 709 | 710 | /** 711 | * return file name with suffix 712 | * 713 | * @param fileName file path and name 714 | * @return String file name with suffix 715 | */ 716 | public static String getFileName(String fileName) { 717 | String shortFileName = fileName; 718 | shortFileName = shortFileName.replace("\\", "/"); 719 | if (shortFileName.contains("/")) 720 | shortFileName = shortFileName.substring(shortFileName.lastIndexOf("/") + 1, shortFileName.length()); 721 | return shortFileName; 722 | } 723 | 724 | /** 725 | * check if directory exists, if not exist, create it, return false if create failed 726 | * 727 | * @param path folder path 728 | * @return boolean 729 | */ 730 | public static boolean isExist(String path) { 731 | File file = new File(path); 732 | if (!file.exists()) { 733 | boolean isSuccess = file.mkdir(); 734 | if (!isSuccess) 735 | createFolders(path); 736 | return isSuccess; 737 | } else { 738 | return true; 739 | } 740 | } 741 | 742 | /** 743 | * check if directory exists 744 | * 745 | * @param path folder path 746 | * @return boolean 747 | */ 748 | public static boolean isExistNotCreate(String path) { 749 | File file = new File(path); 750 | return file.exists(); 751 | } 752 | 753 | /** 754 | * copy a file 755 | * 756 | * @param fileName file name 757 | * @param srcPath source path 758 | * @param dstPath destination path 759 | * @return boolean 760 | */ 761 | public boolean copyTheFile(String fileName, String srcPath, String dstPath) { 762 | boolean isSucceed = false; 763 | 764 | InputStream inputStream = null; 765 | FileOutputStream fileOutputStream = null; 766 | try { 767 | int byteRead; 768 | File srcFile = new File(srcPath + "/" + fileName); 769 | File dstFile = new File(dstPath); 770 | if (!dstFile.exists()) { 771 | dstFile.mkdirs(); 772 | } 773 | if (!srcFile.exists()) { 774 | throw new Exception("the file to copy do not exist"); 775 | } 776 | if (srcFile.exists()) { 777 | inputStream = new FileInputStream(srcPath + "/" + fileName); 778 | fileOutputStream = new FileOutputStream(dstPath + "/" + fileName); 779 | byte[] buffer = new byte[1444]; 780 | while ((byteRead = inputStream.read(buffer)) != -1) { 781 | fileOutputStream.write(buffer, 0, byteRead); 782 | } 783 | } 784 | isSucceed = true; 785 | } catch (Exception e) { 786 | System.out.println("copy file failed"); 787 | e.printStackTrace(); 788 | } finally { 789 | try { 790 | if (fileOutputStream != null) 791 | fileOutputStream.close(); 792 | if (inputStream != null) 793 | inputStream.close(); 794 | } catch (IOException e) { 795 | e.printStackTrace(); 796 | } 797 | } 798 | return isSucceed; 799 | } 800 | 801 | /** 802 | * move a file 803 | * 804 | * @param fileName file name 805 | * @param srcPath source path 806 | * @param dstPath destination path 807 | * @return boolean 808 | */ 809 | public boolean moveTheFile(String fileName, String srcPath, String dstPath) { 810 | boolean isSucceed = false; 811 | 812 | InputStream inputStream = null; 813 | FileOutputStream fileOutputStream = null; 814 | try { 815 | int byteRead; 816 | File srcFile = new File(srcPath + "/" + fileName); 817 | File dstFile = new File(dstPath); 818 | if (!dstFile.exists()) { 819 | dstFile.mkdirs(); 820 | } 821 | if (!srcFile.exists()) { 822 | throw new Exception("the file to move do not exist"); 823 | } 824 | if (srcFile.exists()) { 825 | inputStream = new FileInputStream(srcPath + "/" + fileName); 826 | fileOutputStream = new FileOutputStream(dstPath + "/" + fileName); 827 | byte[] buffer = new byte[1444]; 828 | while ((byteRead = inputStream.read(buffer)) != -1) { 829 | fileOutputStream.write(buffer, 0, byteRead); 830 | } 831 | } 832 | isSucceed = true; 833 | } catch (Exception e) { 834 | System.out.println("move file failed"); 835 | e.printStackTrace(); 836 | } finally { 837 | try { 838 | if (fileOutputStream != null) 839 | fileOutputStream.close(); 840 | if (inputStream != null) 841 | inputStream.close(); 842 | } catch (IOException e) { 843 | e.printStackTrace(); 844 | } 845 | File deleteFile = new File(srcPath + "/" + fileName); 846 | if (isSucceed) 847 | isSucceed = deleteFile.delete(); 848 | } 849 | return isSucceed; 850 | } 851 | } -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/util/html2markdown/HTML2Md.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md.util.html2markdown; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.net.MalformedURLException; 6 | import java.net.URL; 7 | import java.util.ArrayList; 8 | import java.util.Calendar; 9 | import java.util.List; 10 | import java.util.Map; 11 | import java.util.TreeMap; 12 | 13 | import com.github.csccoder.csdn2md.util.PropertiesUtil; 14 | import org.jsoup.Jsoup; 15 | import org.jsoup.nodes.Document; 16 | import org.jsoup.nodes.Element; 17 | import org.jsoup.nodes.Entities.EscapeMode; 18 | import org.jsoup.nodes.Node; 19 | import org.jsoup.nodes.TextNode; 20 | import org.jsoup.parser.Tag; 21 | import org.jsoup.safety.Cleaner; 22 | import org.jsoup.safety.Whitelist; 23 | import com.github.csccoder.csdn2md.util.FileUtil; 24 | 25 | /** 26 | * Convert Html to MarkDown 27 | */ 28 | public class HTML2Md { 29 | private static int indentation = -1; 30 | private static boolean orderedList = false; 31 | 32 | public static String convert(String theHTML, String baseURL) { 33 | Document doc = Jsoup.parse(theHTML, baseURL); 34 | 35 | return parseDocument(doc); 36 | } 37 | 38 | public static String convert(URL url, int timeoutMillis) throws IOException { 39 | Document doc = Jsoup.parse(url, timeoutMillis); 40 | 41 | return parseDocument(doc); 42 | } 43 | 44 | public static String convertHtml(String html, String charset) throws IOException { 45 | Document doc = Jsoup.parse(html, charset); 46 | 47 | return parseDocument(doc); 48 | } 49 | 50 | public static String convertFile(File file, String charset) throws IOException { 51 | Document doc = Jsoup.parse(file, charset); 52 | 53 | return parseDocument(doc); 54 | } 55 | 56 | public static void htmlToJekyllMd(String htmlPath, String mdPath, String charset) { 57 | try { 58 | List fileList = FilesUtil.getAllFiles(htmlPath, "html"); 59 | for (File file : fileList) { 60 | String mdName = file.getAbsolutePath().replace(htmlPath, mdPath).replace("html", "md"); 61 | String hmPath = mdName.substring(0, mdName.lastIndexOf("/")) + "/"; 62 | String separator = System.getProperty("line.separator"); 63 | String head = "---" + separator + 64 | "layout: post" + separator + 65 | "title: \"" + file.getName() + "\"" + separator + 66 | "description: \"" + file.getName() + "\"" + separator + 67 | "category: pages\"" + separator + 68 | "tags: [blog]\"" + separator + 69 | "--- " + separator + 70 | "{% include JB/setup %}" + separator 71 | + separator; 72 | FilesUtil.isExist(hmPath); 73 | String parsedText = convertFile(file, charset); 74 | Calendar calendar = Calendar.getInstance(); 75 | String dateName = DateUtil.dateToShortString(calendar.getTime()); 76 | String newName = dateName + "-" + hmPath.replace(mdPath, "").replace("/", "-") + "-" + file.getName(); 77 | String mmName = (hmPath + newName.replace("html", "md")).replaceAll("\\s*", ""); 78 | FilesUtil.newFile(mmName, head + parsedText, charset); 79 | } 80 | } catch (MalformedURLException e) { 81 | e.printStackTrace(); 82 | } catch (IOException e) { 83 | e.printStackTrace(); 84 | } 85 | } 86 | 87 | 88 | public static void htmlToHexoMd(String htmlPath, String mdPath, String charset) { 89 | try { 90 | List fileList = FilesUtil.getAllFiles(htmlPath, "html"); 91 | for (File file : fileList) { 92 | String mdName = file.getAbsolutePath().replace(htmlPath, mdPath).replace("html", "md"); 93 | String hmPath = mdName.substring(0, mdName.lastIndexOf("/")) + "/"; 94 | String separator = System.getProperty("line.separator"); 95 | String[] strings = hmPath.replace(mdPath, "").split("/"); 96 | Calendar calendar = Calendar.getInstance(); 97 | String dateName = DateUtil.dateToShortString(calendar.getTime()); 98 | String dateString = DateUtil.dateToLongString(calendar.getTime()); 99 | StringBuilder blog = new StringBuilder(); 100 | StringBuilder categories = new StringBuilder(); 101 | Map stringMap = new TreeMap(); 102 | for (String value : strings) { 103 | stringMap.put(value, value); 104 | } 105 | for (String tag : stringMap.keySet()) { 106 | blog.append(" - ").append(tag).append(separator); 107 | } 108 | categories.append(strings[0]); 109 | String head = "---" + separator + 110 | "layout: post" + separator + 111 | "title: \"" + file.getName().replace(".html", "").split("-")[0] + "\"" + separator + 112 | "date: " + dateString + separator + 113 | "categories: " + categories + separator + 114 | "tags: " + separator + 115 | blog.toString() + 116 | "--- " + separator + 117 | separator; 118 | FilesUtil.isExist(hmPath); 119 | String parsedText = HTML2Md.convertFile(file, "utf-8"); 120 | String newName = dateName + "-" + hmPath.replace(mdPath, "").replace("/", "-") + "-" + file.getName(); 121 | String mmName = (hmPath + newName.replace("html", "md")).replaceAll("\\s*", ""); 122 | FilesUtil.newFile(mmName, head + parsedText, charset); 123 | } 124 | } catch (MalformedURLException e) { 125 | e.printStackTrace(); 126 | } catch (IOException e) { 127 | e.printStackTrace(); 128 | } 129 | } 130 | 131 | private static String parseDocument(Document dirtyDoc) { 132 | indentation = -1; 133 | 134 | String title = dirtyDoc.title(); 135 | 136 | Whitelist whitelist = Whitelist.relaxed(); 137 | Cleaner cleaner = new Cleaner(whitelist); 138 | 139 | Document doc = cleaner.clean(dirtyDoc); 140 | doc.outputSettings().escapeMode(EscapeMode.xhtml); 141 | 142 | if (!title.trim().equals("")) { 143 | return "# " + title + "\n\n" + getTextContent(doc); 144 | } else { 145 | return getTextContent(doc); 146 | } 147 | } 148 | 149 | private static String getTextContent(Element element) { 150 | ArrayList lines = new ArrayList(); 151 | 152 | List children = element.childNodes(); 153 | for (Node child : children) { 154 | if (child instanceof TextNode) { 155 | TextNode textNode = (TextNode) child; 156 | MDLine line = getLastLine(lines); 157 | if (line.getContent().equals("")) { 158 | if (!textNode.isBlank()) { 159 | line.append(textNode.text().replaceAll("#", "/#").replaceAll("\\*", "/\\*")); 160 | } 161 | } else { 162 | line.append(textNode.text().replaceAll("#", "/#").replaceAll("\\*", "/\\*")); 163 | } 164 | 165 | } else if (child instanceof Element) { 166 | Element childElement = (Element) child; 167 | processElement(childElement, lines); 168 | } else { 169 | System.out.println(); 170 | } 171 | } 172 | 173 | int blankLines = 0; 174 | StringBuilder result = new StringBuilder(); 175 | for (int i = 0; i < lines.size(); i++) { 176 | String line = lines.get(i).toString().trim(); 177 | if (line.equals("")) { 178 | blankLines++; 179 | } else { 180 | blankLines = 0; 181 | } 182 | if (blankLines < 2) { 183 | result.append(line); 184 | if (i < lines.size() - 1) { 185 | result.append("\n"); 186 | } 187 | } 188 | } 189 | 190 | return result.toString(); 191 | } 192 | 193 | private static void processElement(Element element, ArrayList lines) { 194 | Tag tag = element.tag(); 195 | String tagName = tag.getName(); 196 | if (tagName.equals("div")) { 197 | div(element, lines); 198 | } else if (tagName.equals("p")) { 199 | p(element, lines); 200 | } else if (tagName.equals("br")) { 201 | br(lines); 202 | } else if (tagName.matches("^h[0-9]+$")) { 203 | h(element, lines); 204 | } else if (tagName.equals("strong") || tagName.equals("b")) { 205 | strong(element, lines); 206 | } else if (tagName.equals("em")) { 207 | em(element, lines); 208 | } else if (tagName.equals("hr")) { 209 | hr(lines); 210 | } else if (tagName.equals("a")) { 211 | a(element, lines); 212 | } else if (tagName.equals("img")) { 213 | img(element, lines); 214 | } else if (tagName.equals("code")) { 215 | 216 | Element e = (Element)element.parentNode(); 217 | // ` ` 这种code 218 | if (e.tag().getName().equals("p")) { 219 | codeP(element, lines); 220 | } else { 221 | code(element, lines); 222 | } 223 | } else if (tagName.equals("ul")) { 224 | ul(element, lines); 225 | } else if (tagName.equals("ol")) { 226 | ol(element, lines); 227 | } else if (tagName.equals("li")) { 228 | li(element, lines); 229 | } else { 230 | MDLine line = getLastLine(lines); 231 | line.append(getTextContent(element)); 232 | } 233 | } 234 | 235 | private static MDLine getLastLine(ArrayList lines) { 236 | MDLine line; 237 | if (lines.size() > 0) { 238 | line = lines.get(lines.size() - 1); 239 | } else { 240 | line = new MDLine(MDLine.MDLineType.None, 0, ""); 241 | lines.add(line); 242 | } 243 | 244 | return line; 245 | } 246 | 247 | private static void div(Element element, ArrayList lines) { 248 | if (PropertiesUtil.getProperties("copyright").equals("false")) { 249 | if (!(element.text().indexOf("版权声明")>=0)){ 250 | MDLine line = getLastLine(lines); 251 | String content = getTextContent(element); 252 | if (!content.equals("")) { 253 | if (!line.getContent().trim().equals("")) { 254 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 255 | lines.add(new MDLine(MDLine.MDLineType.None, 0, content)); 256 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 257 | } else { 258 | if (!content.trim().equals("")) 259 | line.append(content); 260 | } 261 | } 262 | } 263 | }else { 264 | MDLine line = getLastLine(lines); 265 | String content = getTextContent(element); 266 | if (!content.equals("")) { 267 | if (!line.getContent().trim().equals("")) { 268 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 269 | lines.add(new MDLine(MDLine.MDLineType.None, 0, content)); 270 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 271 | } else { 272 | if (!content.trim().equals("")) 273 | line.append(content); 274 | } 275 | } 276 | } 277 | 278 | 279 | } 280 | 281 | private static void p(Element element, ArrayList lines) { 282 | MDLine line = getLastLine(lines); 283 | if (!line.getContent().trim().equals("")) 284 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 285 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 286 | lines.add(new MDLine(MDLine.MDLineType.None, 0, getTextContent(element))); 287 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 288 | if (!line.getContent().trim().equals("")) 289 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 290 | } 291 | 292 | private static void br(ArrayList lines) { 293 | MDLine line = getLastLine(lines); 294 | if (!line.getContent().trim().equals("")) 295 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 296 | } 297 | 298 | private static void h(Element element, ArrayList lines) { 299 | MDLine line = getLastLine(lines); 300 | if (!line.getContent().trim().equals("")) 301 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 302 | 303 | int level = Integer.valueOf(element.tagName().substring(1)); 304 | switch (level) { 305 | case 1: 306 | lines.add(new MDLine(MDLine.MDLineType.Head1, 0, getTextContent(element))); 307 | break; 308 | case 2: 309 | lines.add(new MDLine(MDLine.MDLineType.Head2, 0, getTextContent(element))); 310 | break; 311 | case 4: 312 | lines.add(new MDLine(MDLine.MDLineType.Head4, 0, getTextContent(element))); 313 | break; 314 | case 5: 315 | lines.add(new MDLine(MDLine.MDLineType.Head5, 0, getTextContent(element))); 316 | break; 317 | default: 318 | if (PropertiesUtil.getProperties("anchor").equals("false")){ 319 | if (element.text().indexOf("文章目录")>0){ 320 | lines.add(new MDLine(MDLine.MDLineType.Head3, 0, getTextContent(element))); 321 | } 322 | }else{ 323 | lines.add(new MDLine(MDLine.MDLineType.Head3, 0, getTextContent(element))); 324 | } 325 | 326 | break; 327 | } 328 | 329 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 330 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 331 | } 332 | 333 | private static void strong(Element element, ArrayList lines) { 334 | MDLine line = getLastLine(lines); 335 | line.append("**"); 336 | line.append(getTextContent(element)); 337 | line.append("**"); 338 | } 339 | 340 | private static void em(Element element, ArrayList lines) { 341 | MDLine line = getLastLine(lines); 342 | line.append("*"); 343 | line.append(getTextContent(element)); 344 | line.append("*"); 345 | } 346 | 347 | private static void hr(ArrayList lines) { 348 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 349 | lines.add(new MDLine(MDLine.MDLineType.HR, 0, "")); 350 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 351 | } 352 | 353 | private static void a(Element element, ArrayList lines) { 354 | if (PropertiesUtil.getProperties("anchor").equals("false")) { 355 | if (!element.attr("href").equals("")){ 356 | MDLine line = getLastLine(lines); 357 | line.append("["); 358 | line.append(getTextContent(element)); 359 | line.append("]"); 360 | line.append("("); 361 | String url = element.attr("href"); 362 | line.append(url); 363 | String title = element.attr("title"); 364 | if (!title.equals("")) { 365 | line.append(" \""); 366 | line.append(title); 367 | line.append("\""); 368 | } 369 | line.append(")"); 370 | } 371 | }else{ 372 | MDLine line = getLastLine(lines); 373 | line.append("["); 374 | line.append(getTextContent(element)); 375 | line.append("]"); 376 | line.append("("); 377 | String url = element.attr("href"); 378 | line.append(url); 379 | String title = element.attr("title"); 380 | if (!title.equals("")) { 381 | line.append(" \""); 382 | line.append(title); 383 | line.append("\""); 384 | } 385 | line.append(")"); 386 | } 387 | } 388 | 389 | private static void img(Element element, ArrayList lines) { 390 | MDLine line = getLastLine(lines); 391 | 392 | line.append("!["); 393 | String alt = element.attr("alt"); 394 | line.append(alt); 395 | line.append("]"); 396 | line.append("("); 397 | String url = element.attr("src"); 398 | String fix = FileUtil.getPicture(url); 399 | 400 | String url_new = "../images/" + fix; 401 | 402 | line.append(url_new); 403 | String title = element.attr("title"); 404 | if (!title.equals("")) { 405 | line.append(" \""); 406 | line.append(title); 407 | line.append("\""); 408 | } 409 | line.append(")"); 410 | } 411 | 412 | 413 | private static void code(Element element, ArrayList lines) { 414 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 415 | MDLine line = new MDLine(MDLine.MDLineType.None, 0, "```js \n"); 416 | // line.append(getTextContent(element).replace("\n", " ")); 417 | line.append(element.text()+"\n"); 418 | line.append("```"); 419 | lines.add(line); 420 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 421 | } 422 | 423 | private static void codeP(Element element, ArrayList lines) { 424 | MDLine line = getLastLine(lines); 425 | line.append("`"+getTextContent(element)+"`"); 426 | } 427 | 428 | private static void ul(Element element, ArrayList lines) { 429 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 430 | indentation++; 431 | orderedList = false; 432 | MDLine line = new MDLine(MDLine.MDLineType.None, 0, ""); 433 | line.append(getTextContent(element)); 434 | lines.add(line); 435 | indentation--; 436 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 437 | } 438 | 439 | private static void ol(Element element, ArrayList lines) { 440 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 441 | indentation++; 442 | orderedList = true; 443 | MDLine line = new MDLine(MDLine.MDLineType.None, 0, ""); 444 | line.append(getTextContent(element)); 445 | lines.add(line); 446 | indentation--; 447 | lines.add(new MDLine(MDLine.MDLineType.None, 0, "")); 448 | } 449 | 450 | private static void li(Element element, ArrayList lines) { 451 | 452 | if (PropertiesUtil.getProperties("anchor").equals("false")) { 453 | if (!element.children().attr("href").equals("")) { 454 | 455 | MDLine line; 456 | if (orderedList) { 457 | line = new MDLine(MDLine.MDLineType.Ordered, indentation, 458 | getTextContent(element)); 459 | } else { 460 | line = new MDLine(MDLine.MDLineType.Unordered, indentation, 461 | getTextContent(element)); 462 | } 463 | lines.add(line); 464 | } 465 | }else{ 466 | MDLine line; 467 | if (orderedList) { 468 | line = new MDLine(MDLine.MDLineType.Ordered, indentation, 469 | getTextContent(element)); 470 | } else { 471 | line = new MDLine(MDLine.MDLineType.Unordered, indentation, 472 | getTextContent(element)); 473 | } 474 | lines.add(line); 475 | } 476 | 477 | 478 | } 479 | // 480 | // private static void pre(Element element, ArrayList lines) { 481 | // 482 | // lines.add(new MDLine(MDLine.MDLineType.None, 0, "```")); 483 | // MDLine line = new MDLine(MDLine.MDLineType.None, 0, ""); 484 | // String in = getTextContent(element); 485 | // line.append(in); 486 | // lines.add(line); 487 | // lines.add(new MDLine(MDLine.MDLineType.None, 0, "```")); 488 | // 489 | // } 490 | } 491 | -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/util/html2markdown/MDLine.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md.util.html2markdown; 2 | 3 | public class MDLine { 4 | private int level = 0; 5 | private MDLineType type; 6 | private StringBuilder content; 7 | 8 | public MDLine(MDLineType type, int level, String content) { 9 | this.type = type; 10 | this.level = level; 11 | this.content = new StringBuilder(content); 12 | } 13 | 14 | public MDLine create(String line) { 15 | int spaces = 0; 16 | while ((spaces < line.length()) && (line.charAt(spaces) == ' ')) { 17 | spaces++; 18 | } 19 | String content = line.substring(spaces); 20 | 21 | int newLevel = spaces / 4; 22 | 23 | if (content.length() > 0) { 24 | if (content.matches("^[0-9]+\\.\\s.*")) { 25 | int c = 0; 26 | while ((c < content.length()) && (Character.isDigit(content.charAt(c)))) { 27 | c++; 28 | } 29 | return new MDLine(MDLineType.Ordered, newLevel, content.substring(c + 2)); 30 | } else if (content.matches("^(\\*|\\+|\\-)\\s.*")) { 31 | return new MDLine(MDLineType.Unordered, newLevel, content.substring(2)); 32 | } else if (content.matches("^[#]+.*")) { 33 | int c = 0; 34 | while ((c < content.length()) && (content.charAt(c) == '#')) { 35 | c++; 36 | } 37 | MDLineType headerType; 38 | switch (c) { 39 | case 1: 40 | headerType = MDLineType.Head1; 41 | break; 42 | case 2: 43 | headerType = MDLineType.Head2; 44 | break; 45 | case 4: 46 | headerType = MDLineType.Head4; 47 | break; 48 | case 5: 49 | headerType = MDLineType.Head5; 50 | break; 51 | default: 52 | headerType = MDLineType.Head3; 53 | break; 54 | } 55 | 56 | while ((c < content.length()) && (content.charAt(c) == ' ')) { 57 | c++; 58 | } 59 | 60 | return new MDLine(headerType, newLevel, content.substring(c)); 61 | } 62 | } 63 | 64 | content = line.substring(4 * newLevel); 65 | 66 | return new MDLine(MDLineType.None, newLevel, content); 67 | } 68 | 69 | public MDLineType getListTypeName() { 70 | return type; 71 | } 72 | 73 | public int getLevel() { 74 | return level; 75 | } 76 | 77 | public void setLevel(int i) { 78 | level = Math.max(i, 0); 79 | } 80 | 81 | public String toString() { 82 | StringBuilder newLine = new StringBuilder(); 83 | for (int j = 0; j < getLevel(); j++) { 84 | newLine.append(" "); 85 | } 86 | 87 | if (type.equals(MDLineType.Ordered)) { 88 | newLine.append(String.valueOf(1)).append(". "); 89 | } else if (type.equals(MDLineType.Unordered)) { 90 | newLine.append("* "); 91 | } else if (type.equals(MDLineType.Head1)) { 92 | newLine.append("# "); 93 | } else if (type.equals(MDLineType.Head2)) { 94 | newLine.append("## "); 95 | } else if (type.equals(MDLineType.Head3)) { 96 | newLine.append("### "); 97 | } else if (type.equals(MDLineType.Head4)) { 98 | newLine.append("#### "); 99 | } else if (type.equals(MDLineType.Head5)) { 100 | newLine.append("##### "); 101 | } else if (type.equals(MDLineType.HR)) { 102 | newLine.append("----"); 103 | } 104 | 105 | String contentStr = getContent(); 106 | if (type.equals(MDLineType.Unordered)) { 107 | contentStr = contentStr.replaceAll("^\n", ""); 108 | } 109 | newLine.append(contentStr); 110 | 111 | return newLine.toString(); 112 | } 113 | 114 | public String getContent() { 115 | return content.toString(); 116 | } 117 | 118 | public void append(String appendContent) { 119 | if (content.length() == 0) { 120 | int i = 0; 121 | while (i < appendContent.length() && Character.isWhitespace(appendContent.charAt(i))) { 122 | i++; 123 | } 124 | content.append(appendContent.substring(i)); 125 | } else { 126 | content.append(appendContent); 127 | } 128 | } 129 | 130 | @Override 131 | public boolean equals(Object o) { 132 | return o instanceof MDLine && ((MDLine) o).type.equals(this.type); 133 | } 134 | 135 | public boolean isList() { 136 | return (type.equals(MDLineType.Ordered) || type.equals(MDLineType.Unordered)); 137 | } 138 | 139 | public void setListType(MDLineType type2) { 140 | type = type2; 141 | } 142 | 143 | public enum MDLineType { 144 | Ordered, Unordered, None, Head1, Head2, Head3, Head4, Head5, HR 145 | } 146 | } -------------------------------------------------------------------------------- /src/main/java/com/github/csccoder/csdn2md/util/html2markdown/Testbed.java: -------------------------------------------------------------------------------- 1 | package com.github.csccoder.csdn2md.util.html2markdown; 2 | 3 | import java.io.IOException; 4 | import java.net.MalformedURLException; 5 | import java.net.URL; 6 | 7 | public class Testbed { 8 | public static void main(String[] args) { 9 | URL url; 10 | try { 11 | url = new URL("http://jsoup.org/"); 12 | String parsedText = HTML2Md.convert(url, 30000); 13 | System.out.println(parsedText); 14 | 15 | // test parse local html file 16 | // String pathFile = "test.html"; 17 | // File f = new File(pathFile); 18 | // String parsedFileText = HTML2Md.convertFile(f, "gbk"); 19 | // System.out.println(parsedFileText); 20 | 21 | System.out.println("done"); 22 | } catch (MalformedURLException e) { 23 | e.printStackTrace(); 24 | } catch (IOException e) { 25 | e.printStackTrace(); 26 | } 27 | } 28 | } -------------------------------------------------------------------------------- /src/main/resources/config.properties: -------------------------------------------------------------------------------- 1 | # 是否渲染hexo头部 2 | head=false 3 | 4 | # md文章头部配置 5 | #分类和标签逗号隔开 6 | title= 7 | author=Leesin.Dong 8 | tags=a,b,c 9 | categories=a,b,c 10 | 11 | # 文件命名规则 可选值:date title ,date根据日期命名,title根据文章名命名 12 | MdFileName_type=title 13 | 14 | 15 | # csdn host 这里如果没有自定义域名的话不用改 16 | csdn_host=https://blog.csdn.net 17 | # csdn用户名 18 | casn_name=dataiyangu 19 | 20 | 21 | # 文件保存的绝对路径,即img html post这三个文件夹的父文件夹 22 | file_Path=/Users/dzsb-002294/Desktop/hexo_blog_md 23 | # 设置下载的文件夹名字 24 | html_path=html 25 | image_path=images 26 | md_path=_posts 27 | 28 | 29 | # 设置下载的方式,默认是从第一页往后不断的下载的 30 | #可选的方式: 31 | # 1 默认轮询从第一页开始往后 32 | # 2 专栏方式 33 | # 3 指定某篇文章 34 | # 4 指定页数 35 | # 5 分类 36 | url_way=1 37 | # 具体的五种抓取方法的地址填写(视自己情况而定) 38 | # 比如 我的: https://blog.csdn.net/dataiyangu/article/category/8118370 39 | # 下面的几种情况只写 https://blog.csdn.net/dataiyangu 后面的内容即可,后面写什么自己手动点到相应的页面粘贴过来就行了 40 | url_way_1=/article/list/ 41 | url_way_2=/column/info/32118 42 | url_way_3=/article/details/97544551 43 | url_way_4=/article/list/2 44 | url_way_5=/article/category/8118370 45 | 46 | #是否显示csdn中的锚点"文章目录"字样,以及下面具体的锚点 默认false 47 | anchor=false 48 | #是否开启版权声明 默认false 49 | copyright=false 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /src/main/resources/img/blog_csdn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/blog_csdn.png -------------------------------------------------------------------------------- /src/main/resources/img/blog_hexo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/blog_hexo.png -------------------------------------------------------------------------------- /src/main/resources/img/dir_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/dir_0.png -------------------------------------------------------------------------------- /src/main/resources/img/dir_html.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/dir_html.png -------------------------------------------------------------------------------- /src/main/resources/img/dir_md.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/dir_md.png -------------------------------------------------------------------------------- /src/main/resources/img/html_detail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/html_detail.png -------------------------------------------------------------------------------- /src/main/resources/img/markdown_detail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/markdown_detail.png -------------------------------------------------------------------------------- /src/main/resources/img/run_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/run_out.png -------------------------------------------------------------------------------- /src/main/resources/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | 3 | sed -i "" "s#^tags=.*#tags=$1#g" config.properties 4 | sed -i "" "s#^categories=.*#categories=$2#g" config.properties 5 | sed -i "" "s#^title=.*#tags=$3#g" config.properties 6 | 7 | java -jar csdn2hexo-1.0-SNAPSHOT.jar 8 | 9 | -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/Main.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/Main.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/model/Article.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/model/Article.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/paser/ArticlePaser.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/paser/ArticlePaser.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/paser/CorePaser.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/paser/CorePaser.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/util/FileUtil.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/FileUtil.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/util/HexoMdUtil.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/HexoMdUtil.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/util/HttpClientUtil$1.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/HttpClientUtil$1.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/util/HttpClientUtil.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/HttpClientUtil.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/util/PropertiesUtil.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/PropertiesUtil.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/util/ProxyIP.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/ProxyIP.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/util/RegexUtil.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/RegexUtil.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/util/html2markdown/DateUtil.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/html2markdown/DateUtil.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/util/html2markdown/FilesUtil.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/html2markdown/FilesUtil.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/util/html2markdown/HTML2Md.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/html2markdown/HTML2Md.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/util/html2markdown/MDLine$MDLineType.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/html2markdown/MDLine$MDLineType.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/util/html2markdown/MDLine.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/html2markdown/MDLine.class -------------------------------------------------------------------------------- /target/classes/com/github/csccoder/csdn2md/util/html2markdown/Testbed.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/html2markdown/Testbed.class -------------------------------------------------------------------------------- /target/classes/config.properties: -------------------------------------------------------------------------------- 1 | # 是否渲染hexo头部 2 | head=false 3 | 4 | # md文章头部配置 5 | #分类和标签逗号隔开 6 | title= 7 | author=Leesin.Dong 8 | tags=a,b,c 9 | categories=a,b,c 10 | 11 | # 文件命名规则 可选值:date title ,date根据日期命名,title根据文章名命名 12 | MdFileName_type=title 13 | 14 | 15 | # csdn host 这里如果没有自定义域名的话不用改 16 | csdn_host=https://blog.csdn.net 17 | # csdn用户名 18 | casn_name=dataiyangu 19 | 20 | 21 | # 文件保存的绝对路径,即img html post这三个文件夹的父文件夹 22 | file_Path=/Users/dzsb-002294/Desktop/hexo_blog_md 23 | # 设置下载的文件夹名字 24 | html_path=html 25 | image_path=images 26 | md_path=_posts 27 | 28 | 29 | # 设置下载的方式,默认是从第一页往后不断的下载的 30 | #可选的方式: 31 | # 1 默认轮询从第一页开始往后 32 | # 2 专栏方式 33 | # 3 指定某篇文章 34 | # 4 指定页数 35 | # 5 分类 36 | url_way=1 37 | # 具体的五种抓取方法的地址填写(视自己情况而定) 38 | # 比如 我的: https://blog.csdn.net/dataiyangu/article/category/8118370 39 | # 下面的几种情况只写 https://blog.csdn.net/dataiyangu 后面的内容即可,后面写什么自己手动点到相应的页面粘贴过来就行了 40 | url_way_1=/article/list/ 41 | url_way_2=/column/info/32118 42 | url_way_3=/article/details/97544551 43 | url_way_4=/article/list/2 44 | url_way_5=/article/category/8118370 45 | 46 | #是否显示csdn中的锚点"文章目录"字样,以及下面具体的锚点 默认false 47 | anchor=false 48 | #是否开启版权声明 默认false 49 | copyright=false 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /target/classes/img/blog_csdn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/blog_csdn.png -------------------------------------------------------------------------------- /target/classes/img/blog_hexo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/blog_hexo.png -------------------------------------------------------------------------------- /target/classes/img/dir_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/dir_0.png -------------------------------------------------------------------------------- /target/classes/img/dir_html.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/dir_html.png -------------------------------------------------------------------------------- /target/classes/img/dir_md.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/dir_md.png -------------------------------------------------------------------------------- /target/classes/img/html_detail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/html_detail.png -------------------------------------------------------------------------------- /target/classes/img/markdown_detail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/markdown_detail.png -------------------------------------------------------------------------------- /target/classes/img/run_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/run_out.png -------------------------------------------------------------------------------- /target/classes/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | 3 | sed -i "" "s#^tags=.*#tags=$1#g" config.properties 4 | sed -i "" "s#^categories=.*#categories=$2#g" config.properties 5 | sed -i "" "s#^title=.*#tags=$3#g" config.properties 6 | 7 | java -jar csdn2hexo-1.0-SNAPSHOT.jar 8 | 9 | -------------------------------------------------------------------------------- /target/config.properties: -------------------------------------------------------------------------------- 1 | # md文章头部配置 2 | #分类和标签逗号隔开 3 | title= 4 | author=Leesin.Dong 5 | tags=a,b,c 6 | categories=a,b,c 7 | 8 | # 文件命名规则 可选值:date title ,date根据日期命名,title根据文章名命名 9 | MdFileName_type=date 10 | 11 | 12 | # csdn host 这里如果没有自定义域名的话不用改 13 | csdn_host=https://blog.csdn.net 14 | # csdn用户名 15 | casn_name=dataiyangu 16 | 17 | 18 | # 文件保存的绝对路径,即img html post这三个文件夹的父文件夹 19 | file_Path=/Users/leesin/Desktop/hexo_blog_md 20 | # 设置下载的文件夹名字 21 | html_path=html 22 | image_path=images 23 | md_path=_posts 24 | 25 | 26 | # 设置下载的方式,默认是从第一页往后不断的下载的 27 | #可选的方式: 28 | # 1 默认轮询从第一页开始往后 29 | # 2 专栏方式 30 | # 3 指定某篇文章 31 | # 4 指定页数 32 | # 5 分类 33 | url_way=2 34 | # 具体的五种抓取方法的地址填写(视自己情况而定) 35 | # 比如 我的: https://blog.csdn.net/dataiyangu/article/category/8118370 36 | # 下面的几种情况只写 https://blog.csdn.net/dataiyangu 后面的内容即可,后面写什么自己手动点到相应的页面粘贴过来就行了 37 | url_way_1=/article/list/ 38 | url_way_2=/column/info/32118 39 | url_way_3=/article/details/88525801 40 | url_way_4=/article/list/2 41 | url_way_5=/article/category/8118370 42 | 43 | #是否显示csdn中的锚点"文章目录"字样,以及下面具体的锚点 默认false 44 | anchor=false 45 | #是否开启版权声明 默认false 46 | copyright=false 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /target/csdn2hexo-1.0-SNAPSHOT.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/csdn2hexo-1.0-SNAPSHOT.jar -------------------------------------------------------------------------------- /target/dependency-reduced-pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | com.github.csccoder 5 | csdn2hexo 6 | 1.0-SNAPSHOT 7 | 8 | 9 | 10 | true 11 | src/main/java 12 | 13 | **/*.properties 14 | **/*.xml 15 | **/*.sh 16 | 17 | 18 | 19 | true 20 | src/main/resources 21 | 22 | **/* 23 | **/* 24 | 25 | 26 | 27 | 28 | 29 | maven-shade-plugin 30 | 1.2.1 31 | 32 | 33 | package 34 | 35 | shade 36 | 37 | 38 | 39 | 40 | com.github.csccoder.csdn2md.Main 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | maven-resources-plugin 49 | 2.7 50 | 51 | 52 | copy-resources 53 | package 54 | 55 | copy-resources 56 | 57 | 58 | ${project.build.directory} 59 | 60 | 61 | src/main/resources 62 | 63 | **/*.xml 64 | **/*.conf 65 | **/*.properties 66 | **/*.sh 67 | 68 | true 69 | 70 | 71 | UTF-8 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /target/maven-archiver/pom.properties: -------------------------------------------------------------------------------- 1 | #Generated by Maven 2 | #Mon Mar 18 12:19:57 CST 2019 3 | version=1.0-SNAPSHOT 4 | groupId=com.github.csccoder 5 | artifactId=csdn2hexo 6 | -------------------------------------------------------------------------------- /target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst: -------------------------------------------------------------------------------- 1 | com/github/csccoder/csdn2md/util/html2markdown/HTML2Md.class 2 | com/github/csccoder/csdn2md/util/html2markdown/FilesUtil.class 3 | com/github/csccoder/csdn2md/util/HttpClientUtil$1.class 4 | com/github/csccoder/csdn2md/util/html2markdown/MDLine$MDLineType.class 5 | com/github/csccoder/csdn2md/util/html2markdown/MDLine.class 6 | com/github/csccoder/csdn2md/model/Article.class 7 | com/github/csccoder/csdn2md/util/ProxyIP.class 8 | com/github/csccoder/csdn2md/util/FileUtil.class 9 | com/github/csccoder/csdn2md/util/html2markdown/Testbed.class 10 | com/github/csccoder/csdn2md/util/HttpClientUtil.class 11 | com/github/csccoder/csdn2md/util/HexoMdUtil.class 12 | com/github/csccoder/csdn2md/util/RegexUtil.class 13 | com/github/csccoder/csdn2md/util/html2markdown/DateUtil.class 14 | com/github/csccoder/csdn2md/util/PropertiesUtil.class 15 | com/github/csccoder/csdn2md/paser/CorePaser.class 16 | com/github/csccoder/csdn2md/paser/ArticlePaser.class 17 | com/github/csccoder/csdn2md/Main.class 18 | -------------------------------------------------------------------------------- /target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst: -------------------------------------------------------------------------------- 1 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/paser/ArticlePaser.java 2 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/HTML2Md.java 3 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/HexoMdUtil.java 4 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/MDLine.java 5 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/HttpClientUtil.java 6 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/RegexUtil.java 7 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/FileUtil.java 8 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/FilesUtil.java 9 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/Main.java 10 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/Testbed.java 11 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/model/Article.java 12 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/DateUtil.java 13 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/paser/CorePaser.java 14 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/ProxyIP.java 15 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/PropertiesUtil.java 16 | -------------------------------------------------------------------------------- /target/original-csdn2hexo-1.0-SNAPSHOT.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/original-csdn2hexo-1.0-SNAPSHOT.jar -------------------------------------------------------------------------------- /target/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/zsh 2 | 3 | sed -i "" "s#^tags=.*#tags=$1#g" config.properties 4 | sed -i "" "s#^categories=.*#categories=$2#g" config.properties 5 | sed -i "" "s#^title=.*#tags=$3#g" config.properties 6 | 7 | java -jar csdn2hexo-1.0-SNAPSHOT.jar 8 | 9 | --------------------------------------------------------------------------------