├── .DS_Store
├── .idea
├── compiler.xml
├── csdn2hexo.iml
├── inspectionProfiles
│ └── Project_Default.xml
├── jarRepositories.xml
├── misc.xml
├── vcs.xml
└── workspace.xml
├── README.md
├── out
└── .DS_Store
├── pom.xml
├── src
└── main
│ ├── java
│ └── com
│ │ └── github
│ │ └── csccoder
│ │ └── csdn2md
│ │ ├── Main.java
│ │ ├── model
│ │ └── Article.java
│ │ ├── paser
│ │ ├── ArticlePaser.java
│ │ └── CorePaser.java
│ │ └── util
│ │ ├── FileUtil.java
│ │ ├── HexoMdUtil.java
│ │ ├── HttpClientUtil.java
│ │ ├── PropertiesUtil.java
│ │ ├── ProxyIP.java
│ │ ├── RegexUtil.java
│ │ └── html2markdown
│ │ ├── DateUtil.java
│ │ ├── FilesUtil.java
│ │ ├── HTML2Md.java
│ │ ├── MDLine.java
│ │ └── Testbed.java
│ └── resources
│ ├── config.properties
│ ├── img
│ ├── blog_csdn.png
│ ├── blog_hexo.png
│ ├── dir_0.png
│ ├── dir_html.png
│ ├── dir_md.png
│ ├── html_detail.png
│ ├── markdown_detail.png
│ └── run_out.png
│ └── start.sh
└── target
├── classes
├── com
│ └── github
│ │ └── csccoder
│ │ └── csdn2md
│ │ ├── Main.class
│ │ ├── model
│ │ └── Article.class
│ │ ├── paser
│ │ ├── ArticlePaser.class
│ │ └── CorePaser.class
│ │ └── util
│ │ ├── FileUtil.class
│ │ ├── HexoMdUtil.class
│ │ ├── HttpClientUtil$1.class
│ │ ├── HttpClientUtil.class
│ │ ├── PropertiesUtil.class
│ │ ├── ProxyIP.class
│ │ ├── RegexUtil.class
│ │ └── html2markdown
│ │ ├── DateUtil.class
│ │ ├── FilesUtil.class
│ │ ├── HTML2Md.class
│ │ ├── MDLine$MDLineType.class
│ │ ├── MDLine.class
│ │ └── Testbed.class
├── config.properties
├── img
│ ├── blog_csdn.png
│ ├── blog_hexo.png
│ ├── dir_0.png
│ ├── dir_html.png
│ ├── dir_md.png
│ ├── html_detail.png
│ ├── markdown_detail.png
│ └── run_out.png
└── start.sh
├── config.properties
├── csdn2hexo-1.0-SNAPSHOT.jar
├── dependency-reduced-pom.xml
├── maven-archiver
└── pom.properties
├── maven-status
└── maven-compiler-plugin
│ └── compile
│ └── default-compile
│ ├── createdFiles.lst
│ └── inputFiles.lst
├── original-csdn2hexo-1.0-SNAPSHOT.jar
└── start.sh
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/.DS_Store
--------------------------------------------------------------------------------
/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/.idea/csdn2hexo.iml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/.idea/jarRepositories.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 | 1605885898842
144 |
145 |
146 | 1605885898842
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 | file://$PROJECT_DIR$/src/main/java/com/github/csccoder/csdn2md/util/FileUtil.java
234 | 81
235 |
236 |
237 |
238 | file://$PROJECT_DIR$/src/main/java/com/github/csccoder/csdn2md/util/HexoMdUtil.java
239 | 49
240 |
241 |
242 |
243 | file://$PROJECT_DIR$/src/main/java/com/github/csccoder/csdn2md/paser/CorePaser.java
244 | 82
245 |
246 |
247 |
248 | file://$PROJECT_DIR$/src/main/java/com/github/csccoder/csdn2md/paser/ArticlePaser.java
249 | 33
250 |
251 |
252 |
253 | file://$PROJECT_DIR$/src/main/java/com/github/csccoder/csdn2md/util/HexoMdUtil.java
254 | 26
255 |
256 |
257 |
258 | file://$PROJECT_DIR$/src/main/java/com/github/csccoder/csdn2md/util/FileUtil.java
259 | 95
260 |
261 |
262 |
263 |
264 |
265 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # 功能
4 | 概述:利用爬虫爬取指定用户的CSDN博客文章转为md格式,目的是完成博客迁移
5 |
6 | ## 爬取的方式:
7 | 1 默认轮询从第一页开始往后
8 | 2 专栏方式
9 | 3 指定某篇文章
10 | 4 指定页数
11 | 5 分类
12 | ## 设置生成的md文件命名规则:
13 | 可选值:date title ,date根据日期命名,title根据文章名命名
14 |
15 | ## 设置md文件的头部信息
16 | title=
17 | author=
18 | tags=
19 | categories=
20 | ## 是否显示csdn中的锚点"文章目录"字样,以及下面具体的锚点 默认false(因为csdn中是集成了[toc]功能的,hexo并没有集成)
21 | anchor=false
22 | ## 是否开启版权声明 默认false(csdn文章头部有我们自定的版权声明,false即为去掉。)
23 | copyright=false
24 |
25 | # 工具
26 | [html2markdown](https://github.com/pnikosis/jHTML2Md)
27 | Jsoup
28 |
29 | # 提示
30 | - 运行过程中可能出现异常(小概率): 因为csdn有反爬虫机制,如果迁移过程中出现”应该是被反爬虫了,换个wifi或者网络试试~~~~~~~~~~~~~~~~~~~~~~~ “,那你就照着我打的这个日志去做吧,哈哈
31 | - 运行./start.sh 之前需要给这个脚本赋予权限,执行 `chmod 777 ./start.shg`
32 | - 需要jdk1.8
33 |
34 | # 用法:
35 | ## 方式一:
36 | git clone 到本地,进入target目录,修改config.properties中的某些配置(视自己情况而修改)
37 | 运行
38 | ```js
39 | java -jar csdn2hexo-1.0-SNAPSHOT.jar
40 | ```
41 | ## 方式二:
42 | git clone 到本地,进入target目录,修改config.properties中的某些配置(视自己情况而修改)
43 | 运行 ./start.sh a,b,c 1,2,3 articleName tips:这里的a,b,c 是hexo标签,逗号隔开 1,2,3是hexo分类,逗号隔开 articleName是文章标题,不写的话默认标题是csdn的文章标题。这三个参数如果不写的话默认是配置文件中的内容
44 | ## 方式三:
45 | git clone 到本地,通过idea import本project,
46 | 修改src/main/resource目录中的config.properties文件(视自己情况而修改)
47 | 修改读取配置文件的路径:找到com.github.csccoder.csdn2md.util.PropertiesUtil
48 | ```js
49 | package com.github.csccoder.csdn2md.util;
50 |
51 | public class PropertiesUtil {
52 | public static String getProperties(String key){
53 | String value = null;
54 | try {
55 | Properties pp = new Properties();
56 |
57 | //通过idea运行程序
58 | InputStream inputStream = Main.class.getClassLoader().getResourceAsStream("config.properties");
59 | //通过java -jar 或者 ./start.sh运行程序
60 | //String filePath = System.getProperty("user.dir")+ "/config.properties";
61 | //InputStream inputStream = new BufferedInputStream(new FileInputStream(filePath));
62 |
63 | pp.load(inputStream);
64 | value= (String) pp.get(key);
65 | return value;
66 | } catch (IOException e) {
67 | e.printStackTrace();
68 | return value;
69 | }
70 | }
71 | }
72 | ```
73 | 找到 Main 这个类运行即可
74 | # 展示
75 |
76 | 详细效果展示:[利用爬虫爬取指定用户的CSDN博客文章转为md格式,目的是完成博客迁移博文到Hexo等金静态博客](https://blog.csdn.net/dataiyangu/article/details/88637312)
77 |
78 |
79 | 查看头部信息
80 | 
81 |
82 |
83 | 查看头部信息显示效果
84 | 
85 | 
86 | # 配置文件信息展示
87 | ```js
88 | # md文章头部配置
89 | #分类和标签逗号隔开
90 | title=
91 | author=Leesin.Dong
92 | tags=a,b,c
93 | categories=a,b,c
94 |
95 | # 文件命名规则 可选值:date title ,date根据日期命名,title根据文章名命名
96 | MdFileName_type=date
97 |
98 |
99 | # csdn host 这里如果没有自定义域名的话不用改
100 | csdn_host=https://blog.csdn.net
101 | # csdn用户名
102 | casn_name=dataiyangu
103 |
104 |
105 | # 文件保存的绝对路径,即img html post这三个文件夹的父文件夹
106 | file_Path=/Users/leesin/Desktop/hexo_blog_md
107 | # 设置下载的文件夹名字
108 | html_path=html
109 | image_path=images
110 | md_path=_posts
111 |
112 |
113 | # 设置下载的方式,默认是从第一页往后不断的下载的
114 | #可选的方式:
115 | # 1 默认轮询从第一页开始往后
116 | # 2 专栏方式
117 | # 3 指定某篇文章
118 | # 4 指定页数
119 | # 5 分类
120 | url_way=2
121 | # 具体的五种抓取方法的地址填写(视自己情况而定)
122 | # 比如 我的: https://blog.csdn.net/dataiyangu/article/category/8118370
123 | # 下面的几种情况只写 https://blog.csdn.net/dataiyangu 后面的内容即可,后面写什么自己手动点到相应的页面粘贴过来就行了
124 | url_way_1=/article/list/
125 | url_way_2=/column/info/32118
126 | url_way_3=/article/details/88525801
127 | url_way_4=/article/list/2
128 | url_way_5=/article/category/8118370
129 |
130 | #是否显示csdn中的锚点"文章目录"字样,以及下面具体的锚点 默认false
131 | anchor=false
132 | #是否开启版权声明 默认false
133 | copyright=false
134 | ```
135 |
--------------------------------------------------------------------------------
/out/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/out/.DS_Store
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.github.csccoder
8 | csdn2hexo
9 | 1.0-SNAPSHOT
10 |
11 |
12 |
13 |
14 | org.apache.httpcomponents
15 | httpclient
16 |
17 | 4.5.7
18 |
19 |
20 |
21 | org.jsoup
22 | jsoup
23 | 1.11.2
24 |
25 |
26 |
27 | junit
28 | junit
29 | RELEASE
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 | org.apache.maven.plugins
60 | maven-resources-plugin
61 | 2.7
62 |
63 |
64 | copy-resources
65 |
66 | package
67 |
68 | copy-resources
69 |
70 |
71 |
72 | ${project.build.directory}
73 |
74 |
75 | src/main/resources
76 |
77 | **/*.xml
78 | **/*.conf
79 | **/*.properties
80 | **/*.sh
81 |
82 | true
83 |
84 |
85 | UTF-8
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 | src/main/java
97 |
98 | **/*.properties
99 | **/*.xml
100 | **/*.sh
101 |
102 | true
103 |
104 |
105 | src/main/resources
106 |
107 | **/*
108 | **/*
109 |
110 | true
111 |
112 |
113 |
114 |
115 |
116 |
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/Main.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md;
2 |
3 | import com.github.csccoder.csdn2md.paser.CorePaser;
4 | import com.github.csccoder.csdn2md.util.PropertiesUtil;
5 |
6 | import java.io.IOException;
7 | import java.util.concurrent.ExecutorService;
8 | import java.util.concurrent.Executors;
9 |
10 |
11 | public class Main implements Runnable{
12 |
13 | static ExecutorService service = Executors.newFixedThreadPool(1);
14 |
15 | static String csdn_host = PropertiesUtil.getProperties("csdn_host");
16 | private static final String HOST = csdn_host;
17 | private static final String AUTHOR = PropertiesUtil.getProperties("casn_name");
18 | /**
19 | * 文件保存路径(绝对路径)
20 | */
21 | private static final String FILE_PATH = PropertiesUtil.getProperties("file_Path");
22 | //csdn 用户名
23 |
24 | public static void main(String args[]) throws IOException {
25 | service.execute(new Main());
26 | }
27 |
28 | public void run() {
29 | new CorePaser().parse(HOST,
30 | AUTHOR,
31 | FILE_PATH,
32 | //是否爬取图片 默认false
33 | true);
34 | }
35 | }
36 |
37 |
38 |
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/model/Article.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md.model;
2 |
3 | import java.util.Date;
4 |
5 | public class Article {
6 | private int id;
7 | private String title;
8 | private String content;
9 | private String author;
10 | private String tags[];
11 | private String catagory[];
12 | private Date date;
13 |
14 | public String getAuthor() {
15 | return author;
16 | }
17 |
18 | public void setAuthor(final String author) {
19 | this.author = author;
20 | }
21 |
22 | public int getId() {
23 | return id;
24 | }
25 |
26 | public void setId(int id) {
27 | this.id = id;
28 | }
29 |
30 | public String getTitle() {
31 | return title;
32 | }
33 |
34 | public void setTitle(String title) {
35 | this.title = title;
36 | }
37 |
38 | public String getContent() {
39 | return content;
40 | }
41 |
42 | public void setContent(String content) {
43 | this.content = content;
44 | }
45 |
46 | public String[] getTags() {
47 | return tags;
48 | }
49 |
50 | public void setTags(String[] tags) {
51 | this.tags = tags;
52 | }
53 |
54 | public Date getDate() {
55 | return date;
56 | }
57 |
58 | public void setDate(Date date) {
59 | this.date = date;
60 | }
61 |
62 | public String[] getCatagory() {
63 | return catagory;
64 | }
65 |
66 | public void setCatagory(String catagory[]) {
67 | this.catagory = catagory;
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/paser/ArticlePaser.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md.paser;
2 |
3 | import com.github.csccoder.csdn2md.model.Article;
4 | import com.github.csccoder.csdn2md.util.PropertiesUtil;
5 | import com.github.csccoder.csdn2md.util.RegexUtil;
6 | import org.jsoup.nodes.Document;
7 | import org.jsoup.nodes.Element;
8 | import org.jsoup.select.Elements;
9 |
10 | import java.text.SimpleDateFormat;
11 | import java.util.ArrayList;
12 |
13 | public class ArticlePaser {
14 | //2017-10-07 23:13
15 | private static final SimpleDateFormat dateFormat=new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
16 |
17 | public static Article parseArticle(String url){
18 | Document document=CorePaser.getDocument(url);
19 | Article article = new Article();
20 |
21 | String articleId=parseArticleId(url);
22 | String articleTitle=document.select(".article-title-box>h1").text().trim();
23 | String articleContent=document.select("#article_content").html();
24 | String[] tags =PropertiesUtil.getProperties("tags").split(",");
25 | String[] category = PropertiesUtil.getProperties("categories").split(",");
26 | String author = PropertiesUtil.getProperties("author");
27 | article.setAuthor(author);
28 | article.setId(Integer.parseInt(articleId));
29 | article.setTitle(articleTitle);
30 | article.setContent(articleContent);
31 | article.setCatagory(category);
32 | try {
33 | // Elements element = document.select(".article-bar-top>.time");
34 | Elements element = document.select(".bar-content>.time");
35 | article.setDate(dateFormat.parse(element.text()));
36 | } catch (Exception e) {
37 | System.out.println("应该是被反爬虫了,换个wifi或者网络试试~~~~~~~~~~~~~~~~~~~~~~~");
38 | e.printStackTrace();
39 | }
40 | article.setTags(tags);
41 | return article;
42 | }
43 |
44 | private static String parseCata(Document document) {
45 | Elements elements = document.select(".tags-box>a");
46 | for(Element element:elements){
47 | String url=element.attr("href");
48 | if(url != null && url.trim().length() != 0){
49 | if(url.lastIndexOf("category") != -1){
50 | return element.select(".tag-link").text().trim();
51 | }
52 | }
53 | }
54 | return null;
55 | }
56 |
57 | public static String parseArticleId(String uri){
58 | return RegexUtil.match(".*/(\\d*)",uri,1);
59 | }
60 |
61 | public static String[] parseTags(Document document){
62 | ArrayList list = new ArrayList();
63 | Elements elements = document.select(".artic-tag-box>a");
64 | for(Element element:elements){
65 | String text=element.select(".tag-link").text().trim();
66 | list.add(text);
67 | }
68 | return list.toArray(new String[0]);
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/paser/CorePaser.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md.paser;
2 |
3 | import com.github.csccoder.csdn2md.model.Article;
4 | import com.github.csccoder.csdn2md.util.*;
5 | import org.jsoup.Jsoup;
6 | import org.jsoup.nodes.Document;
7 | import org.jsoup.nodes.Element;
8 | import org.jsoup.select.Elements;
9 |
10 |
11 | import java.util.ArrayList;
12 | import java.util.List;
13 |
14 |
15 | public class CorePaser {
16 |
17 |
18 | /**
19 | * 解析博客的入口函数
20 | *
21 | * @param host csdn域名
22 | * @param author csdn账号
23 | */
24 | public void parse(String host, String author, String path, boolean img) {
25 | Document document;
26 | FileUtil fileUtil = new FileUtil(path, img);
27 | int recordCount = 1;
28 | int pageCount = 1;
29 | List uris = new ArrayList();
30 | while (true) {
31 |
32 |
33 |
34 |
35 | System.out.println("正在爬取第" + pageCount + "页");
36 | try {
37 | int url_way = Integer.parseInt(PropertiesUtil.getProperties("url_way"));
38 |
39 | String url_url_way_1 = PropertiesUtil.getProperties("url_way_1");
40 | String url_url_way_2 = PropertiesUtil.getProperties("url_way_2");
41 | String url_url_way_3 = PropertiesUtil.getProperties("url_way_3");
42 | String url_url_way_4 = PropertiesUtil.getProperties("url_way_4");
43 | String url_url_way_5 = PropertiesUtil.getProperties("url_way_5");
44 |
45 | switch (url_way) {
46 | case 1:
47 | document = getDocument(host + "/" + author + url_url_way_1 + pageCount);
48 | uris = parseArticleURIs(document);
49 | break;
50 | case 2:
51 | document = getDocument(host + "/" + author + url_url_way_2);
52 | uris = parseArticleURIsOfZhuanLan(document);
53 | break;
54 | case 3:
55 | uris.add(host + "/" + author + url_url_way_3);
56 | recordCount=1;
57 | break;
58 | case 4:
59 | document = getDocument(host + "/" + author + url_url_way_4);
60 | uris = parseArticleURIs(document);
61 | break;
62 | case 5:
63 | document = getDocument(host + "/" + author + url_url_way_5);
64 | uris = parseArticleURIsOfCategory(document);
65 | break;
66 | default:
67 | break;
68 | }
69 |
70 |
71 | // 获得当前页所有文章的URI
72 | // System.out.println(document);
73 |
74 | if (uris.size() == 0) {
75 | break;
76 | }
77 | for (String uri : uris) {
78 | if (uri.indexOf(author) <= 0) {
79 | System.out.println("网址中没有包含用户名");
80 | continue;
81 | }
82 | // 核心
83 | Article article = ArticlePaser.parseArticle(uri);
84 | System.out.println("第" + recordCount + "篇 =>" + article.getId() + " " + article.getTitle());
85 | // 核心
86 | fileUtil.html2HexoMd(article);
87 | recordCount++;
88 | }
89 | pageCount++;
90 | } catch (Exception e) {
91 | e.printStackTrace();
92 | }
93 | }
94 |
95 | }
96 |
97 |
98 | public static Document getDocument(String url) {
99 | String content = HttpClientUtil.get(url);
100 | Document document = Jsoup.parse(content);
101 | return document;
102 | }
103 |
104 |
105 | public ArrayList parseArticleURIs(Document document) {
106 | ArrayList ids = new ArrayList();
107 | Elements elements = document.select(".article-item-box>h4>a");
108 | for (Element element : elements) {
109 | ids.add(element.attr("href"));
110 | }
111 | return ids;
112 | }
113 |
114 | public ArrayList parseArticleURIsOfZhuanLan(Document document) {
115 | ArrayList ids = new ArrayList();
116 | Elements elements = document.select(".column_article_list>li>a");
117 | for (Element element : elements) {
118 | ids.add(element.attr("href"));
119 | }
120 | return ids;
121 | }
122 |
123 | // 这里其实和上面的parseArticleURIs 实现是一样的,为了区分,重新写一个方法把,只是名字不一样而已。
124 | public ArrayList parseArticleURIsOfCategory(Document document) {
125 | ArrayList ids = new ArrayList();
126 | Elements elements = document.select(".article-item-box>h4>a");
127 | for (Element element : elements) {
128 | ids.add(element.attr("href"));
129 | }
130 | return ids;
131 | }
132 |
133 | public int getRecordCount(String papeList) {
134 | String value = RegexUtil.match("\\s*?(\\d*)条.*", papeList, 1);
135 | return value == null ? 0 : Integer.parseInt(value);
136 | }
137 |
138 | public int getPageCount(String pageList) {
139 | String value = RegexUtil.match(".*共(\\d*)页", pageList, 1);
140 | return value == null ? 0 : Integer.parseInt(value);
141 | }
142 |
143 |
144 |
145 |
146 | }
147 |
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/util/FileUtil.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md.util;
2 |
3 | import com.github.csccoder.csdn2md.model.Article;
4 | import com.github.csccoder.csdn2md.util.html2markdown.HTML2Md;
5 |
6 |
7 | import java.io.*;
8 | import java.net.URL;
9 | import java.text.SimpleDateFormat;
10 | import java.util.Date;
11 | import java.util.UUID;
12 | import java.util.regex.Pattern;
13 |
14 | public class FileUtil {
15 | private static String dir;
16 | private static String htmlDir;
17 | private static String mdDir;
18 | private static String imgDir;
19 | private static boolean img;
20 | private static Pattern FilePattern = Pattern.compile("[\\s\\\\/:\\*\\?\\\"<>\\|]");
21 |
22 | public FileUtil(String path, boolean imgSwitch) {
23 | if (path == null) return;
24 | dir = path;
25 | String html_path = PropertiesUtil.getProperties("html_path");
26 | String image_path = PropertiesUtil.getProperties("image_path");
27 | String md_path = PropertiesUtil.getProperties("md_path");
28 | htmlDir = path + "/" + html_path + "/";
29 | mdDir = path + "/" + md_path + "/";
30 | imgDir = path + "/" + image_path + "/";
31 | if (img) img = false;
32 | else img = imgSwitch;
33 | judeDirExists(dir, htmlDir, mdDir, imgDir);
34 | }
35 |
36 | private void save(String content, String filePath) {
37 | File file = new File(filePath);
38 | FileWriter writer = null;
39 | try {
40 | writer = new FileWriter(file);
41 | writer.write(content);
42 | } catch (IOException e) {
43 | e.printStackTrace();
44 | } finally {
45 | if (writer != null) {
46 | try {
47 | writer.close();
48 | } catch (IOException e) {
49 | e.printStackTrace();
50 | }
51 | }
52 | }
53 |
54 | }
55 |
56 | private void saveHtml(Article article) {
57 | String fileName = article.getTitle();
58 | if ("".equals(fileName)) return;
59 | fileName = (fileName == null ? null : FilePattern.matcher(fileName).replaceAll("")); //过滤文件名特殊字符
60 | String filePath = htmlDir + fileName;
61 | save(article.getContent(), filePath);
62 |
63 | }
64 |
65 | private void saveHexomd(Article article) {
66 | String mdhead = "";
67 | Boolean head = Boolean.valueOf(PropertiesUtil.getProperties("head"));
68 | if (head) {
69 | mdhead = HexoMdUtil.getHeader(article);
70 | }
71 | try {
72 | SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
73 | String fileName = "";
74 | String MdFileName = "";
75 | fileName = article.getTitle();
76 | fileName = (fileName == null ? null : FilePattern.matcher(fileName).replaceAll("")); //过滤文件名特殊字符
77 | String mdContent = HTML2Md.convertFile(new File(htmlDir + fileName), "utf-8");
78 | String realContent = mdhead + mdContent;
79 |
80 | String MdFileName_type = PropertiesUtil.getProperties("MdFileName_type");
81 | if (MdFileName_type.equals("date")) {
82 | MdFileName = dateFormat.format(article.getDate()).replace(" ", "-").replaceAll(":", "-");
83 | } else {
84 | MdFileName = article.getTitle();
85 | }
86 | String filePath = mdDir + MdFileName + ".md";
87 | save(realContent, filePath);
88 | } catch (IOException e) {
89 | e.printStackTrace();
90 | }
91 | }
92 |
93 |
94 | public void html2HexoMd(Article article) {
95 | saveHtml(article);
96 | saveHexomd(article);
97 | }
98 |
99 | public static String getPicture(String url) {
100 | String fix = null;
101 | if (img && !url.equals("")) {
102 | URL ur;
103 | // https://img-blog.csdnimg.cn/20190316212631882.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2RhdGFpeWFuZ3U=,size_16,color_FFFFFF,t_70
104 | BufferedInputStream in;
105 | ByteArrayOutputStream outStream;
106 | try {
107 | // fix = url.substring(url.lastIndexOf("img-blog.csdnimg.cn") + 20, url.length());
108 | // fix = fix.replace("/", "-");
109 | // fix = fix.replace(".png", "-");
110 | // fix = fix.replace("?", "-");
111 | // fix = fix + ".png";
112 |
113 | // 直接uuid,csdn的太长
114 | fix = UUID.randomUUID() + ".png";
115 |
116 | String fileName = UUID.randomUUID().toString();
117 | ur = new URL(url);
118 | in = new BufferedInputStream(ur.openStream());
119 | outStream = new ByteArrayOutputStream();
120 | byte[] buf = new byte[1024];
121 | int length = 0;
122 | while ((length = in.read(buf, 0, buf.length)) != -1) {
123 | outStream.write(buf, 0, length);
124 | }
125 | byte[] bytes = outStream.toByteArray();
126 | if (!"".equals(fix)) {
127 | if (!fix.contains(".")) {
128 | fix = fix + "." + GetFileSuffix(bytes);
129 | }
130 | }
131 |
132 |
133 | File fileOut = new File(imgDir + fix);
134 | FileOutputStream op = new FileOutputStream(fileOut);
135 |
136 | op.write(bytes);
137 | op.close();
138 | in.close();
139 | outStream.close();
140 | return fix;
141 | } catch (Exception e) {
142 | e.printStackTrace();
143 | return fix;
144 | }
145 | }
146 | return fix;
147 | }
148 |
149 | private static String GetFileSuffix(byte[] fileData) {
150 | if (fileData == null || fileData.length < 10) {
151 | return null;
152 | }
153 |
154 | if (fileData[0] == 'G' && fileData[1] == 'I' && fileData[2] == 'F') {
155 | return "gif";
156 | } else if (fileData[1] == 'P' && fileData[2] == 'N' && fileData[3] == 'G') {
157 | return "png";
158 | } else if (fileData[6] == 'J' && fileData[7] == 'F' && fileData[8] == 'I' && fileData[9] == 'F') {
159 | return "jpg";
160 | } else if (fileData[0] == 'B' && fileData[1] == 'M') {
161 | return "bmp";
162 | } else {
163 | return null;
164 | }
165 | }
166 |
167 | // 判断文件夹是否存在
168 | public static void judeDirExists(String dirRoot, String htmlDirPath, String mdDirPath, String imgDirPath) {
169 | try {
170 | File file = new File(dirRoot);
171 | if (!file.exists()) {
172 | if (!file.isDirectory()) {
173 | File dirR = new File(dirRoot);
174 | if (dirR.mkdirs()) {
175 | System.out.println("已创建根目录!");
176 | }
177 | }
178 | }
179 |
180 | File htmlDirP = new File(htmlDirPath);
181 | File mdDirP = new File(mdDirPath);
182 | File imgDirP = new File(imgDirPath);
183 | if (htmlDirP.mkdirs() && mdDirP.mkdirs() && imgDirP.mkdirs()) {
184 | System.out.println("目录创建成功!");
185 | } else {
186 | System.out.println(htmlDirP.mkdirs());
187 | System.out.println("目录已经存在,如果还是不行请尝试删掉根目录下所有文件");
188 | }
189 |
190 | } catch (Exception e) {
191 | e.printStackTrace();
192 |
193 | }
194 |
195 |
196 | }
197 |
198 |
199 | }
200 |
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/util/HexoMdUtil.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md.util;
2 |
3 | import com.github.csccoder.csdn2md.model.Article;
4 | import com.github.csccoder.csdn2md.util.html2markdown.DateUtil;
5 |
6 | import java.text.SimpleDateFormat;
7 | import java.util.Arrays;
8 | import java.util.Map;
9 | import java.util.TreeMap;
10 |
11 | import static java.io.File.separator;
12 |
13 | public class HexoMdUtil {
14 | private static final SimpleDateFormat dateFormat=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
15 | /**
16 | *
17 | * 适配hexo头部
18 | *
19 | ---
20 | title: hexo deploy时重复输入用户名密码的问题
21 | date: 2017-12-12 19:17:34
22 | tags: hexo
23 | ---
24 | */
25 |
26 | public static String getHeader(Article article){
27 | String separator = System.getProperty("line.separator");
28 | String article_title = article.getTitle();
29 | String[] article_tags = article.getTags();
30 | StringBuilder hexo_tags = new StringBuilder(separator);
31 | String[] article_catagory = article.getCatagory();
32 | StringBuilder hexo_categories = new StringBuilder(separator);
33 | for (String tag : article_tags) {
34 | hexo_tags.append(" - ").append(tag).append(separator);
35 | }
36 | for (String category : article_catagory) {
37 | hexo_categories.append(" - ").append(category).append(separator);
38 | }
39 | String title = PropertiesUtil.getProperties("title");
40 | if (!("").equals(title)){
41 | article_title = title;
42 | }
43 |
44 | StringBuilder sb=new StringBuilder();
45 | sb.append("---\n").
46 | append(String.format("title: %s\n",article_title)).
47 | append(String.format("author: %s\n",article.getAuthor())).
48 | append("tags: "+hexo_categories+"\n").
49 | append("category: "+hexo_tags+"\n").
50 | append(String.format("date: %s\n",dateFormat.format(article.getDate()))).
51 | append("---\n");
52 | return sb.toString();
53 | }
54 |
55 | public static String array2String(String[] array){
56 | String str="";
57 | for(String temp:array){
58 | str+=temp;
59 | }
60 | return str;
61 | }
62 |
63 |
64 |
65 | }
66 |
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/util/HttpClientUtil.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md.util;
2 |
3 | import org.apache.http.*;
4 | import org.apache.http.client.HttpClient;
5 | import org.apache.http.client.HttpRequestRetryHandler;
6 | import org.apache.http.client.config.CookieSpecs;
7 | import org.apache.http.client.config.RequestConfig;
8 | import org.apache.http.client.entity.UrlEncodedFormEntity;
9 | import org.apache.http.client.methods.CloseableHttpResponse;
10 | import org.apache.http.client.methods.HttpGet;
11 | import org.apache.http.client.methods.HttpPost;
12 | import org.apache.http.client.methods.HttpRequestBase;
13 | import org.apache.http.client.params.CookiePolicy;
14 | import org.apache.http.client.params.HttpClientParams;
15 | import org.apache.http.client.protocol.HttpClientContext;
16 | import org.apache.http.config.Registry;
17 | import org.apache.http.config.RegistryBuilder;
18 | import org.apache.http.conn.ConnectTimeoutException;
19 | import org.apache.http.conn.routing.HttpRoute;
20 | import org.apache.http.conn.socket.ConnectionSocketFactory;
21 | import org.apache.http.conn.socket.LayeredConnectionSocketFactory;
22 | import org.apache.http.conn.socket.PlainConnectionSocketFactory;
23 | import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
24 | import org.apache.http.impl.client.CloseableHttpClient;
25 | import org.apache.http.impl.client.DefaultHttpClient;
26 | import org.apache.http.impl.client.HttpClients;
27 | import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
28 | import org.apache.http.message.BasicNameValuePair;
29 | import org.apache.http.protocol.HttpContext;
30 | import org.apache.http.util.EntityUtils;
31 |
32 | import javax.net.ssl.SSLException;
33 | import javax.net.ssl.SSLHandshakeException;
34 | import java.io.IOException;
35 | import java.io.InterruptedIOException;
36 | import java.io.UnsupportedEncodingException;
37 | import java.net.UnknownHostException;
38 | import java.util.ArrayList;
39 | import java.util.List;
40 | import java.util.Map;
41 | import java.util.Set;
42 |
43 | public class HttpClientUtil {
44 |
45 | static final int timeOut = 15 * 1000;
46 |
47 | private static CloseableHttpClient httpClient = null;
48 |
49 | private final static Object syncLock = new Object();
50 |
51 | private static void config(HttpRequestBase httpRequestBase) {
52 | // 设置Header等
53 | // httpRequestBase.setHeader("User-Agent", "Mozilla/5.0");
54 | // httpRequestBase
55 | // .setHeader("Accept",
56 | // "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
57 | // httpRequestBase.setHeader("Accept-Language",
58 | // "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");// "en-US,en;q=0.5");
59 | // httpRequestBase.setHeader("Accept-Charset",
60 | // "ISO-8859-1,utf-8,gbk,gb2312;q=0.7,*;q=0.7");
61 |
62 | // 配置请求的超时设置
63 | RequestConfig requestConfig = RequestConfig.custom()
64 | .setConnectionRequestTimeout(timeOut)
65 | .setConnectTimeout(timeOut).setSocketTimeout(timeOut).build();
66 | httpRequestBase.setConfig(requestConfig);
67 | }
68 |
69 | /**
70 | * 获取HttpClient对象
71 | *
72 | * @return
73 | * @author SHANHY
74 | * @create 2015年12月18日
75 | */
76 | public static CloseableHttpClient getHttpClient(String url) {
77 | String hostname = url.split("/")[2];
78 | int port = 80;
79 | if (hostname.contains(":")) {
80 | String[] arr = hostname.split(":");
81 | hostname = arr[0];
82 | port = Integer.parseInt(arr[1]);
83 | }
84 | if (httpClient == null) {
85 | synchronized (syncLock) {
86 | if (httpClient == null) {
87 | httpClient = createHttpClient(200, 40, 100, hostname, port);
88 | }
89 | }
90 | }
91 | return httpClient;
92 | }
93 |
94 | /**
95 | * 创建HttpClient对象
96 | *
97 | * @return
98 | * @author SHANHY
99 | * @create 2015年12月18日
100 | */
101 | public static CloseableHttpClient createHttpClient(int maxTotal,
102 | int maxPerRoute, int maxRoute, String hostname, int port) {
103 | ConnectionSocketFactory plainsf = PlainConnectionSocketFactory
104 | .getSocketFactory();
105 | LayeredConnectionSocketFactory sslsf = SSLConnectionSocketFactory
106 | .getSocketFactory();
107 | Registry registry = RegistryBuilder
108 | .create().register("http", plainsf)
109 | .register("https", sslsf).build();
110 | PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager(
111 | registry);
112 | // 将最大连接数增加
113 | cm.setMaxTotal(maxTotal);
114 | // 将每个路由基础的连接增加
115 | cm.setDefaultMaxPerRoute(maxPerRoute);
116 | HttpHost httpHost = new HttpHost(hostname, port);
117 | // 将目标主机的最大连接数增加
118 | cm.setMaxPerRoute(new HttpRoute(httpHost), maxRoute);
119 |
120 | // 请求重试处理
121 | HttpRequestRetryHandler httpRequestRetryHandler = new HttpRequestRetryHandler() {
122 | public boolean retryRequest(IOException exception,
123 | int executionCount, HttpContext context) {
124 | if (executionCount >= 5) {// 如果已经重试了5次,就放弃
125 | return false;
126 | }
127 | if (exception instanceof NoHttpResponseException) {// 如果服务器丢掉了连接,那么就重试
128 | return true;
129 | }
130 | if (exception instanceof SSLHandshakeException) {// 不要重试SSL握手异常
131 | return false;
132 | }
133 | if (exception instanceof InterruptedIOException) {// 超时
134 | return false;
135 | }
136 | if (exception instanceof UnknownHostException) {// 目标服务器不可达
137 | return false;
138 | }
139 | if (exception instanceof ConnectTimeoutException) {// 连接被拒绝
140 | return false;
141 | }
142 | if (exception instanceof SSLException) {// SSL握手异常
143 | return false;
144 | }
145 |
146 | HttpClientContext clientContext = HttpClientContext
147 | .adapt(context);
148 | HttpRequest request = clientContext.getRequest();
149 | // 如果请求是幂等的,就再次尝试
150 | if (!(request instanceof HttpEntityEnclosingRequest)) {
151 | return true;
152 | }
153 | return false;
154 | }
155 | };
156 |
157 | // 这里有警告
158 | CloseableHttpClient httpClient = HttpClients.custom()
159 | .setConnectionManager(cm)
160 | .setRetryHandler(httpRequestRetryHandler).build();
161 |
162 | return httpClient;
163 | }
164 |
165 | private static void setPostParams(HttpPost httpost,
166 | Map params) {
167 | List nvps = new ArrayList();
168 | Set keySet = params.keySet();
169 | for (String key : keySet) {
170 | nvps.add(new BasicNameValuePair(key, params.get(key).toString()));
171 | }
172 | try {
173 | httpost.setEntity(new UrlEncodedFormEntity(nvps, "UTF-8"));
174 | } catch (UnsupportedEncodingException e) {
175 | e.printStackTrace();
176 | }
177 | }
178 |
179 | /**
180 | * post请求URL获取内容
181 | *
182 | * @param url
183 | * @return
184 | * @throws IOException
185 | * @author SHANHY
186 | * @create 2015年12月18日
187 | */
188 | public static String post(String url, Map params) throws Exception {
189 | HttpPost httppost = new HttpPost(url);
190 | config(httppost);
191 | setPostParams(httppost, params);
192 | CloseableHttpResponse response = null;
193 | try {
194 | response = getHttpClient(url).execute(httppost,
195 | HttpClientContext.create());
196 | HttpEntity entity = response.getEntity();
197 | String result = EntityUtils.toString(entity, "utf-8");
198 | EntityUtils.consume(entity);
199 | return result;
200 | } catch (Exception e) {
201 | // e.printStackTrace();
202 | throw e;
203 | } finally {
204 | try {
205 | if (response != null)
206 | response.close();
207 | } catch (IOException e) {
208 | e.printStackTrace();
209 | }
210 | }
211 | }
212 |
213 | /**
214 | * GET请求URL获取内容
215 | *
216 | * @param url
217 | * @return
218 | * @author SHANHY
219 | * @create 2015年12月18日
220 | */
221 | public static String get(String url) {
222 | HttpGet httpget = new HttpGet(url);
223 | config(httpget);
224 | CloseableHttpResponse response = null;
225 | try {
226 | response = getHttpClient(url).execute(httpget,
227 | HttpClientContext.create());
228 | HttpEntity entity = response.getEntity();
229 | String result = EntityUtils.toString(entity, "utf-8");
230 | EntityUtils.consume(entity);
231 | return result;
232 | } catch (IOException e) {
233 | e.printStackTrace();
234 | } finally {
235 | try {
236 | if (response != null)
237 | response.close();
238 | } catch (IOException e) {
239 | e.printStackTrace();
240 | }
241 | }
242 | return null;
243 | }
244 |
245 | }
246 |
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/util/PropertiesUtil.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md.util;
2 |
3 | import com.github.csccoder.csdn2md.Main;
4 |
5 | import java.io.BufferedInputStream;
6 | import java.io.FileInputStream;
7 | import java.io.IOException;
8 | import java.io.InputStream;
9 | import java.util.Properties;
10 |
11 | public class PropertiesUtil {
12 | public static String getProperties(String key){
13 | String value = null;
14 | try {
15 | Properties pp = new Properties();
16 |
17 | //通过idea运行程序
18 | InputStream inputStream = Main.class.getClassLoader().getResourceAsStream("config.properties");
19 |
20 | //通过java -jar运行程序
21 | // String filePath = System.getProperty("user.dir")+ "/config.properties";
22 | // InputStream inputStream = new BufferedInputStream(new FileInputStream(filePath));
23 |
24 | pp.load(inputStream);
25 | value= (String) pp.get(key);
26 | return value;
27 | } catch (IOException e) {
28 | e.printStackTrace();
29 | return value;
30 | }
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/util/ProxyIP.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md.util;
2 |
3 | import java.util.Properties;
4 |
5 | public class ProxyIP {
6 | public static void daili(String ip, String dk) {
7 | Properties prop = System.getProperties();
8 | // 设置http访问要使用的代理服务器的地址
9 | prop.setProperty("http.proxyHost", ip);
10 | // 设置http访问要使用的代理服务器的端口
11 | prop.setProperty("http.proxyPort", dk);
12 | // 设置不需要通过代理服务器访问的主机,可以使用*通配符,多个地址用|分隔
13 | prop.setProperty("http.nonProxyHosts", "localhost|192.168.168.*");
14 | // 设置安全访问使用的代理服务器地址与端口
15 | // 它没有https.nonProxyHosts属性,它按照http.nonProxyHosts 中设置的规则访问
16 | prop.setProperty("https.proxyHost", ip);
17 | prop.setProperty("https.proxyPort", dk);
18 | // 使用ftp代理服务器的主机、端口以及不需要使用ftp代理服务器的主机
19 | prop.setProperty("ftp.proxyHost", ip);
20 | prop.setProperty("ftp.proxyPort", dk);
21 | prop.setProperty("ftp.nonProxyHosts", "localhost|192.168.168.*");
22 | // socks代理服务器的地址与端口
23 | prop.setProperty("socksProxyHost", ip);
24 | prop.setProperty("socksProxyPort", dk);
25 | System.out.println("即将开始代理进行访问 ip:" + ip + " port:" + dk);
26 | }
27 | public static String[] dl = PropertiesUtil.getProperties("IP_And_Port").split(",");
28 |
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/util/RegexUtil.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md.util;
2 |
3 | import java.util.regex.Matcher;
4 | import java.util.regex.Pattern;
5 |
6 | public class RegexUtil {
7 | public static String match(String regex,String content,int group){
8 | String str=null;
9 | Pattern pattern = Pattern.compile(regex);
10 | Matcher matcher = pattern.matcher(content);
11 | if(matcher.matches()){
12 | str=matcher.group(group);
13 | }
14 | return str;
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/DateUtil.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md.util.html2markdown;
2 |
3 | import java.text.DateFormat;
4 | import java.text.ParsePosition;
5 | import java.text.SimpleDateFormat;
6 | import java.util.Calendar;
7 | import java.util.Date;
8 | import java.util.StringTokenizer;
9 |
10 | /**
11 | * DateUtil
12 | *
13 | * @author robin
14 | * @author refactor by Sevan Joe
15 | */
16 | public class DateUtil {
17 |
18 | public DateUtil() {
19 |
20 | }
21 |
22 | /**
23 | * return current date value in format: yyyy-MM-dd
24 | *
25 | * @return String value
26 | */
27 | public static String getNowDate() {
28 | return dateToStringWithPattern(new Date(), "yyyy-MM-dd");
29 | }
30 |
31 | /**
32 | * return current time value in format: yyyy-MM-dd HH:mm:ss:sss
33 | *
34 | * @return String value
35 | */
36 | public static String getNowTime() {
37 | return dateToStringWithPattern(new Date(), "yyyy-MM-dd HH:mm:ss:sss");
38 | }
39 |
40 | /**
41 | * return time value of specified date in format: yyyy-MM-dd HH:mm
42 | *
43 | * @param date the specified date to convert
44 | * @return String value
45 | */
46 | public static String dateToString(Date date) {
47 | return dateToStringWithPattern(date, "yyyy-MM-dd HH:mm");
48 | }
49 |
50 | /**
51 | * return date value only of specified date in format: yyyy-MM-dd
52 | *
53 | * @param date the specified date to convert
54 | * @return String value
55 | */
56 | public static String dateToShortString(Date date) {
57 | return dateToStringWithPattern(date, "yyyy-MM-dd");
58 | }
59 |
60 | /**
61 | * return time value of specified date in format: yyyy-MM-dd HH:mm:ss
62 | *
63 | * @param date the specified date to convert
64 | * @return String value
65 | */
66 | public static String dateToLongString(Date date) {
67 | return dateToStringWithPattern(date, "yyyy-MM-dd HH:mm:ss");
68 | }
69 |
70 | /**
71 | * return time value only of specified date in format: HH:mm:ss
72 | *
73 | * @param date the specified date to convert
74 | * @return String value
75 | */
76 | public static String dateToTimeString(Date date) {
77 | return dateToStringWithPattern(date, "HH:mm:ss");
78 | }
79 |
80 | /**
81 | * return time value of specified date
82 | *
83 | * @param date the specified date to convert
84 | * @param pattern time format
85 | * @return String value
86 | */
87 | public static String dateToStringWithPattern(Date date, String pattern) {
88 | try {
89 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat(pattern);
90 | return simpleDateFormat.format(date);
91 | } catch (Exception e) {
92 | return "";
93 | }
94 | }
95 |
96 | /**
97 | * split date value of specified date by '-'
98 | *
99 | * @param date the specified date to convert
100 | * @return String[] value
101 | */
102 | public static String[] SplitDate(Date date) {
103 | String s = dateToShortString(date);
104 | String[] temp = new String[3];
105 | StringTokenizer stringTokenizer = new StringTokenizer(s, "-");
106 | int i = 0;
107 | while (stringTokenizer.hasMoreTokens()) {
108 | temp[i] = stringTokenizer.nextToken();
109 | i++;
110 | }
111 | return temp;
112 | }
113 |
114 | /**
115 | * return string value of specified date in format: yyyy-MM-ddTHH:mm:ss
116 | *
117 | * @param date the specified date to convert
118 | * @return String value
119 | */
120 | public static String dateToBOMCStringDate(Date date) {
121 | try {
122 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
123 | String string = simpleDateFormat.format(date);
124 | string = StringToBOMCStringDate(string);
125 | return string;
126 | } catch (Exception e) {
127 | return "";
128 | }
129 | }
130 |
131 | /**
132 | * return handled string value of date
133 | *
134 | * @param date string value to convert
135 | * @return String value
136 | */
137 | public static String StringToBOMCStringDate(String date) {
138 | return date.replace(" ", "T");
139 | }
140 |
141 | /**
142 | * return date value of specified string value in format: yyyy-MM-dd HH:mm:ss
143 | *
144 | * @param string string value to convert
145 | * @return Date value
146 | */
147 | public static Date stringToDate(String string) {
148 | try {
149 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
150 | return simpleDateFormat.parse(string);
151 | } catch (Exception e) {
152 | return null;
153 | }
154 | }
155 |
156 | /**
157 | * return date value of specified string value in format: HH:mm:ss
158 | *
159 | * @param string string value to convert
160 | * @return Date value
161 | */
162 | public static Date timeStringToDate(String string) {
163 | try {
164 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("HH:mm:ss");
165 | return simpleDateFormat.parse(string);
166 | } catch (Exception e) {
167 | return null;
168 | }
169 | }
170 |
171 | /**
172 | * return date value of specified string value in format: yyyy-MM-dd
173 | *
174 | * @param string string value to convert
175 | * @return Date value
176 | */
177 | public static Date stringToShortDate(String string) {
178 | try {
179 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
180 | ParsePosition parsePosition = new ParsePosition(0);
181 | return simpleDateFormat.parse(string, parsePosition);
182 | } catch (Exception e) {
183 | return null;
184 | }
185 | }
186 |
187 | /**
188 | * return date value of specified string value in format: yyyyMMdd
189 | *
190 | * @param string string value to convert
191 | * @return Date value
192 | */
193 | public static Date stringToShortNoDate(String string) {
194 | try {
195 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyyMMdd");
196 | ParsePosition parsePosition = new ParsePosition(0);
197 | return simpleDateFormat.parse(string, parsePosition);
198 | } catch (Exception e) {
199 | return null;
200 | }
201 | }
202 |
203 | /**
204 | * return date value of now
205 | *
206 | * @return Date value
207 | */
208 | public static Date getNow() {
209 | return new Date();
210 | }
211 |
212 | /**
213 | * return unix timestamp of now
214 | *
215 | * @return long value
216 | */
217 | public static long getCurrentTimestamp() {
218 | return (new Date()).getTime();
219 | }
220 |
221 | /**
222 | * return unix timestamp of specified string value in format: yyyy-MM-dd
223 | *
224 | * @param string string value to convert
225 | * @return long value
226 | */
227 | public static long getTimestamp(String string) {
228 | try {
229 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
230 | ParsePosition parsePosition = new ParsePosition(0);
231 | Date date = simpleDateFormat.parse(string, parsePosition);
232 | return date.getTime();
233 | } catch (Exception e) {
234 | return -1;
235 | }
236 | }
237 |
238 | /**
239 | * return unix timestamp of specified string value in format: yyyy-MM-dd HH:mm:ss
240 | *
241 | * @param string string value to convert
242 | * @return long value
243 | */
244 | public static long getStringToTimestamp(String string) {
245 | try {
246 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
247 | ParsePosition parsePosition = new ParsePosition(0);
248 | Date date = simpleDateFormat.parse(string, parsePosition);
249 | return date.getTime();
250 | } catch (Exception e) {
251 | return -1;
252 | }
253 | }
254 |
255 | /**
256 | * return the time difference from a specified time to now in minutes
257 | *
258 | * @param timestamp unix timestamp of a specified time
259 | * @return long value
260 | */
261 | public static long getOffMinutes(long timestamp) {
262 | return getOffMinutes(timestamp, System.currentTimeMillis());
263 | }
264 |
265 | /**
266 | * return the time difference from two specified time
267 | *
268 | * @param left unix timestamp of the first specified time
269 | * @param right unix timestamp of the second specified time
270 | * @return long value
271 | */
272 | public static long getOffMinutes(long left, long right) {
273 | return (left - right) / 60000L;
274 | }
275 |
276 | /**
277 | * return string value of specified unix timestamp
278 | *
279 | * @param timestamp unix timestamp
280 | * @return String value
281 | */
282 | public static String LongToDateString(long timestamp) {
283 | DateFormat dateFormat;
284 | Date date;
285 | try {
286 | dateFormat = DateFormat.getDateTimeInstance();
287 | String dateString = String.valueOf(timestamp);
288 | date = new Date(Long.parseLong(dateString));
289 | } catch (Exception ex) {
290 | ex.printStackTrace();
291 | return null;
292 | }
293 | return dateFormat.format(date);
294 | }
295 |
296 | /**
297 | * return date's weekday value of specified string value in format: yyyy-MM-dd Date first =
298 | * DateUtil.getMonday(today,Calendar.SUNDAY); Date last = DateUtil.getMonday(today,Calendar.SATURDAY);
299 | *
300 | * @param dateString String value of date
301 | * @param weekDay int index of weekday to get, first Calendar.SUNDAY, last Calendar.SATURDAY
302 | * @return Date value
303 | */
304 | public static Date getWeekDay(String dateString, int weekDay) {
305 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
306 | Date date = null;
307 | try {
308 | date = simpleDateFormat.parse(dateString);
309 | } catch (Exception e) {
310 | e.printStackTrace();
311 | }
312 | Calendar calendar = Calendar.getInstance();
313 | if (date != null) {
314 | calendar.setTime(date);
315 | }
316 | // DAY_OF_WEEK
317 | // Field number for get and set indicating the day of the week. This field takes values
318 | // SUNDAY, MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, and SATURDAY
319 | calendar.set(Calendar.DAY_OF_WEEK, weekDay);
320 | calendar.add(Calendar.DATE, 1);
321 | return calendar.getTime();
322 | }
323 |
324 | /**
325 | * return the first day of the date's month of specified string value in format: yyyy-MM
326 | *
327 | * @param dateString String value of date
328 | * @return Date value
329 | */
330 | public static Date getMonthFirstDay(String dateString) {
331 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM");
332 | Date date = null;
333 | try {
334 | date = simpleDateFormat.parse(dateString);
335 | } catch (Exception e) {
336 | e.printStackTrace();
337 | }
338 | Calendar calendar = Calendar.getInstance();
339 | if (date != null) {
340 | calendar.setTime(date);
341 | }
342 | calendar.add(Calendar.DAY_OF_MONTH, 0);
343 | return calendar.getTime();
344 | }
345 |
346 | /**
347 | * return the last day of the date's month of specified string value in format: yyyy-MM
348 | *
349 | * @param dateString String value of date
350 | * @return Date value
351 | */
352 | public static Date getMonthLastDay(String dateString) {
353 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM");
354 | Date date = null;
355 | try {
356 | date = simpleDateFormat.parse(dateString);
357 | } catch (Exception e) {
358 | e.printStackTrace();
359 | }
360 | Calendar calendar = Calendar.getInstance();
361 | if (date != null) {
362 | calendar.setTime(date);
363 | }
364 | calendar.add(Calendar.MONTH, 1);
365 | calendar.add(Calendar.DATE, -1);
366 | return calendar.getTime();
367 | }
368 |
369 | /**
370 | * return the first day of the date's year of specified string value in format: yyyy
371 | *
372 | * @param dateString String value of date
373 | * @return Date value
374 | */
375 | public static Date getYearFirstDay(String dateString) {
376 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy");
377 | Date date = null;
378 | try {
379 | date = simpleDateFormat.parse(dateString);
380 | } catch (Exception e) {
381 | e.printStackTrace();
382 | }
383 | Calendar calendar = Calendar.getInstance();
384 | if (date != null) {
385 | calendar.setTime(date);
386 | }
387 | calendar.add(Calendar.DAY_OF_YEAR, 0);
388 | return calendar.getTime();
389 | }
390 |
391 | /**
392 | * return the last day of the date's year of specified string value in format: yyyy
393 | *
394 | * @param dateString String value
395 | * @return Date value
396 | */
397 | public static Date getYearLastDay(String dateString) {
398 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy");
399 | Date date = null;
400 | try {
401 | date = simpleDateFormat.parse(dateString);
402 | } catch (Exception e) {
403 | e.printStackTrace();
404 | }
405 | Calendar calendar = Calendar.getInstance();
406 | if (date != null) {
407 | calendar.setTime(date);
408 | }
409 | calendar.add(Calendar.YEAR, 1);
410 | calendar.add(Calendar.DATE, -1);
411 | return calendar.getTime();
412 | }
413 |
414 | /**
415 | * return date value with specified field value
416 | *
417 | * @param date Date value
418 | * @param field int Date filed, such as Calendar.DAY_OF_MONTH
419 | * @param amount int the value of the field to set
420 | * @return Date value
421 | */
422 | public static Date getDate(Date date, int field, int amount) {
423 | Calendar calendar = Calendar.getInstance();
424 | calendar.setTime(date);
425 | calendar.add(field, amount);
426 | return calendar.getTime();
427 | }
428 | }
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/FilesUtil.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md.util.html2markdown;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.File;
5 | import java.io.FileInputStream;
6 | import java.io.FileOutputStream;
7 | import java.io.FileWriter;
8 | import java.io.IOException;
9 | import java.io.InputStream;
10 | import java.io.InputStreamReader;
11 | import java.io.PrintWriter;
12 | import java.net.URLDecoder;
13 | import java.util.ArrayList;
14 | import java.util.HashMap;
15 | import java.util.List;
16 | import java.util.Map;
17 | import java.util.StringTokenizer;
18 |
19 | /**
20 | * FilesUtil
21 | *
22 | * @author robin
23 | * @author refactor by Sevan Joe
24 | */
25 | public class FilesUtil {
26 |
27 | /**
28 | * read text file content, return string split by "\n"
29 | *
30 | * @param filePathAndName String file name with absolute path
31 | * @return String text content
32 | */
33 | public static String readAll(String filePathAndName) {
34 | return readAll(filePathAndName, "UTF-8");
35 | }
36 |
37 | /**
38 | * read text file content, return string split by "\n"
39 | *
40 | * @param filePathAndName String file name with absolute path
41 | * @param encoding String file encoding
42 | * @return String text content
43 | */
44 | public static String readAll(String filePathAndName, String encoding) {
45 | String string = "";
46 | StringBuilder stringBuilder = new StringBuilder("");
47 | FileInputStream fileInputStream = null;
48 | try {
49 | fileInputStream = new FileInputStream(URLDecoder.decode(filePathAndName, encoding));
50 | InputStreamReader inputStreamReader;
51 | if ("".equals(encoding)) {
52 | inputStreamReader = new InputStreamReader(fileInputStream);
53 | } else {
54 | inputStreamReader = new InputStreamReader(fileInputStream, encoding);
55 | }
56 | try {
57 | String data;
58 | BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
59 | while ((data = bufferedReader.readLine()) != null) {
60 | stringBuilder.append(data).append("\n");
61 | }
62 | } catch (Exception e) {
63 | return "";
64 | }
65 | string = stringBuilder.toString();
66 | } catch (IOException es) {
67 | string = "";
68 | } finally {
69 | try {
70 | if (fileInputStream != null) fileInputStream.close();
71 | } catch (IOException e) {
72 | e.printStackTrace();
73 | }
74 | }
75 | return string;
76 | }
77 |
78 | /**
79 | * read the specified line content of text file
80 | *
81 | * @param filePathAndName String file name with absolute path
82 | * @param encoding String file encoding
83 | * @return String text content of the line
84 | */
85 | public static String readLine(String filePathAndName, long lineIndex, String encoding) {
86 | String string = "";
87 | StringBuilder stringBuilder = new StringBuilder("");
88 | long i = 0;
89 | FileInputStream fileInputStream = null;
90 | try {
91 | fileInputStream = new FileInputStream(URLDecoder.decode(filePathAndName, encoding));
92 | InputStreamReader inputStreamReader;
93 | if ("".equals(encoding)) {
94 | inputStreamReader = new InputStreamReader(fileInputStream);
95 | } else {
96 | inputStreamReader = new InputStreamReader(fileInputStream, encoding);
97 | }
98 | BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
99 | try {
100 | String data;
101 | while ((data = bufferedReader.readLine()) != null) {
102 | if (lineIndex == i) {
103 | stringBuilder.append(data);
104 | break;
105 | } else {
106 | i++;
107 | }
108 | }
109 | } catch (Exception e) {
110 | return "";
111 | }
112 | string = stringBuilder.toString();
113 | } catch (IOException es) {
114 | return "";
115 | } finally {
116 | try {
117 | if (fileInputStream != null) fileInputStream.close();
118 | } catch (IOException e) {
119 | e.printStackTrace();
120 | }
121 | }
122 | return string;
123 | }
124 |
125 | /**
126 | * read the first line content of text file
127 | *
128 | * @param filePathAndName String file name with absolute path
129 | * @return String text content of the first line
130 | */
131 | public static String readLine(String filePathAndName) {
132 | return readLine(filePathAndName, 0, "UTF-8");
133 | }
134 |
135 | /**
136 | * read the specified line content of text file
137 | *
138 | * @param filePathAndName String file name with absolute path
139 | * @return String text content of the line
140 | */
141 | public static String readLine(String filePathAndName, long rowIndex) {
142 | return readLine(filePathAndName, rowIndex, "UTF-8");
143 | }
144 |
145 | /**
146 | * create file
147 | *
148 | * @param filePathAndName String file path and name
149 | * @param fileContent String file content
150 | * @return boolean flag to indicate create success or not
151 | */
152 | public static boolean newFile(String filePathAndName, String fileContent) {
153 | return newFile(filePathAndName, fileContent, false);
154 | }
155 |
156 | /**
157 | * create file
158 | *
159 | * @param filePathAndName String file path and name
160 | * @param fileContent String file content
161 | * @param flag boolean flag to indicate is append, true to append, false to create
162 | * @return boolean flag to indicate create success or not
163 | */
164 | public static boolean newFile(String filePathAndName, String fileContent, boolean flag) {
165 | try {
166 | File file = new File(filePathAndName);
167 | if (!file.exists()) {
168 | file.createNewFile();
169 | }
170 | FileWriter fileWriter = new FileWriter(file, flag);
171 | PrintWriter printWriter = new PrintWriter(fileWriter);
172 | printWriter.println(fileContent);
173 | fileWriter.close();
174 | return true;
175 | } catch (Exception e) {
176 | System.out.println("create file failed");
177 | e.printStackTrace();
178 | }
179 | return false;
180 | }
181 |
182 | /**
183 | * create file with specified encoding
184 | *
185 | * @param filePathAndName String file path and name
186 | * @param fileContent String file content
187 | * @param encoding the specified encoding, such as GBK or UTF-8
188 | * @return boolean flag to indicate create success or not
189 | */
190 | public static boolean newFile(String filePathAndName, String fileContent, String encoding) {
191 | try {
192 | File file = new File(filePathAndName);
193 | if (!file.exists()) {
194 | file.createNewFile();
195 | }
196 | PrintWriter printWriter = new PrintWriter(file, encoding);
197 | printWriter.println(fileContent);
198 | printWriter.close();
199 | return true;
200 | } catch (Exception e) {
201 | System.out.println("create file failed");
202 | e.printStackTrace();
203 | }
204 | return false;
205 | }
206 |
207 | /**
208 | * delete file
209 | *
210 | * @param filePathAndName String file path and name
211 | */
212 | public static boolean delFile(String filePathAndName) {
213 | try {
214 | File file = new File(filePathAndName);
215 | return file.delete();
216 | } catch (Exception e) {
217 | System.out.println("delete file failed");
218 | e.printStackTrace();
219 | return false;
220 | }
221 | }
222 |
223 | /**
224 | * create folder
225 | *
226 | * @param folderPath String folder path
227 | * @return String created folder path
228 | */
229 | public static String newFolder(String folderPath) {
230 | String filePath = folderPath;
231 | try {
232 | File myFilePath = new File(filePath);
233 | if (!myFilePath.exists()) {
234 | myFilePath.mkdirs();
235 | }
236 | } catch (Exception e) {
237 | System.out.println("create folder failed");
238 | filePath = "";
239 | e.printStackTrace();
240 | }
241 | return filePath;
242 | }
243 |
244 | /**
245 | * delete folder
246 | *
247 | * @param folderPath String folder path
248 | */
249 | public static void delFolder(String folderPath) {
250 | try {
251 | delAllFile(folderPath); // delete all files inside
252 | File file = new File(folderPath);
253 | file.delete(); // delete the empty folder
254 | } catch (Exception e) {
255 | System.out.println("delete folder failed");
256 | e.printStackTrace();
257 | }
258 | }
259 |
260 | /**
261 | * delete all files inside folder
262 | *
263 | * @param path String folder path
264 | */
265 | public static void delAllFile(String path) {
266 | File file = new File(path);
267 | if (!file.exists()) {
268 | return;
269 | }
270 | if (!file.isDirectory()) {
271 | return;
272 | }
273 | if (file.getAbsolutePath().equalsIgnoreCase("/")) {
274 | System.out.println("this is a root directory, you cannot delete all files in it!");
275 | System.out.println("please change the path!");
276 | return;
277 | }
278 | if (file.getAbsolutePath().equalsIgnoreCase("/root")) {
279 | System.out.println("this is a root directory, you cannot delete all files in it!");
280 | System.out.println("please change the path!");
281 | return;
282 | }
283 | if (file.getAbsolutePath().equalsIgnoreCase("/usr") || file.getAbsolutePath().equalsIgnoreCase("/opt")
284 | || file.getAbsolutePath().equalsIgnoreCase("/bin") || file.getAbsolutePath().equalsIgnoreCase("/sbin")
285 | || file.getAbsolutePath().equalsIgnoreCase("/etc") || file.getAbsolutePath().equalsIgnoreCase("/selinux")
286 | || file.getAbsolutePath().equalsIgnoreCase("/sys") || file.getAbsolutePath().equalsIgnoreCase("/var")
287 | || file.getAbsolutePath().equalsIgnoreCase("/home") || file.getAbsolutePath().equalsIgnoreCase("/net")) {
288 | System.out.println("this is a root directory, you cannot delete all files in it!");
289 | System.out.println("please change the path!");
290 | return;
291 | }
292 | if (file.getAbsolutePath().equalsIgnoreCase("C://") || file.getAbsolutePath().equalsIgnoreCase("C:\\\\")) {
293 | System.out.println("this is a root directory, you cannot delete all files in it!");
294 | System.out.println("please change the path!");
295 | return;
296 | }
297 | String[] tempList = file.list();
298 | File temp;
299 | if (tempList == null) {
300 | return;
301 | }
302 | for (String aTempList : tempList) {
303 | if (path.endsWith(File.separator)) {
304 | temp = new File(path + aTempList);
305 | } else {
306 | temp = new File(path + File.separator + aTempList);
307 | }
308 | if (temp.isFile()) {
309 | temp.delete();
310 | }
311 | if (temp.isDirectory()) {
312 | delAllFile(path + "/" + aTempList);// delete all files inside
313 | delFolder(path + "/" + aTempList);// delete the empty folder
314 | }
315 | }
316 | }
317 |
318 | /**
319 | * copy a file
320 | *
321 | * @param srcPath String the source path
322 | * @param dstPath String the destination path
323 | */
324 | public static void copyFile(String srcPath, String dstPath) {
325 | InputStream inputStream = null;
326 | FileOutputStream fileOutputStream = null;
327 | try {
328 | int byteRead;
329 | File srcFile = new File(srcPath);
330 |
331 | if (srcFile.exists()) { // file exists
332 | inputStream = new FileInputStream(srcPath); // read the source file
333 | fileOutputStream = new FileOutputStream(dstPath);
334 | byte[] buffer = new byte[1444];
335 | while ((byteRead = inputStream.read(buffer)) != -1) {
336 | fileOutputStream.write(buffer, 0, byteRead);
337 | }
338 | }
339 | } catch (Exception e) {
340 | System.out.println("copy file failed");
341 | e.printStackTrace();
342 | } finally {
343 | try {
344 | if (fileOutputStream != null)
345 | fileOutputStream.close();
346 | if (inputStream != null)
347 | inputStream.close();
348 | } catch (IOException e) {
349 | e.printStackTrace();
350 | }
351 | }
352 | }
353 |
354 | /**
355 | * copy a folder
356 | *
357 | * @param srcPath String the source path
358 | * @param dstPath String the destination path
359 | */
360 | public static void copyFolder(String srcPath, String dstPath) {
361 |
362 | try {
363 | (new File(dstPath)).mkdirs(); // if the folder does not exits, create it
364 | File file = new File(srcPath);
365 | String[] fileList = file.list();
366 | File tempFile;
367 | for (String fileName : fileList) {
368 | if (srcPath.endsWith(File.separator)) {
369 | tempFile = new File(srcPath + fileName);
370 | } else {
371 | tempFile = new File(srcPath + File.separator + fileName);
372 | }
373 |
374 | if (tempFile.isFile()) {
375 | FileInputStream fileInputStream = new FileInputStream(tempFile);
376 | FileOutputStream fileOutputStream = new FileOutputStream(dstPath + "/" + (tempFile.getName()));
377 | byte[] bytes = new byte[1024 * 5];
378 | int length;
379 | while ((length = fileInputStream.read(bytes)) != -1) {
380 | fileOutputStream.write(bytes, 0, length);
381 | }
382 | fileOutputStream.flush();
383 | fileOutputStream.close();
384 | fileInputStream.close();
385 | }
386 | if (tempFile.isDirectory()) { // it is a subdirectory
387 | copyFolder(srcPath + "/" + fileName, dstPath + "/" + fileName);
388 | }
389 | }
390 | } catch (Exception e) {
391 | System.out.println("copy folder failed");
392 | e.printStackTrace();
393 | }
394 | }
395 |
396 | /**
397 | * move a file
398 | *
399 | * @param srcPath String the source path
400 | * @param dstPath String the destination path
401 | */
402 | public static void moveFile(String srcPath, String dstPath) {
403 | copyFile(srcPath, dstPath);
404 | delFile(srcPath);
405 | }
406 |
407 | /**
408 | * move a folder
409 | *
410 | * @param srcPath String the source path
411 | * @param dstPath String the destination path
412 | */
413 | public static void moveFolder(String srcPath, String dstPath) {
414 | copyFolder(srcPath, dstPath);
415 | delFolder(srcPath);
416 | }
417 |
418 | /**
419 | * create multi-level directory
420 | *
421 | * @param folderPath the path to create multi-level directory
422 | * @param paths directories, split by '|'
423 | * @return String the created directory path
424 | */
425 | public static String createFolders(String folderPath, String paths) {
426 | String pathString;
427 | try {
428 | String path;
429 | pathString = folderPath;
430 | StringTokenizer stringTokenizer = new StringTokenizer(paths, "|");
431 | for (; stringTokenizer.hasMoreTokens(); ) {
432 | path = stringTokenizer.nextToken();
433 | if (pathString.lastIndexOf("/") != -1) {
434 | pathString = newFolder(pathString + path);
435 | } else {
436 | pathString = newFolder(pathString + path + "/");
437 | }
438 | }
439 | } catch (Exception e) {
440 | System.out.println("create multi-level directory failed");
441 | pathString = "";
442 | e.printStackTrace();
443 | }
444 | return pathString;
445 | }
446 |
447 | /**
448 | * create multi-level directory
449 | *
450 | * @param folderPath the multi-level directory to create
451 | * @return String the created directory path
452 | */
453 | public static String createFolders(String folderPath) {
454 | String pathString = folderPath;
455 | try {
456 | File file = new File(pathString);
457 | if (!file.exists()) {
458 | file.mkdirs();
459 | }
460 | } catch (Exception e) {
461 | System.out.println("create multi-level directory failed");
462 | pathString = "";
463 | e.printStackTrace();
464 | }
465 | return pathString;
466 | }
467 |
468 | /**
469 | * check if the specified file exists
470 | *
471 | * @param fileName the name of the file to be checked
472 | * @return boolean true if exits, false if not
473 | */
474 | public static boolean isFileExist(String fileName) {
475 | return new File(fileName).isFile();
476 | }
477 |
478 | /**
479 | * get all files in a folder
480 | *
481 | * @param path String folder path
482 | * @return List
483 | */
484 | public static List getAllFiles(String path) {
485 | List fileList = new ArrayList();
486 | File file = new File(path);
487 | if (!file.exists()) {
488 | return fileList;
489 | }
490 | if (!file.isDirectory()) {
491 | return fileList;
492 | }
493 | String[] tempList = file.list();
494 | File tempFile;
495 | for (String fileName : tempList) {
496 | if (path.endsWith(File.separator)) {
497 | tempFile = new File(path + fileName);
498 | } else {
499 | tempFile = new File(path + File.separator + fileName);
500 | }
501 | if (tempFile.isFile()) {
502 | fileList.add(tempFile);
503 | }
504 | if (tempFile.isDirectory()) {
505 | List allFiles = getAllFiles(tempFile.getAbsolutePath());
506 | fileList.addAll(allFiles);
507 | }
508 | }
509 | return fileList;
510 | }
511 |
512 | /**
513 | * get all files with specified suffix in a folder
514 | *
515 | * @param path String folder path
516 | * @param suffix String the specified suffix
517 | * @return List
518 | */
519 | public static List getAllFiles(String path, String suffix) {
520 | List fileList = new ArrayList();
521 | File file = new File(path);
522 | if (!file.exists()) {
523 | return fileList;
524 | }
525 | if (!file.isDirectory()) {
526 | return fileList;
527 | }
528 | String[] tempList = file.list();
529 | File tempFile;
530 | for (String fileName : tempList) {
531 | if (path.endsWith(File.separator)) {
532 | tempFile = new File(path + fileName);
533 | } else {
534 | tempFile = new File(path + File.separator + fileName);
535 | }
536 | if (tempFile.isFile()) {
537 | if (suffix == null || "".equals(suffix))
538 | fileList.add(tempFile);
539 | else {
540 | String filePath = tempFile.getAbsolutePath();
541 | if (!suffix.equals("")) {
542 | int beginIndex = filePath.lastIndexOf("."); // the last '.' index before suffix
543 | String tempSuffix;
544 |
545 | if (beginIndex != -1) {
546 | tempSuffix = filePath.substring(beginIndex + 1, filePath.length());
547 | if (tempSuffix.equals(suffix)) {
548 | fileList.add(tempFile);
549 | }
550 | }
551 | }
552 | }
553 | }
554 | if (tempFile.isDirectory()) {
555 | List allFiles = getAllFiles(tempFile.getAbsolutePath(), suffix);
556 | fileList.addAll(allFiles);
557 | }
558 | }
559 | return fileList;
560 | }
561 |
562 | /**
563 | * get all names of file with specified suffix in a folder
564 | *
565 | * @param path String folder path
566 | * @param suffix String the specified suffix
567 | * @param isDepth boolean is need to scan all subdirectories
568 | * @return List
569 | */
570 | public static List getAllFileNames(String path, String suffix, boolean isDepth) {
571 | List fileNamesList = new ArrayList();
572 | File file = new File(path);
573 | return listFileName(fileNamesList, file, suffix, isDepth);
574 | }
575 |
576 | private static List listFileName(List fileNamesList, File file, String suffix, boolean isDepth) {
577 | // if is directory, scan all subdirectories by recursion
578 | if (file.isDirectory()) {
579 | File[] fileList = file.listFiles();
580 |
581 | if (fileList != null) {
582 | for (File tempFile : fileList) {
583 | if (isDepth || tempFile.isFile()) {
584 | listFileName(fileNamesList, tempFile, suffix, isDepth);
585 | }
586 | }
587 | }
588 | } else {
589 | String filePath = file.getAbsolutePath();
590 | if (!suffix.equals("")) {
591 | int begIndex = filePath.lastIndexOf("."); // the last '.' index before suffix
592 | String tempSuffix;
593 |
594 | if (begIndex != -1) {
595 | tempSuffix = filePath.substring(begIndex + 1, filePath.length());
596 | if (tempSuffix.equals(suffix)) {
597 | fileNamesList.add(filePath);
598 | }
599 | }
600 | } else {
601 | fileNamesList.add(filePath);
602 | }
603 | }
604 | return fileNamesList;
605 | }
606 |
607 | /**
608 | * get all file names in a folder
609 | *
610 | * @param path String folder path
611 | * @return List
612 | */
613 | public static List getAllFileNames(String path) {
614 | List fileNamesList = new ArrayList();
615 | File file = new File(path);
616 | if (!file.exists()) {
617 | return fileNamesList;
618 | }
619 | if (!file.isDirectory()) {
620 | return fileNamesList;
621 | }
622 | String[] tempList = file.list();
623 | File tempFile;
624 | for (String fileName : tempList) {
625 | if (path.endsWith(File.separator)) {
626 | tempFile = new File(path + fileName);
627 | } else {
628 | tempFile = new File(path + File.separator + fileName);
629 | }
630 | if (tempFile.isFile()) {
631 | fileNamesList.add(tempFile.getName());
632 | }
633 | }
634 | return fileNamesList;
635 | }
636 |
637 | /**
638 | * get all file names in a folder
639 | *
640 | * @param path String folder path
641 | * @return Map
642 | */
643 | public static Map getAllFileNamesByMap(String path) {
644 | Map fileNamesMap = new HashMap();
645 | File file = new File(path);
646 | if (!file.exists()) {
647 | return fileNamesMap;
648 | }
649 | if (!file.isDirectory()) {
650 | return fileNamesMap;
651 | }
652 | String[] tempList = file.list();
653 | File tempFile;
654 | for (String fileName : tempList) {
655 | if (path.endsWith(File.separator)) {
656 | tempFile = new File(path + fileName);
657 | } else {
658 | tempFile = new File(path + File.separator + fileName);
659 | }
660 | if (tempFile.isFile()) {
661 | fileNamesMap.put(tempFile.getName(), tempFile.getName());
662 | }
663 | }
664 | return fileNamesMap;
665 | }
666 |
667 | /**
668 | * get all file names in a folder
669 | *
670 | * @param path String folder path
671 | * @return String[]
672 | */
673 | public static String[] getAllFileNamesByPath(String path) {
674 | File file = new File(path);
675 | if (!file.exists()) {
676 | return null;
677 | }
678 | if (!file.isDirectory()) {
679 | return null;
680 | }
681 | String[] tempList = file.list();
682 | List fileList = new ArrayList();
683 | File tempFile;
684 | for (String fileName : tempList) {
685 | if (path.endsWith(File.separator)) {
686 | tempFile = new File(path + fileName);
687 | } else {
688 | tempFile = new File(path + File.separator + fileName);
689 | }
690 | if (tempFile.isFile()) {
691 | fileList.add(tempFile.getName());
692 | }
693 | }
694 | return fileList.toArray(new String[fileList.size()]);
695 | }
696 |
697 | /**
698 | * remove suffix of a file
699 | *
700 | * @param fileName file name
701 | * @return String file name without suffix
702 | */
703 | public static String getNameNoSuffix(String fileName) {
704 | if (fileName.lastIndexOf(".") >= 0)
705 | return fileName.substring(0, fileName.lastIndexOf("."));
706 | else
707 | return fileName;
708 | }
709 |
710 | /**
711 | * return file name with suffix
712 | *
713 | * @param fileName file path and name
714 | * @return String file name with suffix
715 | */
716 | public static String getFileName(String fileName) {
717 | String shortFileName = fileName;
718 | shortFileName = shortFileName.replace("\\", "/");
719 | if (shortFileName.contains("/"))
720 | shortFileName = shortFileName.substring(shortFileName.lastIndexOf("/") + 1, shortFileName.length());
721 | return shortFileName;
722 | }
723 |
724 | /**
725 | * check if directory exists, if not exist, create it, return false if create failed
726 | *
727 | * @param path folder path
728 | * @return boolean
729 | */
730 | public static boolean isExist(String path) {
731 | File file = new File(path);
732 | if (!file.exists()) {
733 | boolean isSuccess = file.mkdir();
734 | if (!isSuccess)
735 | createFolders(path);
736 | return isSuccess;
737 | } else {
738 | return true;
739 | }
740 | }
741 |
742 | /**
743 | * check if directory exists
744 | *
745 | * @param path folder path
746 | * @return boolean
747 | */
748 | public static boolean isExistNotCreate(String path) {
749 | File file = new File(path);
750 | return file.exists();
751 | }
752 |
753 | /**
754 | * copy a file
755 | *
756 | * @param fileName file name
757 | * @param srcPath source path
758 | * @param dstPath destination path
759 | * @return boolean
760 | */
761 | public boolean copyTheFile(String fileName, String srcPath, String dstPath) {
762 | boolean isSucceed = false;
763 |
764 | InputStream inputStream = null;
765 | FileOutputStream fileOutputStream = null;
766 | try {
767 | int byteRead;
768 | File srcFile = new File(srcPath + "/" + fileName);
769 | File dstFile = new File(dstPath);
770 | if (!dstFile.exists()) {
771 | dstFile.mkdirs();
772 | }
773 | if (!srcFile.exists()) {
774 | throw new Exception("the file to copy do not exist");
775 | }
776 | if (srcFile.exists()) {
777 | inputStream = new FileInputStream(srcPath + "/" + fileName);
778 | fileOutputStream = new FileOutputStream(dstPath + "/" + fileName);
779 | byte[] buffer = new byte[1444];
780 | while ((byteRead = inputStream.read(buffer)) != -1) {
781 | fileOutputStream.write(buffer, 0, byteRead);
782 | }
783 | }
784 | isSucceed = true;
785 | } catch (Exception e) {
786 | System.out.println("copy file failed");
787 | e.printStackTrace();
788 | } finally {
789 | try {
790 | if (fileOutputStream != null)
791 | fileOutputStream.close();
792 | if (inputStream != null)
793 | inputStream.close();
794 | } catch (IOException e) {
795 | e.printStackTrace();
796 | }
797 | }
798 | return isSucceed;
799 | }
800 |
801 | /**
802 | * move a file
803 | *
804 | * @param fileName file name
805 | * @param srcPath source path
806 | * @param dstPath destination path
807 | * @return boolean
808 | */
809 | public boolean moveTheFile(String fileName, String srcPath, String dstPath) {
810 | boolean isSucceed = false;
811 |
812 | InputStream inputStream = null;
813 | FileOutputStream fileOutputStream = null;
814 | try {
815 | int byteRead;
816 | File srcFile = new File(srcPath + "/" + fileName);
817 | File dstFile = new File(dstPath);
818 | if (!dstFile.exists()) {
819 | dstFile.mkdirs();
820 | }
821 | if (!srcFile.exists()) {
822 | throw new Exception("the file to move do not exist");
823 | }
824 | if (srcFile.exists()) {
825 | inputStream = new FileInputStream(srcPath + "/" + fileName);
826 | fileOutputStream = new FileOutputStream(dstPath + "/" + fileName);
827 | byte[] buffer = new byte[1444];
828 | while ((byteRead = inputStream.read(buffer)) != -1) {
829 | fileOutputStream.write(buffer, 0, byteRead);
830 | }
831 | }
832 | isSucceed = true;
833 | } catch (Exception e) {
834 | System.out.println("move file failed");
835 | e.printStackTrace();
836 | } finally {
837 | try {
838 | if (fileOutputStream != null)
839 | fileOutputStream.close();
840 | if (inputStream != null)
841 | inputStream.close();
842 | } catch (IOException e) {
843 | e.printStackTrace();
844 | }
845 | File deleteFile = new File(srcPath + "/" + fileName);
846 | if (isSucceed)
847 | isSucceed = deleteFile.delete();
848 | }
849 | return isSucceed;
850 | }
851 | }
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/HTML2Md.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md.util.html2markdown;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.net.MalformedURLException;
6 | import java.net.URL;
7 | import java.util.ArrayList;
8 | import java.util.Calendar;
9 | import java.util.List;
10 | import java.util.Map;
11 | import java.util.TreeMap;
12 |
13 | import com.github.csccoder.csdn2md.util.PropertiesUtil;
14 | import org.jsoup.Jsoup;
15 | import org.jsoup.nodes.Document;
16 | import org.jsoup.nodes.Element;
17 | import org.jsoup.nodes.Entities.EscapeMode;
18 | import org.jsoup.nodes.Node;
19 | import org.jsoup.nodes.TextNode;
20 | import org.jsoup.parser.Tag;
21 | import org.jsoup.safety.Cleaner;
22 | import org.jsoup.safety.Whitelist;
23 | import com.github.csccoder.csdn2md.util.FileUtil;
24 |
25 | /**
26 | * Convert Html to MarkDown
27 | */
28 | public class HTML2Md {
29 | private static int indentation = -1;
30 | private static boolean orderedList = false;
31 |
32 | public static String convert(String theHTML, String baseURL) {
33 | Document doc = Jsoup.parse(theHTML, baseURL);
34 |
35 | return parseDocument(doc);
36 | }
37 |
38 | public static String convert(URL url, int timeoutMillis) throws IOException {
39 | Document doc = Jsoup.parse(url, timeoutMillis);
40 |
41 | return parseDocument(doc);
42 | }
43 |
44 | public static String convertHtml(String html, String charset) throws IOException {
45 | Document doc = Jsoup.parse(html, charset);
46 |
47 | return parseDocument(doc);
48 | }
49 |
50 | public static String convertFile(File file, String charset) throws IOException {
51 | Document doc = Jsoup.parse(file, charset);
52 |
53 | return parseDocument(doc);
54 | }
55 |
56 | public static void htmlToJekyllMd(String htmlPath, String mdPath, String charset) {
57 | try {
58 | List fileList = FilesUtil.getAllFiles(htmlPath, "html");
59 | for (File file : fileList) {
60 | String mdName = file.getAbsolutePath().replace(htmlPath, mdPath).replace("html", "md");
61 | String hmPath = mdName.substring(0, mdName.lastIndexOf("/")) + "/";
62 | String separator = System.getProperty("line.separator");
63 | String head = "---" + separator +
64 | "layout: post" + separator +
65 | "title: \"" + file.getName() + "\"" + separator +
66 | "description: \"" + file.getName() + "\"" + separator +
67 | "category: pages\"" + separator +
68 | "tags: [blog]\"" + separator +
69 | "--- " + separator +
70 | "{% include JB/setup %}" + separator
71 | + separator;
72 | FilesUtil.isExist(hmPath);
73 | String parsedText = convertFile(file, charset);
74 | Calendar calendar = Calendar.getInstance();
75 | String dateName = DateUtil.dateToShortString(calendar.getTime());
76 | String newName = dateName + "-" + hmPath.replace(mdPath, "").replace("/", "-") + "-" + file.getName();
77 | String mmName = (hmPath + newName.replace("html", "md")).replaceAll("\\s*", "");
78 | FilesUtil.newFile(mmName, head + parsedText, charset);
79 | }
80 | } catch (MalformedURLException e) {
81 | e.printStackTrace();
82 | } catch (IOException e) {
83 | e.printStackTrace();
84 | }
85 | }
86 |
87 |
88 | public static void htmlToHexoMd(String htmlPath, String mdPath, String charset) {
89 | try {
90 | List fileList = FilesUtil.getAllFiles(htmlPath, "html");
91 | for (File file : fileList) {
92 | String mdName = file.getAbsolutePath().replace(htmlPath, mdPath).replace("html", "md");
93 | String hmPath = mdName.substring(0, mdName.lastIndexOf("/")) + "/";
94 | String separator = System.getProperty("line.separator");
95 | String[] strings = hmPath.replace(mdPath, "").split("/");
96 | Calendar calendar = Calendar.getInstance();
97 | String dateName = DateUtil.dateToShortString(calendar.getTime());
98 | String dateString = DateUtil.dateToLongString(calendar.getTime());
99 | StringBuilder blog = new StringBuilder();
100 | StringBuilder categories = new StringBuilder();
101 | Map stringMap = new TreeMap();
102 | for (String value : strings) {
103 | stringMap.put(value, value);
104 | }
105 | for (String tag : stringMap.keySet()) {
106 | blog.append(" - ").append(tag).append(separator);
107 | }
108 | categories.append(strings[0]);
109 | String head = "---" + separator +
110 | "layout: post" + separator +
111 | "title: \"" + file.getName().replace(".html", "").split("-")[0] + "\"" + separator +
112 | "date: " + dateString + separator +
113 | "categories: " + categories + separator +
114 | "tags: " + separator +
115 | blog.toString() +
116 | "--- " + separator +
117 | separator;
118 | FilesUtil.isExist(hmPath);
119 | String parsedText = HTML2Md.convertFile(file, "utf-8");
120 | String newName = dateName + "-" + hmPath.replace(mdPath, "").replace("/", "-") + "-" + file.getName();
121 | String mmName = (hmPath + newName.replace("html", "md")).replaceAll("\\s*", "");
122 | FilesUtil.newFile(mmName, head + parsedText, charset);
123 | }
124 | } catch (MalformedURLException e) {
125 | e.printStackTrace();
126 | } catch (IOException e) {
127 | e.printStackTrace();
128 | }
129 | }
130 |
131 | private static String parseDocument(Document dirtyDoc) {
132 | indentation = -1;
133 |
134 | String title = dirtyDoc.title();
135 |
136 | Whitelist whitelist = Whitelist.relaxed();
137 | Cleaner cleaner = new Cleaner(whitelist);
138 |
139 | Document doc = cleaner.clean(dirtyDoc);
140 | doc.outputSettings().escapeMode(EscapeMode.xhtml);
141 |
142 | if (!title.trim().equals("")) {
143 | return "# " + title + "\n\n" + getTextContent(doc);
144 | } else {
145 | return getTextContent(doc);
146 | }
147 | }
148 |
149 | private static String getTextContent(Element element) {
150 | ArrayList lines = new ArrayList();
151 |
152 | List children = element.childNodes();
153 | for (Node child : children) {
154 | if (child instanceof TextNode) {
155 | TextNode textNode = (TextNode) child;
156 | MDLine line = getLastLine(lines);
157 | if (line.getContent().equals("")) {
158 | if (!textNode.isBlank()) {
159 | line.append(textNode.text().replaceAll("#", "/#").replaceAll("\\*", "/\\*"));
160 | }
161 | } else {
162 | line.append(textNode.text().replaceAll("#", "/#").replaceAll("\\*", "/\\*"));
163 | }
164 |
165 | } else if (child instanceof Element) {
166 | Element childElement = (Element) child;
167 | processElement(childElement, lines);
168 | } else {
169 | System.out.println();
170 | }
171 | }
172 |
173 | int blankLines = 0;
174 | StringBuilder result = new StringBuilder();
175 | for (int i = 0; i < lines.size(); i++) {
176 | String line = lines.get(i).toString().trim();
177 | if (line.equals("")) {
178 | blankLines++;
179 | } else {
180 | blankLines = 0;
181 | }
182 | if (blankLines < 2) {
183 | result.append(line);
184 | if (i < lines.size() - 1) {
185 | result.append("\n");
186 | }
187 | }
188 | }
189 |
190 | return result.toString();
191 | }
192 |
193 | private static void processElement(Element element, ArrayList lines) {
194 | Tag tag = element.tag();
195 | String tagName = tag.getName();
196 | if (tagName.equals("div")) {
197 | div(element, lines);
198 | } else if (tagName.equals("p")) {
199 | p(element, lines);
200 | } else if (tagName.equals("br")) {
201 | br(lines);
202 | } else if (tagName.matches("^h[0-9]+$")) {
203 | h(element, lines);
204 | } else if (tagName.equals("strong") || tagName.equals("b")) {
205 | strong(element, lines);
206 | } else if (tagName.equals("em")) {
207 | em(element, lines);
208 | } else if (tagName.equals("hr")) {
209 | hr(lines);
210 | } else if (tagName.equals("a")) {
211 | a(element, lines);
212 | } else if (tagName.equals("img")) {
213 | img(element, lines);
214 | } else if (tagName.equals("code")) {
215 |
216 | Element e = (Element)element.parentNode();
217 | // ` ` 这种code
218 | if (e.tag().getName().equals("p")) {
219 | codeP(element, lines);
220 | } else {
221 | code(element, lines);
222 | }
223 | } else if (tagName.equals("ul")) {
224 | ul(element, lines);
225 | } else if (tagName.equals("ol")) {
226 | ol(element, lines);
227 | } else if (tagName.equals("li")) {
228 | li(element, lines);
229 | } else {
230 | MDLine line = getLastLine(lines);
231 | line.append(getTextContent(element));
232 | }
233 | }
234 |
235 | private static MDLine getLastLine(ArrayList lines) {
236 | MDLine line;
237 | if (lines.size() > 0) {
238 | line = lines.get(lines.size() - 1);
239 | } else {
240 | line = new MDLine(MDLine.MDLineType.None, 0, "");
241 | lines.add(line);
242 | }
243 |
244 | return line;
245 | }
246 |
247 | private static void div(Element element, ArrayList lines) {
248 | if (PropertiesUtil.getProperties("copyright").equals("false")) {
249 | if (!(element.text().indexOf("版权声明")>=0)){
250 | MDLine line = getLastLine(lines);
251 | String content = getTextContent(element);
252 | if (!content.equals("")) {
253 | if (!line.getContent().trim().equals("")) {
254 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
255 | lines.add(new MDLine(MDLine.MDLineType.None, 0, content));
256 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
257 | } else {
258 | if (!content.trim().equals(""))
259 | line.append(content);
260 | }
261 | }
262 | }
263 | }else {
264 | MDLine line = getLastLine(lines);
265 | String content = getTextContent(element);
266 | if (!content.equals("")) {
267 | if (!line.getContent().trim().equals("")) {
268 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
269 | lines.add(new MDLine(MDLine.MDLineType.None, 0, content));
270 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
271 | } else {
272 | if (!content.trim().equals(""))
273 | line.append(content);
274 | }
275 | }
276 | }
277 |
278 |
279 | }
280 |
281 | private static void p(Element element, ArrayList lines) {
282 | MDLine line = getLastLine(lines);
283 | if (!line.getContent().trim().equals(""))
284 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
285 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
286 | lines.add(new MDLine(MDLine.MDLineType.None, 0, getTextContent(element)));
287 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
288 | if (!line.getContent().trim().equals(""))
289 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
290 | }
291 |
292 | private static void br(ArrayList lines) {
293 | MDLine line = getLastLine(lines);
294 | if (!line.getContent().trim().equals(""))
295 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
296 | }
297 |
298 | private static void h(Element element, ArrayList lines) {
299 | MDLine line = getLastLine(lines);
300 | if (!line.getContent().trim().equals(""))
301 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
302 |
303 | int level = Integer.valueOf(element.tagName().substring(1));
304 | switch (level) {
305 | case 1:
306 | lines.add(new MDLine(MDLine.MDLineType.Head1, 0, getTextContent(element)));
307 | break;
308 | case 2:
309 | lines.add(new MDLine(MDLine.MDLineType.Head2, 0, getTextContent(element)));
310 | break;
311 | case 4:
312 | lines.add(new MDLine(MDLine.MDLineType.Head4, 0, getTextContent(element)));
313 | break;
314 | case 5:
315 | lines.add(new MDLine(MDLine.MDLineType.Head5, 0, getTextContent(element)));
316 | break;
317 | default:
318 | if (PropertiesUtil.getProperties("anchor").equals("false")){
319 | if (element.text().indexOf("文章目录")>0){
320 | lines.add(new MDLine(MDLine.MDLineType.Head3, 0, getTextContent(element)));
321 | }
322 | }else{
323 | lines.add(new MDLine(MDLine.MDLineType.Head3, 0, getTextContent(element)));
324 | }
325 |
326 | break;
327 | }
328 |
329 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
330 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
331 | }
332 |
333 | private static void strong(Element element, ArrayList lines) {
334 | MDLine line = getLastLine(lines);
335 | line.append("**");
336 | line.append(getTextContent(element));
337 | line.append("**");
338 | }
339 |
340 | private static void em(Element element, ArrayList lines) {
341 | MDLine line = getLastLine(lines);
342 | line.append("*");
343 | line.append(getTextContent(element));
344 | line.append("*");
345 | }
346 |
347 | private static void hr(ArrayList lines) {
348 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
349 | lines.add(new MDLine(MDLine.MDLineType.HR, 0, ""));
350 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
351 | }
352 |
353 | private static void a(Element element, ArrayList lines) {
354 | if (PropertiesUtil.getProperties("anchor").equals("false")) {
355 | if (!element.attr("href").equals("")){
356 | MDLine line = getLastLine(lines);
357 | line.append("[");
358 | line.append(getTextContent(element));
359 | line.append("]");
360 | line.append("(");
361 | String url = element.attr("href");
362 | line.append(url);
363 | String title = element.attr("title");
364 | if (!title.equals("")) {
365 | line.append(" \"");
366 | line.append(title);
367 | line.append("\"");
368 | }
369 | line.append(")");
370 | }
371 | }else{
372 | MDLine line = getLastLine(lines);
373 | line.append("[");
374 | line.append(getTextContent(element));
375 | line.append("]");
376 | line.append("(");
377 | String url = element.attr("href");
378 | line.append(url);
379 | String title = element.attr("title");
380 | if (!title.equals("")) {
381 | line.append(" \"");
382 | line.append(title);
383 | line.append("\"");
384 | }
385 | line.append(")");
386 | }
387 | }
388 |
389 | private static void img(Element element, ArrayList lines) {
390 | MDLine line = getLastLine(lines);
391 |
392 | line.append("![");
393 | String alt = element.attr("alt");
394 | line.append(alt);
395 | line.append("]");
396 | line.append("(");
397 | String url = element.attr("src");
398 | String fix = FileUtil.getPicture(url);
399 |
400 | String url_new = "../images/" + fix;
401 |
402 | line.append(url_new);
403 | String title = element.attr("title");
404 | if (!title.equals("")) {
405 | line.append(" \"");
406 | line.append(title);
407 | line.append("\"");
408 | }
409 | line.append(")");
410 | }
411 |
412 |
413 | private static void code(Element element, ArrayList lines) {
414 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
415 | MDLine line = new MDLine(MDLine.MDLineType.None, 0, "```js \n");
416 | // line.append(getTextContent(element).replace("\n", " "));
417 | line.append(element.text()+"\n");
418 | line.append("```");
419 | lines.add(line);
420 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
421 | }
422 |
423 | private static void codeP(Element element, ArrayList lines) {
424 | MDLine line = getLastLine(lines);
425 | line.append("`"+getTextContent(element)+"`");
426 | }
427 |
428 | private static void ul(Element element, ArrayList lines) {
429 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
430 | indentation++;
431 | orderedList = false;
432 | MDLine line = new MDLine(MDLine.MDLineType.None, 0, "");
433 | line.append(getTextContent(element));
434 | lines.add(line);
435 | indentation--;
436 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
437 | }
438 |
439 | private static void ol(Element element, ArrayList lines) {
440 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
441 | indentation++;
442 | orderedList = true;
443 | MDLine line = new MDLine(MDLine.MDLineType.None, 0, "");
444 | line.append(getTextContent(element));
445 | lines.add(line);
446 | indentation--;
447 | lines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
448 | }
449 |
450 | private static void li(Element element, ArrayList lines) {
451 |
452 | if (PropertiesUtil.getProperties("anchor").equals("false")) {
453 | if (!element.children().attr("href").equals("")) {
454 |
455 | MDLine line;
456 | if (orderedList) {
457 | line = new MDLine(MDLine.MDLineType.Ordered, indentation,
458 | getTextContent(element));
459 | } else {
460 | line = new MDLine(MDLine.MDLineType.Unordered, indentation,
461 | getTextContent(element));
462 | }
463 | lines.add(line);
464 | }
465 | }else{
466 | MDLine line;
467 | if (orderedList) {
468 | line = new MDLine(MDLine.MDLineType.Ordered, indentation,
469 | getTextContent(element));
470 | } else {
471 | line = new MDLine(MDLine.MDLineType.Unordered, indentation,
472 | getTextContent(element));
473 | }
474 | lines.add(line);
475 | }
476 |
477 |
478 | }
479 | //
480 | // private static void pre(Element element, ArrayList lines) {
481 | //
482 | // lines.add(new MDLine(MDLine.MDLineType.None, 0, "```"));
483 | // MDLine line = new MDLine(MDLine.MDLineType.None, 0, "");
484 | // String in = getTextContent(element);
485 | // line.append(in);
486 | // lines.add(line);
487 | // lines.add(new MDLine(MDLine.MDLineType.None, 0, "```"));
488 | //
489 | // }
490 | }
491 |
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/MDLine.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md.util.html2markdown;
2 |
3 | public class MDLine {
4 | private int level = 0;
5 | private MDLineType type;
6 | private StringBuilder content;
7 |
8 | public MDLine(MDLineType type, int level, String content) {
9 | this.type = type;
10 | this.level = level;
11 | this.content = new StringBuilder(content);
12 | }
13 |
14 | public MDLine create(String line) {
15 | int spaces = 0;
16 | while ((spaces < line.length()) && (line.charAt(spaces) == ' ')) {
17 | spaces++;
18 | }
19 | String content = line.substring(spaces);
20 |
21 | int newLevel = spaces / 4;
22 |
23 | if (content.length() > 0) {
24 | if (content.matches("^[0-9]+\\.\\s.*")) {
25 | int c = 0;
26 | while ((c < content.length()) && (Character.isDigit(content.charAt(c)))) {
27 | c++;
28 | }
29 | return new MDLine(MDLineType.Ordered, newLevel, content.substring(c + 2));
30 | } else if (content.matches("^(\\*|\\+|\\-)\\s.*")) {
31 | return new MDLine(MDLineType.Unordered, newLevel, content.substring(2));
32 | } else if (content.matches("^[#]+.*")) {
33 | int c = 0;
34 | while ((c < content.length()) && (content.charAt(c) == '#')) {
35 | c++;
36 | }
37 | MDLineType headerType;
38 | switch (c) {
39 | case 1:
40 | headerType = MDLineType.Head1;
41 | break;
42 | case 2:
43 | headerType = MDLineType.Head2;
44 | break;
45 | case 4:
46 | headerType = MDLineType.Head4;
47 | break;
48 | case 5:
49 | headerType = MDLineType.Head5;
50 | break;
51 | default:
52 | headerType = MDLineType.Head3;
53 | break;
54 | }
55 |
56 | while ((c < content.length()) && (content.charAt(c) == ' ')) {
57 | c++;
58 | }
59 |
60 | return new MDLine(headerType, newLevel, content.substring(c));
61 | }
62 | }
63 |
64 | content = line.substring(4 * newLevel);
65 |
66 | return new MDLine(MDLineType.None, newLevel, content);
67 | }
68 |
69 | public MDLineType getListTypeName() {
70 | return type;
71 | }
72 |
73 | public int getLevel() {
74 | return level;
75 | }
76 |
77 | public void setLevel(int i) {
78 | level = Math.max(i, 0);
79 | }
80 |
81 | public String toString() {
82 | StringBuilder newLine = new StringBuilder();
83 | for (int j = 0; j < getLevel(); j++) {
84 | newLine.append(" ");
85 | }
86 |
87 | if (type.equals(MDLineType.Ordered)) {
88 | newLine.append(String.valueOf(1)).append(". ");
89 | } else if (type.equals(MDLineType.Unordered)) {
90 | newLine.append("* ");
91 | } else if (type.equals(MDLineType.Head1)) {
92 | newLine.append("# ");
93 | } else if (type.equals(MDLineType.Head2)) {
94 | newLine.append("## ");
95 | } else if (type.equals(MDLineType.Head3)) {
96 | newLine.append("### ");
97 | } else if (type.equals(MDLineType.Head4)) {
98 | newLine.append("#### ");
99 | } else if (type.equals(MDLineType.Head5)) {
100 | newLine.append("##### ");
101 | } else if (type.equals(MDLineType.HR)) {
102 | newLine.append("----");
103 | }
104 |
105 | String contentStr = getContent();
106 | if (type.equals(MDLineType.Unordered)) {
107 | contentStr = contentStr.replaceAll("^\n", "");
108 | }
109 | newLine.append(contentStr);
110 |
111 | return newLine.toString();
112 | }
113 |
114 | public String getContent() {
115 | return content.toString();
116 | }
117 |
118 | public void append(String appendContent) {
119 | if (content.length() == 0) {
120 | int i = 0;
121 | while (i < appendContent.length() && Character.isWhitespace(appendContent.charAt(i))) {
122 | i++;
123 | }
124 | content.append(appendContent.substring(i));
125 | } else {
126 | content.append(appendContent);
127 | }
128 | }
129 |
130 | @Override
131 | public boolean equals(Object o) {
132 | return o instanceof MDLine && ((MDLine) o).type.equals(this.type);
133 | }
134 |
135 | public boolean isList() {
136 | return (type.equals(MDLineType.Ordered) || type.equals(MDLineType.Unordered));
137 | }
138 |
139 | public void setListType(MDLineType type2) {
140 | type = type2;
141 | }
142 |
143 | public enum MDLineType {
144 | Ordered, Unordered, None, Head1, Head2, Head3, Head4, Head5, HR
145 | }
146 | }
--------------------------------------------------------------------------------
/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/Testbed.java:
--------------------------------------------------------------------------------
1 | package com.github.csccoder.csdn2md.util.html2markdown;
2 |
3 | import java.io.IOException;
4 | import java.net.MalformedURLException;
5 | import java.net.URL;
6 |
7 | public class Testbed {
8 | public static void main(String[] args) {
9 | URL url;
10 | try {
11 | url = new URL("http://jsoup.org/");
12 | String parsedText = HTML2Md.convert(url, 30000);
13 | System.out.println(parsedText);
14 |
15 | // test parse local html file
16 | // String pathFile = "test.html";
17 | // File f = new File(pathFile);
18 | // String parsedFileText = HTML2Md.convertFile(f, "gbk");
19 | // System.out.println(parsedFileText);
20 |
21 | System.out.println("done");
22 | } catch (MalformedURLException e) {
23 | e.printStackTrace();
24 | } catch (IOException e) {
25 | e.printStackTrace();
26 | }
27 | }
28 | }
--------------------------------------------------------------------------------
/src/main/resources/config.properties:
--------------------------------------------------------------------------------
1 | # 是否渲染hexo头部
2 | head=false
3 |
4 | # md文章头部配置
5 | #分类和标签逗号隔开
6 | title=
7 | author=Leesin.Dong
8 | tags=a,b,c
9 | categories=a,b,c
10 |
11 | # 文件命名规则 可选值:date title ,date根据日期命名,title根据文章名命名
12 | MdFileName_type=title
13 |
14 |
15 | # csdn host 这里如果没有自定义域名的话不用改
16 | csdn_host=https://blog.csdn.net
17 | # csdn用户名
18 | casn_name=dataiyangu
19 |
20 |
21 | # 文件保存的绝对路径,即img html post这三个文件夹的父文件夹
22 | file_Path=/Users/dzsb-002294/Desktop/hexo_blog_md
23 | # 设置下载的文件夹名字
24 | html_path=html
25 | image_path=images
26 | md_path=_posts
27 |
28 |
29 | # 设置下载的方式,默认是从第一页往后不断的下载的
30 | #可选的方式:
31 | # 1 默认轮询从第一页开始往后
32 | # 2 专栏方式
33 | # 3 指定某篇文章
34 | # 4 指定页数
35 | # 5 分类
36 | url_way=1
37 | # 具体的五种抓取方法的地址填写(视自己情况而定)
38 | # 比如 我的: https://blog.csdn.net/dataiyangu/article/category/8118370
39 | # 下面的几种情况只写 https://blog.csdn.net/dataiyangu 后面的内容即可,后面写什么自己手动点到相应的页面粘贴过来就行了
40 | url_way_1=/article/list/
41 | url_way_2=/column/info/32118
42 | url_way_3=/article/details/97544551
43 | url_way_4=/article/list/2
44 | url_way_5=/article/category/8118370
45 |
46 | #是否显示csdn中的锚点"文章目录"字样,以及下面具体的锚点 默认false
47 | anchor=false
48 | #是否开启版权声明 默认false
49 | copyright=false
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/src/main/resources/img/blog_csdn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/blog_csdn.png
--------------------------------------------------------------------------------
/src/main/resources/img/blog_hexo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/blog_hexo.png
--------------------------------------------------------------------------------
/src/main/resources/img/dir_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/dir_0.png
--------------------------------------------------------------------------------
/src/main/resources/img/dir_html.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/dir_html.png
--------------------------------------------------------------------------------
/src/main/resources/img/dir_md.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/dir_md.png
--------------------------------------------------------------------------------
/src/main/resources/img/html_detail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/html_detail.png
--------------------------------------------------------------------------------
/src/main/resources/img/markdown_detail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/markdown_detail.png
--------------------------------------------------------------------------------
/src/main/resources/img/run_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/src/main/resources/img/run_out.png
--------------------------------------------------------------------------------
/src/main/resources/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/zsh
2 |
3 | sed -i "" "s#^tags=.*#tags=$1#g" config.properties
4 | sed -i "" "s#^categories=.*#categories=$2#g" config.properties
5 | sed -i "" "s#^title=.*#tags=$3#g" config.properties
6 |
7 | java -jar csdn2hexo-1.0-SNAPSHOT.jar
8 |
9 |
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/Main.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/Main.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/model/Article.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/model/Article.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/paser/ArticlePaser.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/paser/ArticlePaser.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/paser/CorePaser.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/paser/CorePaser.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/util/FileUtil.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/FileUtil.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/util/HexoMdUtil.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/HexoMdUtil.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/util/HttpClientUtil$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/HttpClientUtil$1.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/util/HttpClientUtil.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/HttpClientUtil.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/util/PropertiesUtil.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/PropertiesUtil.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/util/ProxyIP.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/ProxyIP.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/util/RegexUtil.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/RegexUtil.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/util/html2markdown/DateUtil.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/html2markdown/DateUtil.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/util/html2markdown/FilesUtil.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/html2markdown/FilesUtil.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/util/html2markdown/HTML2Md.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/html2markdown/HTML2Md.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/util/html2markdown/MDLine$MDLineType.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/html2markdown/MDLine$MDLineType.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/util/html2markdown/MDLine.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/html2markdown/MDLine.class
--------------------------------------------------------------------------------
/target/classes/com/github/csccoder/csdn2md/util/html2markdown/Testbed.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/com/github/csccoder/csdn2md/util/html2markdown/Testbed.class
--------------------------------------------------------------------------------
/target/classes/config.properties:
--------------------------------------------------------------------------------
1 | # 是否渲染hexo头部
2 | head=false
3 |
4 | # md文章头部配置
5 | #分类和标签逗号隔开
6 | title=
7 | author=Leesin.Dong
8 | tags=a,b,c
9 | categories=a,b,c
10 |
11 | # 文件命名规则 可选值:date title ,date根据日期命名,title根据文章名命名
12 | MdFileName_type=title
13 |
14 |
15 | # csdn host 这里如果没有自定义域名的话不用改
16 | csdn_host=https://blog.csdn.net
17 | # csdn用户名
18 | casn_name=dataiyangu
19 |
20 |
21 | # 文件保存的绝对路径,即img html post这三个文件夹的父文件夹
22 | file_Path=/Users/dzsb-002294/Desktop/hexo_blog_md
23 | # 设置下载的文件夹名字
24 | html_path=html
25 | image_path=images
26 | md_path=_posts
27 |
28 |
29 | # 设置下载的方式,默认是从第一页往后不断的下载的
30 | #可选的方式:
31 | # 1 默认轮询从第一页开始往后
32 | # 2 专栏方式
33 | # 3 指定某篇文章
34 | # 4 指定页数
35 | # 5 分类
36 | url_way=1
37 | # 具体的五种抓取方法的地址填写(视自己情况而定)
38 | # 比如 我的: https://blog.csdn.net/dataiyangu/article/category/8118370
39 | # 下面的几种情况只写 https://blog.csdn.net/dataiyangu 后面的内容即可,后面写什么自己手动点到相应的页面粘贴过来就行了
40 | url_way_1=/article/list/
41 | url_way_2=/column/info/32118
42 | url_way_3=/article/details/97544551
43 | url_way_4=/article/list/2
44 | url_way_5=/article/category/8118370
45 |
46 | #是否显示csdn中的锚点"文章目录"字样,以及下面具体的锚点 默认false
47 | anchor=false
48 | #是否开启版权声明 默认false
49 | copyright=false
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/target/classes/img/blog_csdn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/blog_csdn.png
--------------------------------------------------------------------------------
/target/classes/img/blog_hexo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/blog_hexo.png
--------------------------------------------------------------------------------
/target/classes/img/dir_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/dir_0.png
--------------------------------------------------------------------------------
/target/classes/img/dir_html.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/dir_html.png
--------------------------------------------------------------------------------
/target/classes/img/dir_md.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/dir_md.png
--------------------------------------------------------------------------------
/target/classes/img/html_detail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/html_detail.png
--------------------------------------------------------------------------------
/target/classes/img/markdown_detail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/markdown_detail.png
--------------------------------------------------------------------------------
/target/classes/img/run_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/classes/img/run_out.png
--------------------------------------------------------------------------------
/target/classes/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/zsh
2 |
3 | sed -i "" "s#^tags=.*#tags=$1#g" config.properties
4 | sed -i "" "s#^categories=.*#categories=$2#g" config.properties
5 | sed -i "" "s#^title=.*#tags=$3#g" config.properties
6 |
7 | java -jar csdn2hexo-1.0-SNAPSHOT.jar
8 |
9 |
--------------------------------------------------------------------------------
/target/config.properties:
--------------------------------------------------------------------------------
1 | # md文章头部配置
2 | #分类和标签逗号隔开
3 | title=
4 | author=Leesin.Dong
5 | tags=a,b,c
6 | categories=a,b,c
7 |
8 | # 文件命名规则 可选值:date title ,date根据日期命名,title根据文章名命名
9 | MdFileName_type=date
10 |
11 |
12 | # csdn host 这里如果没有自定义域名的话不用改
13 | csdn_host=https://blog.csdn.net
14 | # csdn用户名
15 | casn_name=dataiyangu
16 |
17 |
18 | # 文件保存的绝对路径,即img html post这三个文件夹的父文件夹
19 | file_Path=/Users/leesin/Desktop/hexo_blog_md
20 | # 设置下载的文件夹名字
21 | html_path=html
22 | image_path=images
23 | md_path=_posts
24 |
25 |
26 | # 设置下载的方式,默认是从第一页往后不断的下载的
27 | #可选的方式:
28 | # 1 默认轮询从第一页开始往后
29 | # 2 专栏方式
30 | # 3 指定某篇文章
31 | # 4 指定页数
32 | # 5 分类
33 | url_way=2
34 | # 具体的五种抓取方法的地址填写(视自己情况而定)
35 | # 比如 我的: https://blog.csdn.net/dataiyangu/article/category/8118370
36 | # 下面的几种情况只写 https://blog.csdn.net/dataiyangu 后面的内容即可,后面写什么自己手动点到相应的页面粘贴过来就行了
37 | url_way_1=/article/list/
38 | url_way_2=/column/info/32118
39 | url_way_3=/article/details/88525801
40 | url_way_4=/article/list/2
41 | url_way_5=/article/category/8118370
42 |
43 | #是否显示csdn中的锚点"文章目录"字样,以及下面具体的锚点 默认false
44 | anchor=false
45 | #是否开启版权声明 默认false
46 | copyright=false
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/target/csdn2hexo-1.0-SNAPSHOT.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/csdn2hexo-1.0-SNAPSHOT.jar
--------------------------------------------------------------------------------
/target/dependency-reduced-pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 4.0.0
4 | com.github.csccoder
5 | csdn2hexo
6 | 1.0-SNAPSHOT
7 |
8 |
9 |
10 | true
11 | src/main/java
12 |
13 | **/*.properties
14 | **/*.xml
15 | **/*.sh
16 |
17 |
18 |
19 | true
20 | src/main/resources
21 |
22 | **/*
23 | **/*
24 |
25 |
26 |
27 |
28 |
29 | maven-shade-plugin
30 | 1.2.1
31 |
32 |
33 | package
34 |
35 | shade
36 |
37 |
38 |
39 |
40 | com.github.csccoder.csdn2md.Main
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 | maven-resources-plugin
49 | 2.7
50 |
51 |
52 | copy-resources
53 | package
54 |
55 | copy-resources
56 |
57 |
58 | ${project.build.directory}
59 |
60 |
61 | src/main/resources
62 |
63 | **/*.xml
64 | **/*.conf
65 | **/*.properties
66 | **/*.sh
67 |
68 | true
69 |
70 |
71 | UTF-8
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
--------------------------------------------------------------------------------
/target/maven-archiver/pom.properties:
--------------------------------------------------------------------------------
1 | #Generated by Maven
2 | #Mon Mar 18 12:19:57 CST 2019
3 | version=1.0-SNAPSHOT
4 | groupId=com.github.csccoder
5 | artifactId=csdn2hexo
6 |
--------------------------------------------------------------------------------
/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst:
--------------------------------------------------------------------------------
1 | com/github/csccoder/csdn2md/util/html2markdown/HTML2Md.class
2 | com/github/csccoder/csdn2md/util/html2markdown/FilesUtil.class
3 | com/github/csccoder/csdn2md/util/HttpClientUtil$1.class
4 | com/github/csccoder/csdn2md/util/html2markdown/MDLine$MDLineType.class
5 | com/github/csccoder/csdn2md/util/html2markdown/MDLine.class
6 | com/github/csccoder/csdn2md/model/Article.class
7 | com/github/csccoder/csdn2md/util/ProxyIP.class
8 | com/github/csccoder/csdn2md/util/FileUtil.class
9 | com/github/csccoder/csdn2md/util/html2markdown/Testbed.class
10 | com/github/csccoder/csdn2md/util/HttpClientUtil.class
11 | com/github/csccoder/csdn2md/util/HexoMdUtil.class
12 | com/github/csccoder/csdn2md/util/RegexUtil.class
13 | com/github/csccoder/csdn2md/util/html2markdown/DateUtil.class
14 | com/github/csccoder/csdn2md/util/PropertiesUtil.class
15 | com/github/csccoder/csdn2md/paser/CorePaser.class
16 | com/github/csccoder/csdn2md/paser/ArticlePaser.class
17 | com/github/csccoder/csdn2md/Main.class
18 |
--------------------------------------------------------------------------------
/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst:
--------------------------------------------------------------------------------
1 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/paser/ArticlePaser.java
2 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/HTML2Md.java
3 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/HexoMdUtil.java
4 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/MDLine.java
5 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/HttpClientUtil.java
6 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/RegexUtil.java
7 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/FileUtil.java
8 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/FilesUtil.java
9 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/Main.java
10 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/Testbed.java
11 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/model/Article.java
12 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/html2markdown/DateUtil.java
13 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/paser/CorePaser.java
14 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/ProxyIP.java
15 | /Users/leesin/Desktop/csdn2hexo_github/csdn2hexo/src/main/java/com/github/csccoder/csdn2md/util/PropertiesUtil.java
16 |
--------------------------------------------------------------------------------
/target/original-csdn2hexo-1.0-SNAPSHOT.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LeesinDong/csdn2hexo/2875900f0e211e4381de1e923a918dbf354efd94/target/original-csdn2hexo-1.0-SNAPSHOT.jar
--------------------------------------------------------------------------------
/target/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/zsh
2 |
3 | sed -i "" "s#^tags=.*#tags=$1#g" config.properties
4 | sed -i "" "s#^categories=.*#categories=$2#g" config.properties
5 | sed -i "" "s#^title=.*#tags=$3#g" config.properties
6 |
7 | java -jar csdn2hexo-1.0-SNAPSHOT.jar
8 |
9 |
--------------------------------------------------------------------------------