├── .gitignore
├── README.md
└── ZhihuDown
├── .classpath
├── .gitignore
├── .project
├── .settings
└── org.eclipse.jdt.core.prefs
└── src
├── controller
└── Spider.java
└── model
├── FileReaderWriter.java
└── Zhihu.java
/.gitignore:
--------------------------------------------------------------------------------
1 | *.class
2 |
3 | # Package Files #
4 | *.jar
5 | *.war
6 | *.ear
7 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 知乎下巴
2 | ==========
3 |
4 | 笔记地址:
5 | [让我们一起来做一个知乎下巴噢耶](http://blog.csdn.net/pleasecallmewhy/article/details/17538809)
--------------------------------------------------------------------------------
/ZhihuDown/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/ZhihuDown/.gitignore:
--------------------------------------------------------------------------------
1 | /bin
2 |
--------------------------------------------------------------------------------
/ZhihuDown/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | ZhihuDown
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 |
15 | org.eclipse.jdt.core.javanature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/ZhihuDown/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
5 | org.eclipse.jdt.core.compiler.compliance=1.7
6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
11 | org.eclipse.jdt.core.compiler.source=1.7
12 |
--------------------------------------------------------------------------------
/ZhihuDown/src/controller/Spider.java:
--------------------------------------------------------------------------------
1 | package controller;
2 |
3 | import java.util.ArrayList;
4 |
5 | import org.apache.http.client.methods.CloseableHttpResponse;
6 | import org.apache.http.client.methods.HttpGet;
7 | import org.apache.http.impl.client.CloseableHttpClient;
8 | import org.apache.http.impl.client.HttpClients;
9 | import org.apache.http.util.EntityUtils;
10 | import org.jsoup.Jsoup;
11 | import org.jsoup.nodes.Document;
12 | import org.jsoup.nodes.Element;
13 | import org.jsoup.select.Elements;
14 |
15 | import model.FileReaderWriter;
16 | import model.Zhihu;
17 |
18 | public class Spider {
19 | public static void main(String[] args) {
20 |
21 | String url = "http://www.zhihu.com/explore/recommendations";
22 | // 获取知乎推荐首页
23 | String content = Spider.SendGet(url);
24 |
25 | // 获取推荐内容详细内容
26 | ArrayList myZhihu = Spider.GetRecommendations(content);
27 |
28 | // 写入文档
29 | for (Zhihu zhihu : myZhihu) {
30 | FileReaderWriter.writeIntoFile(zhihu.writeString(),
31 | "D:/知乎_编辑推荐.txt", true);
32 | }
33 | }
34 |
35 | //获取指定Url页面内容
36 | //采用http-client和http-core 4.3 jar包
37 | public static String SendGet(String url) {
38 |
39 | CloseableHttpClient client = HttpClients.createDefault();
40 | try{
41 | HttpGet request = new HttpGet(url);
42 | CloseableHttpResponse resp = client.execute(request);
43 |
44 | String result = EntityUtils.toString(resp.getEntity());
45 |
46 | return result;
47 | }catch(Exception e){
48 | e.printStackTrace();
49 | }finally{
50 | try{
51 | client.close();
52 | }catch(Exception e){
53 | e.printStackTrace();
54 | }
55 |
56 | }
57 |
58 | return null;
59 | }
60 |
61 | // 获取推荐内容详细内容url
62 | public static ArrayList GetRecommendations(String content) {
63 |
64 | ArrayList results = new ArrayList();
65 | Document doc = Jsoup.parse(content);
66 | Elements items = doc.getElementsByClass("zm-item"); //推荐内容元素
67 | for(Element item:items){
68 | Element h2TagEle = item.getElementsByTag("h2").first(); //推荐内容标题元素
69 | Element aTagEl = h2TagEle.getElementsByTag("a").first(); //推荐内容的Url超链接元素
70 | String href = aTagEl.attr("href"); //推荐内容url
71 | if(href.contains("question")){ //去除不规范url
72 | results.add(new Zhihu(href));
73 | }
74 | }
75 | return results;
76 | }
77 |
78 | }
79 |
--------------------------------------------------------------------------------
/ZhihuDown/src/model/FileReaderWriter.java:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmewhy/ZhihuDown/4b8fee29cbffb45c8febe90fd74caa3365ec77fe/ZhihuDown/src/model/FileReaderWriter.java
--------------------------------------------------------------------------------
/ZhihuDown/src/model/Zhihu.java:
--------------------------------------------------------------------------------
1 | package model;
2 |
3 | import java.util.ArrayList;
4 | import java.util.regex.Matcher;
5 | import java.util.regex.Pattern;
6 |
7 | import org.jsoup.Jsoup;
8 | import org.jsoup.nodes.Document;
9 | import org.jsoup.nodes.Element;
10 | import org.jsoup.select.Elements;
11 |
12 | import controller.Spider;
13 |
14 | public class Zhihu {
15 | public String question;// 问题
16 | public String questionDescription;// 问题描述
17 | public String zhihuUrl;// 链接地址
18 | public ArrayList answers;// 回答
19 |
20 | // 获取知乎内容
21 | public Zhihu(String url) {
22 | question = "";
23 | questionDescription = "";
24 | zhihuUrl = "";
25 | answers = new ArrayList();
26 |
27 | if (getRealUrl(url)) {
28 |
29 | // 获取推荐内容详细页面
30 | String content = Spider.SendGet(zhihuUrl);
31 | if(content != null){
32 | Document doc = Jsoup.parse(content);
33 | // 获取标题,即用户发布的问题
34 | question = doc.title();
35 |
36 | // 问题消息标书
37 | Element despElement = doc.getElementById("zh-question-detail");
38 | if(despElement != null){
39 | questionDescription = despElement.text();
40 | }
41 | // 解答
42 | Elements ansItems = doc.getElementsByClass("zm-item-answer");
43 | for(Element ansItem:ansItems){
44 | Element textElement = ansItem.getElementsByClass("zm-item-rich-text").first();
45 | if(despElement != null){
46 | answers.add(textElement.text());
47 | }
48 | }
49 | }else{
50 | System.out.println("content is null");
51 | }
52 | }
53 | }
54 |
55 | // ����url
56 | boolean getRealUrl(String url) {
57 | // 将http://www.zhihu.com/question/22355264/answer/21102139
58 | // 转换为http://www.zhihu.com/question/22355264
59 | Pattern pattern = Pattern.compile("question/(.*?)/");
60 | Matcher matcher = pattern.matcher(url);
61 | if (matcher.find()) {
62 | zhihuUrl = "http://www.zhihu.com/question/" + matcher.group(1);
63 | } else {
64 | return false;
65 | }
66 | return true;
67 | }
68 |
69 | public String writeString() {
70 | // 将html页面转换为字符串
71 | String result = "";
72 | result += "问题:" + question + "\r\n";
73 | result += "描述:" + questionDescription + "\r\n";
74 | result += "链接:" + zhihuUrl + "\r\n\r\n";
75 | for (int i = 0; i < answers.size(); i++) {
76 | result += "回答" + i + ":" + answers.get(i) + "\r\n\r\n\r\n";
77 | }
78 | result += "\r\n\r\n\r\n\r\n\r\n\r\n";
79 | // 替换html换行符和其他字符
80 | result = result.replaceAll("
", "\r\n");
81 | result = result.replaceAll("<.*?>", "");
82 | return result;
83 | }
84 |
85 | @Override
86 | public String toString() {
87 | String result = "";
88 | result += "问题:" + question + "\n";
89 | result += "描述:" + questionDescription + "\n";
90 | result += "链接:" + zhihuUrl + "\n";
91 | result += "回答:" + answers.size() + "\n";
92 | return result;
93 | }
94 | }
95 |
--------------------------------------------------------------------------------