├── .editorconfig ├── .gitignore ├── .jscsrc ├── .npmignore ├── .travis.yml ├── LICENSE ├── api ├── Answer.js ├── Collection.js ├── Post.js ├── Question.js ├── Topic.js ├── User.js └── index.js ├── config ├── api.js ├── commonModules.js ├── index.js └── query.js ├── index.js ├── package.json ├── readme.md └── test ├── Answer.test.js ├── Post.test.js ├── Question.test.js ├── Topic.test.js ├── User.test.js └── Zhuanlan.test.js /.editorconfig: -------------------------------------------------------------------------------- 1 | # editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = space 6 | indent_size = 2 7 | end_of_line = lf 8 | charset = utf-8 9 | trim_trailing_whitespace = true 10 | insert_final_newline = true 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .idea 3 | 4 | *.swp 5 | .DS_Store 6 | 7 | temp 8 | -------------------------------------------------------------------------------- /.jscsrc: -------------------------------------------------------------------------------- 1 | { 2 | // 使用 jquery 编码风格规范 3 | "preset": "airbnb", 4 | "fix": true, 5 | "maxErrors": 50, 6 | "fileExtensions": [ 7 | ".js", 8 | ".jsx" 9 | ], 10 | "excludeFiles": [], 11 | // 改变 requireCurlyBraces 规则 12 | //"requireCurlyBraces": null // or false 13 | "requireDollarBeforejQueryAssignment": false 14 | } -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | examples 2 | test 3 | .idea 4 | .npm-debug.log 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "5" 4 | - "5.1" 5 | - "6.9.1" 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | This software is licensed under the MIT License. 3 | 4 | Copyright (c) [2016] all of contributors. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. -------------------------------------------------------------------------------- /api/Answer.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Ivan Jiang (iplus26) 3 | * @date 23 May 2016 4 | * @description 5 | * 6 | */ 7 | 'use strict'; 8 | const {cheerio, request, _} = require('../config/commonModules'); 9 | 10 | const config = require('../config/api'); 11 | const User = require('./User'); 12 | 13 | let _renderUrl = (answerId) => { 14 | let url = _.template(config.answer.voters)({answerId}); 15 | return url; 16 | }; 17 | 18 | /* 19 | * @param answerId Different from the string after "answer" in url, 20 | * the real answerId is not that obvious. For example, 21 | * "/question/28207685/answer/39974928", 22 | * the answerId of this post is "11382008" instead. 23 | */ 24 | let voters = (answerId) => { 25 | let url = _renderUrl(answerId); 26 | let options = { 27 | url 28 | }; 29 | 30 | return request(options).then(function (res) { 31 | let buffer = JSON.parse(res.body), 32 | voters = []; 33 | 34 | if (Array.isArray(buffer.payload)) { 35 | voters = buffer.payload.map(function (payload) { 36 | let $ = cheerio.load(payload), 37 | user = {}; 38 | 39 | let anchor = $('a[title]'), 40 | status = $('ul.status > li').children('a, span'); 41 | user.name = anchor.attr('title'); 42 | 43 | user.anonymous = !user.name; 44 | 45 | if (!user.anonymous) { 46 | user.profileUrl = anchor.attr('href'); 47 | user.sex = (function (str) { 48 | switch (str) { 49 | case '他': 50 | return 'male'; 51 | case '她': 52 | return 'female'; 53 | default: 54 | return undefined; 55 | } 56 | })($('.zg-btn-follow').text().slice(2)); 57 | 58 | } else { 59 | user.name = '匿名用户'; 60 | } 61 | 62 | user.avatar = $('.zm-item-img-avatar').attr('src'); 63 | user.like = parseInt(status.eq(0).text()); 64 | user.thank = parseInt(status.eq(1).text()); 65 | user.question = (function (el) { 66 | let href = el.attr('href'); 67 | if (href) { 68 | this.questionUrl = href; 69 | } 70 | return parseInt(el.text()); 71 | }).call(user, status.eq(2)); 72 | user.answer = (function (el) { 73 | let href = el.attr('href'); 74 | if (href) { 75 | this.answerUrl = href; 76 | } 77 | return parseInt(el.text()); 78 | }).call(user, status.eq(3)); 79 | 80 | return user; 81 | }); 82 | } 83 | return voters; 84 | }); 85 | }; 86 | 87 | module.exports = { 88 | voters 89 | }; 90 | -------------------------------------------------------------------------------- /api/Collection.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 Meizu bigertech, All rights reserved. 3 | * http://www.bigertech.com/ 4 | * @author liuxing 5 | * @date 15/10/14 6 | * @description 7 | * 8 | */ 9 | 'use strict'; 10 | 11 | const {cheerio, request, Promise, util: url} = require('../config/commonModules'); 12 | 13 | const config = require('../config'); 14 | const API = require('../config/api'); 15 | 16 | function getItems(body) { 17 | let $ = cheerio.load(body); 18 | let allZMItem = $('.zm-item'); 19 | let items = []; 20 | allZMItem.each(function (index, element) { 21 | let h2 = $(element).find('h2.zm-item-title a'); 22 | let href = h2.attr('href') || ''; 23 | let content = $(element).find('div.zm-item-fav div'); 24 | let user = content.find('.answer-head .zm-item-answer-author-wrap'); 25 | let answerID = parseInt($(element).find('.zm-item-fav .zm-item-answer ').attr('data-aid')); 26 | let atoken = parseInt($(element).find('.zm-item-fav .zm-item-answer ').attr('data-atoken')); 27 | let html = $(element).find('textarea.content').html(); 28 | let item = { 29 | aid: answerID, 30 | voter: parseInt($(element).find('.zm-item-vote a.zm-item-vote-count').text()), 31 | desc: content.find('div.zh-summary.summary').text(), 32 | content: html, 33 | atoken: atoken, 34 | question: { 35 | id: parseInt(href.match(/\d*?$/)[0]), 36 | title: h2.text(), 37 | url: config.zhihu + h2.attr('href'), 38 | }, 39 | user: { 40 | username: user.find('a').text(), 41 | userTitle: user.find('strong').text(), 42 | url: user.find('a').attr('href'), 43 | }, 44 | }; 45 | items.push(item); 46 | }); 47 | 48 | return items; 49 | } 50 | 51 | /** 52 | * 获取某一页的数据 53 | * @param url 54 | * @returns {*} 55 | */ 56 | function getDataByPage(url) { 57 | if (url.indexOf(API.collection.url) < 0) { 58 | throw new Error('Url not match!'); 59 | } 60 | 61 | let options = { 62 | url, 63 | headers: config.headers 64 | }; 65 | return request(options).then(function (body) { 66 | return getItems(body.body); 67 | }); 68 | } 69 | 70 | /** 71 | * 获取分页信息 72 | * @param url 73 | * @returns {*} 74 | */ 75 | function getPagination(url) { 76 | let options = { 77 | url, 78 | headers: config.headers 79 | }; 80 | return request(options).then(function (body) { 81 | let $ = cheerio.load(body.body); 82 | let pages = $('.zm-invite-pager span').eq(-2).text(); 83 | let currentPage = $('.zm-invite-pager span.zg-gray-normal').eq(-1).text(); 84 | return { 85 | pages: parseInt(pages), 86 | current: parseInt(currentPage), 87 | }; 88 | }); 89 | } 90 | 91 | /** 92 | * 获取所有页的数据, 93 | * 先查询分页,然后查询每一页的数据 94 | * @param url 95 | * @returns {*} 96 | */ 97 | function getAllPageData(url) { 98 | let formatUrl = util.parse(url); 99 | let realUrl = config.zhihu + formatUrl.pathname; 100 | let allItems = []; 101 | return getPagination(url).then(function (paginations) { 102 | let pages = []; 103 | for (let i = 1; i <= paginations.pages; i++) { 104 | pages.push(i); 105 | } 106 | 107 | //并发 108 | return Promise.map(pages, function (page) { 109 | let pageUrl = realUrl + '?page=' + page; 110 | return getDataByPage(pageUrl).then(function (items) { 111 | allItems = allItems.concat(items); 112 | }); 113 | }, {concurrency: 5}).then(function (total) { 114 | return total; 115 | }); 116 | }).then(function () { 117 | return allItems; 118 | }); 119 | } 120 | 121 | function getCollectionInfo(url) { 122 | if (url.indexOf(API.collection.url) < 0) { 123 | throw new Error('Url not match!'); 124 | } 125 | 126 | let cid = parseInt(url.match(/\d+/)[0]); 127 | let options = { 128 | url, 129 | headers: config.headers 130 | }; 131 | return request(options).then(function (body) { 132 | let $ = cheerio.load(body[1]); 133 | let title = $('#zh-fav-head-title').text(); 134 | let $user = $('#zh-single-answer-author-info .zm-list-content-title a'); 135 | let user = { 136 | img: $('a.zm-list-avatar-link .zm-list-avatar-medium').attr('src'), 137 | name: $user.text(), 138 | url: $user.attr('href'), 139 | }; 140 | return { 141 | cid, 142 | title, 143 | user 144 | }; 145 | }); 146 | } 147 | 148 | module.exports = { 149 | getAllPageData, 150 | getDataByPage, 151 | getPagination, 152 | getCollectionInfo 153 | }; 154 | -------------------------------------------------------------------------------- /api/Post.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2014 Meizu bigertech, All rights reserved. 3 | * http://www.bigertech.com/ 4 | * @author liuxing 5 | * @date 14-11-10 6 | * @description 7 | * 8 | */ 9 | 'use strict'; 10 | 11 | const {Promise, request, url, _, QUERY} = require('../config/commonModules'); 12 | 13 | const API = require('../config/api'); 14 | const User = require('./User'); 15 | 16 | 17 | function getRealUrl(apiUrl, postUrl) { 18 | let pathname = url.parse(postUrl).pathname; 19 | let paths = pathname.split('\/'); 20 | if (paths.length < 0) { 21 | throw new Error('Url error!'); 22 | } 23 | 24 | let data = { 25 | name: paths[1], 26 | postID: paths[2], 27 | }; 28 | return _.template(apiUrl)(data); 29 | } 30 | 31 | let getLikers = (postUrl, config) => { 32 | let url = getRealUrl(API.post.likers, postUrl); 33 | let query = config || QUERY.zhuanlan.likers; 34 | let data = { 35 | url, 36 | qs: { 37 | limit: query.limit, 38 | offset: query.offset 39 | } 40 | }; 41 | return request(data).then(function (content) { 42 | let users = content.body; 43 | return JSON.parse(users); 44 | }); 45 | }; 46 | /** 47 | * get full userinfo who stared post 48 | * @param postUrl post's url 49 | * @param config 50 | * @returns {*} User Object contain detail userinfo , number of question, number of answer etc 51 | */ 52 | let likersDetail = (postUrl, config) => { 53 | return getLikers(postUrl, config).then(function (users) { 54 | if (users.length > 0) { 55 | //并发 56 | return Promise.map(users, function (user) { 57 | //User.getUserByName参数是用户的slug值,不是直接的用户名 58 | return User.getUserByName(user.slug).then(function (result) { 59 | return result; 60 | }); 61 | }, { 62 | concurrency: 30, 63 | }).then(function (data) { 64 | //按follower数目逆序排列 65 | let pure_users = _.sortBy(data, 'follower').reverse(); 66 | return pure_users; 67 | }); 68 | } 69 | }); 70 | }; 71 | 72 | let articleInfo = (postUrl) => { 73 | let url = getRealUrl(API.post.info, postUrl); 74 | let options = { 75 | url, 76 | gzip: true, 77 | }; 78 | 79 | return request(options).then((content) => { 80 | return JSON.parse(content.body); 81 | }); 82 | }; 83 | 84 | let articleList = (name, config) => { 85 | let query = config || QUERY.zhuanlan.articleList; 86 | let data = { 87 | url: _.template(API.post.page)({name}), 88 | qs: { 89 | limit: query.limit, 90 | offset: query.offset 91 | } 92 | }; 93 | return request(data).then((content) => { 94 | return JSON.parse(content.body); 95 | }); 96 | }; 97 | 98 | let zhuanlanInfo = (zhuanlanName) => { 99 | let url = API.post.zhuanlan + zhuanlanName; 100 | let options = { 101 | url, 102 | gzip: true, 103 | }; 104 | return request(options).then((content) => { 105 | return JSON.parse(content.body); 106 | }); 107 | }; 108 | 109 | 110 | let comments = (postUrl, config) => { 111 | let url = getRealUrl(API.post.comments, postUrl); 112 | let query = config || QUERY.zhuanlan.comments; 113 | 114 | let options = { 115 | url, 116 | qs: { 117 | limit: query.limit, 118 | offset: query.offset 119 | } 120 | }; 121 | return request(options).then((content) => { 122 | return JSON.parse(content.body); 123 | }) 124 | }; 125 | 126 | 127 | module.exports = { 128 | likersDetail, 129 | comments, 130 | info: articleInfo, 131 | page: articleList, 132 | zhuanlanInfo 133 | }; 134 | -------------------------------------------------------------------------------- /api/Question.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | const {request, cheerio} = require('../config/commonModules') 3 | 4 | let answers = function (params) { 5 | 6 | if (typeof params === 'string') { 7 | params = { 8 | token: arguments[0], 9 | offset: arguments[1] || 0, 10 | // pagesize: arguments[2] || 10, 11 | }; 12 | } 13 | 14 | let opt = { 15 | uri: 'https://www.zhihu.com/node/QuestionAnswerListV2', 16 | form: { 17 | method: 'next', 18 | params: JSON.stringify({ 19 | 'url_token': params.token, 20 | 'pagesize': params.pagesize, 21 | 'offset': 0, // params.offset, 22 | }) 23 | }, 24 | method: 'POST', 25 | headers: { 26 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) ' + 27 | 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36', 28 | 'Referer': 'https://www.zhihu.com/question/' + params.token, 29 | }, 30 | }; 31 | 32 | return request(opt) 33 | .then(function (content) { 34 | let ret; 35 | try { 36 | let data = JSON.parse(content.body); 37 | if (Array.isArray(data.msg)) { 38 | ret = data.msg.map(function (payload) { 39 | let $ = cheerio.load(payload, { 40 | decodeEntities: false, 41 | }); 42 | 43 | let author = $('.zm-item-answer-author-info'), 44 | authorAnchor = author.find('.author-link'), 45 | voters = $('span.voters a'), 46 | content = $('.zm-editable-content'), 47 | ans = {}; 48 | 49 | if (authorAnchor.length) { 50 | ans.author = { 51 | name: authorAnchor.text(), 52 | profileUrl: authorAnchor.attr('href'), 53 | bio: author.find('span[title]').attr('title'), 54 | avatar: author.find('img').attr('src') 55 | }; 56 | } else { 57 | ans.author = { 58 | name: '匿名用户', 59 | }; 60 | } 61 | 62 | ans.voters = voters.length ? parseInt(voters.text()) : 0; 63 | ans.text = content.text(); 64 | ans.html = content.html(); 65 | 66 | return ans; 67 | }); 68 | } 69 | 70 | } catch (e) { 71 | } 72 | return ret; 73 | }); 74 | }; 75 | 76 | module.exports = { 77 | answers 78 | }; 79 | -------------------------------------------------------------------------------- /api/Topic.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 话题相关 3 | */ 4 | 5 | 'use strict'; 6 | 7 | const {request, cheerio} = require('../config/commonModules'); 8 | 9 | let API = require('../config/api'); 10 | 11 | let getTopicByID = (topicID, page = 1) => { 12 | let data = { 13 | url: API.topic_url + topicID + '/questions', 14 | qs: { 15 | page 16 | }, 17 | }; 18 | 19 | return request(data).then((content) => { 20 | let responseBody = content.body; 21 | let $ = cheerio.load(responseBody); 22 | let result = { 23 | name: $('.topic-info .topic-name h1').text(), 24 | }; 25 | 26 | let questions = {}; 27 | let index = 0; 28 | 29 | $('div.feed-item.feed-item-hook.question-item').each(function () { 30 | questions[index] = {}; 31 | questions[index].title = $('a.question_link', this).text(); 32 | questions[index].url = API.zhihu + 33 | $('a.question_link', this).attr('href'); 34 | questions[index].postTime = $('span.time', this).text(); 35 | index = index + 1; 36 | }); 37 | 38 | result.page = page; 39 | result.totalpage = Number($('div.zm-invite-pager span').last().prev().text()); 40 | result.questions = questions; 41 | return result; 42 | }); 43 | }; 44 | 45 | let getTopicTopAnswersByID = (topicID, page = 1) => { 46 | let data = { 47 | url: API.topic_url + topicID + '/top-answers', 48 | qs: { 49 | page 50 | } 51 | }; 52 | return request(data).then((content) => { 53 | let responseBody = content.body; 54 | let $ = cheerio.load(responseBody); 55 | let result = { 56 | name: $('.topic-info .topic-name h1').text(), 57 | }; 58 | 59 | let questions = {}; 60 | let index = 0; 61 | 62 | $('div.feed-item.feed-item-hook.folding').each(function () { 63 | questions[index] = {}; 64 | questions[index].title = $('a.question_link', this).text(); 65 | questions[index].url = API.zhihu + $('a.question_link', this).attr('href'); 66 | questions[index].upvotes = $('a.zm-item-vote-count', this).text(); 67 | questions[index].comment_count = $('a.toggle-comment', this).last().text().match(/\d+/g)[0]; 68 | questions[index].answer_url = API.zhihu + $('a.toggle-expand', this).attr('href'); 69 | questions[index].user = {}; 70 | questions[index].user.name = $('div.zm-item-answer-author-info a', this).text(); 71 | questions[index].user.url = API.zhihu 72 | + $('div.zm-item-answer-author-info a', this).attr('href'); 73 | index++; 74 | }); 75 | 76 | result.page = page; 77 | result.totalpage = Number($('div.zm-invite-pager span').last().prev().text()); 78 | result.questions = questions; 79 | return result; 80 | }); 81 | }; 82 | 83 | module.exports = { 84 | getTopicByID, 85 | getTopicTopAnswersByID 86 | }; 87 | -------------------------------------------------------------------------------- /api/User.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2014 Meizu bigertech, All rights reserved. 3 | * http://www.bigertech.com/ 4 | * @author liuxing 5 | * @date 14-11-10 6 | * @description 7 | * 8 | */ 9 | 'use strict'; 10 | const {request, cheerio} = require('../config/commonModules'); 11 | 12 | const config = require('../config'); 13 | const API = require('../config/api'); 14 | 15 | 16 | function formatFollowData(str) { 17 | if (str.indexOf('K') !== -1) { 18 | return parseInt(str) * 1000; 19 | } 20 | // if (str.indexOf('K') !== -1) { 21 | // return parseInt(str) * 10000; 22 | // } 23 | return parseInt(str); 24 | } 25 | 26 | /* 27 | * @param name The name of Zhihu user 28 | * @return A promise 29 | */ 30 | let info = (name) => { 31 | let data = { 32 | url: API.user.info, 33 | qs: { 34 | params: JSON.stringify({ 35 | 'url_token': name 36 | }), 37 | }, 38 | }; 39 | 40 | return request(data).then(function (content) { 41 | let responseBody = content.body; 42 | let $ = cheerio.load(responseBody); 43 | 44 | const tagline = $('.tagline').eq(0).text(); 45 | const workItem = $('.personal .info-wrap .item'); 46 | const company = workItem.eq(0).text(); 47 | const title = workItem.eq(1).text(); 48 | 49 | let values = $('span.value'); 50 | let result = { 51 | tagline, 52 | work: `${company} ${title}`, 53 | answer: formatFollowData(values.eq(0).text()), 54 | post: formatFollowData(values.eq(1).text()), 55 | follower: formatFollowData(values.eq(2).text()), 56 | }; 57 | result.profileUrl = config.zhihu + $('a.avatar-link').attr('href'); 58 | result.name = $('span.name').text(); 59 | let male = $('.icon-profile-female'); 60 | result.sex = male.length === 1 ? 'female' : 'male'; 61 | return result; 62 | }); 63 | }; 64 | 65 | let questions = (qID) => { 66 | }; 67 | 68 | let answers = (qID) => { 69 | }; 70 | 71 | let zhuanlansFocus = () => { 72 | }; 73 | 74 | let topic = () => { 75 | }; 76 | 77 | module.exports = { 78 | info, 79 | // TODO 80 | zhuanlansFocus, 81 | questions, 82 | answers, 83 | topic, 84 | 85 | // Deprecated 86 | getUserByName: info, 87 | }; 88 | -------------------------------------------------------------------------------- /api/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2014 Meizu bigertech, All rights reserved. 3 | * http://www.bigertech.com/ 4 | * @author liuxing 5 | * @date 14-11-11 6 | * @description 7 | * 8 | */ 9 | let zhuanlan = require('./Post'); 10 | let User = require('./User'); 11 | let Collection = require('./Collection'); 12 | let Topic = require('./Topic'); 13 | let Answer = require('./Answer'); 14 | let Question = require('./Question'); 15 | 16 | module.exports = { 17 | Post: zhuanlan, 18 | User, 19 | Topic, 20 | Collection, 21 | Answer, 22 | Question 23 | }; 24 | -------------------------------------------------------------------------------- /config/api.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2014 Meizu bigertech, All rights reserved. 3 | * http://www.bigertech.com/ 4 | * @author liuxing 5 | * @date 14-11-10 6 | * @description zhihu api url template ,知乎api 接口模板 7 | * 8 | */ 9 | 'use strict'; 10 | 11 | const zhihu = 'https://www.zhihu.com', 12 | zhuanlan = 'https://zhuanlan.zhihu.com'; 13 | 14 | module.exports = { 15 | zhihu: zhihu, 16 | zhuanlan: zhuanlan, 17 | topic_url: zhihu + '/topic/', 18 | post: { 19 | info: zhuanlan + '/api/posts/<%= postID%>', 20 | likers: zhuanlan + '/api/posts/<%=postID%>/likers', 21 | page: zhuanlan + '/api/columns/<%=name %>/posts', 22 | zhuanlan: zhuanlan + '/api/columns/', 23 | comments:zhuanlan+'/api/posts/<%=postID%>/comments' 24 | }, 25 | answer: { 26 | likers: zhihu + '/node/AnswerFullVoteInfoV2', 27 | voters: zhihu + '/answer/<%= answerId %>/voters_profile', 28 | }, 29 | user: { 30 | info: zhihu + '/node/MemberProfileCardV2', 31 | }, 32 | collection: { 33 | // full url: http://www.zhihu.com/collection/25547043?page=1 34 | url: zhihu + '/collection/', 35 | }, 36 | }; 37 | -------------------------------------------------------------------------------- /config/commonModules.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by 80920 on 2016/12/24. 3 | */ 4 | const url = require('url'); 5 | const cheerio = require('cheerio'); 6 | const Promise = require('bluebird'); 7 | const request = Promise.promisify(require('request')); 8 | const _ = require('lodash'); 9 | const QUERY = require('./query'); 10 | 11 | module.exports = { 12 | url, 13 | cheerio, 14 | Promise, 15 | request, 16 | _, 17 | QUERY 18 | }; 19 | -------------------------------------------------------------------------------- /config/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2015 Meizu bigertech, All rights reserved. 3 | * http://www.bigertech.com/ 4 | * @author liuxing 5 | * @date 15/10/14 6 | * @description 7 | * 8 | */ 9 | 10 | module.exports = { 11 | zhihu: 'https://www.zhihu.com', 12 | headers: { 13 | // 'accept': 'text/html,application/xhtml+xml', 14 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36', 15 | }, 16 | }; 17 | -------------------------------------------------------------------------------- /config/query.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by suncg on 2016/12/26. 3 | */ 4 | //全部接口的默认query集中管理,便于后期维护 5 | 6 | let query = { 7 | zhuanlan: { 8 | comments: { 9 | limit: 10, 10 | offset: 0 11 | }, 12 | likers: { 13 | limit: 10, 14 | offset: 0 15 | }, 16 | articleList: { 17 | limit: 10, 18 | offset: 0 19 | } 20 | }, 21 | 22 | 23 | }; 24 | 25 | module.exports = query; 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2014 Meizu bigertech, All rights reserved. 3 | * http://www.bigertech.com/ 4 | * @author liuxing 5 | * @date 14-11-11 6 | * @description 7 | * 8 | */ 9 | let api = require('./api'); 10 | 11 | module.exports = api; 12 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "zhihu", 3 | "version": "1.4.2", 4 | "author": "kissliux ", 5 | "contributors": [ 6 | "Ivan Jiang ", 7 | "王祎 <451114984@qq.com>", 8 | "sunchenguang <809200299@qq.com>", 9 | "Li Han " 10 | ], 11 | "description": "Zhihu API, 获取知乎平台信息数据的接口", 12 | "main": "index.js", 13 | "engines": { 14 | "node": " >0.10.x" 15 | }, 16 | "dependencies": { 17 | "async": "^1.5.2", 18 | "bluebird": "^3.4.0", 19 | "cheerio": "^0.17.0", 20 | "lodash": "^4.13.1", 21 | "request": "^2.72.0", 22 | "request-promise": "^3.0.0" 23 | }, 24 | "devDependencies": { 25 | "should": "^4.3.0", 26 | "mocha": "*" 27 | }, 28 | "scripts": { 29 | "test": "./node_modules/mocha/bin/mocha --timeout 15000" 30 | }, 31 | "repository": { 32 | "type": "git", 33 | "url": "git+https://github.com/shanelau/zhihu.git" 34 | }, 35 | "bugs": { 36 | "url": "https://github.com/shanelau/zhihu/issues" 37 | }, 38 | "homepage": "https://github.com/shanelau/zhihu#readme", 39 | "directories": { 40 | "test": "test" 41 | }, 42 | "license": "MIT" 43 | } 44 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # 停止维护 2 | 3 | > 知乎已经更新为 https, 本项目 \< 1.0.0 不能再使用了. 请升级 4 | 5 | [![](https://nodei.co/npm/zhihu.png?downloads=true)](https://nodei.co/npm/zhihu/) 6 | 7 | [![](https://travis-ci.org/iplus26/zhihu.svg)](https://travis-ci.org/iplus26/zhihu/builds) 8 | 9 | 根据这些接口获取到知乎的数据,包括以下接口: 10 | 11 | * [User API](#user-api) (用户信息) 12 | * [Post API](#post-api) (专栏文章) 13 | * [Answer API](#answer-api) (答案) 14 | * [Question API](#question-api) (问题) 15 | * [Topic API](#topic-api) (话题) 16 | 17 | **欢迎贡献代码,一起完善知乎的接口** 18 | 19 | ## Getting Started 20 | 21 | [DEMO](https://tonicdev.com/shanelau/575696208b908f1300212fff) 22 | 23 | ```javascript 24 | let zhihu = require('zhihu'); 25 | 26 | let username = 'shanejs'; 27 | 28 | zhihu.User.info(username).then(function(user){ 29 | console.log(user); 30 | }); 31 | 32 | /* You'll get 33 | { 34 | answer: 14, 35 | post: 0, 36 | follower: 529, 37 | profileUrl: 'https://www.zhihu.com/people/shanejs', 38 | name: '狂飙蜗牛', 39 | sex: 'male' 40 | } 41 | */ 42 | ``` 43 | 44 | ## Usage 45 | ### User API 46 | #### User.info(username) 47 | 根据用户名获取到用户的简要信息,`username` 为用户的唯一标识,参见个人主页的 url,或者设置中的个性域名(只能修改一次)。 48 | 49 | 比如请求这个用户 shanelau ([zhihu.com/people/shanelau](https://www.zhihu.com/people/shanelau)): 50 | 51 | ```javascript 52 | let zhihu = require('zhihu'); 53 | let name = 'shanelau'; 54 | 55 | zhihu.User.info(name).then(function(user){ 56 | console.log(user); 57 | }); 58 | ``` 59 | 60 | 请求成功将会返回: 61 | 62 | ```javascript 63 | /* 64 | * answer (number) 答题数量 65 | * post (number) 文章数量 66 | * follower (number) 跟随者数量 67 | * profileUrl (string) 个人主页 68 | * name (string) 名字 69 | * sex (string) 性别 70 | */ 71 | 72 | { 73 | answer: 5, 74 | post: 0, 75 | follower: 456, 76 | profileUrl: 'https://www.zhihu.com/people/shanelau1021', 77 | name: '狂飙蜗牛', 78 | sex: 'male' 79 | } 80 | ``` 81 | 82 | 83 | 84 | #### User.questions 85 | 用户的提问列表 86 | @TODO 87 | 88 | #### User.answers 89 | 用户的回答列表 90 | @TODO 91 | 92 | #### User.zhuanlansFocus 93 | 用户关注的专栏 94 | @TODO 95 | 96 | #### User.topic 97 | 用户关注的话题信息 98 | @TODO 99 | 100 | ### Post API 101 | #### Post.info(postUrl) 102 | 获取专栏文章的详细信息 103 | 104 | * `postUrl` 文章的url地址 105 | 106 | **Example** 107 | 108 | ```javascript 109 | zhihu.Post.info(postUrl).then(function(data){ 110 | // do something 111 | }); 112 | ``` 113 | 114 | **Result** 115 | 116 | * Object 117 | 118 | [example][9] 119 | 120 | #### Post.page(name[, config]) 121 | 获取专栏文章列表 122 | 123 | * `name` 专栏的英文名字, 例如:'bigertech' 124 | * `config` 可选,{object} ,默认值如下 125 | 126 | ```javascript 127 | { 128 | limit: 10 // 记录数 129 | offset: 0 // 偏移量 130 | } 131 | ``` 132 | 133 | **Example** 134 | 135 | [demo][10] 136 | 137 | 138 | #### Post.likersDetail(postUrl[, config]) 139 | 获取专栏文章的点赞者的详细信息 140 | 141 | * `postUrl` 专栏文章的url地址 142 | * `config` 可选,配置对象,默认 `{limit:10, offset:10}` 143 | 144 | **Result** 145 | 146 | 用户数组。结构与User.info接口返回的一致。 147 | 148 | * `{Array}` //User 149 | 150 | 151 | #### Post.zhuanlanInfo(name) 152 | 获取专栏的信息 153 | 154 | * `name` 专栏的名字,比如 `bigertech` 155 | 156 | **Result** 157 | 158 | ```javascript 159 | { 160 | followersCount: 22614, 161 | description: '', 162 | creator: 163 | { bio: '魅族营销中心招募设计师', 164 | hash: '29c3654588fd4246bb90cbd345242d65', 165 | description: '', 166 | profileUrl: 'http://www.zhihu.com/people/linan', 167 | avatar: 168 | { id: '24f3a654b', 169 | template: 'http://pic2.zhimg.com/{id}\_{size}.jpg' }, 170 | slug: 'linan', 171 | name: '李楠' }, 172 | topics: [], 173 | href: '/api/columns/bigertech', 174 | acceptSubmission: true, 175 | slug: 'bigertech', 176 | name: '笔戈科技', 177 | url: '/bigertech', 178 | avatar: 179 | { id: 'a4bf61d95', 180 | template: 'http://pic3.zhimg.com/{id}\_{size}.jpg' }, 181 | commentPermission: 'anyone', 182 | following: false, 183 | postsCount: 173, 184 | canPost: false, 185 | activateAuthorRequested: false } 186 | ``` 187 | #### Post.comments(postUrl[, config]) 188 | 获取专栏文章的评论信息 189 | 190 | * `postUrl` 专栏文章的url地址 191 | * `config` 可选,配置对象,默认 `{limit:10, offset:10}` 192 | ``` 193 | zhihu.Post.comments(`https://zhuanlan.zhihu.com/p/24241616?refer=chenyuz`).then(function(comments){ 194 | console.log(comments); 195 | }); 196 | ``` 197 | **Result** 198 | 评论数组 199 | ``` 200 | [ 201 | { liked: false, 202 | inReplyToCommentId: 0, 203 | featured: false, 204 | href: '/api/posts/24241616/comments/199226760', 205 | reviewing: false, 206 | disliked: false, 207 | dislikesCount: 0, 208 | id: 199226760, 209 | author: 210 | { profileUrl: 'https://www.zhihu.com/people/xu-xing-62-43', 211 | bio: '', 212 | hash: '6954117908c91a1c2897e466fc0545af', 213 | uid: 647461616195604500, 214 | isOrg: false, 215 | description: '', 216 | isOrgWhiteList: false, 217 | slug: 'xu-xing-62-43', 218 | avatar: [Object], 219 | name: 'SP fan' }, 220 | content: 'ins即视感', 221 | createdTime: '2016-12-07T21:56:25+08:00', 222 | collapsed: false, 223 | likesCount: 1 224 | }, 225 | ... 226 | ... 227 | ] 228 | ``` 229 | 230 | ### Answer API 231 | #### Answer.voters(answerId) 232 | 233 | 用 `answerId` 获取这个回答的点赞者。注意 `answerId` 与 `url_token` 的区别,`answerId` 可以在 DOM Tree 中找到,具体的对应关系仍在探索中。知乎的一篇回答的 URL 结构一般是: 234 | 235 | ``` 236 | zhihu.com/question/12345/answer/67890 237 | ^^^^^ ^^^^^ 238 | question token answer token 239 | 240 | zhihu.com/answer/12306/voters_profile 241 | ^^^^^ 242 | answer id 243 | ``` 244 | 245 | @TODO 实现知乎支持的更多参数,比如 offset 等 246 | 247 | ### Question API 248 | #### Question.answers(token[, offset]) 249 | #### Question.answers(options) 250 | 获取该问题的回答列表 251 | 252 | ```javascript 253 | let Question = require('zhihu').Question; 254 | 255 | Question.answers('19557271'); 256 | Question.answers('19557271', 10); // start from 10 257 | Question.answers({token: '19557271', offset: 10}); 258 | ``` 259 | 260 | 261 | ### Collection API 262 | 问题的收藏列表 263 | 264 | `url : http://www.zhihu.com/collection/25547043?page=1` 265 | 266 | 267 | #### Collection.getAllPageData 268 | 获取所有的页面数据,遍历所有的页面 269 | 270 | ```javascript 271 | Collection.getAllPageData(url); 272 | ``` 273 | 274 | #### Collection.getDataByPage 275 | 获取某一页的页面数据 276 | 277 | ```javascript 278 | let url = 'http://www.zhihu.com/collection/25547043?page=1'; 279 | Collection.getDataByPage(url); 280 | ``` 281 | 282 | #### Collection.getPagination 283 | 获取改收藏列表的分页信息 284 | 285 | ``` 286 | { 287 | pages: 总页数, 288 | current: 当前页面 289 | } 290 | ``` 291 | 292 | ### Topic API 293 | 294 | #### Topic.getTopicByID(topicID[, page]) 295 | 根据话题id获取该话题下的问题,话题id为唯一标识,参见话题的url 296 | - `topicID` 话题的ID 297 | 298 | **Example** 299 | 300 | 请求这个话题:[生活、艺术、文化与活动][11] 301 | `topicID` 为 `19778317` 302 | 303 | ```javascript 304 | let topicID = '19778317'; 305 | zhihu.Topic.getTopicByID(topicID).then(function(result){ 306 | console.log(result); 307 | }); 308 | ``` 309 | 310 | 311 | **Result** 312 | 313 | 参数说明 314 | 315 | ```javascript 316 | /* You'll get 317 | * name: (string) 话题名称 318 | * page: (number) 当前页数 319 | * totalPage: (number) 该话题下问题总页数 320 | * questions: (object) 当页问题 321 | * - title: (string) 问题名字 322 | * - url: (string) 问题链接 323 | * - postTime:(string) 问题最近更新时间 324 | */ 325 | 326 | { 327 | name: '生活、艺术、文化与活动', 328 | page: 1, 329 | totalPage: 47242, 330 | questions: 331 | { '0': 332 | { title: '为什么很多人能接受有过长期恋爱经历,却不能接受有过婚姻的人?', 333 | url: 'http://www.zhihu.com/question/27816723', 334 | postTime: '41 秒前' }, 335 | '19': 336 | { title: '360卫士在C盘为什么不可以删掉?', 337 | url: 'http://www.zhihu.com/question/27816632', 338 | postTime: '5 分钟前' } 339 | } 340 | } 341 | ``` 342 | 343 | ## 贡献者 344 | 1. shanelau 345 | 2. Ivan Jiang (iplus26) 346 | 3. [sunchenguang](https://github.com/sunchenguang) 347 | 348 | ## 更新记录 349 | #### 2016.5.23 350 | 1. 修复 https 问题 351 | 2. 修改部分bug 352 | 3. 加入 jscs 格式化代码风格 353 | 354 | #### 2015.10.15 355 | 1. 新增收藏列表的数据抓取 356 | 2. 查询某个收藏下的所有数据和分页数据 357 | 358 | [8]: http://www.zhihu.com/people/shanelau1021 359 | [9]: https://zhuanlan.zhihu.com/api/columns/bigertech/posts/19885136 360 | [10]: https://zhuanlan.zhihu.com/api/columns/bigertech/posts?limit=1&offset=10 361 | [11]: http://www.zhihu.com/topic/19778317/questions 362 | 363 | [image-1]: https://nodei.co/npm/zhihu.png?downloads=true "NPM" 364 | [image-2]: https://travis-ci.org/iplus26/zhihu.svg 365 | 366 | ### LICENSE 367 | 368 | [MIT](./LICENSE) 369 | -------------------------------------------------------------------------------- /test/Answer.test.js: -------------------------------------------------------------------------------- 1 | /* 2 | * @author Ivan Jiang 3 | * @date 23 May 2016 4 | */ 5 | 6 | 'use strict'; 7 | 8 | let Answer = require('../index').Answer; 9 | let should = require('should'); 10 | 11 | describe('Answer', function () { 12 | describe('Voters', function () { 13 | it('should return voters of the answer', function (done) { 14 | let answerId = '35369006'; 15 | Answer.voters(answerId).then(function (data) { 16 | data.length.should.above(0); 17 | done(); 18 | }); 19 | }); 20 | }); 21 | }); 22 | -------------------------------------------------------------------------------- /test/Post.test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2014 Meizu bigertech, All rights reserved. 3 | * http://www.bigertech.com/ 4 | * @author liuxing 5 | * @date 14-11-10 6 | * @description 7 | * 8 | */ 9 | let Post = require('../').Post; 10 | let should = require('should'); 11 | 12 | describe('Post', function () { 13 | describe('#info', function () { 14 | it('should return post info object', function (done) { 15 | let postUrl = 'https://zhuanlan.zhihu.com/p/19888522'; 16 | Post.info(postUrl).then(function (data) { 17 | Object.keys(data).length.should.above(0); 18 | done(); 19 | }).catch(function (err) { 20 | console.error(err); 21 | }); 22 | 23 | }); 24 | }); 25 | 26 | describe('#zhuanlan', function () { 27 | it('should return zhuanlan info object', function (done) { 28 | let name = 'bigertech'; 29 | Post.zhuanlanInfo(name).then(function (data) { 30 | Object.keys(data).length.should.above(0); 31 | done(); 32 | }).catch(function (err) { 33 | console.error(err); 34 | }); 35 | 36 | }); 37 | }); 38 | describe('#comments', function () { 39 | it('should return zhuanlan article comments array', function (done) { 40 | let postUrl = 'https://zhuanlan.zhihu.com/p/19888522'; 41 | Post.comments(postUrl).then(function (data) { 42 | data.length.should.above(0); 43 | done(); 44 | }).catch(function (err) { 45 | console.error(err); 46 | }); 47 | 48 | }); 49 | }); 50 | 51 | }); 52 | -------------------------------------------------------------------------------- /test/Question.test.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | let should = require('should'); 4 | let Question = require('../').Question; 5 | 6 | describe('Question', function () { 7 | it('should return question object by settings', function () { 8 | return Question.answers({ 9 | token: '19557271', 10 | offset: 0, 11 | // pagesize: 5 12 | }).then(function (data) { 13 | Object.keys(data).length.should.above(0); 14 | }); 15 | }); 16 | 17 | it('should return question object, from 0 - 9 by default', function () { 18 | return Question.answers('19557271') 19 | .then(function (data) { 20 | Object.keys(data).length.should.above(0); 21 | }) 22 | }) 23 | }); 24 | -------------------------------------------------------------------------------- /test/Topic.test.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | let Topic = require('../').Topic; 4 | let should = require('should'); 5 | 6 | describe('Topic', function () { 7 | describe('#info', function () { 8 | it('should return topic info object', function (done) { 9 | let topicID = '19550461'; 10 | 11 | // http://www.zhihu.com/topic/19550461/questions 12 | // if page? http://www.zhihu.com/topic/19550461/questions?page=2 13 | 14 | Topic.getTopicTopAnswersByID(topicID).then(function (data) { 15 | Object.keys(data).length.should.above(0); 16 | done(); 17 | }).catch(function (err) { 18 | console.error(err); 19 | }); 20 | }); 21 | }); 22 | }); 23 | 24 | -------------------------------------------------------------------------------- /test/User.test.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2014 Meizu bigertech, All rights reserved. 3 | * http://www.bigertech.com/ 4 | * @author liuxing 5 | * @date 14-11-10 6 | * @description 7 | * 8 | */ 9 | 'use strict'; 10 | 11 | let User = require('../').User; 12 | let should = require('should'); 13 | 14 | let shouldReturn = function () { 15 | return this.then(function (value) { 16 | // console.log(value); 17 | Object.keys(value).length.should.above(0); 18 | }); 19 | }; 20 | 21 | let shouldParseBigV = function () { 22 | return this.then(function (value) { 23 | // console.log(value); 24 | value.follower.should.above(1000); 25 | }); 26 | }; 27 | 28 | let promise1 = User.info('iplus26'); 29 | let promise2 = User.info('fenng'); 30 | let promise3 = User.info('magie'); 31 | 32 | describe('User', function () { 33 | describe('#info', function () { 34 | 35 | it('should return user info object', 36 | shouldReturn.bind(promise1)); 37 | 38 | /* 39 | fenng 40 | followed by 293,993 users, following 1891 users up to 24 May, 2016 41 | */ 42 | it('should return user info object (fenng)', 43 | shouldReturn.bind(promise2)); 44 | it('should recongize users followed by thousands (fenng)', 45 | shouldParseBigV.bind(promise2)); 46 | 47 | /* 48 | magie 49 | followed by 538,958 users, following 570 users up to 24 May, 2016 50 | */ 51 | it('should return user info object (magie)', 52 | shouldReturn.bind(promise3)); 53 | it('should recongize users followed by thousands (magie)', 54 | shouldParseBigV.bind(promise3)); 55 | 56 | }); 57 | }); 58 | -------------------------------------------------------------------------------- /test/Zhuanlan.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shanelau/zhihu/09ecde3d79dd9cea821b3c79e53c808646414631/test/Zhuanlan.test.js --------------------------------------------------------------------------------