├── .gitignore ├── LICENSE ├── README.md ├── app.js ├── config ├── global.js └── weixin.js ├── demo ├── home.png └── search.png ├── lib ├── cookiePool.js ├── counter.js ├── factory.js ├── redis.js ├── render.js ├── request.js ├── rss │ ├── weibo.js │ └── weixin.js ├── sogouEncrypt.js └── sogouEncryptSalt.js ├── package.json ├── task └── cookiePool.js └── views ├── home.html └── weixin.html /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | 5 | # Runtime data 6 | pids 7 | *.pid 8 | *.seed 9 | 10 | # Directory for instrumented libs generated by jscoverage/JSCover 11 | lib-cov 12 | 13 | # Coverage directory used by tools like istanbul 14 | coverage 15 | 16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 17 | .grunt 18 | 19 | # node-waf configuration 20 | .lock-wscript 21 | 22 | # Compiled binary addons (http://nodejs.org/api/addons.html) 23 | build/Release 24 | 25 | # Dependency directory 26 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git 27 | node_modules 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 wlwr 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rss 2 | 3 | 4 | **搜狗接口变更,目前失效中** 5 | 6 | 7 | ---- 8 | 9 | 提供微信公众号RSS订阅接口,基于nodejs koajs开发 10 | 11 | 演示地址:[http://rss.wlwr.net](http://rss.wlwr.net) 12 | 13 | 注意:因演示地址访问量过多,服务器IP被搜狗加入黑名单,故不定期关闭演示地址。建议取代码搭建在自己服务器上。 14 | 15 | ## 更新日志 16 | 17 | - 2015.04.26 搜狗接口变更 (已修复) 18 | 1. 去掉 `phantomjs` 依赖,不再需要定时生成cookie池 (好消息) 19 | 2. 以前搜狗的openid标识失效,改用微信号ID作为标识 (坏消息) 20 | 21 | - 2015.10.20 搜狗调整加密请求方式 (已修复) 22 | 23 | - 2015.08.11 增加微信账号查询功能 24 | 25 | - 2015.06.28 cookie池采集采用 `phantomjs`,及 加密盐值采集 26 | 27 | - 2015.05.22 搜狗微信接口做了加密处理,导致采集失败。(已修复) 28 | 29 | --- 30 | 31 | ## 搭建 32 | 33 | - 安装 `io.js` 或 `Node.js 0.11` 以上版本,才支持 ES6相关语法 34 | 35 | - 安装 `redis-server` 端,默认端口是 `6379` 36 | 37 | 38 | 进入项目根目录, `npm install`,然后 `node --harmony app.js` 即可启动 39 | 40 | ---- 41 | 42 | ## 截图: 43 | 44 | ![查询](demo/search.png) 45 | 46 | -------------------------------------------------------------------------------- /app.js: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * Module dependencies. 4 | */ 5 | 6 | var logger = require('koa-logger'); 7 | var route = require('koa-route'); 8 | var parse = require('co-body'); 9 | var koa = require('koa'); 10 | var render = require('./lib/render'); 11 | var factory = require('./lib/factory'); 12 | var counter = require('./lib/counter'); 13 | 14 | 15 | var app = koa(); 16 | // "database" 17 | 18 | 19 | // middleware 20 | 21 | app.use(logger()); 22 | 23 | // route middleware 24 | 25 | app.use(route.get('/', home)); 26 | app.use(route.get('/weixin/:id', weixin)); 27 | app.use(route.get('/search/:keyword', search)); 28 | 29 | 30 | // route definitions 31 | 32 | /** 33 | * home 34 | */ 35 | 36 | function *home() { 37 | var count = yield counter.get(); 38 | this.body = yield render('home', {count : count}); 39 | } 40 | 41 | 42 | /** 43 | * weixin-rss builder 44 | */ 45 | 46 | function *weixin(id) { 47 | try { 48 | var handler = factory.create('weixin', id); 49 | var data = yield handler.getData(); 50 | yield counter.incr(); 51 | this.type = 'text/xml; charset=UTF-8'; 52 | this.body = yield render('weixin', {rss : data}); 53 | } catch (err) { 54 | console.log(err); 55 | this.body = err.message; 56 | } 57 | } 58 | 59 | function *search(keyword) { 60 | try { 61 | var handler = factory.create('weixin'); 62 | var html = yield handler.searchNameProxy(keyword); 63 | var result = {error : false, data : html, message : 'success'}; 64 | this.body = JSON.stringify(result); 65 | } catch (err) { 66 | console.log(err); 67 | this.body = {error : true, data : null, message : err.message}; 68 | } 69 | } 70 | 71 | 72 | // listen 73 | app.listen(3000); 74 | console.log('listening on port 3000'); 75 | -------------------------------------------------------------------------------- /config/global.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | 3 | 4 | proxyEnable: false, 5 | 6 | redis: { 7 | host: '127.0.0.1', 8 | port: '6379', 9 | }, 10 | 11 | proxy: 'http://114.36.105.161:8088' 12 | 13 | } -------------------------------------------------------------------------------- /config/weixin.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | 3 | imageProxy : 'http://read.html5.qq.com/image?src=forum&q=5&r=0&imgflag=7&imageUrl=%s', 4 | 5 | cacheEnable: true, 6 | 7 | api: { 8 | getList: 'http://weixin.sogou.com/gzhjs?cb=sogou.weixin.gzhcb&openid=%s&ext=%s&page=1&t=%d', 9 | getCookie: 'http://weixin.sogou.com/weixin?query=123', 10 | searchName: 'http://weixin.sogou.com/weixin?query=%s' 11 | }, 12 | 13 | 14 | listCachePolicy: { 15 | lifeTime: 86400, 16 | cacheId: 'rss:list:%s' 17 | }, 18 | 19 | contentCachePolicy: { 20 | lifeTime: 864000, 21 | cacheId: 'rss:content:%s' 22 | }, 23 | 24 | searchCachePolicy: { 25 | lifeTime: 864000, 26 | cacheId: 'rss:search:%s' 27 | }, 28 | 29 | //options : request || phantom 30 | cookieCollectEngine : 'phantom', 31 | 32 | cookieNum : 10, 33 | 34 | cookieCachePolicy: { 35 | lifeTime: 43200, 36 | cacheId: 'rss:cookies' 37 | } 38 | } -------------------------------------------------------------------------------- /demo/home.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wlwr/rss/714e8a3521e2bcd806ea43adc76eacb965635712/demo/home.png -------------------------------------------------------------------------------- /demo/search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wlwr/rss/714e8a3521e2bcd806ea43adc76eacb965635712/demo/search.png -------------------------------------------------------------------------------- /lib/cookiePool.js: -------------------------------------------------------------------------------- 1 | var request = require('./request'); 2 | var redis = require('./redis'); 3 | var util = require('util'); 4 | var config = require('../config/weixin'); 5 | var phantom = require('phantom'); 6 | 7 | function CookiePool() { 8 | this.cacheId = config.cookieCachePolicy.cacheId; 9 | this.lifeTime = config.cookieCachePolicy.lifeTime; 10 | this.num = config.cookieNum; 11 | this.engine = config.cookieCollectEngine; 12 | } 13 | 14 | CookiePool.prototype.collectAll = function*() { 15 | var cookies = []; 16 | for (var i = 0, l = this.num; i < l; i++) { 17 | var cookie = yield this.collect(); 18 | if (cookie) 19 | cookies.push(cookie); 20 | }; 21 | return cookies; 22 | } 23 | 24 | CookiePool.prototype.save = function*(cookies) { 25 | yield redis.sadd(this.cacheId, cookies); 26 | yield redis.expire(this.cacheId, this.lifeTime); 27 | } 28 | 29 | CookiePool.prototype.collect = function*() { 30 | var actionName = this.engine + 'Collect'; 31 | var apiUrl = util.format(config.api.getCookie); 32 | var cookie = yield this[actionName](apiUrl); 33 | console.log(cookie); 34 | return cookie; 35 | } 36 | 37 | CookiePool.prototype.requestCollect = function*(url) { 38 | var result = yield request(url, { 39 | proxyEnable: false 40 | }); 41 | if (!result.error && result.statusCode == 200) { 42 | var cookieStr = result.headers['set-cookie'].join(';'); 43 | return this.parseCookie(cookieStr); 44 | } 45 | } 46 | 47 | CookiePool.prototype.phantomCollect = function(url) { 48 | var me = this; 49 | return function(callback) { 50 | phantom.create(function (ph) { 51 | ph.createPage(function (page) { 52 | page.set('onResourceRequested', function(requestData, request) { 53 | if ((/http:\/\/.+?\.css/gi).test(requestData['url']) || requestData['Content-Type'] == 'text/css') { 54 | // console.log('The url of the request is matching. Aborting: ' + requestData['url']); 55 | request['abort()']; 56 | } 57 | }); 58 | page.open(url, function (status) { 59 | page.evaluate(function () { return document.cookie; }, function (result) { 60 | callback(null, me.parseCookie(result + ';')); 61 | ph.exit(); 62 | }); 63 | }); 64 | }); 65 | }, {parameters: {'load-images': 'no'}}); 66 | } 67 | } 68 | 69 | CookiePool.prototype.parseCookie = function(cookieStr) { 70 | var SNUID = cookieStr.match(/(SNUID=\S+?);/); 71 | var SUID = cookieStr.match(/(SUID=\S+?);/)[1]; 72 | if (SNUID) 73 | SNUID = SNUID[1]; 74 | var SUV = 'SUV=' + ((new Date()).getTime())*1000+Math.round(Math.random()*1000); 75 | var cookie = [SNUID, SUID, SUV].join(';'); 76 | return cookie; 77 | } 78 | 79 | CookiePool.prototype.getRandom = function*() { 80 | return yield redis.srandmember(this.cacheId); 81 | } 82 | 83 | CookiePool.prototype.destroy = function*() { 84 | return yield redis.del(this.cacheId); 85 | } 86 | 87 | CookiePool.prototype.getAll = function*() { 88 | return yield redis.smembers(this.cacheId); 89 | } 90 | 91 | module.exports = new CookiePool(); 92 | -------------------------------------------------------------------------------- /lib/counter.js: -------------------------------------------------------------------------------- 1 | var redis = require('./redis'); 2 | 3 | function Counter() { 4 | this.id = 'rss:counter'; 5 | } 6 | 7 | Counter.prototype.incr = function* () { 8 | return yield redis.incr(this.id); 9 | } 10 | 11 | Counter.prototype.get = function* () { 12 | return yield redis.get(this.id); 13 | } 14 | 15 | module.exports = new Counter(); 16 | -------------------------------------------------------------------------------- /lib/factory.js: -------------------------------------------------------------------------------- 1 | function factory() { 2 | 3 | } 4 | 5 | factory.create = function(source, id) { 6 | var Handler = require('./rss/' + source + '.js'); 7 | return new Handler(id); 8 | } 9 | 10 | module.exports = factory; -------------------------------------------------------------------------------- /lib/redis.js: -------------------------------------------------------------------------------- 1 | var co = require('co'); 2 | var config = require('../config/global'); 3 | var redisClient = require('redis').createClient(config.redis.port, config.redis.host); 4 | redisClient.on("error", function (err) { 5 | console.log("Error " + err); 6 | }); 7 | var wrapper = require('co-redis'); 8 | var redisCo = wrapper(redisClient); 9 | module.exports = redisCo; -------------------------------------------------------------------------------- /lib/render.js: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * Module dependencies. 4 | */ 5 | 6 | var views = require('co-views'); 7 | 8 | // setup views mapping .html 9 | // to the swig template engine 10 | 11 | module.exports = views(__dirname + '/../views', { 12 | map: { html: 'swig' } 13 | }); -------------------------------------------------------------------------------- /lib/request.js: -------------------------------------------------------------------------------- 1 | var config = require('../config/global'); 2 | var _request = require('request'); 3 | 4 | 5 | function request(uri, options) { 6 | return function(callback) { 7 | if (options && options.proxyEnable) { 8 | _request = _request.defaults({proxy: config.proxy}); 9 | } 10 | _request(uri, options, function(error, response, body) { 11 | callback(error, response); 12 | }) 13 | } 14 | } 15 | 16 | module.exports = request; -------------------------------------------------------------------------------- /lib/rss/weibo.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wlwr/rss/714e8a3521e2bcd806ea43adc76eacb965635712/lib/rss/weibo.js -------------------------------------------------------------------------------- /lib/rss/weixin.js: -------------------------------------------------------------------------------- 1 | var request = require('../request'); 2 | var redis = require('../redis'); 3 | var xmlParser = require('xml2json'); 4 | var util = require('util'); 5 | var config = require('../../config/weixin'); 6 | 7 | function Weixin(id) { 8 | this.id = id; 9 | } 10 | 11 | Weixin.prototype.getData = function*() { 12 | var rss = {}; 13 | var now = new Date(); 14 | 15 | //rss-cache 16 | var rssCacheId = util.format(config.listCachePolicy.cacheId, this.id); 17 | var rssCache = yield redis.get(rssCacheId); 18 | if (config.cacheEnable && rssCache) { 19 | return JSON.parse(rssCache); 20 | } 21 | 22 | //query-list-url 23 | var users = yield this.searchNameProxy(this.id); 24 | if (users.length == 0) 25 | throw Error('不存在此微信号'); 26 | var rss = users[0]; 27 | var apiUrl = rss.url; 28 | 29 | //list-collect 30 | rss.items = yield this.getListData(apiUrl); 31 | for (var i = 0, l = rss.items.length; i < l; i++) { 32 | var item = rss.items[i]; 33 | now.setTime(item.lastModified*1000); 34 | item.formatDate = now.toUTCString(); 35 | var content = yield this.getContentDataProxy(item); 36 | if (content) { 37 | item.fullContent = content.content; 38 | } 39 | }; 40 | if (rss.items.length == 0) 41 | throw Error('无相关文章'); 42 | 43 | rss = this.getRssInfo(rss); 44 | yield redis.setex(rssCacheId, config.listCachePolicy.lifeTime, JSON.stringify(rss)); 45 | return rss; 46 | }; 47 | 48 | 49 | Weixin.prototype.getRssInfo = function(rss) { 50 | var now = new Date(); 51 | var recentItem = rss.items[0]; 52 | rss.id = this.id; 53 | rss.title = recentItem.sourcename; 54 | rss.lastModified = recentItem.lastModified; 55 | now.setTime(rss.lastModified*1000); 56 | rss.formatDate = now.toUTCString(); 57 | rss.author = recentItem.sourcename; 58 | return rss; 59 | }; 60 | 61 | 62 | Weixin.prototype.getListData = function*(apiUrl) { 63 | var items = []; 64 | var response = 65 | yield request(apiUrl, { 66 | headers: yield this.getRequestHeaders() 67 | }); 68 | if (response.error) 69 | throw new Error(response.error); 70 | if (response.statusCode != 200) 71 | throw new Error('response statusCode: ' + response.statusCode); 72 | 73 | var body = response.body; 74 | var match = body.match(/var\smsgList\s=\s'([\s\S]*?)';/); 75 | if (!match || !match[1]) { 76 | throw new Error('无法获取列表数据'); 77 | } 78 | var dataJson = JSON.parse(match[1].html()); 79 | if (dataJson.list.length === 0) 80 | throw new Error(this.id + ': 无相关文章'); 81 | dataJson.list.forEach(function(item) { 82 | var _item = { 83 | 'docid' : item.comm_msg_info.id, 84 | 'lastModified' : item.comm_msg_info.datetime, 85 | 'title' : item.app_msg_ext_info.title, 86 | 'content' : item.app_msg_ext_info.digest, 87 | 'url' : 'http://mp.weixin.qq.com' + item.app_msg_ext_info.content_url.substr(1).html(), 88 | 'sourcename' : item.app_msg_ext_info.author 89 | }; 90 | items.push(_item); 91 | //subList 92 | var subList = item.app_msg_ext_info.multi_app_msg_item_list; 93 | if (subList.length > 0) { 94 | subList.forEach(function(subItem) { 95 | var _item = { 96 | 'docid' : subItem.fileid, 97 | 'lastModified' : item.comm_msg_info.datetime, 98 | 'title' : subItem.title, 99 | 'content' : subItem.digest, 100 | 'url' : 'http://mp.weixin.qq.com' + subItem.content_url.substr(1).html(), 101 | 'sourcename' : subItem.author 102 | }; 103 | items.push(_item); 104 | }); 105 | } 106 | }); 107 | return items; 108 | } 109 | 110 | 111 | Weixin.prototype.getContentDataProxy = function*(item) { 112 | //content-cache 113 | var contentCacheId = util.format(config.contentCachePolicy.cacheId, item.docid); 114 | var content = yield redis.get(contentCacheId); 115 | if (!config.cacheEnable || !content || !content.content) { 116 | //content-collect 117 | content = yield this.getContentData(item); 118 | if (content) { 119 | yield redis.setex(contentCacheId, config.contentCachePolicy.lifeTime, JSON.stringify(content)); 120 | } 121 | } else { 122 | content = JSON.parse(content); 123 | } 124 | return content; 125 | }; 126 | 127 | 128 | Weixin.prototype.getContentData = function*(item) { 129 | var url = item.url; 130 | var result = 131 | yield request(url, { 132 | headers: yield this.getRequestHeaders() 133 | }); 134 | if (!result.error && result.statusCode == 200) { 135 | var match = result.body.match(/id=\"js_content\">([\s\S]*?)<\/div>/); 136 | var cover = this.getCover(result.body); 137 | if (match && match[1]) { 138 | content = match[1].replace(/data-src/g, 'src'); 139 | var coverHtml = cover ? '

' : ''; 140 | content = coverHtml + content; 141 | return {'content' : content, 'source' : result.request.href}; 142 | } 143 | } 144 | } 145 | 146 | Weixin.prototype.getCover = function(html) { 147 | var match = html.match(/var\scover\s=\s\"([\s\S]*?)\";/); 148 | if (match && match[1]) { 149 | return match[1]; 150 | } 151 | }; 152 | 153 | Weixin.prototype.getRequestHeaders = function*() { 154 | return { 155 | 'Accept': '*/*', 156 | 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6', 157 | 'Cache-Control': 'max-age=0', 158 | 'Connection': 'keep-alive', 159 | 'Cookie': 'sd_userid=74761436088560498; sd_cookie_crttime=1436088560498; 3g_guest_id=-9124332030488707072; ts_refer=www.baidu.com/link; ts_uid=1816741185; eas_sid=f1w4I5H7w224X4z3w9J27844g9; _ga=GA1.2.1140860462.1438155424; pgv_pvid=1467256065; o_cookie=527114214; noticeLoginFlag=1; ptui_loginuin=527114214; ptcz=2a4b878f712cb71f3eeb0fda5a3d488fd4688d292820e1485fa04ee62c2c9500; pt2gguin=o0527114214; uin=o0527114214; skey=@k3f0ouWCY; qm_username=527114214; qm_sid=705e43ed71261bdea26e56d389d4d334,cpfgcNUS3Cs4.; ptisp=ctc', 160 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36', 161 | 'Referer' : 'http://weixin.sogou.com/weixin?type=1&query=%E4%B8%80%E5%85%9C%E7%B3%96&ie=utf8&_sug_=y&_sug_type_=&w=01019900&sut=2112&sst0=1468836385188&lkt=1%2C1468836385080%2C1468836385080' 162 | } 163 | } 164 | 165 | Weixin.prototype.searchNameProxy = function*(keyword) { 166 | var searchCacheId = util.format(config.searchCachePolicy.cacheId, keyword); 167 | var result = yield redis.get(searchCacheId); 168 | if (!config.cacheEnable || !result) { 169 | var result = yield this.searchName(keyword); 170 | yield redis.setex(searchCacheId, config.searchCachePolicy.lifeTime, JSON.stringify(result)); 171 | } else { 172 | result = JSON.parse(result); 173 | } 174 | return result; 175 | }; 176 | 177 | Weixin.prototype.searchName = function*(keyword) { 178 | var apiUrl = util.format(config.api.searchName, encodeURIComponent(keyword)); 179 | var response = yield request(apiUrl, { 180 | headers: yield this.getRequestHeaders() 181 | }); 182 | if (response.error) 183 | throw new Error(response.error); 184 | if (response.statusCode != 200) 185 | throw new Error('response statusCode: ' + response.statusCode); 186 | var regexp = //g; 187 | var matches = response.body.match(regexp); 188 | if (!matches) 189 | throw new Error('查询不到相关账号'); 190 | var result = []; 191 | for (var i = 0, l = matches.length; i < l; i++) { 192 | var item = matches[i]; 193 | var match = item.match(/href="([^"]*)"[\s\S]*(.*)<\/h3>[\s\S]*微信号: