├── .gitignore ├── Cakefile ├── LICENSE ├── README.md ├── build └── weibo.js ├── cli.js ├── package.json └── src └── weibo.coffee /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | 8 | # Runtime data 9 | pids 10 | *.pid 11 | *.seed 12 | *.pid.lock 13 | 14 | # Directory for instrumented libs generated by jscoverage/JSCover 15 | lib-cov 16 | 17 | # Coverage directory used by tools like istanbul 18 | coverage 19 | 20 | # nyc test coverage 21 | .nyc_output 22 | 23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 24 | .grunt 25 | 26 | # Bower dependency directory (https://bower.io/) 27 | bower_components 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (http://nodejs.org/api/addons.html) 33 | build/Release 34 | 35 | # Dependency directories 36 | node_modules/ 37 | jspm_packages/ 38 | 39 | # Typescript v1 declaration files 40 | typings/ 41 | 42 | # Optional npm cache directory 43 | .npm 44 | 45 | # Optional eslint cache 46 | .eslintcache 47 | 48 | # Optional REPL history 49 | .node_repl_history 50 | 51 | # Output of 'npm pack' 52 | *.tgz 53 | 54 | # Yarn Integrity file 55 | .yarn-integrity 56 | 57 | # dotenv environment variables file 58 | .env 59 | 60 | -------------------------------------------------------------------------------- /Cakefile: -------------------------------------------------------------------------------- 1 | {print} = require 'util' 2 | {spawn} = require 'child_process' 3 | 4 | build = () -> 5 | os = require 'os' 6 | if os.platform() == 'win32' 7 | coffeeCmd = 'coffee.cmd' 8 | else 9 | coffeeCmd = 'coffee' 10 | coffee = spawn coffeeCmd, ['-c', '-o', 'build', 'src'] 11 | coffee.stderr.on 'data', (data) -> 12 | process.stderr.write data.toString() 13 | coffee.stdout.on 'data', (data) -> 14 | print data.toString() 15 | coffee.on 'exit', (code) -> 16 | if code != 0 17 | process.exit code 18 | 19 | task 'build', 'Build ./ from src/', -> 20 | build() 21 | 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 joyqi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # weibo-request 2 | 3 | 这个库可以解析任意合法的微博状态URL,并将它转化为标准的数据格式输出。 4 | 5 | ## 安装 6 | 7 | ``` 8 | npm install weibo-request 9 | ``` 10 | 11 | ## 使用 12 | 13 | ```javascript 14 | var weibo = require('weibo-request'); 15 | 16 | weibo('https://weibo.com/2036070420/FyGnYmrxE', function (err, data) { 17 | if (err) { 18 | return console.log(err); 19 | } 20 | 21 | console.log(data); 22 | }); 23 | ``` 24 | 25 | ## 数据范例 26 | 27 | ```json 28 | { 29 | "id": "4182660865349090", 30 | "title": "SSH 登录流程分析", 31 | "text": "【SSH 登录流程分析】网页链接,作者:JayChen(网页链接

写一篇短文,介绍 ssh 密钥登录远程服务器流程和注意事项。
- 登录流程
- 生成密钥对
- 首次 ssh 登录
- 中间人攻击
- known_hosts 文件
- config 配置 ​", 32 | "plainText": "【SSH 登录流程分析】网页链接,作者:JayChen(网页链接

写一篇短文,介绍 ssh 密钥登录远程服务器流程和注意事项。
- 登录流程
- 生成密钥对
- 首次 ssh 登录
- 中间人攻击
- known_hosts 文件
- config 配置 ​", 33 | "date": "2017-12-08T04:32:03.000Z", 34 | "url": "https://weibo.com/2036070420/FyGnYmrxE", 35 | "thumbnail": "http://wx2.sinaimg.cn/thumbnail/795bf814gy1fm96qh5j2sj20m80duglv.jpg", 36 | "pics": [ 37 | [ 38 | "https://wx2.sinaimg.cn/orj360/795bf814gy1fm96qh5j2sj20m80duglv.jpg", 39 | "https://wx2.sinaimg.cn/large/795bf814gy1fm96qh5j2sj20m80duglv.jpg" 40 | ] 41 | ], 42 | "reposts": 6, 43 | "comments": 1, 44 | "likes": 4, 45 | "reads": 2040, 46 | "user": { 47 | "name": "SegmentFault", 48 | "url": "https://weibo.com/u/2036070420", 49 | "avatar": "https://ww2.sinaimg.cn/orj480/795bf814jw1e8qgp5bmzyj2050050aa8.jpg" 50 | } 51 | } 52 | ``` 53 | 54 | ## 使用范例 55 | 56 | 源码包下的 `cli.js` 可以用来测试数据格式 57 | 58 | ![screen](http://wx2.sinaimg.cn/large/6828cfabgy1fm99mq0mixj20r20h643c.jpg) 59 | -------------------------------------------------------------------------------- /build/weibo.js: -------------------------------------------------------------------------------- 1 | // Generated by CoffeeScript 1.12.2 2 | (function() { 3 | var Cheerio, Request, URL, VM, WeiboID, 4 | slice = [].slice; 5 | 6 | WeiboID = require('weibo-ids'); 7 | 8 | Cheerio = require('cheerio'); 9 | 10 | VM = require('vm2').VM; 11 | 12 | Request = require('request'); 13 | 14 | URL = require('url'); 15 | 16 | module.exports = function(url, cb, cookie) { 17 | var headers, id, matches, promise, reject, resolve, scheme; 18 | if (cb == null) { 19 | cb = null; 20 | } 21 | if (cookie == null) { 22 | cookie = null; 23 | } 24 | promise = null; 25 | if (cb == null) { 26 | resolve = null; 27 | reject = null; 28 | promise = new Promise(function(res, rej) { 29 | resolve = res; 30 | return reject = rej; 31 | }); 32 | cb = function(err, data) { 33 | if (data == null) { 34 | data = null; 35 | } 36 | if (err != null) { 37 | reject(err); 38 | } else { 39 | resolve(data); 40 | } 41 | return promise; 42 | }; 43 | } 44 | scheme = URL.parse(url); 45 | if (!scheme) { 46 | return cb(new Error('Url is not correct.')); 47 | } 48 | id = null; 49 | if (scheme.host === 'weibo.com') { 50 | matches = scheme.pathname.match(/^\/[0-9]+\/([0-9a-z]+)$/i); 51 | if (!matches) { 52 | return cb(new Error(scheme.path + " is not a valid path.")); 53 | } 54 | id = matches[1]; 55 | } else if (scheme.host === 'm.weibo.cn') { 56 | matches = scheme.pathname.match(/^\/(detail|status)\/([0-9a-z]+)$/i); 57 | if (!matches) { 58 | return cb(new Error(scheme.path + " is not a valid path.")); 59 | } 60 | id = matches[2]; 61 | } 62 | if (id == null) { 63 | return cb(new Error('Url is not correct.')); 64 | } 65 | headers = { 66 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36' 67 | }; 68 | if (cookie != null) { 69 | headers.Cookie = cookie; 70 | } 71 | Request({ 72 | uri: 'https://m.weibo.cn/status/' + id, 73 | timeout: 5000, 74 | headers: headers 75 | }, function(err, response, body) { 76 | var $, data, e, i, len, pic, pics, ref, script, status, text, vm; 77 | if (err != null) { 78 | return cb(err); 79 | } 80 | $ = Cheerio.load(body); 81 | script = ($('script')).eq(1).html() + ';$render_data'; 82 | vm = new VM({ 83 | timeout: 1000, 84 | sandbox: { 85 | location: { 86 | href: '' 87 | } 88 | } 89 | }); 90 | try { 91 | data = vm.run(script); 92 | if (typeof data.status === 'undefined') { 93 | return cb(new Error('Status has no data.')); 94 | } 95 | status = data.status; 96 | pics = []; 97 | text = status.text.replace(/<(\/?)([_a-z0-9-]+)(\s+[^>]+)?>/ig, function() { 98 | var all, matches; 99 | matches = 1 <= arguments.length ? slice.call(arguments, 0) : []; 100 | if (matches[2] !== 'a') { 101 | return ''; 102 | } 103 | if (matches[1] === '/') { 104 | return matches[0]; 105 | } 106 | url = '#'; 107 | if (matches[3] != null) { 108 | if (!!(all = matches[3].match(/\s+href="([^"]+)"/i))) { 109 | url = all[1]; 110 | } 111 | if (!!(all = matches[3].match(/\s+data\-url="([^"]+)"/i))) { 112 | url = all[1]; 113 | } 114 | } 115 | return ""; 116 | }); 117 | if (status.pics != null) { 118 | ref = status.pics; 119 | for (i = 0, len = ref.length; i < len; i++) { 120 | pic = ref[i]; 121 | pics.push([pic.url, pic.large.url]); 122 | } 123 | } 124 | return cb(null, { 125 | id: status.id, 126 | bid: status.bid, 127 | title: status.status_title, 128 | text: status.text, 129 | plainText: text, 130 | date: new Date(status.created_at), 131 | url: 'https://weibo.com/' + status.user.id + '/' + status.bid, 132 | thumbnail: status.thumbnail_pic, 133 | pics: pics, 134 | reposts: status.reposts_count, 135 | comments: status.comments_count, 136 | likes: status.attitudes_count, 137 | reads: status.reads, 138 | user: { 139 | name: status.user.screen_name, 140 | url: 'https://weibo.com/u/' + status.user.id, 141 | avatar: status.user.avatar_hd 142 | } 143 | }); 144 | } catch (error) { 145 | e = error; 146 | return cb(e); 147 | } 148 | }); 149 | return promise; 150 | }; 151 | 152 | }).call(this); 153 | -------------------------------------------------------------------------------- /cli.js: -------------------------------------------------------------------------------- 1 | weibo = require('./build/weibo'); 2 | args = process.argv.slice(2); 3 | 4 | if (args.length == 0) { 5 | console.log('Please provide a valid Weibo URL.'); 6 | } else { 7 | weibo(args[0], function (err, data) { 8 | if (err) { 9 | return console.log(err); 10 | } 11 | 12 | console.log(JSON.stringify(data, null, 4)); 13 | }); 14 | } 15 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "weibo-request", 3 | "version": "1.1.5", 4 | "description": "Read public data via m.weibo.cn", 5 | "main": "build/weibo.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/joyqi/weibo-request.git" 12 | }, 13 | "keywords": [ 14 | "weibo", 15 | "reqeust", 16 | "http", 17 | "api" 18 | ], 19 | "author": "joyqi", 20 | "license": "MIT", 21 | "bugs": { 22 | "url": "https://github.com/joyqi/weibo-request/issues" 23 | }, 24 | "homepage": "https://github.com/joyqi/weibo-request#readme", 25 | "dependencies": { 26 | "cheerio": "^1.0.0-rc.2", 27 | "request": "^2.83.0", 28 | "vm2": "^3.5.2", 29 | "weibo-ids": "^0.2.1" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/weibo.coffee: -------------------------------------------------------------------------------- 1 | WeiboID = require 'weibo-ids' 2 | Cheerio = require 'cheerio' 3 | {VM} = require 'vm2' 4 | Request = require 'request' 5 | URL = require 'url' 6 | 7 | module.exports = (url, cb = null, cookie = null) -> 8 | promise = null 9 | 10 | if not cb? 11 | resolve = null 12 | reject = null 13 | 14 | promise = new Promise (res, rej) -> 15 | resolve = res 16 | reject = rej 17 | 18 | cb = (err, data = null) -> 19 | if err? 20 | reject err 21 | else 22 | resolve data 23 | 24 | promise 25 | 26 | 27 | # 解析URL 28 | scheme = URL.parse url 29 | return cb new Error 'Url is not correct.' if not scheme 30 | 31 | id = null 32 | 33 | if scheme.host is 'weibo.com' 34 | # web版 35 | matches = scheme.pathname.match /^\/[0-9]+\/([0-9a-z]+)$/i 36 | return cb new Error "#{scheme.path} is not a valid path." if not matches 37 | 38 | id = matches[1] 39 | else if scheme.host is 'm.weibo.cn' 40 | # mobile版 41 | matches = scheme.pathname.match /^\/(detail|status)\/([0-9a-z]+)$/i 42 | return cb new Error "#{scheme.path} is not a valid path." if not matches 43 | 44 | id = matches[2] 45 | 46 | return cb new Error 'Url is not correct.' if not id? 47 | 48 | headers = 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36' 49 | headers.Cookie = cookie if cookie? 50 | 51 | # 请求移动版地址 52 | Request 53 | uri: 'https://m.weibo.cn/status/' + id 54 | timeout: 5000 55 | headers: headers 56 | , (err, response, body) -> 57 | return cb err if err? 58 | $ = Cheerio.load body 59 | 60 | script = ($ 'script').eq 1 61 | .html() + ';$render_data' 62 | 63 | # 执行VM 64 | vm = new VM 65 | timeout: 1000 66 | sandbox: 67 | location: 68 | href: '' 69 | 70 | try 71 | data = vm.run script 72 | return cb new Error 'Status has no data.' if typeof data.status is 'undefined' 73 | 74 | status = data.status 75 | pics = [] 76 | 77 | text = status.text.replace /<(\/?)([_a-z0-9-]+)(\s+[^>]+)?>/ig, (matches...) -> 78 | return '' if matches[2] isnt 'a' 79 | return matches[0] if matches[1] is '/' 80 | url = '#' 81 | 82 | if matches[3]? 83 | if !!(all = matches[3].match /\s+href="([^"]+)"/i) 84 | url = all[1] 85 | 86 | if !!(all = matches[3].match /\s+data\-url="([^"]+)"/i) 87 | url = all[1] 88 | 89 | return "" 90 | 91 | if status.pics? 92 | pics.push [pic.url, pic.large.url] for pic in status.pics 93 | 94 | cb null, 95 | id: status.id 96 | bid: status.bid 97 | title: status.status_title 98 | text: status.text 99 | plainText: text 100 | date: new Date status.created_at 101 | url: 'https://weibo.com/' + status.user.id + '/' + status.bid 102 | thumbnail: status.thumbnail_pic 103 | pics: pics 104 | reposts: status.reposts_count 105 | comments: status.comments_count 106 | likes: status.attitudes_count 107 | reads: status.reads 108 | user: 109 | name: status.user.screen_name 110 | url: 'https://weibo.com/u/' + status.user.id 111 | avatar: status.user.avatar_hd 112 | catch e 113 | cb e 114 | 115 | promise 116 | 117 | --------------------------------------------------------------------------------