├── .gitignore
├── Cakefile
├── LICENSE
├── README.md
├── build
└── weibo.js
├── cli.js
├── package.json
└── src
└── weibo.coffee
/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 |
8 | # Runtime data
9 | pids
10 | *.pid
11 | *.seed
12 | *.pid.lock
13 |
14 | # Directory for instrumented libs generated by jscoverage/JSCover
15 | lib-cov
16 |
17 | # Coverage directory used by tools like istanbul
18 | coverage
19 |
20 | # nyc test coverage
21 | .nyc_output
22 |
23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
24 | .grunt
25 |
26 | # Bower dependency directory (https://bower.io/)
27 | bower_components
28 |
29 | # node-waf configuration
30 | .lock-wscript
31 |
32 | # Compiled binary addons (http://nodejs.org/api/addons.html)
33 | build/Release
34 |
35 | # Dependency directories
36 | node_modules/
37 | jspm_packages/
38 |
39 | # Typescript v1 declaration files
40 | typings/
41 |
42 | # Optional npm cache directory
43 | .npm
44 |
45 | # Optional eslint cache
46 | .eslintcache
47 |
48 | # Optional REPL history
49 | .node_repl_history
50 |
51 | # Output of 'npm pack'
52 | *.tgz
53 |
54 | # Yarn Integrity file
55 | .yarn-integrity
56 |
57 | # dotenv environment variables file
58 | .env
59 |
60 |
--------------------------------------------------------------------------------
/Cakefile:
--------------------------------------------------------------------------------
1 | {print} = require 'util'
2 | {spawn} = require 'child_process'
3 |
4 | build = () ->
5 | os = require 'os'
6 | if os.platform() == 'win32'
7 | coffeeCmd = 'coffee.cmd'
8 | else
9 | coffeeCmd = 'coffee'
10 | coffee = spawn coffeeCmd, ['-c', '-o', 'build', 'src']
11 | coffee.stderr.on 'data', (data) ->
12 | process.stderr.write data.toString()
13 | coffee.stdout.on 'data', (data) ->
14 | print data.toString()
15 | coffee.on 'exit', (code) ->
16 | if code != 0
17 | process.exit code
18 |
19 | task 'build', 'Build ./ from src/', ->
20 | build()
21 |
22 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 joyqi
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # weibo-request
2 |
3 | 这个库可以解析任意合法的微博状态URL,并将它转化为标准的数据格式输出。
4 |
5 | ## 安装
6 |
7 | ```
8 | npm install weibo-request
9 | ```
10 |
11 | ## 使用
12 |
13 | ```javascript
14 | var weibo = require('weibo-request');
15 |
16 | weibo('https://weibo.com/2036070420/FyGnYmrxE', function (err, data) {
17 | if (err) {
18 | return console.log(err);
19 | }
20 |
21 | console.log(data);
22 | });
23 | ```
24 |
25 | ## 数据范例
26 |
27 | ```json
28 | {
29 | "id": "4182660865349090",
30 | "title": "SSH 登录流程分析",
31 | "text": "【SSH 登录流程分析】
网页链接,作者:JayChen(
网页链接)
写一篇短文,介绍 ssh 密钥登录远程服务器流程和注意事项。
- 登录流程
- 生成密钥对
- 首次 ssh 登录
- 中间人攻击
- known_hosts 文件
- config 配置 ",
32 | "plainText": "【SSH 登录流程分析】网页链接,作者:JayChen(网页链接)
写一篇短文,介绍 ssh 密钥登录远程服务器流程和注意事项。
- 登录流程
- 生成密钥对
- 首次 ssh 登录
- 中间人攻击
- known_hosts 文件
- config 配置 ",
33 | "date": "2017-12-08T04:32:03.000Z",
34 | "url": "https://weibo.com/2036070420/FyGnYmrxE",
35 | "thumbnail": "http://wx2.sinaimg.cn/thumbnail/795bf814gy1fm96qh5j2sj20m80duglv.jpg",
36 | "pics": [
37 | [
38 | "https://wx2.sinaimg.cn/orj360/795bf814gy1fm96qh5j2sj20m80duglv.jpg",
39 | "https://wx2.sinaimg.cn/large/795bf814gy1fm96qh5j2sj20m80duglv.jpg"
40 | ]
41 | ],
42 | "reposts": 6,
43 | "comments": 1,
44 | "likes": 4,
45 | "reads": 2040,
46 | "user": {
47 | "name": "SegmentFault",
48 | "url": "https://weibo.com/u/2036070420",
49 | "avatar": "https://ww2.sinaimg.cn/orj480/795bf814jw1e8qgp5bmzyj2050050aa8.jpg"
50 | }
51 | }
52 | ```
53 |
54 | ## 使用范例
55 |
56 | 源码包下的 `cli.js` 可以用来测试数据格式
57 |
58 | 
59 |
--------------------------------------------------------------------------------
/build/weibo.js:
--------------------------------------------------------------------------------
1 | // Generated by CoffeeScript 1.12.2
2 | (function() {
3 | var Cheerio, Request, URL, VM, WeiboID,
4 | slice = [].slice;
5 |
6 | WeiboID = require('weibo-ids');
7 |
8 | Cheerio = require('cheerio');
9 |
10 | VM = require('vm2').VM;
11 |
12 | Request = require('request');
13 |
14 | URL = require('url');
15 |
16 | module.exports = function(url, cb, cookie) {
17 | var headers, id, matches, promise, reject, resolve, scheme;
18 | if (cb == null) {
19 | cb = null;
20 | }
21 | if (cookie == null) {
22 | cookie = null;
23 | }
24 | promise = null;
25 | if (cb == null) {
26 | resolve = null;
27 | reject = null;
28 | promise = new Promise(function(res, rej) {
29 | resolve = res;
30 | return reject = rej;
31 | });
32 | cb = function(err, data) {
33 | if (data == null) {
34 | data = null;
35 | }
36 | if (err != null) {
37 | reject(err);
38 | } else {
39 | resolve(data);
40 | }
41 | return promise;
42 | };
43 | }
44 | scheme = URL.parse(url);
45 | if (!scheme) {
46 | return cb(new Error('Url is not correct.'));
47 | }
48 | id = null;
49 | if (scheme.host === 'weibo.com') {
50 | matches = scheme.pathname.match(/^\/[0-9]+\/([0-9a-z]+)$/i);
51 | if (!matches) {
52 | return cb(new Error(scheme.path + " is not a valid path."));
53 | }
54 | id = matches[1];
55 | } else if (scheme.host === 'm.weibo.cn') {
56 | matches = scheme.pathname.match(/^\/(detail|status)\/([0-9a-z]+)$/i);
57 | if (!matches) {
58 | return cb(new Error(scheme.path + " is not a valid path."));
59 | }
60 | id = matches[2];
61 | }
62 | if (id == null) {
63 | return cb(new Error('Url is not correct.'));
64 | }
65 | headers = {
66 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
67 | };
68 | if (cookie != null) {
69 | headers.Cookie = cookie;
70 | }
71 | Request({
72 | uri: 'https://m.weibo.cn/status/' + id,
73 | timeout: 5000,
74 | headers: headers
75 | }, function(err, response, body) {
76 | var $, data, e, i, len, pic, pics, ref, script, status, text, vm;
77 | if (err != null) {
78 | return cb(err);
79 | }
80 | $ = Cheerio.load(body);
81 | script = ($('script')).eq(1).html() + ';$render_data';
82 | vm = new VM({
83 | timeout: 1000,
84 | sandbox: {
85 | location: {
86 | href: ''
87 | }
88 | }
89 | });
90 | try {
91 | data = vm.run(script);
92 | if (typeof data.status === 'undefined') {
93 | return cb(new Error('Status has no data.'));
94 | }
95 | status = data.status;
96 | pics = [];
97 | text = status.text.replace(/<(\/?)([_a-z0-9-]+)(\s+[^>]+)?>/ig, function() {
98 | var all, matches;
99 | matches = 1 <= arguments.length ? slice.call(arguments, 0) : [];
100 | if (matches[2] !== 'a') {
101 | return '';
102 | }
103 | if (matches[1] === '/') {
104 | return matches[0];
105 | }
106 | url = '#';
107 | if (matches[3] != null) {
108 | if (!!(all = matches[3].match(/\s+href="([^"]+)"/i))) {
109 | url = all[1];
110 | }
111 | if (!!(all = matches[3].match(/\s+data\-url="([^"]+)"/i))) {
112 | url = all[1];
113 | }
114 | }
115 | return "";
116 | });
117 | if (status.pics != null) {
118 | ref = status.pics;
119 | for (i = 0, len = ref.length; i < len; i++) {
120 | pic = ref[i];
121 | pics.push([pic.url, pic.large.url]);
122 | }
123 | }
124 | return cb(null, {
125 | id: status.id,
126 | bid: status.bid,
127 | title: status.status_title,
128 | text: status.text,
129 | plainText: text,
130 | date: new Date(status.created_at),
131 | url: 'https://weibo.com/' + status.user.id + '/' + status.bid,
132 | thumbnail: status.thumbnail_pic,
133 | pics: pics,
134 | reposts: status.reposts_count,
135 | comments: status.comments_count,
136 | likes: status.attitudes_count,
137 | reads: status.reads,
138 | user: {
139 | name: status.user.screen_name,
140 | url: 'https://weibo.com/u/' + status.user.id,
141 | avatar: status.user.avatar_hd
142 | }
143 | });
144 | } catch (error) {
145 | e = error;
146 | return cb(e);
147 | }
148 | });
149 | return promise;
150 | };
151 |
152 | }).call(this);
153 |
--------------------------------------------------------------------------------
/cli.js:
--------------------------------------------------------------------------------
1 | weibo = require('./build/weibo');
2 | args = process.argv.slice(2);
3 |
4 | if (args.length == 0) {
5 | console.log('Please provide a valid Weibo URL.');
6 | } else {
7 | weibo(args[0], function (err, data) {
8 | if (err) {
9 | return console.log(err);
10 | }
11 |
12 | console.log(JSON.stringify(data, null, 4));
13 | });
14 | }
15 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "weibo-request",
3 | "version": "1.1.5",
4 | "description": "Read public data via m.weibo.cn",
5 | "main": "build/weibo.js",
6 | "scripts": {
7 | "test": "echo \"Error: no test specified\" && exit 1"
8 | },
9 | "repository": {
10 | "type": "git",
11 | "url": "git+https://github.com/joyqi/weibo-request.git"
12 | },
13 | "keywords": [
14 | "weibo",
15 | "reqeust",
16 | "http",
17 | "api"
18 | ],
19 | "author": "joyqi",
20 | "license": "MIT",
21 | "bugs": {
22 | "url": "https://github.com/joyqi/weibo-request/issues"
23 | },
24 | "homepage": "https://github.com/joyqi/weibo-request#readme",
25 | "dependencies": {
26 | "cheerio": "^1.0.0-rc.2",
27 | "request": "^2.83.0",
28 | "vm2": "^3.5.2",
29 | "weibo-ids": "^0.2.1"
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/weibo.coffee:
--------------------------------------------------------------------------------
1 | WeiboID = require 'weibo-ids'
2 | Cheerio = require 'cheerio'
3 | {VM} = require 'vm2'
4 | Request = require 'request'
5 | URL = require 'url'
6 |
7 | module.exports = (url, cb = null, cookie = null) ->
8 | promise = null
9 |
10 | if not cb?
11 | resolve = null
12 | reject = null
13 |
14 | promise = new Promise (res, rej) ->
15 | resolve = res
16 | reject = rej
17 |
18 | cb = (err, data = null) ->
19 | if err?
20 | reject err
21 | else
22 | resolve data
23 |
24 | promise
25 |
26 |
27 | # 解析URL
28 | scheme = URL.parse url
29 | return cb new Error 'Url is not correct.' if not scheme
30 |
31 | id = null
32 |
33 | if scheme.host is 'weibo.com'
34 | # web版
35 | matches = scheme.pathname.match /^\/[0-9]+\/([0-9a-z]+)$/i
36 | return cb new Error "#{scheme.path} is not a valid path." if not matches
37 |
38 | id = matches[1]
39 | else if scheme.host is 'm.weibo.cn'
40 | # mobile版
41 | matches = scheme.pathname.match /^\/(detail|status)\/([0-9a-z]+)$/i
42 | return cb new Error "#{scheme.path} is not a valid path." if not matches
43 |
44 | id = matches[2]
45 |
46 | return cb new Error 'Url is not correct.' if not id?
47 |
48 | headers = 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
49 | headers.Cookie = cookie if cookie?
50 |
51 | # 请求移动版地址
52 | Request
53 | uri: 'https://m.weibo.cn/status/' + id
54 | timeout: 5000
55 | headers: headers
56 | , (err, response, body) ->
57 | return cb err if err?
58 | $ = Cheerio.load body
59 |
60 | script = ($ 'script').eq 1
61 | .html() + ';$render_data'
62 |
63 | # 执行VM
64 | vm = new VM
65 | timeout: 1000
66 | sandbox:
67 | location:
68 | href: ''
69 |
70 | try
71 | data = vm.run script
72 | return cb new Error 'Status has no data.' if typeof data.status is 'undefined'
73 |
74 | status = data.status
75 | pics = []
76 |
77 | text = status.text.replace /<(\/?)([_a-z0-9-]+)(\s+[^>]+)?>/ig, (matches...) ->
78 | return '' if matches[2] isnt 'a'
79 | return matches[0] if matches[1] is '/'
80 | url = '#'
81 |
82 | if matches[3]?
83 | if !!(all = matches[3].match /\s+href="([^"]+)"/i)
84 | url = all[1]
85 |
86 | if !!(all = matches[3].match /\s+data\-url="([^"]+)"/i)
87 | url = all[1]
88 |
89 | return ""
90 |
91 | if status.pics?
92 | pics.push [pic.url, pic.large.url] for pic in status.pics
93 |
94 | cb null,
95 | id: status.id
96 | bid: status.bid
97 | title: status.status_title
98 | text: status.text
99 | plainText: text
100 | date: new Date status.created_at
101 | url: 'https://weibo.com/' + status.user.id + '/' + status.bid
102 | thumbnail: status.thumbnail_pic
103 | pics: pics
104 | reposts: status.reposts_count
105 | comments: status.comments_count
106 | likes: status.attitudes_count
107 | reads: status.reads
108 | user:
109 | name: status.user.screen_name
110 | url: 'https://weibo.com/u/' + status.user.id
111 | avatar: status.user.avatar_hd
112 | catch e
113 | cb e
114 |
115 | promise
116 |
117 |
--------------------------------------------------------------------------------