├── .gitignore
├── doc
├── es6.png
├── pp.png
├── sf-jj.gif
├── ali-pay.png
├── es6-pdf.png
├── pp-trace.png
├── pp-trace2.png
└── wx-pay.jpeg
├── data
├── sf-juejin
│ ├── err.png
│ ├── sf.png
│ └── done.png
├── zhentaoo
│ └── zhentaoo.png
└── monitor
│ └── success
│ ├── ZT-2017-9-19T21:17:10.png
│ ├── ZT-2017-9-19T21:18:50.png
│ └── ZT-2017-9-19T21:23:43.png
├── src
├── monitor
│ ├── views
│ │ ├── index.jade
│ │ ├── error.jade
│ │ └── layout.jade
│ ├── public
│ │ └── stylesheets
│ │ │ └── style.css
│ ├── routes
│ │ ├── index.js
│ │ └── monitor.js
│ ├── scripts
│ │ └── monitor.js
│ ├── app.js
│ └── bin
│ │ └── www
├── trace.js
├── zhentaoo.js
├── es6-crawl.js
├── sf-juejin.js
└── shuabi.js
├── package.json
├── tools
└── tools.js
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 |
--------------------------------------------------------------------------------
/doc/es6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/doc/es6.png
--------------------------------------------------------------------------------
/doc/pp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/doc/pp.png
--------------------------------------------------------------------------------
/doc/sf-jj.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/doc/sf-jj.gif
--------------------------------------------------------------------------------
/doc/ali-pay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/doc/ali-pay.png
--------------------------------------------------------------------------------
/doc/es6-pdf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/doc/es6-pdf.png
--------------------------------------------------------------------------------
/doc/pp-trace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/doc/pp-trace.png
--------------------------------------------------------------------------------
/doc/pp-trace2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/doc/pp-trace2.png
--------------------------------------------------------------------------------
/doc/wx-pay.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/doc/wx-pay.jpeg
--------------------------------------------------------------------------------
/data/sf-juejin/err.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/data/sf-juejin/err.png
--------------------------------------------------------------------------------
/data/sf-juejin/sf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/data/sf-juejin/sf.png
--------------------------------------------------------------------------------
/data/sf-juejin/done.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/data/sf-juejin/done.png
--------------------------------------------------------------------------------
/data/zhentaoo/zhentaoo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/data/zhentaoo/zhentaoo.png
--------------------------------------------------------------------------------
/src/monitor/views/index.jade:
--------------------------------------------------------------------------------
1 | extends layout
2 |
3 | block content
4 | h1= title
5 | p Welcome to #{title}
6 |
--------------------------------------------------------------------------------
/src/monitor/views/error.jade:
--------------------------------------------------------------------------------
1 | extends layout
2 |
3 | block content
4 | h1= message
5 | h2= error.status
6 | pre #{error.stack}
7 |
--------------------------------------------------------------------------------
/data/monitor/success/ZT-2017-9-19T21:17:10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/data/monitor/success/ZT-2017-9-19T21:17:10.png
--------------------------------------------------------------------------------
/data/monitor/success/ZT-2017-9-19T21:18:50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/data/monitor/success/ZT-2017-9-19T21:18:50.png
--------------------------------------------------------------------------------
/data/monitor/success/ZT-2017-9-19T21:23:43.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhentaoo/puppeteer-deep/HEAD/data/monitor/success/ZT-2017-9-19T21:23:43.png
--------------------------------------------------------------------------------
/src/monitor/public/stylesheets/style.css:
--------------------------------------------------------------------------------
1 | body {
2 | padding: 50px;
3 | font: 14px "Lucida Grande", Helvetica, Arial, sans-serif;
4 | }
5 |
6 | a {
7 | color: #00B7FF;
8 | }
9 |
--------------------------------------------------------------------------------
/src/monitor/views/layout.jade:
--------------------------------------------------------------------------------
1 | doctype html
2 | html
3 | head
4 | title= title
5 | link(rel='stylesheet', href='/stylesheets/style.css')
6 | body
7 | block content
8 |
--------------------------------------------------------------------------------
/src/monitor/routes/index.js:
--------------------------------------------------------------------------------
1 | var express = require('express');
2 | var router = express.Router();
3 |
4 | /* GET home page. */
5 | router.get('/', function(req, res, next) {
6 | res.render('index', { title: 'Monitor' });
7 | });
8 |
9 | module.exports = router;
10 |
--------------------------------------------------------------------------------
/src/monitor/routes/monitor.js:
--------------------------------------------------------------------------------
1 | var express = require('express');
2 | var router = express.Router();
3 |
4 | /* GET users listing. */
5 | router.get('/', function(req, res, next) {
6 | res.render('index', { title: `Monitor${req.query.img}` });
7 | });
8 |
9 | module.exports = router;
10 |
--------------------------------------------------------------------------------
/src/trace.js:
--------------------------------------------------------------------------------
1 | const puppeteer = require('puppeteer');
2 |
3 | (async() => {
4 | const browser = await puppeteer.launch({headless: false});
5 | const page = await browser.newPage();
6 |
7 | await page.tracing.start({path: './data/trace/trace.json'});
8 | await page.goto('http://www.zhentaoo.com');
9 | await page.tracing.stop();
10 | })();
11 |
--------------------------------------------------------------------------------
/src/zhentaoo.js:
--------------------------------------------------------------------------------
1 | const puppeteer = require('puppeteer');
2 |
3 | (async() => {
4 | const browser = await puppeteer.launch({headless: false});
5 | const page = await browser.newPage();
6 | await page.goto('http://www.zhentaoo.com', {waitUntil: 'networkidle'});
7 |
8 | await page.screenshot({path: './data/zhentaoo/zhentaoo.png', type: 'png'});
9 |
10 | // browser.close();
11 | })();
12 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "puppeteer-deep",
3 | "version": "1.0.0",
4 | "description": "",
5 | "main": "index.js",
6 | "scripts": {
7 | "start": "node src/zhentaoo.js",
8 | "test": "npm run juejin",
9 | "sf-juejin": "node src/sf-juejin.js",
10 | "zhentaoo": "node src/zhentaoo.js",
11 | "es6": "rm -rf ./es6-pdf/* && node src/es6-crawl.js",
12 | "trace": "node src/trace.js",
13 | "monitor": "node src/monitor/bin/www",
14 | "shuabi": "node src/shuabi.js",
15 | "shuabi0": "node src/shuabi0.js"
16 | },
17 | "author": "",
18 | "license": "ISC",
19 | "dependencies": {
20 | "body-parser": "~1.15.2",
21 | "cookie-parser": "~1.4.3",
22 | "debug": "~2.2.0",
23 | "express": "~4.14.0",
24 | "jade": "~1.11.0",
25 | "morgan": "~1.7.0",
26 | "puppeteer": "^0.9.0",
27 | "request": "^2.81.0",
28 | "request-promise": "^4.2.1",
29 | "serve-favicon": "~2.3.0",
30 | "shelljs": "^0.7.8"
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/tools/tools.js:
--------------------------------------------------------------------------------
1 | class Tools {
2 | static timeout(delay) {
3 | return new Promise((resolve, reject) => {
4 | setTimeout(() => {
5 | try {
6 | resolve(1)
7 | } catch (e) {
8 | reject(0)
9 | }
10 | }, delay)
11 | })
12 | }
13 |
14 | /**
15 | * [TimeTools description]
16 | * @param {[type]} timestamp 12312312312312
17 | * @param {[type]} formatStr Y年M月D日
18 | *
19 | * M: month 1~12
20 | * Y: year 2017
21 | * D: date 0 ~ 31
22 | */
23 | static moment(formatStr, timestamp) {
24 | let date = new Date(timestamp || new Date().getTime())
25 |
26 | let M = date.getMonth() + 1
27 |
28 | let Y = date.getFullYear()
29 |
30 | let D = date.getDate()
31 |
32 | let h = date.getHours()
33 |
34 | let m = date.getMinutes()
35 |
36 | let s = date.getSeconds()
37 |
38 | return formatStr.replace('M', M).replace('Y', Y).replace('D', D).replace('h', h).replace('m', m).replace('s', s)
39 | }
40 | }
41 |
42 | module.exports = Tools;
43 |
--------------------------------------------------------------------------------
/src/es6-crawl.js:
--------------------------------------------------------------------------------
1 | const puppeteer = require('puppeteer');
2 | var {timeout} = require('../tools/tools.js');
3 |
4 | puppeteer.launch().then(async browser => {
5 | let page = await browser.newPage();
6 |
7 | await page.goto('http://es6.ruanyifeng.com/#README');
8 | await timeout(2000);
9 |
10 | let aTags = await page.evaluate(() => {
11 | let as = [...document.querySelectorAll('ol li a')];
12 | return as.map((a) =>{
13 | return {
14 | href: a.href.trim(),
15 | name: a.text
16 | }
17 | });
18 | });
19 |
20 | await page.pdf({path: `./data/es6-pdf/${aTags[0].name}.pdf`});
21 | page.close()
22 |
23 | // 这里也可以使用promise all,但cpu可能吃紧,谨慎操作
24 | for (var i = 1; i < aTags.length; i++) {
25 | page = await browser.newPage()
26 |
27 | var a = aTags[i];
28 |
29 | await page.goto(a.href);
30 |
31 | await timeout(2000);
32 |
33 | await page.pdf({path: `./data/es6-pdf/${a.name}.pdf`});
34 |
35 | page.close();
36 | }
37 |
38 | browser.close();
39 | });
40 |
--------------------------------------------------------------------------------
/src/monitor/scripts/monitor.js:
--------------------------------------------------------------------------------
1 | const puppeteer = require('puppeteer');
2 | let {timeout, moment} = require('../../../tools/tools.js');
3 | let rp = require('request-promise');
4 |
5 | function monitor() {
6 | puppeteer.launch().then(async browser => {
7 | let page = await browser.newPage();
8 | let date = moment("Y-M-DTh:m:s");
9 |
10 | // 进入网站后,等待三秒
11 | await page.goto('http://www.zhentaoo.com/');
12 | await timeout(3000);
13 |
14 | // 取出首页的文章title,如果有title为空,则截图存入err,mongo,结束本次任务
15 | let info = await page.evaluate(() => {
16 | let post = [...document.querySelectorAll('.post-title')];
17 | return post.map((a) => a.innerText );
18 | });
19 |
20 | for (let i = 0; i < info.length; i++) {
21 | if (!info[i]) {
22 | let options = {
23 | uri: 'http://127.0.0.1:3000/monitor',
24 | qs: {
25 | img: `ZT-${date}.png`
26 | }
27 | };
28 |
29 | rq(options);
30 | await page.screenshot({path: `./data/monitor/err/ZT-${date}.png`, type: 'png'});
31 | browser.close();
32 | }
33 | }
34 |
35 | // 如果正常则截图,结束任务
36 | await page.screenshot({path: `./data/monitor/success/ZT-${date}.png`, type: 'png'});
37 | browser.close();
38 | });
39 | }
40 |
41 | monitor();
42 | setInterval(monitor, 1000 * 60 * 5);
43 |
--------------------------------------------------------------------------------
/src/monitor/app.js:
--------------------------------------------------------------------------------
1 | var express = require('express');
2 | var path = require('path');
3 | var favicon = require('serve-favicon');
4 | var logger = require('morgan');
5 | var cookieParser = require('cookie-parser');
6 | var bodyParser = require('body-parser');
7 |
8 | var index = require('./routes/index');
9 | var monitor = require('./routes/monitor');
10 | var child_process = require('child_process');
11 |
12 | /**
13 | * 运行scripts脚本,定时抓取www.zhentaoo.com的首页
14 | */
15 | child_process.spawn('node', ['./src/monitor/scripts/monitor.js']);
16 |
17 | /**
18 | * 启动express实例
19 | */
20 | var app = express();
21 |
22 | // view engine setup
23 | app.set('views', path.join(__dirname, 'views'));
24 | app.set('view engine', 'jade');
25 |
26 | // uncomment after placing your favicon in /public
27 | //app.use(favicon(path.join(__dirname, 'public', 'favicon.ico')));
28 | app.use(logger('dev'));
29 | app.use(bodyParser.json());
30 | app.use(bodyParser.urlencoded({ extended: false }));
31 | app.use(cookieParser());
32 | app.use(express.static(path.join(__dirname, 'public')));
33 |
34 | app.use('/', index);
35 | app.use('/monitor', monitor);
36 |
37 | // catch 404 and forward to error handler
38 | app.use(function(req, res, next) {
39 | var err = new Error('Not Found');
40 | err.status = 404;
41 | next(err);
42 | });
43 |
44 | // error handler
45 | app.use(function(err, req, res, next) {
46 | // set locals, only providing error in development
47 | res.locals.message = err.message;
48 | res.locals.error = req.app.get('env') === 'development' ? err : {};
49 |
50 | // render the error page
51 | res.status(err.status || 500);
52 | res.render('error');
53 | });
54 |
55 | module.exports = app;
56 |
--------------------------------------------------------------------------------
/src/monitor/bin/www:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | /**
4 | * Module dependencies.
5 | */
6 |
7 | var app = require('../app');
8 | var debug = require('debug')('xxx:server');
9 | var http = require('http');
10 |
11 | /**
12 | * Get port from environment and store in Express.
13 | */
14 |
15 | var port = normalizePort(process.env.PORT || '3000');
16 | app.set('port', port);
17 |
18 | /**
19 | * Create HTTP server.
20 | */
21 |
22 | var server = http.createServer(app);
23 |
24 | /**
25 | * Listen on provided port, on all network interfaces.
26 | */
27 |
28 | server.listen(port);
29 | server.on('error', onError);
30 | server.on('listening', onListening);
31 |
32 | /**
33 | * Normalize a port into a number, string, or false.
34 | */
35 |
36 | function normalizePort(val) {
37 | var port = parseInt(val, 10);
38 |
39 | if (isNaN(port)) {
40 | // named pipe
41 | return val;
42 | }
43 |
44 | if (port >= 0) {
45 | // port number
46 | return port;
47 | }
48 |
49 | return false;
50 | }
51 |
52 | /**
53 | * Event listener for HTTP server "error" event.
54 | */
55 |
56 | function onError(error) {
57 | if (error.syscall !== 'listen') {
58 | throw error;
59 | }
60 |
61 | var bind = typeof port === 'string'
62 | ? 'Pipe ' + port
63 | : 'Port ' + port;
64 |
65 | // handle specific listen errors with friendly messages
66 | switch (error.code) {
67 | case 'EACCES':
68 | console.error(bind + ' requires elevated privileges');
69 | process.exit(1);
70 | break;
71 | case 'EADDRINUSE':
72 | console.error(bind + ' is already in use');
73 | process.exit(1);
74 | break;
75 | default:
76 | throw error;
77 | }
78 | }
79 |
80 | /**
81 | * Event listener for HTTP server "listening" event.
82 | */
83 |
84 | function onListening() {
85 | var addr = server.address();
86 | var bind = typeof addr === 'string'
87 | ? 'pipe ' + addr
88 | : 'port ' + addr.port;
89 | debug('Listening on ' + bind);
90 | }
91 |
--------------------------------------------------------------------------------
/src/sf-juejin.js:
--------------------------------------------------------------------------------
1 | const puppeteer = require('puppeteer')
2 | var {timeout} = require('../tools/tools.js');
3 |
4 | var delay = 1000
5 | // 以下拿掘金开刀,贡献私人测试账号
6 | // puppeteer.launch().then(async browser => {
7 | puppeteer.launch({headless: false}).then(async browser => {
8 | var page = await browser.newPage()
9 | page.setViewport({width: 1200, height: 600})
10 |
11 | /** 1. 到sf获取最新的前端文章 **/
12 | try {
13 | await page.goto('https://segmentfault.com/news/frontend')
14 | await timeout(delay)
15 |
16 | var SfFeArticleList = await page.evaluate(() => {
17 | var list = [...document.querySelectorAll('.news__list .news__item-title a')]
18 |
19 | return list.map(el => {
20 | return {href: el.href.trim(), title: el.innerText}
21 | })
22 | })
23 |
24 | console.log('SfFeArticleList:', SfFeArticleList);
25 |
26 | await page.screenshot({path: './data/sf-juejin/sf.png', type: 'png'});
27 | } catch (e) {
28 | console.log('sf err:', e);
29 | }
30 |
31 | /** 登录juejin **/
32 | try {
33 | await timeout(3000)
34 | await page.goto('https://juejin.im')
35 | await timeout(3000)
36 |
37 | var login = await page.$('.login')
38 | await login.click()
39 |
40 | var loginPhoneOrEmail = await page.$('[name=loginPhoneOrEmail]')
41 | console.log('loginPhoneOrEmail:', loginPhoneOrEmail);
42 | await loginPhoneOrEmail.click()
43 | await page.type('18516697699@163.com', {delay: 20})
44 |
45 | var password = await page.$('[placeholder=请输入密码]')
46 | console.log('password:', password);
47 | await password.click()
48 | await page.type('aaa123456', {delay: 20})
49 |
50 | var authLogin = await page.$('.panel .btn')
51 | console.log('authLogin:', authLogin);
52 | await authLogin.click()
53 |
54 | } catch (e) {}
55 |
56 | /** 随机推荐一篇从sf拿来的文章到掘金 **/
57 | try {
58 | await timeout(2500)
59 | var seed = Math.floor(Math.random() * 30)
60 | var theArtile = SfFeArticleList[seed]
61 |
62 | var add = await page.$('.main-nav .more')
63 | await add.click()
64 |
65 | var addLink = await page.$('.more-list .item')
66 | await addLink.click()
67 |
68 | await timeout(2500)
69 |
70 | var shareUrl = await page.$('.entry-form-input .url-input')
71 | await shareUrl.click()
72 | await page.type(theArtile.href, {delay: 20})
73 |
74 | await page.press('Tab')
75 | await page.type(theArtile.title, {delay: 20})
76 |
77 | await page.press('Tab')
78 | await page.type(theArtile.title, {delay: 20})
79 |
80 | await page.evaluate(() => {
81 | let li = [...document.querySelectorAll('.category-list-box .category-list .item')]
82 | li.forEach(el => {
83 | if (el.innerText == '前端')
84 | el.click()
85 | })
86 | })
87 |
88 | var submitBtn = await page.$('.submit-btn')
89 | await submitBtn.click()
90 |
91 | } catch (e) {
92 | await page.screenshot({path: './data/sf-juejin/err.png', type: 'png'});
93 | }
94 |
95 | await page.screenshot({path: './data/sf-juejin/done.png', type: 'png'});
96 | // await page.close()
97 | // browser.close()
98 | })
99 |
--------------------------------------------------------------------------------
/src/shuabi.js:
--------------------------------------------------------------------------------
1 | const puppeteer = require('puppeteer');
2 | var {timeout} = require('../tools/tools.js');
3 |
4 | // var fxUrl = 'https://yeecall.gl.yeecall.com/activity/share/5a59a9d14cc2562b0fd32ec5'
5 | // var meUrl = 'https://yeecall.gl.yeecall.com/activity/share/5a599e2664325570dd5b6c25'
6 |
7 | var url1 = 'https://yeecall.gl.yeecall.com/activity/share?t=5a5a2507edea6a2010998444'
8 | var url2 = 'https://yeecall.gl.yeecall.com/activity/share?t=5a5a24deebf2136b273c05ce'
9 | var url3 = 'https://yeecall.gl.yeecall.com/activity/share?t=5a5a297debf2136b273c26ab'
10 | var url4 = 'https://yeecall.gl.yeecall.com/activity/share?t=5a5a2986ebf2136b273c26f0'
11 | var url5 = 'https://yeecall.gl.yeecall.com/activity/share?t=5a5a29add6ab05313934d7f6'
12 | var url6 = 'https://yeecall.gl.yeecall.com/activity/share?t=5a5a29c4ebf2136b273c2851'
13 | var url7 = 'https://yeecall.gl.yeecall.com/activity/share?t=5a5a2ec6edea6a201099cf70'
14 | var url8 = 'https://yeecall.gl.yeecall.com/activity/share?t=5a5a2ecdd6ab0531393500ad'
15 | var url9 = 'https://yeecall.gl.yeecall.com/activity/share?t=5a5a2edad6ab05313935010a'
16 | var url10 = 'https://yeecall.gl.yeecall.com/activity/share?t=5a5a2ee2edea6a201099d040'
17 |
18 | var token = [
19 | '0x1AA59c01fa169fB6A4a2E2D7DB9D02db9A9e',
20 | '0x1BD55G01fa169fkjA7a2E2D7SBG8R0dgy928',
21 | '0x1TG85G01faII2fkjA7a3G2DD76BG8D10dgyO',
22 | '0x1TON15GMUXaIO3BkjA7a3M8UD78YB8D1B3Gy',
23 | '0x100im57UNXaIa7bkjA7a3M8UD78YB8D1B3Gy',
24 | '0x5197fa565CED81cf5d599169e73754F0EDdC',
25 | '0xD3Da927d3698832fA68568152E43E9824b30',
26 | '0x743fFa1891640d0eA108d69362a287ea1063',
27 | '0x56B86E766860D8866B42e87B041BBa8065AE',
28 | ]
29 |
30 | function rdToken() {
31 | var seed = [
32 | 1,2,3,4,5,6,7,8,9,0,
33 | 'a','b','c','d','e','f','g',
34 | 'h','i','j','k','l','m','n',
35 | 'o','p','q','r','s','t',
36 | 'u','v','w','x','y','z',
37 | 'A','B','C','D','E','F','G',
38 | 'H','I','J','K','L','M','N',
39 | 'O','P','Q','R','S','T',
40 | 'U','V','W','X','Y','Z',
41 | ]
42 | var rd1 = Math.floor( Math.random() * 62 )
43 | var rd2 = Math.floor( Math.random() * 62 )
44 | var rd3 = Math.floor( Math.random() * 62 )
45 | var rd4 = Math.floor( Math.random() * 62 )
46 | var tookenRand = Math.floor( Math.random() * 9 )
47 |
48 | return token[tookenRand] + seed[rd1] + seed[rd2] + seed[rd3] + seed[rd4]
49 | }
50 | var count = 0
51 | puppeteer.launch().then(async browser => {
52 | // puppeteer.launch({headless: false}).then(async browser => {
53 | let p1 = await browser.newPage();
54 | let p2 = await browser.newPage();
55 | let p3 = await browser.newPage();
56 | let p4 = await browser.newPage();
57 | let p5 = await browser.newPage();
58 | let p6 = await browser.newPage();
59 | let p7 = await browser.newPage();
60 | let p8 = await browser.newPage();
61 | let p9 = await browser.newPage();
62 | let p10 = await browser.newPage();
63 |
64 | oneVisit(p1, url1)
65 | oneVisit(p2, url2)
66 | oneVisit(p3, url3)
67 | oneVisit(p4, url4)
68 | oneVisit(p5, url5)
69 | oneVisit(p6, url6)
70 | oneVisit(p7, url7)
71 | oneVisit(p8, url8)
72 | oneVisit(p9, url9)
73 | oneVisit(p10, url10)
74 |
75 | rdLoop()
76 |
77 | function rdLoop() {
78 | var time = Math.floor( Math.random() * 40 ) + 5
79 | console.log('time:', time);
80 |
81 | setTimeout(function () {
82 | oneVisit(p1, url1)
83 | oneVisit(p2, url2)
84 | oneVisit(p3, url3)
85 | oneVisit(p4, url4)
86 | oneVisit(p5, url5)
87 | oneVisit(p6, url6)
88 | oneVisit(p7, url7)
89 | oneVisit(p8, url8)
90 | oneVisit(p9, url9)
91 | oneVisit(p10, url10)
92 |
93 | console.log('time:', time);
94 | rdLoop()
95 | }, time * 1000);
96 | }
97 |
98 | async function oneVisit(page, url) {
99 | await page.goto(url);
100 |
101 | var input = await page.$('input')
102 | await input.click()
103 | await page.type(rdToken(), {delay: 20})
104 | await timeout(500);
105 |
106 | var submit = await page.$('button')
107 | await submit.click()
108 | await timeout(500);
109 |
110 | var ICO_TOKEN = await page.evaluate(() => {
111 | if (localStorage.ICO_TOKEN) {
112 | delete localStorage.ICO_TOKEN
113 | location.reload()
114 | }
115 | return localStorage.ICO_TOKEN
116 | })
117 | }
118 | });
119 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## 首先介绍Puppeteer
2 | - Puppeteer是一个node库,他提供了一组用来操纵Chrome的API,理论上使用它可以做任何Chrome可以做的事
3 | - 有点类似于PhantomJS,但Puppeteer由Chrome官方团队进行维护,前景更好
4 | - Puppeteer的应用场景会非常多,就爬虫领域来说,远比一般的爬虫工具功能更丰富,性能分析、自动化测试也不在话下
5 | - [Puppeteer官方文档请猛戳这里](https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md#puppeteerlaunchoptions)
6 |
7 | ## 本项目会针对Puppeteer应用场景做几个可用的DEMO
8 | 1. 高级爬虫(有别于传统爬虫.使用Puppeteer可以拿到渲染后的效果。而传统爬虫相当于只能拿到http response,对字符串进行解析)
9 | 2. UI自动化测试(使用Puppeteer可以模拟用户操作,模拟表单填写)
10 | 3. 页面性能分析 (使用chrome的timeline,也就是Puppeteer提供的trace API)
11 |
12 | ## 项目Repo && Usage
13 | 1. git clone https://github.com/zhentaoo/puppeteer-deep
14 | 2. npm install (puppeteer在win下100+M、mac下70+M,请耐心等候)
15 |
16 | - npm run sf-juejin (推荐segmentfault的热门文章到掘金)
17 | - npm run monitor (前端监控、报警)
18 | - npm run es6 (爬取了阮一峰老师的《ES6标准入门》并打印PDF)
19 | - npm run zhentaoo (打印 www.zhentaoo.com 首页的图片)
20 | - npm run trace (生成 www.zhentaoo.com 的trace.json,并分析性能)
21 |
22 | ## 一、 UI自动化测试--自动推荐segmentfault的热门文章到掘金
23 | #### 1. 废话不多说,先上动图/视频看效果
24 | GIF图片比较大,如果不能加载成功,也可以到微博看下录制的视频
25 | http://weibo.com/tv/v/FiHMz7dcq?fid=1034:dcc08a8eee118263f6071fb6fafcc9a9
26 |
27 |
28 |
29 | #### 2. 开始介绍,第一步,爬取 segmentfault 前30篇热门文章
30 | - 跳转到https://segmentfault.com/news/frontend
31 | - 接着分析SF首页的Dom结构,爬取每篇文章的链接
32 | - 然后取出每篇文章最重要的 href,title 等信息
33 | - 具体代码如下:
34 | ```js
35 | await page.goto('https://segmentfault.com/news/frontend')
36 |
37 | var SfFeArticleList = await page.evaluate(() => {
38 | var list = [...document.querySelectorAll('.news__list .news__item-title a')]
39 | return list.map(el => {
40 | return {href: el.href.trim(), title: el.innerText}
41 | })
42 | })
43 |
44 | await page.screenshot({path: './sf-juejin/sf.png', type: 'png'});
45 | ```
46 |
47 | #### 3. 登录掘金 (这里我事先注册了个测试账号,大家可以替换成自己的)
48 | - 跳转到掘金,模拟点击登录按钮
49 | - 接着,会弹出一个的登录dialog,模拟输入用户名密码
50 | - 模拟点击登录,稍等....嗯...掘金应该把cookie写好了....
51 | - 代码如下:
52 | ```js
53 | await page.goto('https://juejin.im')
54 |
55 | var login = await page.$('.login')
56 | await login.click()
57 |
58 | var loginPhoneOrEmail = await page.$('[name=loginPhoneOrEmail]')
59 | await loginPhoneOrEmail.click()
60 | await page.type('18516697699@163.com', {delay: 20})
61 |
62 | var password = await page.$('[placeholder=请输入密码]')
63 | await password.click()
64 | await page.type('123456', {delay: 20})
65 |
66 | var authLogin = await page.$('.panel .btn')
67 | await authLogin.click()
68 | ```
69 | #### 4.推荐文章(使用第一步从SF爬取的文章信息)
70 | - 模拟点击推荐文章 按钮 “+”
71 | - 这时从SF拿到的文章信息就派上用场了,随机取出一篇: Math.floor(Math.random() * 30)
72 | - 模拟填写推荐表单,点击发布
73 | - 嗯,有时会提示该文章已被分享,那就换一篇吧,再执行一次。
74 | - 代码如下
75 | ```js
76 | var seed = Math.floor(Math.random() * 30)
77 | var theArtile = SfFeArticleList[seed]
78 |
79 | var add = await page.$('.main-nav .ion-android-add')
80 | await add.click()
81 |
82 | var shareUrl = await page.$('.entry-form-input .url-input')
83 | await shareUrl.click()
84 | await page.type(theArtile.href, {delay: 20})
85 |
86 | await page.press('Tab')
87 | await page.type(theArtile.title, {delay: 20})
88 |
89 | await page.press('Tab')
90 | await page.type(theArtile.title, {delay: 20})
91 |
92 | await page.evaluate(() => {
93 | let li = [...document.querySelectorAll('.category-list-box .category-list .item')]
94 | li.forEach(el => {
95 | if (el.innerText == '前端')
96 | el.click()
97 | })
98 | })
99 |
100 | var submitBtn = await page.$('.submit-btn')
101 | await submitBtn.click()
102 | ```
103 |
104 | ## 二、 前端监控系统
105 |
106 | #### 代码 https://github.com/zhentaoo/hawk-eye
107 |
108 | #### 1. 为什么要有前端监控系统?
109 | > 目前市面上以及各大公司流行的监控系统,都是API层的监控,包括调用量、数据、响应时长.....
110 | > 似乎只要接口没问题,整个系统就是稳定运行的,一切皆大欢喜
111 | > 但事实并非如此,CDN、DNS、Webview等等这些条件,都可能导致前端渲染失败、白屏
112 | > 离用户最近的一层--前端,却迟迟没有被加入监控列表,无形中流失多少用户.....
113 |
114 | #### 2. Node Express Server
115 | - 使用 `express monitor` 命令,生成express项目模版
116 | - 安装并启动mongodb,推荐 robomongo 可视化工具
117 | - 提供两个接口,1当Puppeteer发现网页渲染有异常则调用,2获取系统监控状态
118 |
119 | #### 3. 定时脚本
120 | - 设置定是脚本,每隔5分钟,访问 www.zhentaoo.com,抓取关键信息,并生成截图
121 | - 如果信息获取失败,则将截图保存至err目录,并记入数据库中
122 | ```js
123 | function monitor() {
124 | puppeteer.launch().then(async browser => {
125 | let page = await browser.newPage();
126 |
127 | await page.goto('http://www.zhentaoo.com/');
128 | await timeout(2000);
129 |
130 | let aTags = await page.evaluate(() => {
131 | let as = [...document.querySelectorAll('ol li a')];
132 | return as.map((a) =>{
133 | return {
134 | href: a.href.trim(),
135 | name: a.text
136 | }
137 | });
138 | });
139 |
140 | await page.screenshot({path: './data/zhentaoo/zhentaoo.png', type: 'png'});
141 | browser.close();
142 | });
143 | }
144 |
145 | monitor();
146 | setInterval(monitor, 1000 * 60 * 5);
147 | ```
148 |
149 | #### 4. 进阶:与Chrome插件集成
150 | > 如果单纯的监控系统,每每需要点击,然后去看监控的情况,想必也有些麻烦
151 | > 那么为何不做个Chrome插件,显示监控状态呢?
152 | > 好吧,可以看我的另一个repo,https://github.com/zhentaoo/bitcoin-price,学习如何写一个chrome插件
153 | > 然后监控系统提供API给Chrome插件使用
154 |
155 | ## 三、高级爬虫--爬取《ES6标准入门》并打印成PDF
156 |
157 | #### 1. 运行Puppeteer,使用launch
158 | ```js
159 | puppeteer.launch().then(async browser => {
160 | ......
161 | what you want
162 | ......
163 | })
164 | ```
165 |
166 | #### 2. 跳转至 [阮一峰老师的ES6博客](http://es6.ruanyifeng.com/#README),使用goto
167 | ```js
168 | let page = await browser.newPage();
169 | await page.goto('http://es6.ruanyifeng.com/#README');
170 | ```
171 |
172 | #### 3. 分析博客左侧导航栏的dom结构,并拿到所有链接的href、title信息
173 | ```js
174 | let as = [...document.querySelectorAll('ol li a')];
175 | return as.map((a) =>{
176 | return {
177 | href: a.href.trim(),
178 | name: a.text
179 | }
180 | });
181 | ```
182 |
183 | #### 4. 使用Puppeteer打印当前页面的PDF,使用pdf
184 | ```js
185 | await page.pdf({path: `./es6-pdf/${aTags[0].name}.pdf`});
186 | ```
187 |
188 | #### 5. 最终结果,将20多页博客打印成PDF
189 |
190 |
191 |
192 | ## 四、性能分析--Puppeteer Trace API
193 |
194 | #### 1. 简单介绍 Trace API
195 | > Trace API其实很简单,主要是使用Chrome Performance,生成当前页面的 性能追踪 文件,
196 | 然后将该文件上传给Chrome,就可以利用Chrome的开发者工具分析火焰图、各种数据参数
197 |
198 | #### 2. API: 使用 tracing start,stop生成trace.json
199 | ```js
200 | await page.tracing.start({path: './data/trace/trace.json'});
201 | await page.goto('http://www.zhentaoo.com');
202 | await page.tracing.stop();
203 | ```
204 |
205 |
206 | #### 3. 将trace.json上传给chrome,如下图
207 |
208 |
209 | #### 4. Chrome Performance/Timeline 使用教程
210 | 关于Chrome Performance/Timeline的使用又是一个大篇幅,这里提供一个教程
211 | - [Chrome 开发者工具](https://developers.google.com/web/tools/chrome-devtools/?hl=zh-cn)
212 | - [如何查看性能](https://developers.google.com/web/tools/chrome-devtools/?hl=zh-cn)
213 | - [分析运行时性能](https://developers.google.com/web/tools/chrome-devtools/evaluate-performance/timeline-tool?hl=zh-cn)
214 | - [诊断强制的同步布局](https://developers.google.com/web/tools/chrome-devtools/rendering-tools/forced-synchronous-layouts?hl=zh-cn)
215 |
216 |
217 | ## 结语
218 | 1. 为了效果展示,这里使用的headless: false模式,实际使用时可以同时开n个page,模拟操作,大家可以尝试改改,也可以给我提PR
219 | 2. 目前已经带领大家,使用Puppeteer完成爬虫 和 UI自动化测试,接下来可能会出第三篇,应该会是关于前端性能分析
220 | 3. 其实Puppeteer的应用场景远不止这些,大家也可以使用它在各自的领域大放异彩!!!
221 | 4. 希望掘金小编不会打我....
222 |
223 | ## 赞助
224 | 如果你觉得该项目对你有用,欢迎打赏作者,你的打赏是开源的强大动力~~~
225 |
226 |
227 |
--------------------------------------------------------------------------------