├── .gitignore ├── README.md ├── config ├── config.article.js ├── config.mail.js └── config.mail.sample.js ├── index.js ├── package.json └── src ├── config.js ├── crawler.js └── mailer.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | config/config.mail.js -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Techweekly 2 | 3 | 高可配的技术周报邮件推送工具。 4 | 5 |  6 | 7 | ## 快速入门 8 | 9 | 第一步,下载代码,安装依赖: 10 | ```shell 11 | $ git clone https://github.com/xiongwilee/Techweekly.git 12 | $ cd Techweekly && npm install --registry=https://registry.npm.taobao.org 13 | ``` 14 | 15 | 第二步,修改邮件配置`config/config.mail.js`: 16 | ```javascript 17 | module.exports = { 18 | "sender": { 19 | "host": "邮箱服务器host", 20 | "port": "邮箱服务器端口号", 21 | "auth": { 22 | "user": "邮箱地址", 23 | "pass": "邮箱密码" 24 | } 25 | }, 26 | "subject": "邮件主题", 27 | "from": "你的名字 <邮箱地址>", 28 | "to": ["收件人邮箱地址"] 29 | } 30 | ``` 31 | 32 | 或者,你也可以直接使用默认的邮箱配置`config.mail.sample.js`,修改`config.mail.sample.js`为`config.mail.js` 33 | 34 | 第三步,发送周报邮件: 35 | ```shell 36 | $ node index.js 37 | ``` 38 | 39 | 40 | **FYI:** 41 | 42 | 如果你需要定时发送邮件,推荐使用`crontab`: 43 | ```shell 44 | * 10 * * 5 cd /your/project/path/ && node index.js 45 | ``` 46 | 47 | 48 | ## 贡献 49 | 50 | Techweekly默认支持[fex](https://github.com/zenany/weekly/tree/master/software/)和[75team](https://weekly.75team.com/)两个默认周报源,你可以根据自己的需求配置周报来源: 51 | ```javascript 52 | "源ID(可以配置任意字符)": { 53 | /** 54 | * 页面连接,可以是一个string, 也可以是function,如果是function则: 55 | * @return {String} 页面URL 56 | */ 57 | url: function() {}, 58 | 59 | /** 60 | * 通过url获取文章内容URL的方法 61 | * @param {string} html 通过页面连接爬取到的页面html 62 | * @return {String} 从html中解析到的文章内容的链接 63 | */ 64 | getLink: function(html) {}, 65 | 66 | /** 67 | * 通过文章内容的链接爬取到文章主体 68 | * @param {String} html 通过文章内容的链接爬取到文章的html 69 | * @return {String} 文章主体部分的html 70 | */ 71 | getContent: function(html) {} 72 | } 73 | ``` 74 | 75 | **FYI:** 76 | 77 | 在`getLink`和`getContent`方法里,你可以直接使用[cheerio](https://github.com/cheeriojs/cheerio#cheerio)来解析DOM。 78 | 79 | ## 作者 80 | 81 | * [xiongwilee](https://github.com/xiongwilee) 82 | 83 | -------------------------------------------------------------------------------- /config/config.article.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 文章源配置文件 3 | * @author xiongwilee 4 | */ 5 | 6 | 'use strict'; 7 | 8 | const cheerio = require("cheerio"); 9 | const url_opera = require('url'); 10 | 11 | module.exports = { 12 | "fex": { 13 | /** 14 | * 页面连接,可以是一个string, 也可以是function 15 | * @return {String} 页面URL 16 | */ 17 | url: function() { 18 | let year = new Date().getFullYear(); 19 | return `https://github.com/zenany/weekly/blob/master/software/${year}/` 20 | }, 21 | 22 | /** 23 | * 通过url获取文章内容URL的方法 24 | * @param {string} html 通过页面连接爬取到的页面html 25 | * @return {String} 从html中解析到的文章内容的链接 26 | */ 27 | getLink: function(html) { 28 | try { 29 | let curLink = 'https://github.com/'; 30 | 31 | let $ = cheerio.load(html); 32 | let links = $('table.files .content a'); 33 | for (let i = links.length; i > 0; i--) { 34 | let url = $(links[i - 1]).attr('href'); 35 | 36 | // 匹配这种类型的URL: /zenany/weekly/blob/master/software/2017/0220.md 37 | let urlReg = /.\/[\d]+\.md/g; 38 | if (/.\/[\d]+\.md/g.test(url)) return url_opera.resolve(curLink , url); 39 | } 40 | } catch (err) { 41 | return; 42 | } 43 | }, 44 | 45 | /** 46 | * 通过文章内容的链接爬取到文章主体 47 | * @param {String} html 通过文章内容的链接爬取到文章的html 48 | * @return {String} 文章主体部分的html 49 | */ 50 | getContent: function(html) { 51 | let $ = cheerio.load(html); 52 | try { 53 | let html = $('.entry-content').html(); 54 | html = html.replace('
-- THE END --
', ''); 55 | return html; 56 | } catch (err) { 57 | return; 58 | } 59 | } 60 | }, 61 | "75team": { 62 | url: "https://weekly.75team.com/", 63 | getLink: function(html) { 64 | try { 65 | let curLink = 'https://weekly.75team.com/'; 66 | 67 | let urlMatch = html.match(/href\=\'(issue\d+\.html)/); 68 | if (urlMatch) { 69 | return url_opera.resolve(curLink , urlMatch[1]) 70 | } else { 71 | return; 72 | } 73 | /* 这个页面下的html注释写成了 导致cheerio不识别,改用正则 74 | let $ = cheerio.load(html); 75 | return curLink + $('.issue-list li:first-child a').attr('href'); 76 | */ 77 | } catch (err) { 78 | return; 79 | } 80 | }, 81 | getContent: function(html) { 82 | let $ = cheerio.load(html); 83 | try { 84 | let contentDom = $('#main #content>ul'); 85 | return contentDom.html(); 86 | } catch (err) { 87 | return; 88 | } 89 | } 90 | } 91 | } -------------------------------------------------------------------------------- /config/config.mail.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 邮箱配置文件 3 | * @author xiongwilee 4 | */ 5 | 6 | module.exports = { 7 | "sender": { 8 | "host": "smtp.163.com", 9 | "port": 465, 10 | "auth": { 11 | "user": "wileetest04@163.com", 12 | "pass": "123qwe" 13 | } 14 | }, 15 | "subject": "每周技术文章推荐", 16 | "from": "xiongwilee▼ 来源: ${article.contentLink}
`; 57 | htmlContent += article.articleHtml; 58 | htmlContent +=`该技术周报由Techweekly强力驱动
`; 62 | 63 | return htmlContent 64 | } 65 | 66 | /** 67 | * 通过页面连接获取文档内容的HTML 68 | * @param {Object} article 文档列表页 69 | * @return {Object} Promise 70 | */ 71 | function getContentPromise(article) { 72 | if (!article.linkBody) return; 73 | 74 | let contentLink = article.getLink(article.linkBody); 75 | if (!contentLink) return; 76 | 77 | return new Promise((resolve, reject) => { 78 | request(contentLink, (err, res, body) => { 79 | if (err) { console.error(`抓取内容失败:${contentLink}`, err) } 80 | resolve(Object.assign(article, { 81 | articleBody: body, 82 | contentLink: contentLink 83 | })) 84 | }) 85 | }) 86 | } 87 | 88 | /** 89 | * 获取所有文档列表页面的页面HTML 90 | * @param {Object} articleConfig 文章列表配置 91 | * @return {Obejct} Promise 92 | */ 93 | function allArticle(articleConfig) { 94 | let promiseList = []; 95 | 96 | for (let key in articleConfig) { 97 | let article = articleConfig[key]; 98 | let url = typeof article.url == 'function' ? article.url() : article.url; 99 | 100 | promiseList.push(new Promise((resolve, reject) => { 101 | request(url, (err, res, body) => { 102 | if (err) { console.error(`抓取列表失败:${url}`, err) } 103 | resolve(Object.assign(article, { 104 | linkBody: body 105 | })); 106 | }) 107 | })) 108 | } 109 | 110 | return Promise.all(promiseList) 111 | } 112 | 113 | module.exports = crawler; 114 | -------------------------------------------------------------------------------- /src/mailer.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const nodemailer = require("nodemailer"); 4 | 5 | //配置邮件服务信息 6 | let smtpTransport; 7 | 8 | 9 | /** 10 | * 发送邮件 11 | * @param {Object} mailConfig 邮箱配置 12 | * @param {String} html 邮件内容 13 | * @return {Undefined} 14 | */ 15 | function sendMail(mailConfig, html) { 16 | smtpTransport = smtpTransport || nodemailer.createTransport(mailConfig.sender); 17 | 18 | smtpTransport.sendMail({ 19 | subject: mailConfig.subject, 20 | from: mailConfig.from, 21 | to: mailConfig.to.join(','), 22 | html: html 23 | }, (error, response) => { 24 | if (error) { 25 | console.error(error); 26 | } else { 27 | console.log(response); 28 | } 29 | smtpTransport.close(); 30 | }); 31 | } 32 | 33 | exports.sendMail = sendMail; --------------------------------------------------------------------------------