├── .gitignore ├── README.md ├── index.js ├── lib └── ibd.js └── package.json /.gitignore: -------------------------------------------------------------------------------- 1 | books 2 | node_modules 3 | .idea -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ituring-downloader 图灵阅读书籍下载器 2 | 3 | 4 | 一键下载 [图灵阅读社区书架上](http://www.ituring.com.cn/user/shelf) 的书籍。 5 | 6 | ## 安装 7 | 8 | ```shell 9 | git clone https://github.com/laispace/ituring-downloader.git 10 | // 或者 11 | // npm intall ituring-downloader 12 | 13 | cd ituring-downloader 14 | 15 | npm install 16 | 17 | ``` 18 | 19 | ## 使用方法 20 | 21 | > 需要使用 v7.6.0 或更高版本的 Node. 22 | 23 | ``` 24 | node lib/ibd username password pathToSave bookUrls 25 | ``` 26 | 27 | - `username:String` 账号名字 28 | 29 | - `password:String` 账号密码 30 | 31 | - `pathToSave:String` 书籍保存到本地的路径 32 | 33 | - `book_urls:Array` 需要下载的书籍,缺省则表示下载所有数据 34 | 35 | 36 | ### 下载指定的多本书籍 37 | ``` 38 | // 下载《算法图解》和《图解设计模式》 39 | ibd MY_USERNAME MY_PASSWORD ./books http://www.ituring.com.cn/book/1864 http://www.ituring.com.cn/book/1811 40 | ``` 41 | 42 | ### 下载书架上所有书籍 43 | ``` 44 | // 下载所有书架上的书籍,默认保存到 './books' 目录 45 | ibd MY_USERNAME MY_PASSWORD 46 | ``` 47 | > 注意,该脚本是抓取指定账号在图灵社区购买的正版电子书,只能抓取已购买书籍,下载后仅供自己阅读,请勿肆意传播,否则后果自负! 48 | 49 | 50 | 51 | ### 下载效果 52 | ![](https://raw.githubusercontent.com/laispace/puppeteer-explore/master/assets/download-ituring-books.gif?raw=true) 53 | ![](https://raw.githubusercontent.com/laispace/puppeteer-explore/master/assets/download-ituring-books.png?raw=true) 54 | 55 | 56 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const ibd = require('./lib/ibd'); 2 | module.exports = ibd; -------------------------------------------------------------------------------- /lib/ibd.js: -------------------------------------------------------------------------------- 1 | /** 2 | * 下载图灵电子书 3 | */ 4 | const puppeteer = require('puppeteer'); 5 | const mkdirp = require('mkdirp'); 6 | const path = require('path'); 7 | const fs = require('fs'); 8 | const BASE_URL = 'http://www.ituring.com.cn'; 9 | const SHELF_URL = `${BASE_URL}/user/shelf`; 10 | const LOGIN_URL = `http://account.ituring.com.cn/log-in?returnUrl=${encodeURIComponent(SHELF_URL)}`; 11 | 12 | const downloadIturingBooks = async (userName, password, saveDir = './books/', targetBookUrls) => { 13 | if (!userName) { 14 | throw new Error('请输入用户名'); 15 | } 16 | if (!password) { 17 | throw new Error('请输入密码'); 18 | } 19 | try { 20 | // 设置统一的视窗大小 21 | const viewport = { 22 | width: 1376, 23 | height: 768, 24 | }; 25 | 26 | console.log('启动浏览器'); 27 | const browser = await puppeteer.launch({ 28 | timeout: 0, 29 | // 关闭无头模式,方便我们看到这个无头浏览器执行的过程 30 | // 注意若调用了 Page.pdf 即保存为 pdf,则需要保持为无头模式 31 | // headless: false, 32 | }); 33 | 34 | console.log('打开新页面'); 35 | const page = await browser.newPage(); 36 | page.setViewport(viewport); 37 | 38 | console.log('输入登录地址'); 39 | await page.goto(LOGIN_URL, { 40 | timeout: 0 41 | }); 42 | 43 | await page.waitForSelector('#loginForm'); 44 | 45 | console.log('输入用户名和密码'); 46 | await page.focus('#Email'); 47 | await page.type('#Email', userName); 48 | await page.focus('#Password'); 49 | await page.type('#Password', password); 50 | await page.click('#loginForm input[type="submit"]'); 51 | 52 | await page.waitForSelector('.block-items'); 53 | 54 | let books; 55 | if (Array.isArray(targetBookUrls) && targetBookUrls.length) { 56 | books = targetBookUrls; 57 | console.log(`准备下载指定的${books.length}本书`); 58 | } else { 59 | books = await page.$eval('.block-items', element => { 60 | const booksHTMLCollection = element.querySelectorAll('.block-item'); 61 | const booksElementArray = Array.prototype.slice.call(booksHTMLCollection); 62 | const books = booksElementArray.map(item => { 63 | const a = item.querySelector('.book-img a'); 64 | return a.getAttribute('href'); 65 | }); 66 | return books; 67 | }); 68 | console.log(`准备下载书架上找到的所有${books.length}本书`); 69 | } 70 | 71 | for (let book of books) { 72 | const bookPage = await browser.newPage(); 73 | bookPage.setViewport(viewport); 74 | const bookUrl = book.startsWith(BASE_URL) ? book : `${BASE_URL}${book}`; 75 | await bookPage.goto(bookUrl, { 76 | timeout: 0 77 | }); 78 | await bookPage.waitForSelector('.bookmenu'); 79 | const bookTitle = await bookPage.$eval('.book-title h2', element => element.innerText); 80 | const articles = await bookPage.$eval('.bookmenu table tbody', element => { 81 | const articlesHTMLCollection = element.querySelectorAll('tr'); 82 | const articlesElementArray = Array.prototype.slice.call(articlesHTMLCollection); 83 | const articles = articlesElementArray.map(item => { 84 | const a = item.querySelector('td a'); 85 | return { 86 | href: a.getAttribute('href'), 87 | title: a.innerText.trim(), 88 | }; 89 | }); 90 | return articles; 91 | }); 92 | bookPage.close(); 93 | 94 | for (let article of articles) { 95 | const articlePage = await browser.newPage(); 96 | articlePage.setViewport(viewport); 97 | // articlePage.on('console', msg => { 98 | // for (let i = 0; i < msg.args.length; ++i) 99 | // console.log(`${i}: ${msg.args[i]}`); 100 | // }); 101 | await articlePage.goto(`${BASE_URL}/${article.href}`, { 102 | timeout: 0 103 | }); 104 | await articlePage.waitForSelector('.article-detail'); 105 | await articlePage.$eval('body', body => { 106 | body.querySelector('.layout-head').style.display = 'none'; 107 | body.querySelector('.book-page .side').style.display = 'none'; 108 | body.querySelector('#footer').style.display = 'none'; 109 | body.querySelector('#toTop').style.display = 'none'; 110 | Promise.resolve(); 111 | }); 112 | const dirPath = `${saveDir}/${bookTitle}`; 113 | const fileName = `${article.title.replace(/\//g, '、')}.pdf`; 114 | const filePath = `${dirPath}/${fileName}`; 115 | mkdirp.sync(dirPath); 116 | 117 | if (fs.existsSync(filePath)) { 118 | console.log(`章节已存在,跳过: ${filePath}`); 119 | } else { 120 | await page.emulateMedia('screen'); 121 | await articlePage.pdf({ 122 | path: filePath, 123 | format: 'A4' 124 | }); 125 | console.log(`保存章节成功: ${filePath}`); 126 | } 127 | 128 | await articlePage.waitForSelector('.book-nav'); 129 | const sections = await articlePage.$eval('.book-nav', element => { 130 | const liHTMLCollection = element.querySelectorAll('li'); 131 | const liElementArray = Array.prototype.slice.call(liHTMLCollection); 132 | const sectionsElementArray = liElementArray.filter(li => { 133 | return li.getAttribute('style') === 'text-indent: 1em'; 134 | }); 135 | const sections = sectionsElementArray.map(item => { 136 | const a = item.querySelector('a'); 137 | return { 138 | href: a.getAttribute('href'), 139 | title: a.innerText.trim(), 140 | }; 141 | }); 142 | return sections; 143 | }); 144 | if (sections.length) { 145 | for (let section of sections) { 146 | const sectionPage = await browser.newPage(); 147 | sectionPage.setViewport(viewport); 148 | await sectionPage.goto(`${BASE_URL}/${section.href}`, { 149 | timeout: 0 150 | }); 151 | await sectionPage.waitForSelector('.article-detail'); 152 | await sectionPage.$eval('body', body => { 153 | body.querySelector('.layout-head').style.display = 'none'; 154 | body.querySelector('.book-page .side').style.display = 'none'; 155 | body.querySelector('#footer').style.display = 'none'; 156 | body.querySelector('#toTop').style.display = 'none'; 157 | Promise.resolve(); 158 | }); 159 | const dirPath = `${saveDir}/${bookTitle}`; 160 | const fileName = `${article.title.replace(/\//g, '、')}-${section.title.replace(/\//g, '、')}.pdf`; 161 | const filePath = `${dirPath}/${fileName}`; 162 | mkdirp.sync(dirPath); 163 | 164 | if (fs.existsSync(filePath)) { 165 | console.log(`子章节已存在,跳过: ${filePath}`); 166 | } else { 167 | await page.emulateMedia('screen'); 168 | await sectionPage.pdf({ 169 | path: filePath, 170 | format: 'A4' 171 | }); 172 | console.log(`保存子章节成功: ${filePath}`); 173 | } 174 | } 175 | } 176 | 177 | articlePage.close(); 178 | } 179 | } 180 | 181 | browser.close(); 182 | } catch (e) { 183 | console.error(e); 184 | } 185 | }; 186 | 187 | if (require.main) { 188 | const USER = process.argv[2]; 189 | const PASSWORD = process.argv[3]; 190 | const SAVE_DIR = process.argv[4]; 191 | const BOOK_URLS = process.argv.slice(5); 192 | if (!USER || !PASSWORD) { 193 | console.log('无效账号明或密码'); 194 | process.exit(); 195 | } 196 | downloadIturingBooks(USER, PASSWORD, SAVE_DIR, BOOK_URLS); 197 | } 198 | 199 | module.exports = downloadIturingBooks; 200 | 201 | 202 | 203 | 204 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ituring-downloader", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "main": "./bin/ibd.js" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/laispace/ituring-downloader.git" 12 | }, 13 | "keywords": [], 14 | "author": "", 15 | "license": "ISC", 16 | "bugs": { 17 | "url": "https://github.com/laispace/ituring-downloader/issues" 18 | }, 19 | "homepage": "https://github.com/laispace/ituring-downloader#readme", 20 | "dependencies": { 21 | "mkdirp": "^0.5.1", 22 | "puppeteer": "0.12.0" 23 | } 24 | } 25 | --------------------------------------------------------------------------------