├── src ├── lib │ ├── limited-queue.js │ ├── verbose.js │ ├── str.js │ ├── LANG.js │ ├── logger.js │ └── scrapy.js └── index.js ├── .vscode └── launch.json ├── LICENSE ├── .eslintrc ├── .gitignore ├── package.json └── README.md /src/lib/limited-queue.js: -------------------------------------------------------------------------------- 1 | module.exports = class LimitedInQueue { 2 | constructor(capacity) { 3 | this.capacity = capacity 4 | this._data = [] 5 | } 6 | 7 | get size() { 8 | return this._data.length 9 | } 10 | 11 | get first() { 12 | return this._data[0] 13 | } 14 | 15 | get last() { 16 | return this._data[this.size - 1] 17 | } 18 | 19 | push(v) { 20 | if (this.size < this.capacity) { 21 | this._data.push(v) 22 | } 23 | else { 24 | this._data.forEach((_v, i, d) => d[i] = d[i + 1]) 25 | this._data[this.capacity - 1] = v 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // 使用 IntelliSense 了解相关属性。 3 | // 悬停以查看现有属性的描述。 4 | // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "type": "node", 9 | "request": "launch", 10 | "name": "Debug-Key", 11 | "skipFiles": [ 12 | "/**" 13 | ], 14 | "program": "${workspaceFolder}/src/index.js", 15 | "args": ["-k", "ph5dd68958332dd", "--verbose"] 16 | }, 17 | { 18 | "type": "node", 19 | "request": "launch", 20 | "name": "Debug-Search", 21 | "skipFiles": [ 22 | "/**" 23 | ], 24 | "program": "${workspaceFolder}/src/index.js", 25 | "args": ["-s", "妹妹", "--verbose"] 26 | }, 27 | { 28 | "type": "node", 29 | "request": "launch", 30 | "name": "启动程序", 31 | "skipFiles": [ 32 | "/**" 33 | ], 34 | "program": "${workspaceFolder}/src/index.js" 35 | } 36 | ] 37 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 RoyShen12 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/lib/verbose.js: -------------------------------------------------------------------------------- 1 | const chalk = require('chalk').default 2 | const { performance } = require('perf_hooks') 3 | 4 | /** 5 | * @param {string} c 6 | */ 7 | const vblog = c => { 8 | if (global.cli.flags.verbose) { 9 | c = c.replace(/\[(\w+)\]/, chalk.cyanBright('[$1]')) 10 | c = c.replace(/<([\w\s]+)>/, chalk.magentaBright('<$1>')) 11 | c = c.replace(/(\sentered[\s,]?)/, chalk.green('$1')) 12 | c = c.replace(/(\sexits[\s,]?)/, chalk.red('$1')) 13 | c = chalk.gray('(debug) ') + c 14 | console.log(c) 15 | } 16 | } 17 | 18 | /** 19 | * @type {Map} 20 | */ 21 | vblog.watches = new Map() 22 | 23 | /** 24 | * @param {string} name 25 | * @param {boolean} start 26 | */ 27 | vblog.stopWatch = (name, start) => { 28 | if (!global.cli.flags.verbose) return 0 29 | if (start) { 30 | vblog.watches.set(name, performance.now()) 31 | } 32 | else { 33 | return performance.now() - vblog.watches.get(name) 34 | } 35 | // if (vblog.watches.has(name)) { 36 | // const tm = performance.now() - vblog.watches.get(name) 37 | // vblog.watches.delete(name) 38 | // return tm 39 | // } 40 | // else { 41 | // vblog.watches.set(name, performance.now()) 42 | // } 43 | } 44 | 45 | module.exports = vblog 46 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "es6": true, 4 | "node": true 5 | }, 6 | "parserOptions": { 7 | "ecmaVersion": 8 8 | }, 9 | "rules": { 10 | 11 | "no-undef": "error", 12 | "array-bracket-spacing": [ 13 | 2, 14 | "never" 15 | ], 16 | "block-scoped-var": 2, 17 | "computed-property-spacing": [ 18 | 2, 19 | "never" 20 | ], 21 | "eol-last": 2, 22 | "eqeqeq": [ 23 | 2, 24 | "smart" 25 | ], 26 | "max-depth": [ 27 | 1, 28 | 5 29 | ], 30 | "max-len": [ 31 | 1, 32 | 260 33 | ], 34 | "max-statements": [ 35 | 1, 36 | 150 37 | ], 38 | "new-cap": "off", 39 | "no-extend-native": 2, 40 | "no-mixed-spaces-and-tabs": 2, 41 | "no-trailing-spaces": 2, 42 | "no-unused-vars": 1, 43 | "no-use-before-define": [ 44 | 2, 45 | "nofunc" 46 | ], 47 | "object-curly-spacing": [ 48 | 2, 49 | "always" 50 | ], 51 | "quotes": [ 52 | 2, 53 | "single", 54 | "avoid-escape" 55 | ], 56 | "keyword-spacing": [ 57 | 2, 58 | { 59 | "before": true, 60 | "after": true 61 | } 62 | ], 63 | "space-unary-ops": 2 64 | } 65 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | 8 | # Runtime data 9 | pids 10 | *.pid 11 | *.seed 12 | *.pid.lock 13 | 14 | # Directory for instrumented libs generated by jscoverage/JSCover 15 | lib-cov 16 | 17 | # Coverage directory used by tools like istanbul 18 | coverage 19 | 20 | # nyc test coverage 21 | .nyc_output 22 | 23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 24 | .grunt 25 | 26 | # Bower dependency directory (https://bower.io/) 27 | bower_components 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (http://nodejs.org/api/addons.html) 33 | build/Release 34 | 35 | # Dependency directories 36 | node_modules/ 37 | jspm_packages/ 38 | 39 | # Typescript v1 declaration files 40 | typings/ 41 | 42 | # Optional npm cache directory 43 | .npm 44 | 45 | # Optional eslint cache 46 | .eslintcache 47 | 48 | # Optional REPL history 49 | .node_repl_history 50 | 51 | # Output of 'npm pack' 52 | *.tgz 53 | 54 | # Yarn Integrity file 55 | .yarn-integrity 56 | 57 | # dotenv environment variables file 58 | .env 59 | 60 | # by blackmatch 61 | downloads 62 | debug 63 | .cache 64 | dist-mac 65 | dist-win 66 | 67 | .DS_Store 68 | 69 | pack.sh 70 | dlist.txt 71 | search.log 72 | config.json 73 | 74 | *.sh 75 | *.gz 76 | *.tgz 77 | *.zip -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pornhub-downloader", 3 | "version": "1.1.1", 4 | "description": "video downloader for pornhub", 5 | "main": "src/index.js", 6 | "bin": "src/index.js", 7 | "files": [ 8 | "src", 9 | ".eslintrc", 10 | "LICENSE", 11 | "package-lock.json", 12 | "package.json", 13 | "README.md" 14 | ], 15 | "scripts": { 16 | "start": "node src", 17 | "pkg": "pkg . -t macos-x64 --out-path=dist-mac/ && cp node_modules/**/*.node dist-mac/ && pkg . -t win-x64 --out-path=dist-win/" 18 | }, 19 | "repository": { 20 | "type": "git", 21 | "url": "git+https://github.com/RoyShen12/pornhub-downloader" 22 | }, 23 | "keywords": [ 24 | "crawler", 25 | "node.js", 26 | "pornhub" 27 | ], 28 | "author": "Roy Shen", 29 | "bugs": { 30 | "url": "https://github.com/RoyShen12/pornhub-downloader/issues" 31 | }, 32 | "homepage": "https://github.com/RoyShen12/pornhub-downloader#readme", 33 | "license": "MIT", 34 | "dependencies": { 35 | "chalk": "^2.4.2", 36 | "cheerio": "^1.0.0-rc.3", 37 | "diskusage": "^1.1.3", 38 | "human-size": "^1.1.0", 39 | "imgcat": "^2.3.0", 40 | "lodash": "^4.17.19", 41 | "make-fetch-happen": "^8.0.7", 42 | "meow": "^7.0.1", 43 | "os-locale": "^5.0.0", 44 | "pretty": "^2.0.0", 45 | "pretty-ms": "^5.1.0", 46 | "progress": "^2.0.3", 47 | "progress-stream": "^2.0.0", 48 | "throttle": "^1.0.3" 49 | }, 50 | "devDependencies": { 51 | "eslint": "^5.16.0", 52 | "pkg": "^4.4.8" 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/lib/str.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | randomStr(bits) { 3 | let ret = '' 4 | for (let index = 0; index < bits; index++) { 5 | ret += ((Math.random() * 16 | 0) & 0xf).toString(16) 6 | } 7 | return ret 8 | }, 9 | transferBadSymbolOnFileName(fn) { 10 | return fn.replace(/[\\/\*:"\?<>|\s@!$%]/g, '_') 11 | }, 12 | transferBadSymbolOnPathName(pn) { 13 | return pn.replace(/[\*:"\?<>|\s@!$%]/g, '_') 14 | }, 15 | fileNameToTitle(fn) { 16 | return fn.replace(/^\d*_/, '').replace(/_\d{3,}P_ph[0-9a-f]+\.mp4$/, '') 17 | }, 18 | DateTimeToFileString(DateObj, needHour = false, needMinute = false, needSecond = false, needMillisecond = false) { 19 | const year = DateObj.getFullYear() 20 | const month = ((DateObj.getMonth() + 1) + '').padStart(2, '0') 21 | const day = (DateObj.getDate() + '').padStart(2, '0') 22 | const hour = needHour ? (DateObj.getHours() + '').padStart(2, '0') : '' 23 | const minute = needMinute ? (DateObj.getMinutes() + '').padStart(2, '0') : '' 24 | const second = needSecond ? (DateObj.getSeconds() + '').padStart(2, '0') : '' 25 | const millisecond = needMillisecond ? (DateObj.getMilliseconds() + '').padStart(3, '0') : '' 26 | return `${year}-${month}-${day} ${hour}_${minute}_${second}.${millisecond}` 27 | }, 28 | WideStr: class { 29 | constructor(s) { 30 | this._str = [] 31 | s.split('').forEach(ch => { 32 | const length = /[\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff66-\uff9f]/.test(ch) ? 2 : 1 33 | this._str.push({ ch, length }) 34 | }) 35 | } 36 | get length() { 37 | return this._str.reduce((pv, v) => pv + v.length, 0) 38 | } 39 | substr(start, length) { 40 | length = Math.min(length, this._str.length - start) 41 | let ret = '' 42 | let sumL = 0 43 | for (let i = start;; i++) { 44 | if (sumL === length || (sumL === length - 1 && this._str[i].length === 2)) break 45 | ret += this._str[i].ch 46 | sumL += this._str[i].length 47 | } 48 | return ret 49 | } 50 | toString() { 51 | return this._str.map(ls => ls.ch).join('') 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # pornhub-DL 3 | 4 | Download highest quality videos from [pornhub](https://pornhub.com). 5 | 下载 [pornhub](https://pornhub.com) 上的高清视频。 6 | 7 | ## statement 8 | 9 | **This repo is just for studying, not for other purpose.** 10 | **本项目仅供学习交流,如作他用所承受的法律风险概与作者无关** 11 | 12 | ## Features 13 | 14 | * Support http or socks5 proxy. 支持 http 和 socks5 代理 15 | 16 | * Always select highest quality video. 永远选择最高清的画质选项 17 | 18 | * Support keyword searching. 关键词搜索下载 19 | 20 | * Show progress. 显示下载进度条 21 | 22 | * Skip repeat & previously downloaded file. 跳过重复/已下载过的文件 23 | 24 | * Support Keyword filtering. 支持关键词过滤 25 | 26 | * Support direct downloading from PH viewkey. 支持根据ph代码直接下载视频 27 | 28 | ## Requirement 29 | 30 | * Node.js 8.3.0+. 31 | 32 | * Internet that can access [pornhub.com](https://www.pornhub.com). 33 | 34 | ## Usage 使用方法 35 | 36 | ```shell 37 | git clone https://github.com/RoyShen12/pornhub-downloader.git 38 | npm install 39 | node src -s 40 | ``` 41 | 42 | ##### npm install failed on Linux 43 | 44 | 1. install pip, command on Ubuntu 16/18 is `apt install python-pip -y` 45 | 46 | 2. `pip install ast` 47 | 48 | ### Example 例子 49 | 50 | #### Multi Keywords Searching 多关键词搜索 51 | 52 | ```shell 53 | node src -s Lesbian+muscle 54 | ``` 55 | 56 | #### Keywords Searching With Keywords Excluding 搜索并过滤部分结果 57 | 58 | ```shell 59 | node src -s Lesbian -e japanese,jav 60 | ``` 61 | 62 | #### ~~Jumping First Four ADs 跳过前4个推广视频~~ Now support ADs auto jumping 现在支持自动跳过推广视频 63 | 64 | ```shell 65 | node src -s --skip 4 66 | ``` 67 | 68 | #### Preview Videos And Don't Download(only on Mac with iTerm>2.9) 仅预览视频缩略图,并不实际下载(需要Mac与iTerm版本>2.9) 69 | 70 | ```shell 71 | node src -s -f --preview --preview-size 50 72 | # or 73 | node src -s --list-only --preview --preview-size 50 74 | ``` 75 | 76 | ## More Options 77 | 78 | see with `--help` argument. 79 | 更多功能和选项见 --help 帮助内容。 80 | 81 | ## Configuration 82 | 83 | Configuration is available from `config.json`. 84 | 配置文件 `config.json` 存放一些不常改动的设置。 85 | 86 | - `proxyUrl`: set up the proxy with port. For example `http://127.0.0.1:1087` or `socks5://127.0.0.1:1080`. 87 | If you don't need it, just keep it empty. 88 | 89 | - `timeout`: request timeout (second). 90 | 91 | - `downloadDir`: the directory for saving videos. 92 | 93 | - `httpChunkSizeKB`: splitting size of each video while downloading, the default value is 2048. 94 | 95 | ## LICENCE 96 | 97 | MIT 98 | -------------------------------------------------------------------------------- /src/lib/LANG.js: -------------------------------------------------------------------------------- 1 | const _ = require('lodash') 2 | const osLocale = require('os-locale') 3 | 4 | const Locale = osLocale.sync().toLowerCase() 5 | 6 | let lang = 'en' 7 | 8 | if (Locale.includes('cn') && Locale.includes('zh')) lang = 'sc' 9 | 10 | if ((Locale.includes('tw') || Locale.includes('hk')) && Locale.includes('zh')) lang = 'tc' 11 | 12 | const STRINGS = { 13 | downloading: { 14 | sc: '下载', 15 | tc: '下載' 16 | }, 17 | EAT: { 18 | sc: '剩余', 19 | tc: '剩余' 20 | }, 21 | Piece: { 22 | sc: '块', 23 | tc: '塊' 24 | }, 25 | retrying: { 26 | sc: '重试', 27 | tc: '重試' 28 | }, 29 | 'Using Proxy': { 30 | sc: '使用代理', 31 | tc: '使用代理' 32 | }, 33 | 'process will shutdown after current download finish.': { 34 | sc: '当前下载完成后,进程将关闭。', 35 | tc: '當前下載完成後,該進程將關閉。' 36 | }, 37 | 'netword limitation': { 38 | sc: '网速限制', 39 | tc: '網絡速度限制' 40 | }, 41 | 'The program cannot run if neither --search nor --key is provided!': { 42 | sc: '--search 或 --key 参数均未提供,程序无法运行', 43 | tc: '未提供--search或--key,程序無法運行' 44 | }, 45 | 'task finished.': { 46 | sc: '任务完成', 47 | tc: '任務完成' 48 | }, 49 | 'Invalid number value': { 50 | sc: '不是有效的数字', 51 | tc: '無效的數值类型' 52 | }, 53 | 'type "stop" and enter, and this program will be terminated after the current download task finished.': { 54 | sc: '输入"stop"并回车,程序将在当前下载任务结束后自动退出', 55 | tc: '鍵入“停止”並輸入,當前下載任務完成後,該程式將自動終止。' 56 | } 57 | } 58 | 59 | _.forOwn(STRINGS, (v, k, o) => { 60 | o[k] = new Proxy(v, { 61 | get(t, p, r) { 62 | if (p === 'en') return k 63 | else return Reflect.get(t, p, r) 64 | } 65 | }) 66 | }) 67 | 68 | module.exports = { 69 | downloading: STRINGS.downloading[lang], 70 | EAT: STRINGS.EAT[lang], 71 | Piece: STRINGS.Piece[lang], 72 | retrying: STRINGS.retrying[lang], 73 | 'Using Proxy': STRINGS['Using Proxy'][lang], 74 | 'process will shutdown after current download finish.': STRINGS['process will shutdown after current download finish.'][lang], 75 | 'netword limitation': STRINGS['netword limitation'][lang], 76 | 'The program cannot run if neither --search nor --key is provided!': STRINGS['The program cannot run if neither --search nor --key is provided!'][lang], 77 | 'task finished.': STRINGS['task finished.'][lang], 78 | 'Invalid number value': STRINGS['Invalid number value'][lang], 79 | 'type "stop" and enter, and this program will be terminated after the current download task finished.': STRINGS['type "stop" and enter, and this program will be terminated after the current download task finished.'][lang] 80 | } 81 | -------------------------------------------------------------------------------- /src/lib/logger.js: -------------------------------------------------------------------------------- 1 | try { 2 | Error.stackTraceLimit = 20 3 | } catch (e) {} 4 | 5 | const chalk = require('chalk').default 6 | 7 | const getTraceInfo = (fx) => { 8 | const obj = {} 9 | Error.captureStackTrace(obj, fx || getTraceInfo) 10 | return obj.stack 11 | } 12 | 13 | /** 14 | * @typedef {keyof logLevel} LogType 15 | */ 16 | 17 | /** 18 | * @type {Map void>} 19 | */ 20 | const loggersCached = new Map() 21 | 22 | /** 23 | * @type {Map} 24 | */ 25 | const filesOfLogger = new Map() // only store unziped files 26 | 27 | const logLevel = new Proxy({ 28 | verbose: 'VERBOSE ', // -1 各种冗长而不重要的输出 29 | debug: 'DEBUG ', // 0 调试信息的日志,日志信息最多 30 | suc: 'SUCCESS ', // 1 重要的运行时成功信息 31 | info: 'INFO ', // 2 一般信息的日志,最常用 32 | notice: 'NOTICE ', // 3 最具有重要性的普通条件的信息 33 | warn: 'WARNING ', // 4 警告级别 34 | err: 'ERROR ', // 5 错误级别,阻止某个功能或者模块不能正常工作的信息 35 | crit: 'CRIT ', // 6 严重级别,阻止整个系统或者整个软件不能正常工作的信息 36 | alert: 'ALERT ', // 7 需要立刻修改的信息 37 | fatal: 'FATAL ', // 8 崩溃等严重信息 38 | get error() { return this.err }, 39 | get success() { return this.suc }, 40 | get warning() { return this.warn }, 41 | get inf() { return this.info }, 42 | get information() { return this.info }, 43 | get dbg() { return this.debug } 44 | }, { 45 | get: function (target, property, receiver) { 46 | return Reflect.get(target, property, receiver) || target.info 47 | } 48 | }) 49 | 50 | const levelNumberMap = new Map([ 51 | ['VERBOSE ', -1], 52 | ['DEBUG ', 0], 53 | ['SUCCESS ', 1], 54 | ['INFO ', 2], 55 | ['NOTICE ', 3], 56 | ['WARNING ', 4], 57 | ['ERROR ', 5], 58 | ['CRIT ', 6], 59 | ['ALERT ', 7], 60 | ['FATAL ', 8] 61 | ]) 62 | 63 | const levelColorMap = new Map([ 64 | [-1, chalk.gray], 65 | [0, chalk.white], 66 | [1, chalk.greenBright], 67 | [2, chalk.whiteBright], 68 | [3, chalk.blueBright], 69 | [4, chalk.yellowBright], 70 | [5, chalk.redBright], 71 | [6, chalk.bgYellowBright], 72 | [7, chalk.bgMagentaBright], 73 | [8, chalk.bgRedBright] 74 | ]) 75 | 76 | /** 77 | * @param {string} level 78 | */ 79 | const logLevelToColor = level => levelColorMap.get(levelNumberMap.get(logLevel[level])) 80 | 81 | function timeBasedLogHead(bc) { 82 | const DateObj = new Date() 83 | const year = DateObj.getFullYear() 84 | const month = ((DateObj.getMonth() + 1) + '').padStart(2, '0') 85 | const day = (DateObj.getDate() + '').padStart(2, '0') 86 | const hour = (DateObj.getHours() + '').padStart(2, '0') 87 | const minute = (DateObj.getMinutes() + '').padStart(2, '0') 88 | const second = (DateObj.getSeconds() + '').padStart(2, '0') 89 | const msecond = (DateObj.getMilliseconds() + '').padStart(3, '0') 90 | let blank = ''.padEnd(bc) 91 | return `${blank}${year}-${month}-${day} ${hour}:${minute}:${second}.${msecond}` 92 | } 93 | 94 | /** 95 | * @param {string} loggerName 96 | * @param {string} logfilePath 97 | * @param {string} logFileNameHead 98 | * @param {string} logFileNameTail 99 | * @param {boolean} zipOldFiles 100 | * @param {(type: LogType, logLine: string) => void | () => void} onLoggingHook 101 | * @returns {void} 102 | */ 103 | function initNewLogger(loggerName, onLoggingHook = () => {}) { 104 | 105 | if (loggerName === 'debug') return null 106 | 107 | if (loggersCached.has(loggerName)) return null 108 | 109 | filesOfLogger.set(loggerName, []) // init file record map 110 | 111 | function _inner_logger_(type, message, trace = false) { 112 | 113 | const timeH = timeBasedLogHead() 114 | 115 | const logLine = trace ? 116 | (timeH + ' ' + logLevel[type] + ' ' + message.toString() + '\n' + getTraceInfo(_inner_logger_)) : 117 | (timeH + ' ' + logLevel[type] + ' ' + message.toString()) 118 | 119 | onLoggingHook(type, logLine) 120 | } 121 | 122 | loggersCached.set(loggerName, _inner_logger_) 123 | } 124 | 125 | /** 126 | * - 暴露给外部的获取 Logger 的函数 127 | * - 如果无 [loggerName] 对应的 Logger 128 | * - 则回退到 console.log 129 | * @param {string} loggerName 130 | * @returns {(type: LogType, message: string, trace?: boolean) => void} 131 | */ 132 | const getLogger = loggerName => loggersCached.get(loggerName) || ((...args) => console.log(...args)) 133 | 134 | module.exports = { 135 | initNewLogger, 136 | getLogger, 137 | logLevelToColor 138 | } 139 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | process.env.TZ = 'Asia/Shanghai' 2 | // process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0' 3 | 4 | const fs = require('fs') 5 | const path = require('path') 6 | 7 | fs.existsSync('./config.json') || fs.writeFileSync('./config.json', JSON.stringify({ 8 | proxyUrl: '', 9 | timeout: 0, 10 | downloadDir: './downloads/', 11 | cacheDir: './cache/', 12 | httpChunkSizeKB: 5120 13 | }, null, 2)) 14 | 15 | /** 16 | * @type {{ proxyUrl: string, timeout: number, downloadDir: string, httpChunkSizeKB: number, aria2: any }} 17 | */ 18 | const config = JSON.parse(fs.readFileSync(path.join(process.cwd(), 'config.json')).toString()) 19 | 20 | // const util = require('util') 21 | const { performance } = require('perf_hooks') 22 | const fetch = require('make-fetch-happen').defaults() 23 | const hs = require('human-size') 24 | const prettyMilliseconds = require('pretty-ms') 25 | const meow = require('meow') 26 | const chalk = require('chalk').default 27 | 28 | const logger = require('./lib/logger') 29 | const vblog = require('./lib/verbose') 30 | const strTools = require('./lib/str') 31 | const LANGS = require('./lib/LANG') 32 | 33 | const cli = meow(` 34 | Usage 35 | $ node src -s <> [options] 36 | $ node src -k <> [options] 37 | 38 | Options 39 | -s, --search Searching key word 40 | 搜索关键词下载 41 | 42 | -k, --key Sprightly download target video from given key (or muitl keys sepreted by commas) 43 | 直接下载ph号 44 | 45 | -p, --parallel (ex)Enable parallel downloading to accelerate download speed 46 | 多线程下载(实验性功能) 47 | 48 | -e, --exclude Excluding key word (or muitl words sepreted by commas) using for title filter 49 | 关键词过滤,对视频的标题进行过滤,多个关键词请用英文逗号连接 50 | 51 | -a, --amount Only download specified amount of files, default is Infinity 52 | 仅下载指定数量的视频后结束任务 53 | 54 | --limit-speed Limit download speed to specified rate (KB) 55 | 限制下载速度为指定值,单位是 KB 56 | 57 | -l, --limit Limitation of the downloading content (MB), default is Infinity 58 | 指定下载的总大小,到达指定大小后结束任务,单位是 MB 59 | 60 | -f, --fakerun Fake running (Dry run), won't actually download anything 61 | 干运行,不会实际下载视频、写入dlist 62 | 63 | --force Force downloading, even the file is already downloaded or exists 64 | 强制下载,无视 dlist 记录和本地已存在文件 65 | 66 | --skip Skip the first few videos 67 | 跳过前 N 个视频 68 | 69 | --skipless Skipping file smaller than the given size (MB) 70 | 跳过小于指定大小的视频,单位是 MB 71 | 72 | --skipmore Skipping file larger than the given size (MB) 73 | 跳过大于指定大小的视频,单位是 MB 74 | 75 | --rebuild-dlist Rebuild the dlist.txt by searching the download path 76 | 77 | --list-only Only list keys from searching key word 78 | 仅列出搜索结果的ph号,并不会实际下载 79 | 80 | -d, --dir Specify storage directory to cover the config file option 81 | 指定存储目录,覆盖配置文件的项 82 | 83 | --preview Show preview image of each video before downloading 84 | 显示缩略图预览(需要Mac与iTerm版本>2.9) 85 | 86 | --preview-size Preview image height for iTerm2 only (show while --list-only or --verbose flag is on), default is 40px 87 | 缩略图显示尺寸,默认是40px 88 | 89 | --verbose Make the process more talkative 90 | `, { 91 | flags: { 92 | search: { 93 | alias: 's' 94 | }, 95 | key: { 96 | alias: 'k' 97 | }, 98 | parallel: { 99 | alias: 'p' 100 | }, 101 | exclude: { 102 | alias: 'e' 103 | }, 104 | limitSpeed: {}, 105 | limit: { 106 | alias: 'l', 107 | default: 'Infinity' 108 | }, 109 | amount: { 110 | alias: 'a', 111 | default: 'Infinity' 112 | }, 113 | fakerun: { 114 | type: 'boolean', 115 | alias: 'f' 116 | }, 117 | force: { 118 | type: 'boolean' 119 | }, 120 | dir: { 121 | alias: 'd' 122 | }, 123 | skip: { 124 | default: '0' 125 | }, 126 | skipless: { 127 | }, 128 | skipmore: { 129 | }, 130 | rebuildDlist: { 131 | type: 'boolean' 132 | }, 133 | verbose: { 134 | type: 'boolean' 135 | }, 136 | listOnly: { 137 | type: 'boolean' 138 | }, 139 | preview: { 140 | type: 'boolean' 141 | }, 142 | previewSize: { 143 | default: '40px' 144 | } 145 | } 146 | }) 147 | 148 | logger.initNewLogger('main', (t, m) => console.log(logger.logLevelToColor(t)(m))) 149 | const log = logger.getLogger('main') 150 | 151 | if (cli.flags.skipless && isNaN(+cli.flags.skipless)) { 152 | console.log(`--skipless: ${LANGS['Invalid number value']} '${cli.flags.skipless}'`) 153 | process.exit(0) 154 | } 155 | 156 | if (cli.flags.skipmore && isNaN(+cli.flags.skipmore)) { 157 | console.log(`--skipless: : ${LANGS['Invalid number value']} '${cli.flags.skipmore}'`) 158 | process.exit(0) 159 | } 160 | 161 | if (cli.flags.parallel && isNaN(+cli.flags.parallel)) { 162 | console.log(`--parallel (-p): ${LANGS['Invalid number value']} '${cli.flags.parallel}'`) 163 | process.exit(0) 164 | } 165 | 166 | if (cli.flags.limitSpeed) { 167 | if (isNaN(+cli.flags.limitSpeed)) { 168 | console.log(`--limitSpeed: ${LANGS['Invalid number value']} '${cli.flags.limitSpeed}'`) 169 | process.exit(0) 170 | } 171 | log('info', `${LANGS['netword limitation']}: ${hs(cli.flags.limitSpeed * 1024)}/s`) 172 | } 173 | 174 | global.cli = cli 175 | 176 | fs.existsSync('./dlist.txt') || fs.writeFileSync('./dlist.txt', '') 177 | 178 | const scrapy = require('./lib/scrapy') 179 | 180 | let processShutdownToken = false 181 | 182 | const stdin = process.stdin 183 | 184 | stdin.setEncoding('utf8') 185 | stdin.on('readable', function() { 186 | const chunk = process.stdin.read() 187 | // 188 | // Restart process when user inputs stop 189 | // 190 | if (chunk !== null) { 191 | if (chunk === 'stop\n' || chunk === 'stop\r\n') { 192 | log('alert', LANGS['process will shutdown after current download finish.']) 193 | processShutdownToken = true 194 | } 195 | else if (chunk === 'skip\n' || chunk === 'skip\r\n') { 196 | log('alert', 'skip this task !') 197 | } 198 | } 199 | }) 200 | 201 | if (cli.flags.verbose) { 202 | fs.existsSync('./debug') || fs.mkdirSync('./debug') 203 | } 204 | 205 | const run = async () => { 206 | vblog('[main run] entered') 207 | 208 | fs.existsSync(config.downloadDir) || fs.mkdirSync(config.downloadDir) 209 | fs.existsSync('./dlist.txt') || fs.writeFileSync('./dlist.txt', '') 210 | 211 | /** 212 | * start from 1 213 | */ 214 | let page = 1 215 | 216 | /** 217 | * @type {{ search: string, key: string }} 218 | */ 219 | const { search, key } = cli.flags 220 | 221 | 222 | if (!search && !key) { 223 | console.log(LANGS['The program cannot run if neither --search nor --key is provided!']) 224 | process.exit(0) 225 | } 226 | 227 | // Key Mode 228 | if (key) { 229 | const keyList = key.split(',') 230 | for (const k of keyList) { 231 | try { 232 | let info = null 233 | 234 | while (!info) { 235 | 236 | try { 237 | info = await scrapy.findDownloadInfo(k) 238 | } catch (error) { 239 | log('error', 'error occured while getting download info, waiting for retry') 240 | info = null 241 | log('error', error, true) 242 | } 243 | } 244 | 245 | if (!info.title || info.title.trim().length === 0) { 246 | log('warn', `cannot find the video title, skipping ${k}.`) 247 | continue 248 | } 249 | 250 | const result = await scrapy.downloadVideo(info, '', undefined, cli.flags.parallel) 251 | log('suc', result[0]) 252 | } catch (error) { 253 | console.error(error) 254 | } 255 | } 256 | 257 | log('suc', LANGS['task finished.']) 258 | process.exit(0) 259 | } 260 | // Search Mode 261 | else { 262 | const limit = +cli.flags.limit 263 | const amountLimit = +cli.flags.amount 264 | 265 | let skip = +cli.flags.skip 266 | 267 | log('notice', `skipping first ${skip} results`) 268 | 269 | if (isNaN(limit)) { 270 | console.log(`--limit (-l): ${LANGS['Invalid number value']} '${cli.flags.limit}'`) 271 | process.exit(0) 272 | } 273 | 274 | if (isNaN(amountLimit)) { 275 | console.log(`--amount (-a): ${LANGS['Invalid number value']} '${cli.flags.amount}'`) 276 | process.exit(0) 277 | } 278 | 279 | if (isNaN(skip)) { 280 | console.log(`--skip: ${LANGS['Invalid number value']} '${cli.flags.skip}'`) 281 | process.exit(0) 282 | } 283 | 284 | const limitBytes = limit * 1024 ** 2 285 | 286 | let downloadedSize = 0 287 | 288 | log('info', `set Maximum download size: ${chalk.blueBright(limit + '')} MB, Maximum download amount: ${chalk.blueBright(amountLimit + '')}`) 289 | log('info', `set search keyword: ${chalk.blueBright(search)}`) 290 | log('notice', LANGS['type "stop" and enter, and this program will be terminated after the current download task finished.']) 291 | 292 | fs.writeFileSync('./search.log', (new Date().toLocaleString() + ' ') + search + '\n', { 293 | flag: 'a+', encoding: 'utf-8' 294 | }) 295 | 296 | let downloadCount = 0 297 | 298 | // --- download loop --- 299 | while (downloadedSize <= limitBytes && downloadCount < amountLimit && !processShutdownToken) { 300 | 301 | const opts = { 302 | page, 303 | search 304 | } 305 | 306 | vblog('[main download] while loop entered') 307 | 308 | const keys = await scrapy.findKeys(opts) 309 | 310 | if (!Array.isArray(keys) || keys.length === 0) { 311 | throw new Error('scrapy.findKeys: find nothing!') 312 | } 313 | 314 | if (cli.flags.listOnly) { 315 | vblog('[main download] skip key loop (listOnly)') 316 | page += 1 317 | continue 318 | } 319 | 320 | if (skip > 0) { 321 | const remainSkip = skip > keys.length ? skip - keys.length : 0 322 | // console.log(keys) 323 | new Array(Math.min(keys.length, skip)).fill(1).forEach(() => keys.shift()) 324 | // console.log(keys) 325 | skip = remainSkip 326 | } 327 | 328 | // --- one page loop --- 329 | for (const key of keys) { 330 | vblog(`[main download] for...of loop entered, key=${key}`) 331 | 332 | if (downloadedSize > limitBytes || downloadCount >= amountLimit || processShutdownToken) { 333 | break 334 | } 335 | 336 | let info = null 337 | let result = null 338 | 339 | while (!info) { 340 | 341 | try { 342 | info = await scrapy.findDownloadInfo(key) 343 | } catch (error) { 344 | log('error', 'error occured while getting download info, waiting for retry') 345 | info = null 346 | log('error', error, true) 347 | } 348 | } 349 | 350 | if (!info.title || info.title.trim().length === 0) { 351 | log('warn', `cannot find the video title, skipping ${key}.`) 352 | continue 353 | } 354 | 355 | downloadCount++ 356 | 357 | let sizeOfDl = -1 358 | let fileStoreName = '' 359 | 360 | try { 361 | result = await scrapy.downloadVideo(info, search, downloadCount, cli.flags.parallel) 362 | sizeOfDl = +result[1] 363 | fileStoreName = result[2] 364 | } catch (error) { 365 | log('error', 'error occured while downloading the video') 366 | log('error', error, true) 367 | if (error.toString().includes('disk')) { 368 | process.exit(22) 369 | } else { 370 | continue 371 | } 372 | } 373 | 374 | if (sizeOfDl > 0) { 375 | downloadedSize += sizeOfDl 376 | } 377 | 378 | log('suc', result[0]) 379 | log('verbose', `downloading size statistic (this/total/limitation): ${hs(sizeOfDl, 2)} / ${hs(downloadedSize, 2)} / ${limit} MB`) 380 | 381 | if (config.aria2 && config.aria2.address && fileStoreName) { 382 | fetch(config.aria2.address, { 383 | method: 'POST', 384 | headers: { 385 | 'Content-Type': 'application/json' 386 | }, 387 | body: JSON.stringify({ 388 | jsonrpc: '2.0', 389 | method: 'aria2.addUri', 390 | id: strTools.randomStr(48), 391 | params: [ 392 | 'token:', 393 | [`${config.aria2.localPrefix}/${strTools.transferBadSymbolOnFileName(search)}/${fileStoreName}`], 394 | {} 395 | ] 396 | }) 397 | }).then(res => { 398 | return res.json() 399 | }).then(data => { 400 | log('suc', `remote aria2 server: ${data.id}-${data.jsonrpc}-${data.result}`) 401 | }).catch(err => { 402 | log('err', 'send command to remote aria2 server failed: ' + err.toString(), true) 403 | }) 404 | } 405 | } 406 | // --- endof one page loop --- 407 | 408 | page += 1 409 | } 410 | // --- endof download loop --- 411 | 412 | log('suc', `One situation has been satisfied, process will auto quit. 413 | total time cost: ${prettyMilliseconds(performance.now(), { verbose: true })} 414 | total download size: ${hs(downloadedSize, 1)}`) 415 | 416 | setTimeout(process.exit, 200, 0) 417 | } 418 | 419 | } 420 | 421 | if (cli.flags.rebuildDlist) { 422 | const older = new Set(fs.readFileSync(path.join(process.cwd(), './dlist.txt')).toString().split('\n')) 423 | fs.readdirSync(config.downloadDir).forEach(dp => { 424 | const dpath = path.resolve(config.downloadDir, dp) 425 | const dstat = fs.statSync(dpath) 426 | if (dstat.isDirectory()) { 427 | fs.readdirSync(dpath).forEach(fp => { 428 | const fpath = path.resolve(dpath, fp) 429 | const fstat = fs.statSync(fpath) 430 | if (fp.includes('.mp4') && fstat.isFile()) { 431 | const title = strTools.fileNameToTitle(fp) 432 | older.add(title) 433 | } 434 | }) 435 | } 436 | }) 437 | fs.writeFileSync('./dlist.txt', Array.from(older).join('\n') + '\n') 438 | process.exit(0) 439 | } 440 | else { 441 | run() 442 | } 443 | 444 | process.on('unhandledRejection', (reason, p) => { 445 | console.log('unhandled promise rejection:', reason, p) 446 | }) 447 | -------------------------------------------------------------------------------- /src/lib/scrapy.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs') 2 | const fsp = fs.promises 3 | const path = require('path') 4 | const os = require('os') 5 | const vm = require('vm') 6 | const util = require('util') 7 | // eslint-disable-next-line no-unused-vars 8 | const sysUtil = util 9 | 10 | async function sleep(ms) { 11 | return new Promise(resolve => setTimeout(resolve, ms)) 12 | } 13 | 14 | const tempDir = path.resolve(os.tmpdir(), 'ph-dler/') 15 | fs.existsSync(tempDir) || fs.mkdirSync(tempDir) 16 | 17 | /** 18 | * @type {{ proxyUrl: string, timeout: number, downloadDir: string, httpChunkSizeKB: number }} 19 | */ 20 | const config = JSON.parse(fs.readFileSync(path.join(process.cwd(), 'config.json')).toString()) 21 | 22 | const targetDir = global.cli.flags.dir || config.downloadDir 23 | const cachedir = global.cli.flags.dir || config.cacheDir 24 | 25 | const _ = require('lodash') 26 | 27 | const chalk = require('chalk').default 28 | 29 | const imgcat = require('imgcat') 30 | 31 | const disk = require('diskusage') 32 | 33 | const cheerio = require('cheerio') 34 | 35 | // const request = require('request') 36 | const makeFetchHappen = require('make-fetch-happen') 37 | 38 | const Throttle = require('throttle') 39 | 40 | const hs = require('human-size') 41 | const pretty = require('pretty') 42 | const prettyMilliseconds = require('pretty-ms') 43 | const ProgressBar = require('progress') 44 | const progressStream = require('progress-stream') 45 | 46 | const LANGS = require('./LANG') 47 | 48 | const downloadText = LANGS.downloading 49 | const eatText = LANGS.EAT 50 | const pieceText = LANGS.Piece 51 | 52 | const { performance } = require('perf_hooks') 53 | const perf = performance 54 | 55 | const log = require('./logger').getLogger('main') 56 | 57 | const LimitedQueue = require('./limited-queue') 58 | 59 | // in windows, file name should not contain these symbols 60 | // * : " * ? < > | 61 | // here is the method to transfer these symbol to leagal ones 62 | const { 63 | transferBadSymbolOnFileName, 64 | transferBadSymbolOnPathName, 65 | fileNameToTitle, 66 | randomStr, 67 | WideStr, 68 | DateTimeToFileString 69 | } = require('./str') 70 | 71 | const vblog = require('./verbose') 72 | 73 | const domain = 'cn.pornhub.com' 74 | const baseUrl = `https://${domain}` 75 | 76 | const customHeaders = { 77 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 78 | 'Accept-Encoding': 'gzip, deflate, br', 79 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,ja;q=0.7,zh-TW;q=0.6', 80 | // 'Cache-Control': 'max-age=0', 81 | // 'Connection': 'keep-alive', 82 | // 'Cookie': '', 83 | 'DNT': '1', 84 | // 'Host': domain, 85 | // 'Referer': baseUrl, 86 | // 'Upgrade-Insecure-Requests': '1', 87 | // 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15' 88 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36' 89 | } 90 | 91 | const baseFetchOptions = { 92 | // cacheManager: './.cache', 93 | headers: customHeaders, 94 | retry: 5, 95 | onRetry() { 96 | log('warn', `[Fetch] ${LANGS.retrying}...`) 97 | } 98 | } 99 | 100 | // proxy 101 | if (config.proxyUrl.trim().length > 0) { 102 | log('notice', `${LANGS['Using Proxy']}: ${chalk.yellowBright(config.proxyUrl.trim())}`) 103 | baseFetchOptions.proxy = config.proxyUrl.trim() 104 | } 105 | 106 | // timeout 107 | if (config.timeout > 0) { 108 | baseFetchOptions.timeout = config.timeout * 1000 109 | } 110 | 111 | const fetch = makeFetchHappen.defaults(baseFetchOptions) 112 | 113 | const httpChunkBytes = (config.httpChunkSizeKB || 2048) * 1024 114 | const timeout = httpChunkBytes / 20480/* 20kb/s */ * 1000 115 | 116 | async function findKeys(opts) { 117 | vblog.stopWatch('findKeys-requests', true) 118 | vblog(`[findKeys] entered, opt=${util.inspect(opts, false, Infinity, true)}`) 119 | 120 | const url = `${baseUrl}/video/search?search=${encodeURIComponent(opts.search.trim())}&page=${opts.page}` // &suggestion=true 121 | vblog(`[findKeys] requests to ${chalk.greenBright(url)}`) 122 | const res = await fetch(url) 123 | // console.log(res) 124 | /** 125 | * @type {string} 126 | */ 127 | const text = await res.text() 128 | // console.log(text) 129 | if (global.cli.flags.verbose) fs.writeFileSync(`./debug/search-${DateTimeToFileString(new Date(), true, true, true, true)}.html`, pretty(text)) 130 | const $ = cheerio.load(text) 131 | /** 132 | * @type {string[]} 133 | */ 134 | const allKeys = [] 135 | /** 136 | * @type {Map} 137 | */ 138 | const previews = new Map() 139 | 140 | $('.videoblock.videoBox').each((_idx, element) => { 141 | const key = element.attribs['_vkey'] 142 | vblog(`[findKeys] working on .videoblock.videoBox Node, key=${chalk.greenBright(key)}`) 143 | const $$ = cheerio.load($(element).html()) 144 | const previewImg = $$('img') 145 | const alt = previewImg.attr('alt') 146 | const imgUrl = previewImg.attr('data-thumb_url') 147 | 148 | previews.set(key, { name: alt, img: imgUrl }) 149 | allKeys.push(key) 150 | }) 151 | 152 | const skipKeys = [] 153 | $('.dropdownHottestVideos .videoblock.videoBox').each((idx, element) => { 154 | const key = element.attribs['_vkey'] 155 | vblog(`[findKeys] working on .dropdownHottestVideos .videoblock.videoBox Node, exclude key=${chalk.greenBright(key)}`) 156 | skipKeys.push(key) 157 | }) 158 | 159 | $('.dropdownReccomendedVideos .videoblock.videoBox').each((idx, element) => { 160 | const key = element.attribs['_vkey'] 161 | vblog(`[findKeys] working on .dropdownReccomendedVideos .videoblock.videoBox Node, exclude key=${chalk.greenBright(key)}`) 162 | skipKeys.push(key) 163 | }) 164 | 165 | const retKeys = allKeys.filter(k => !skipKeys.includes(k)) 166 | 167 | const tm = chalk.redBright(prettyMilliseconds(vblog.stopWatch('findKeys-requests', false), { verbose: true })) 168 | vblog(`[findKeys] exits with ret=${util.inspect(retKeys, false, Infinity, true)}, time cost ${tm}`) 169 | 170 | if (global.cli.flags.preview) { 171 | for (const rk of retKeys) { 172 | try { 173 | const { name, img } = previews.get(rk) 174 | // console.log('downloading image', img) 175 | let imgBuf = await (await fetch(img)).buffer() 176 | // console.log('download ok.') 177 | const imgTfName = `${randomStr(16)}.jpg` 178 | const imgTfPath = path.resolve(tempDir, imgTfName) 179 | await fsp.writeFile(imgTfPath, imgBuf) 180 | // console.log(`key: ${rk} name: ${name} preview: ${imgTfPath}`) 181 | /** 182 | * @type {string} 183 | */ 184 | const image = await imgcat(imgTfPath, { height: global.cli.flags.previewSize, preserveAspectRatio: true }) 185 | console.log(image + ` <- thumb of ${chalk.blue(name)}, key=${chalk.greenBright(rk)}`) 186 | } catch (error) { 187 | console.error(error) 188 | } 189 | } 190 | } 191 | 192 | if (global.cli.flags.listOnly) { 193 | console.log(retKeys) 194 | } 195 | 196 | return retKeys 197 | } 198 | 199 | /** 200 | * @param {string} bodyStr 201 | */ 202 | function findTitle(bodyStr) { 203 | vblog.stopWatch('findTitle', true) 204 | vblog(`[findTitle] entered, (bodyStr length=${bodyStr.length})`) 205 | 206 | const $ = cheerio.load(bodyStr) 207 | const title = $('title').text() 208 | 209 | vblog(`[findTitle] gets raw title=${title}`) 210 | 211 | const ret = title.replace(/\s-\sPornhub\.com.*/,'').trim() 212 | 213 | const tm = chalk.redBright(prettyMilliseconds(vblog.stopWatch('findTitle', false), { verbose: true })) 214 | vblog(`[findTitle] exits with ret=${ret}, time cost ${tm}`) 215 | 216 | return ret 217 | } 218 | 219 | /** 220 | * @param {string} bodyStr 221 | */ 222 | function parseDownloadInfo(bodyStr) { 223 | vblog.stopWatch('parseDIF', true) 224 | vblog(`[parseDownloadInfo] entered, (bodyStr length=${bodyStr.length})`) 225 | 226 | if (global.cli.flags.verbose) fs.writeFileSync(`./debug/video-${DateTimeToFileString(new Date(), true, true, true, true)}.html`, pretty(bodyStr)) 227 | 228 | let info 229 | const idx = bodyStr.indexOf('mediaDefinitions') 230 | 231 | if (idx < 0) { 232 | vblog('[parseDownloadInfo] exits with wrong section !') 233 | return info 234 | } 235 | 236 | const $ = cheerio.load(bodyStr) 237 | const scripts = $('script').toArray() 238 | // console.log(sysUtil.inspect(scripts, false, 2, true)) 239 | const inlineScripts = scripts.filter(sc => sc.children.length > 0) 240 | // inlineScripts.forEach(is => is.children.length > 0 ? console.log(is.children[0].data) : '---') 241 | const inlineScriptText = inlineScripts.map(is => is.children.map(ch => ch.data).join('\n')) 242 | // console.log(inlineScriptText.length, inlineScriptText) 243 | // console.log(sysUtil.inspect(inlineScripts, false, 3, true)) 244 | // process.exit(0) 245 | //const inlineScriptText = inlineScripts.map(sc => sc.data) 246 | const jsline = inlineScriptText.find(ist => ist.includes('var flashvars')) 247 | // console.log(jsline) 248 | 249 | //const jsline = bodyStr.split('\n').find(l => l.includes('var flashvars')).trim().replace(/^var\s?flashvars\S{1}\d+\s?=\s?/, 'k = ') 250 | if (!jsline) { 251 | vblog('[parseDownloadInfo] exits with wrong jsline !') 252 | return info 253 | } 254 | 255 | try { 256 | // eslint-disable-next-line 257 | const c = vm.createContext({ playerObjList: { } }) 258 | vm.runInContext(jsline, c) 259 | // console.log(sysUtil.inspect(c, false, 4, true)) 260 | for (const k in c) { 261 | if (/flashvars_\d+/.test(k) && c[k].mediaDefinitions) { 262 | // console.log(sysUtil.inspect(c[k].mediaDefinitions, false, 3, true)) 263 | const arr = c[k].mediaDefinitions 264 | .filter(s => s.videoUrl.length > 0) 265 | .sort((a, b) => { 266 | return a.quality !== b.quality ? (+b.quality) - (+a.quality) : b.format.localeCompare(a.format) 267 | }) 268 | // console.log(arr) 269 | // process.exit(0) 270 | const ret = arr[0] 271 | ret.title = findTitle(bodyStr) 272 | 273 | const tm = chalk.redBright(prettyMilliseconds(vblog.stopWatch('parseDIF', false), { verbose: true })) 274 | // vblog(`[parseDownloadInfo] exits with ret=${util.inspect(ret, false, Infinity, true)}, time cost ${tm}`) 275 | vblog(`[parseDownloadInfo] exits, time cost ${tm}`) 276 | 277 | return ret 278 | } 279 | } 280 | // console.log(sysUtil.inspect(c, false, 3, true)) 281 | // process.exit(0) 282 | } catch (error) { 283 | console.error(error) 284 | return '' 285 | } 286 | } 287 | 288 | async function findDownloadInfo(key) { 289 | vblog.stopWatch('findDF', true) 290 | vblog(`[findDownloadInfo] entered with key=${key}`) 291 | 292 | // let finalKey = key 293 | const url = `https://www.pornhub.com/view_video.php?viewkey=${key}` 294 | vblog(`[findDownloadInfo] requests to ${chalk.greenBright(url)}`) 295 | const res = await fetch(url) 296 | /** 297 | * @type {string} 298 | */ 299 | const text = await res.text() 300 | 301 | const ditem = parseDownloadInfo(text) 302 | if (ditem) { 303 | ditem.key = key 304 | } 305 | 306 | const tm = chalk.redBright(prettyMilliseconds(vblog.stopWatch('findDF', false), { verbose: true })) 307 | vblog(`[findDownloadInfo] exits with ret=${util.inspect(ditem, false, Infinity, true)}, time cost ${tm}`) 308 | 309 | return ditem 310 | } 311 | 312 | /** 313 | * @param {{ title: string, quality: string, key: string, videoUrl: string }} ditem 314 | * @param {string} folderName 315 | * @param {number} downloadCount 316 | * @param {number} parallel 317 | */ 318 | async function downloadVideo(ditem, folderName, downloadCount, parallel) { 319 | vblog.stopWatch('scrapy.js-downloadVideo', true) 320 | vblog(`[downloadVideo] entered, folderName=${chalk.yellowBright(folderName)}, downloadCount=${chalk.greenBright(downloadCount)}`) 321 | 322 | const title = ditem.title.trim() 323 | 324 | const _wide_title = new WideStr(title) 325 | 326 | const shortTitle = _wide_title.length <= 20 ? title : (_wide_title.substr(0, 17) + '...') 327 | 328 | const transferedTitle = transferBadSymbolOnFileName(title) 329 | const filename = `${transferedTitle}_${ditem.quality}P_${ditem.key}.mp4` 330 | // const transferedFilename = transferBadSymbolOnFileName(filename) 331 | const filenameWithRank = downloadCount === undefined ? filename : `${(downloadCount + '').padStart(4, '0')}_${filename}` 332 | const transferedFilenameWithRank = transferBadSymbolOnFileName(filenameWithRank) 333 | 334 | // const dir = path.resolve(targetDir, transferBadSymbolOnFileName(folderName)) 335 | const dir = targetDir; 336 | vblog(`[Debug] Desternation folder : ${dir}`); 337 | 338 | if (!global.cli.flags.fakerun) { 339 | fs.existsSync(dir) || fs.mkdirSync(dir) 340 | } 341 | 342 | const dst = path.join(dir, transferBadSymbolOnPathName(filename)) 343 | const dstWithRank = path.join(dir, transferBadSymbolOnPathName(filenameWithRank)) 344 | 345 | const transferedDst = dst 346 | const transferedDstWithRank = dstWithRank 347 | 348 | vblog(`[downloadVideo] generated safe title: ${chalk.cyan(transferedTitle)} in safe path: ${chalk.cyanBright(transferedDst)}`) 349 | 350 | if (global.cli.flags.exclude) { 351 | /** 352 | * @type {string[]} 353 | */ 354 | const excludes = global.cli.flags.exclude.split(',') 355 | if (excludes.some(ex => title.includes(ex))) { 356 | const resWords = global.cli.flags.verbose ? `title ${title} excluded by user flag ${global.cli.flags.exclude}` : 'skip a video by title filter' 357 | return [resWords, 0] 358 | } 359 | } 360 | 361 | if (!global.cli.flags.force && fs.existsSync(transferedDst) && downloadCount !== undefined) { 362 | log('warn', `rename to -> ${filenameWithRank}`) 363 | fs.renameSync(transferedDst, transferedDstWithRank) 364 | return [`${title} already exists in dl path and has been renamed into new style!`, 0] 365 | } 366 | 367 | // check new file 368 | const thisFolderFiles = global.cli.flags.fakerun ? [] : fs.readdirSync(dir).filter(f => f[0] !== '.') 369 | if (!global.cli.flags.force && thisFolderFiles.some(oldf => fileNameToTitle(oldf) === transferedTitle)) { 370 | return [`${title} already exists in dl path!`, 0] 371 | } 372 | 373 | // check dl list 374 | const oldFiles = fs.readFileSync(path.join(process.cwd(), './dlist.txt'), 'utf-8').toString().split('\n') 375 | if (!global.cli.flags.force && oldFiles.includes(transferedTitle)) { 376 | return [`${title} already exists in dlist.txt!`, 0] 377 | } 378 | 379 | log('notice', `start downloading > ${filename}`) 380 | vblog(`[downloadVideo] requests to ${chalk.greenBright(ditem.videoUrl)}`) 381 | 382 | const res = await fetch(ditem.videoUrl) 383 | 384 | if (res.status !== 200) { 385 | throw new Error('cannot access to video file, response status ' + chalk.redBright(res.status)) 386 | } 387 | vblog(`[downloadVideo] getting Code=${chalk.redBright(res.status)}, Header ${util.inspect(res.headers, false, Infinity, true)}`) 388 | 389 | const contentTotalLength = +res.headers.get('content-length') 390 | vblog(`[downloadVideo] getting content-length: ${chalk.bold(contentTotalLength)} (${chalk.bold(chalk.greenBright(hs(contentTotalLength, 3)))})`) 391 | 392 | if (global.cli.flags.fakerun) return ['fake downloaded!', contentTotalLength] 393 | 394 | if (global.cli.flags.skipless && contentTotalLength < global.cli.flags.skipless * 1024 ** 2) { 395 | return ['skip this video (size too small for --skipless)', 0] 396 | } 397 | 398 | if (global.cli.flags.skipmore && contentTotalLength > global.cli.flags.skipmore * 1024 ** 2) { 399 | return ['skip this video (size too large for --skipmore)', 0] 400 | } 401 | 402 | // stop tasks while disk is full 403 | const diskusage = await disk.check(/*os.platform() === 'win32' ? 'c:' : '/'*/targetDir) 404 | if (diskusage.free < contentTotalLength * 2.5) { 405 | throw new Error('skip this video (no free disk space remains)') 406 | } 407 | else { 408 | log('verbose', `disk free space: ${hs(diskusage.free, 2)}\n`) 409 | } 410 | 411 | /** 412 | * @type { { start: number, end: number }[] } 413 | */ 414 | const ranges = [] 415 | 416 | const _chunkCount = Math.floor(contentTotalLength / httpChunkBytes) 417 | const _mod = contentTotalLength % httpChunkBytes 418 | 419 | for (let i = 0; i < _chunkCount; i++) { 420 | ranges.push({ 421 | start: i * httpChunkBytes, 422 | end: (i + 1) * httpChunkBytes - 1 423 | }) 424 | } 425 | 426 | if (_mod > 0) { 427 | ranges.push({ 428 | start: _chunkCount * httpChunkBytes, 429 | end: contentTotalLength - 1 430 | }) 431 | } 432 | 433 | if (global.cli.flags.verbose) { 434 | const rl = ranges.length 435 | const rll = (rl + '').length 436 | const vblogRanges = ranges.map((r, i) => ` piece: ${((i + 1) + '').padStart(rll)}/${rl}, range: ${chalk.yellowBright(r.start)} - ${chalk.yellowBright(r.end)}${i !== rl - 1 ? ',' : ''}`).join('\n') 437 | 438 | vblog(`[downloadVideo] generated ranges=\n${vblogRanges}`) 439 | } 440 | 441 | /** 442 | * Download Start time 443 | */ 444 | // const timeStart = perf.now() 445 | 446 | /** 447 | * Total downloaded size 448 | */ 449 | let downloadedBytes = 0 450 | 451 | // [11]1[20]1[bar]1[spd]1[5]1[piece]7[3][EAT] 452 | // 78 + [bar] + [piece] + [progress] 453 | // 极限情况 454 | // SC: 455 | // 下载 title [bar] 582.9KB/s 116.42MB/996.42MB 块:116/997 100% 剩余:4400.0s 456 | // 86+bar 457 | const progressBar = new ProgressBar(`${downloadText} ${shortTitle} [:bar] :spd/s :prog ${pieceText}::piece :percent ${eatText}::etas`, { 458 | incomplete: ' ', 459 | complete: '-', 460 | width: process.stdout.columns - 95, 461 | total: contentTotalLength 462 | }) 463 | 464 | const files = [] 465 | let idx = 0 466 | 467 | const analyzingSteps = 12 468 | const dlTimeQueue = new LimitedQueue(analyzingSteps) 469 | const dlChunkQueue = new LimitedQueue(analyzingSteps) 470 | 471 | dlTimeQueue.push(perf.now()) 472 | dlChunkQueue.push(0) 473 | 474 | for (const item of ranges) { 475 | vblog.stopWatch('scrapy.js-downloadVideo-piece', true) 476 | vblog(`[downloadVideo] for...of at range=(${chalk.bold(item.start)}, ${chalk.bold(item.end)})`) 477 | 478 | const tmpFilename = transferBadSymbolOnPathName(ditem.key + idx); 479 | const file = path.join(cachedir, `${tmpFilename}`) 480 | // const file = dir 481 | 482 | files.push(file) 483 | 484 | const standardFile = file 485 | vblog(`[Chunk] Downloading ${standardFile}`) 486 | 487 | if (fs.existsSync(standardFile)) { 488 | const tmpStat = fs.statSync(standardFile) 489 | vblog(`[downloadVideo] for...of check file piece(${idx + 1}/${ranges.length}) ${chalk.greenBright('(Exists)')} (Size: ${chalk.blueBright(tmpStat.size)})`) 490 | if (tmpStat.size === httpChunkBytes) { 491 | log('warn', `detect file ${file} (piece ${idx + 1}/${ranges.length}) already downloaded, skip it`) 492 | idx += 1 493 | downloadedBytes += httpChunkBytes 494 | progressBar.tick(httpChunkBytes, { 495 | prog: chalk.bold(`${hs(downloadedBytes, 2)}/${hs(contentTotalLength, 2)}`) 496 | }) 497 | continue 498 | } 499 | else { 500 | vblog(`file ${file} (piece ${idx + 1}/${ranges.length}) exists but ${chalk.yellowBright('Incomplete')}, redownload it`) 501 | } 502 | } 503 | 504 | // ----- Download the file frags ----- 505 | const bdOpt = { 506 | headers: Object.assign(_.cloneDeep(customHeaders), { 507 | Accept: '*/*', 508 | 'Accept-Encoding': 'identity', 509 | Range: `bytes=${item.start}-${item.end}`, 510 | Pragma: 'no-cache', 511 | 'Cache-Control': 'no-cache' 512 | }), 513 | retry: 5, 514 | onRetry() { 515 | log('warn', `[Fetch] ${LANGS['retrying']}...`) 516 | } 517 | } 518 | if (config.proxyUrl.trim().length > 0) { 519 | bdOpt.proxy = config.proxyUrl.trim() 520 | } 521 | // console.log(util.inspect(bdOpt.headers.Range, false, Infinity, true)) 522 | const bytesFetch = makeFetchHappen.defaults(bdOpt) 523 | 524 | /** 525 | * @type { boolean | null } 526 | */ 527 | let oneFile = null 528 | 529 | while (!oneFile) { 530 | vblog(`[downloadVideo] for...of while loop for file piece(${idx + 1}/${ranges.length}) entered`) 531 | 532 | try { 533 | const res = await bytesFetch(ditem.videoUrl) 534 | vblog(`[downloadVideo] for...of Request for file piece(${idx + 1}/${ranges.length}) responed with 535 | Code=${res.status} 536 | Header=${util.inspect(res.headers, false, 2, true)}`) 537 | 538 | if (res.status !== 206) { 539 | throw new Error(`error code ${chalk.redBright(res.status)} while downloading piece`) 540 | } 541 | 542 | // oneFile = await res.buffer() 543 | 544 | // console.log(`Downloaded bytes ${oneFile.length}, Speed ${hs(oneFile.length / (perf.now() - timeStart) * 1000, 1)}`) 545 | // process.exit(0) 546 | 547 | // const timePE = perf.now() 548 | // downloadedBytes += oneFile.length 549 | // const avgSpeed = hs(downloadedBytes / (timePE - timeStart) * 1000, 1) 550 | // progressBar.tick(oneFile.length, { 551 | // spd: avgSpeed, 552 | // piece: `${idx + 1}/${ranges.length}` 553 | // }) 554 | 555 | // await fsp.writeFile(standardFile, oneFile, { encoding: 'binary' }) 556 | 557 | // idx += 1 558 | 559 | oneFile = await Promise.race([ 560 | new Promise((res, rej) => { 561 | setTimeout(() => { 562 | downloadedBytes = httpChunkBytes * idx 563 | rej('timeout !') 564 | }, timeout) 565 | }), 566 | new Promise((resolve, reject) => { 567 | let OriginStream = res.body 568 | 569 | if (global.cli.flags.limitSpeed) { 570 | OriginStream = OriginStream.pipe(new Throttle(global.cli.flags.limitSpeed * 1024)) 571 | } 572 | 573 | OriginStream.pipe(progressStream({ time: 17, speed: Infinity })) 574 | .on('error', err => { 575 | reject(err) 576 | }) 577 | .on('progress', innerProgress => { 578 | const progressTime = perf.now() 579 | 580 | downloadedBytes += innerProgress.delta 581 | 582 | dlTimeQueue.push(progressTime) 583 | dlChunkQueue.push(downloadedBytes) 584 | 585 | progressBar.tick(innerProgress.delta, { 586 | // spd: hs(downloadedBytes / (progressTime - timeStart) * 1000, 1), 587 | spd: hs((dlChunkQueue.last - dlChunkQueue.first) / (dlTimeQueue.last - dlTimeQueue.first) * 1000, 1), 588 | piece: `${idx + 1}/${ranges.length}`, 589 | prog: chalk.bold(`${hs(downloadedBytes, 2)}/${hs(contentTotalLength, 2)}`) 590 | }) 591 | }) 592 | .pipe(fs.createWriteStream(standardFile, { encoding: 'binary', highWaterMark: Math.round(httpChunkBytes * 1.25) })) 593 | .on('error', err => { 594 | reject(err) 595 | }) 596 | .on('close', () => { 597 | // console.log('\n', downloadedBytes, httpChunkBytes * (idx + 1)) 598 | if (idx < ranges.length - 1 && downloadedBytes !== httpChunkBytes * (idx + 1)) { 599 | console.log(chalk.bold(chalk.yellowBright('\nbad Close !'))) 600 | downloadedBytes = httpChunkBytes * idx 601 | reject('bad Close !') 602 | } 603 | else { 604 | vblog(`[downloadVideo] for...of Request for file piece(${idx + 1}/${ranges.length}) ended, Stream closed`) 605 | idx += 1 606 | resolve(true) 607 | } 608 | }) 609 | }) 610 | ]) 611 | } catch (error) { 612 | oneFile = null 613 | log('err', error, true) 614 | log('alert', 'downloading chunk fails, waiting for retry') 615 | await sleep(500) 616 | } 617 | } // ----- end of while 618 | const tmr = vblog.stopWatch('scrapy.js-downloadVideo-piece', false) 619 | const tmc = chalk.yellowBright(tmr.toFixed(1)) 620 | const avs = chalk.redBright(hs(httpChunkBytes / tmr * 1000, 1)) 621 | vblog(`[downloadVideo] for...of piece(${idx}/${ranges.length}) exits, time cost ${tmc} ms, speed ${avs}/s`) 622 | } 623 | 624 | // log('info', 'all pieces have been downloaded, now concat pieces...') 625 | 626 | const ws = fs.createWriteStream(transferedDstWithRank, { flags: 'a', highWaterMark: 32 * 1024 ** 2 }) // 32 MB write cache 627 | 628 | for (const file of files) { 629 | vblog(`[downloadVideo] for...of at file=${file}`) 630 | 631 | const standardFile = file 632 | 633 | const tmpRead = fs.createReadStream(standardFile, { flags: 'r', highWaterMark: httpChunkBytes }) 634 | 635 | await new Promise((__res, __rej) => { 636 | vblog(`[downloadVideo] for...of pipes file to ${transferedDstWithRank}`) 637 | 638 | tmpRead.pipe(ws, { end: false }) 639 | tmpRead.on('end', () => { 640 | __res() 641 | }) 642 | tmpRead.on('error', e => { 643 | __rej(e) 644 | }) 645 | }) 646 | 647 | vblog('[downloadVideo] for...of deletes file') 648 | // await fsp.unlink(standardFile) 649 | fsp.unlink(standardFile) 650 | } 651 | ws.end() 652 | 653 | vblog('[downloadVideo] piping ended, appending dlist.txt') 654 | 655 | // comment while debug 656 | fs.writeFileSync('./dlist.txt', transferedTitle + '\n', { flag: 'a+', encoding: 'utf-8' }) 657 | 658 | const ret = [`${dst} downloaded!`, contentTotalLength, transferedFilenameWithRank] 659 | 660 | // vblog(`[downloadVideo] exits with ret=${util.inspect(ret, false, Infinity, true)}`) 661 | vblog(`[downloadVideo] time cost ${chalk.yellowBright(prettyMilliseconds(vblog.stopWatch('scrapy.js-downloadVideo', false), { verbose: true }))}`) 662 | 663 | return ret 664 | } 665 | 666 | module.exports = { 667 | findKeys, 668 | findDownloadInfo, 669 | downloadVideo 670 | } 671 | --------------------------------------------------------------------------------