├── .tern-project ├── .gitignore ├── package.json ├── README.md └── index.js /.tern-project: -------------------------------------------------------------------------------- 1 | { 2 | } 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | *.swp 3 | *.m4a 4 | *.mp3 5 | .vscode 6 | download/ 7 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wing", 3 | "version": "1.0.2", 4 | "description": "喜马拉雅专辑批量下载爬虫", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "node index.js" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/wing-ho/ximalaya.git" 12 | }, 13 | "keywords": [ 14 | "喜马拉雅", 15 | "爬虫" 16 | ], 17 | "author": "wing", 18 | "license": "MIT", 19 | "bugs": { 20 | "url": "https://github.com/wing-ho/ximalaya/issues" 21 | }, 22 | "homepage": "https://github.com/wing-ho/ximalaya#readme", 23 | "dependencies": { 24 | "crypto-js": "^4.2.0" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 喜马拉雅专辑批量下载小工具 2 | 3 | ## 程序的编写思路 4 | 把需要下载的对象抽象为有限状态机,包含音频文件信息的json文件、m4a和mp3文件之间存在着一种转化关系 5 | ![思路](https://www.callmewing.com/2017/08/09/%E6%89%B9%E9%87%8F%E4%B8%8B%E8%BD%BD%E5%96%9C%E9%A9%AC%E6%8B%89%E9%9B%85%E7%9A%84%E5%85%8D%E8%B4%B9%E4%B8%93%E8%BE%91/state.png) 6 | 7 | [具体设计思路👇](https://www.callmewing.com/2017/08/09/%E6%89%B9%E9%87%8F%E4%B8%8B%E8%BD%BD%E5%96%9C%E9%A9%AC%E6%8B%89%E9%9B%85%E7%9A%84%E5%85%8D%E8%B4%B9%E4%B8%93%E8%BE%91/) 8 | 9 | ## 前提 10 | 11 | 安装 NodeJS 12 | 13 | https://nodejs.org/en/ 14 | 15 | ## 使用 16 | 17 | 命令行 18 | 19 | ```bash 20 | npm i 21 | node index.js https://www.ximalaya.com/album/4264862 目录(可选) 22 | ``` 23 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const https = require("https"); 2 | const http = require("http"); 3 | const { access, accessSync, mkdirSync, createWriteStream, unlinkSync, exists } = require("fs"); 4 | const { spawn } = require("child_process"); 5 | const readline = require("readline"); 6 | const util = require("util"); 7 | const URL = require('url'); 8 | const path = require("path"); 9 | const EventEmitter = require("events"); 10 | var CryptoJS = require("crypto-js"); 11 | const { info } = require("console"); 12 | 13 | const dest = process.argv[3] || path.resolve("download") 14 | 15 | let queue; 16 | let pagesize = 20; 17 | let pageId = 1; 18 | let quality = 24 19 | let isAsc = false; 20 | let albumOrTrack = "album" 21 | let url; 22 | 23 | function getTrackBaseInfo(trackId) { 24 | let ts = Date.now(); 25 | return "https://www.ximalaya.com/mobile-playpage/track/v3/baseInfo/" + ts + "?device=www2&trackId=" + trackId; 26 | // return `https://www.ximalaya.com/mobile-playpage/track/v3/baseInfo/${ts}?device=www2&trackId=${trackId}&trackQualityLevel=2`; 27 | } 28 | 29 | function getAlbumInfo(albumId, pageId) { 30 | let ts = Date.now(); 31 | // return "https://mobile.ximalaya.com/mobile/v1/album/track/ts-" + ts + "?albumId=" + albumId + "&device=web&isAsc=true&pageId=" + pageId + "&pageSize="+pagesize+"&pre_page=0" 32 | return "https://mobile.ximalaya.com/mobile/v1/album/track/ts-" + ts + "?albumId=" + albumId + "&device=android&isAsc=" + isAsc + "&isQueryInvitationBrand=true&pageId=" + pageId + "&pageSize=" + pagesize + "&pre_page=0" 33 | // return `https://mobile.ximalaya.com/mobile/v1/album/track/ts-${ts}?albumId=${albumId}&device=android&isAsc=${isAsc}&isQueryInvitationBrand=true&pageId=${pageId}&pageSize=${pagesize}&pre_page=0` 34 | } 35 | function decryptUrl(encryptedUrl) { 36 | const o = new Uint8Array([183, 174, 108, 16, 131, 159, 250, 5, 239, 110, 193, 202, 153, 137, 251, 176, 119, 150, 47, 204, 97, 237, 1, 71, 177, 42, 88, 218, 166, 82, 87, 94, 14, 195, 69, 127, 215, 240, 225, 197, 238, 142, 123, 44, 219, 50, 190, 29, 181, 186, 169, 98, 139, 185, 152, 13, 141, 76, 6, 157, 200, 132, 182, 49, 20, 116, 136, 43, 155, 194, 101, 231, 162, 242, 151, 213, 53, 60, 26, 134, 211, 56, 28, 223, 107, 161, 199, 15, 229, 61, 96, 41, 66, 158, 254, 21, 165, 253, 103, 89, 3, 168, 40, 246, 81, 95, 58, 31, 172, 78, 99, 45, 148, 187, 222, 124, 55, 203, 235, 64, 68, 149, 180, 35, 113, 207, 118, 111, 91, 38, 247, 214, 7, 212, 209, 189, 241, 18, 115, 173, 25, 236, 121, 249, 75, 57, 216, 10, 175, 112, 234, 164, 70, 206, 198, 255, 140, 230, 12, 32, 83, 46, 245, 0, 62, 227, 72, 191, 156, 138, 248, 114, 220, 90, 84, 170, 128, 19, 24, 122, 146, 80, 39, 37, 8, 34, 22, 11, 93, 130, 63, 154, 244, 160, 144, 79, 23, 133, 92, 54, 102, 210, 65, 67, 27, 196, 201, 106, 143, 52, 74, 100, 217, 179, 48, 233, 126, 117, 184, 226, 85, 171, 167, 86, 2, 147, 17, 135, 228, 252, 105, 30, 192, 129, 178, 120, 36, 145, 51, 163, 77, 205, 73, 4, 188, 125, 232, 33, 243, 109, 224, 104, 208, 221, 59, 9]); 37 | const a = new Uint8Array([204, 53, 135, 197, 39, 73, 58, 160, 79, 24, 12, 83, 180, 250, 101, 60, 206, 30, 10, 227, 36, 95, 161, 16, 135, 150, 235, 116, 242, 116, 165, 171]); 38 | 39 | function decodeBase64(input) { 40 | return Uint8Array.from(atob(input), c => c.charCodeAt(0)); 41 | } 42 | 43 | function xorBytes(buffer, offset, key) { 44 | const length = Math.min(buffer.length - offset, key.length); 45 | for (let i = 0; i < length; i++) { 46 | buffer[offset + i] ^= key[i]; 47 | } 48 | } 49 | 50 | try { 51 | const decodedUrl = encryptedUrl.replace(/_/g, "/").replace(/-/g, "+"); 52 | const encryptedData = decodeBase64(decodedUrl); 53 | if (encryptedData.length < 16) { 54 | return encryptedUrl; 55 | } 56 | 57 | const data = encryptedData.slice(0, -16); 58 | const iv = encryptedData.slice(-16); 59 | 60 | for (let i = 0; i < data.length; i++) { 61 | data[i] = o[data[i]]; 62 | } 63 | 64 | for (let i = 0; i < data.length; i += 16) { 65 | xorBytes(data, i, iv); 66 | } 67 | 68 | for (let i = 0; i < data.length; i += 32) { 69 | xorBytes(data, i, a); 70 | } 71 | 72 | return new TextDecoder().decode(data); 73 | } catch (error) { 74 | console.warn(error, "Decryption failed"); 75 | return ""; 76 | } 77 | } 78 | // 存在只有m4a格式,没有mp3格式的情况 79 | function getURLFromEncodeDataList(playUrlList, quality) { 80 | let qualities = ["64", "128", "32", "24"] 81 | // let types = ["M4A","MP3"] 82 | let types = ["MP3", "M4A", "AAC"] 83 | let item 84 | loop: 85 | for (let i = 0; i < types.length; i++) { 86 | for (let j = 0; j < qualities.length; j++) { 87 | let type = types[i].concat('_').concat(qualities[j]) 88 | for (let k = 0; k < playUrlList.length; k++) { 89 | item = playUrlList[k] 90 | if (item.type == type) { 91 | break loop; 92 | } 93 | } 94 | } 95 | } 96 | return decryptUrl(item.url) 97 | // return CryptoJS.AES.decrypt({ ciphertext: CryptoJS.enc.Base64url.parse(item.url) }, 98 | // CryptoJS.enc.Hex.parse("aaad3e4fd540b0f79dca95606e72bf93"), 99 | // { mode: CryptoJS.mode.ECB, padding: CryptoJS.pad.Pkcs7 }).toString(CryptoJS.enc.Utf8); 100 | } 101 | 102 | 103 | 104 | function main() { 105 | url = process.argv[2] 106 | if (!url) { 107 | url = "https://www.ximalaya.com/album/4264862" 108 | // usage(); 109 | // return; 110 | } 111 | access(dest, (err) => { 112 | if (err) { 113 | mkdirSync(dest) 114 | } 115 | }) 116 | queue = new Queue() 117 | let albumId; 118 | let trackId; 119 | let groups = url.match(/(album|sound)\/([0-9]+)/); 120 | albumOrTrack = groups[1]; 121 | if (albumOrTrack == "sound") { 122 | trackId = groups[2] 123 | url = getTrackBaseInfo(trackId) 124 | let file = new File(url) 125 | file.type = "track" 126 | queue.enqueue(file); 127 | } else { 128 | albumId = groups[2] 129 | url = getAlbumInfo(albumId, pageId) 130 | let page = new File(url) 131 | page.type = "page" 132 | queue.enqueue(page); 133 | page.on("downloaded", function () { 134 | let resData = JSON.parse(this.content); 135 | if ("data" in resData && "maxPageId" in resData.data) { 136 | let maxPageId = resData.data.maxPageId; 137 | for (let pageId = 2; pageId <= maxPageId; pageId++) { 138 | let url = getAlbumInfo(albumId, pageId) 139 | var page = new File(url); 140 | queue.enqueue(page); 141 | } 142 | } 143 | }) 144 | } 145 | 146 | queue.showProcess(); 147 | 148 | process.on('SIGINT', function () { 149 | queue.end(); 150 | process.exit(0); 151 | }); 152 | } 153 | function usage() { 154 | console.log("Usage: node index.js url dest_folder?") 155 | console.log("Example: node index.js https://www.ximalaya.com/album/4264862 目录(可选)") 156 | } 157 | if (!String.prototype.format) { 158 | String.prototype.format = function() { 159 | var args = arguments; 160 | return this.replace(/{(\d+)}/g, function(match, number) { 161 | return typeof args[number] != 'undefined' 162 | ? args[number] 163 | : match 164 | ; 165 | }); 166 | }; 167 | } 168 | 169 | class File extends EventEmitter { 170 | constructor(url) { 171 | super() 172 | this.url = url 173 | this.filename = path.basename(new URL.URL(url).pathname); 174 | this.size = 0; 175 | this.speed = 0; 176 | this.downloaded = 0 177 | this.content = ""; 178 | this.contentType = ""; 179 | this.type = "page"; // page track mp3 180 | this.state = "create";// create enqueue response data downloeaded 181 | } 182 | get percent() { 183 | return Math.ceil(this.downloaded / this.size * 100); 184 | } 185 | set title(title) { 186 | let extname = path.extname(this.filename) 187 | if (extname != "") { 188 | this.filename = title.replace(/[\/:*?"<>|]/g, "") + extname 189 | this.path = path.join(dest, this.filename) 190 | } 191 | } 192 | isBinaryFile() { 193 | return this.type == "mp3" 194 | } 195 | toString() { 196 | let result = ""; 197 | switch (this.state) { 198 | case "create": 199 | case "enqueue": 200 | result = util.format("%s开始下载!", this.filename); 201 | break; 202 | case "data": 203 | result = util.format("%s下载完成%d%", this.filename, this.percent); 204 | break; 205 | case "downloaded": 206 | result = util.format("%s下载已完成!", this.filename); 207 | break; 208 | } 209 | return result; 210 | } 211 | download() { 212 | this.state = "enqueue" 213 | let request = http 214 | if (this.url.startsWith("https")) { 215 | request = https 216 | } 217 | 218 | let options = URL.parse(this.url) 219 | 220 | options = Object.assign(options, { 221 | headers: { 222 | 'Referer': this.url, 223 | 'Accept': "application/json, text/javascript, */*; q=0.01", 224 | 'Content-Type': "application/json;", 225 | 'Connection': "keep-alive", 226 | // set vip cookie 227 | // 'Cookie':"1&_token=395507195&AB994A60240N47B8F9FEFF650563C85C670E05E8A120DBBB4D858643B39208BD494D49F9C92887MA29988DDA1902F0_", 228 | 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36" 229 | } 230 | }) 231 | 232 | request.get(options, (res) => { 233 | this.state = "response"; 234 | var chunks = []; 235 | this.size = res.headers["content-length"] || 0; 236 | this.contentType = res.headers["content-type"]; 237 | if (res.statusCode == 301) { 238 | this.url = res.req.protocol + "//" + res.req.host + res.headers.location; 239 | this.state = "create"; 240 | return 241 | } 242 | 243 | if (this.isBinaryFile()) { 244 | try { 245 | accessSync(this.path) 246 | this.state = "end" 247 | queue.dequeue(this) 248 | return 249 | } catch (e) { 250 | this.writeStream = createWriteStream(this.path); 251 | } 252 | } 253 | 254 | res.on("data", (chunk) => { 255 | this.state = "data"; 256 | let len = chunk.length; 257 | this.downloaded += len; 258 | this.speed = Math.ceil(len / 1024); 259 | if (this.writeStream) { 260 | this.writeStream.write(chunk); 261 | } else { 262 | chunks.push(chunk); 263 | } 264 | }).on("end", () => { 265 | this.state = "downloaded"; 266 | this.size = this.downloaded; 267 | if (!this.writeStream) { 268 | var buf = Buffer.concat(chunks, this.size); 269 | this.content = buf.toString(); 270 | } 271 | this.emit("downloaded"); 272 | this.onDownloaded() 273 | queue.dequeue() 274 | }) 275 | }) 276 | } 277 | 278 | getTrack() { 279 | let resData = JSON.parse(this.content); 280 | if ("data" in resData && "list" in resData.data) { 281 | let tracks = resData.data.list; 282 | let total = resData.data.totalCount; 283 | let pageId = resData.data.pageId; 284 | let pageSize = resData.data.pageSize; 285 | let padlen = String(total).length; 286 | for (let i = 0; i < tracks.length; i++) { 287 | let track = tracks[i] 288 | let index = String((pageId - 1) * pageSize + i + 1).padStart(padlen, '0') + "-" 289 | // 如果音频的标题自带编号,则取消下一行注释,使用标题自带的编号 290 | // index = "" 291 | // if (track.isPaid) { 292 | let url = getTrackBaseInfo(track.trackId) 293 | console.log("getTrackBaseInfo", url) 294 | let payTrack = new File(url) 295 | payTrack.type = "track" 296 | payTrack.index = index 297 | queue.enqueue(payTrack); 298 | // } else { 299 | // let mp3 = new File(track.playUrl64) 300 | // mp3.type = "mp3" 301 | // mp3.title = index + track.title; 302 | // queue.enqueue(mp3); 303 | // } 304 | } 305 | } 306 | } 307 | 308 | getMediaFile() { 309 | let resData = JSON.parse(this.content); 310 | if ("trackInfo" in resData && "playUrlList" in resData.trackInfo) { 311 | let track = resData.trackInfo 312 | let url = getURLFromEncodeDataList(track.playUrlList, quality) 313 | let mp3 = new File(url); 314 | mp3.type = "mp3" 315 | mp3.title = this.index + track.title 316 | queue.enqueue(mp3); 317 | } 318 | } 319 | onDownloaded() { 320 | switch (this.type) { 321 | case "page": 322 | this.getTrack(); 323 | this.end() 324 | break; 325 | case "track": 326 | this.getMediaFile(); 327 | this.end() 328 | break; 329 | case "mp3": 330 | this.end() 331 | break; 332 | } 333 | } 334 | 335 | get isEnd() { 336 | return this.state === "end"; 337 | } 338 | end() { 339 | if (this.type == "mp3" && this.state == "data") { 340 | unlinkSync(this.path); 341 | } 342 | this.state = "end"; 343 | } 344 | 345 | } 346 | class Queue { 347 | MAX_THREADS = 1; 348 | current_threads = 0 349 | get idle_threads() { 350 | return this.MAX_THREADS - this.current_threads; 351 | } 352 | constructor() { 353 | this.queue = []; 354 | this.timer = 0; 355 | this.head = 0 356 | this.tail = 0; 357 | this.cursorDx = 0; 358 | this.cursorDy = 0; 359 | } 360 | 361 | showProcess() { 362 | this.timer = setInterval(() => { 363 | this.toString(); 364 | if (this.head == this.tail) { 365 | clearInterval(this.timer); 366 | } 367 | }, 200); 368 | } 369 | enqueue(file) { 370 | this.queue.push(file) 371 | while (this.tail < this.queue.length && this.idle_threads > 0) { 372 | this.queue[this.tail].download() 373 | this.tail++ 374 | this.current_threads++ 375 | } 376 | } 377 | dequeue() { 378 | while (this.head < this.tail) { 379 | if (!this.queue[this.head].isEnd) { 380 | break 381 | } 382 | this.head++ 383 | } 384 | if (this.tail < this.queue.length) { 385 | this.queue[this.tail].download() 386 | this.tail++ 387 | } 388 | } 389 | toString() { 390 | var content = "", stdout = process.stdout; 391 | for (var i = this.head; i < this.tail; i++) { 392 | var file = this.queue[i]; 393 | if (!file.isEnd) { 394 | content += file.toString() 395 | if (i + 1 < this.tail) { 396 | content += "\n" 397 | } 398 | } 399 | } 400 | readline.moveCursor(stdout, this.cursorDx, this.cursorDy); 401 | readline.clearScreenDown(stdout); 402 | stdout.write(content); 403 | var rec = this.getDisplayRectangle(content); 404 | this.cursorDx = -1 * rec.width; 405 | this.cursorDy = -1 * rec.height; 406 | } 407 | 408 | end() { 409 | for (var i = this.head; i < this.tail; i++) { 410 | var file = this.queue[i]; 411 | file.end(); 412 | } 413 | } 414 | getDisplayRectangle(str) { 415 | var width = 0, height = 0, maxWidth = 0, len = str.length, charCode = -1; 416 | for (var i = 0; i < len; i++) { 417 | charCode = str.charCodeAt(i); 418 | if (charCode === 10) { 419 | if (width > maxWidth) { 420 | maxWidth = width; 421 | } 422 | height += Math.floor(width / process.stdout.columns); 423 | width = 0; 424 | height++; 425 | } 426 | if (charCode >= 0 && charCode <= 255) { 427 | width += 1; 428 | } else { 429 | width += 2; 430 | } 431 | } 432 | return { 433 | width: maxWidth || width, 434 | height: height 435 | } 436 | } 437 | } 438 | main() 439 | --------------------------------------------------------------------------------