├── README.md └── weread-scraper.js /README.md: -------------------------------------------------------------------------------- 1 | # WeRead-Scraper 2 | Scrape WeRead books and save them as HTML files 3 | 4 | This repo is the [Sec-ant project's](https://github.com/Sec-ant/weread-scraper) archive. 5 | 6 | I didn't modify any code. Please relax. 7 | 8 | --- 9 | 10 | This project has didn't work anymore since Oct.22.2023. 11 | 12 | Please find another way to get WeRead ebooks. 13 | 14 | If you can take over to start a secondary development, please use the code of this repo directly. 15 | -------------------------------------------------------------------------------- /weread-scraper.js: -------------------------------------------------------------------------------- 1 | // ==UserScript== 2 | // @name WeRead Scraper 3 | // @namespace https://github.com/Sec-ant/weread-scraper 4 | // @version 1.4.3 5 | // @author Ze-Zheng Wu 6 | // @description Scrape WeRead books and save them as HTML files 7 | // @license MIT 8 | // @icon https://weread.qq.com/favicon.ico 9 | // @homepage https://github.com/Sec-ant/weread-scraper 10 | // @homepageURL https://github.com/Sec-ant/weread-scraper 11 | // @source https://github.com/Sec-ant/weread-scraper.git 12 | // @supportURL https://github.com/Sec-ant/weread-scraper/issues 13 | // @match https://weread.qq.com/web/reader/* 14 | // @match https://weread.qq.com/web/book/read* 15 | // @require https://fastly.jsdelivr.net/npm/minify-html-wasm@0.1.1/dist/no-modules/index.min.js 16 | // @require https://fastly.jsdelivr.net/npm/zustand@4.4.3/umd/vanilla.production.js 17 | // @require https://fastly.jsdelivr.net/npm/zustand@4.4.3/umd/middleware.production.js 18 | // @require https://fastly.jsdelivr.net/npm/@sec-ant/gm-fetch@1.2.0/dist/index.iife.js 19 | // @connect fastly.jsdelivr.net 20 | // @connect weread.qq.com 21 | // @connect tencent-cloud.com 22 | // @connect * 23 | // @grant GM_deleteValue 24 | // @grant GM_getValue 25 | // @grant GM_registerMenuCommand 26 | // @grant GM_setValue 27 | // @grant GM_unregisterMenuCommand 28 | // @grant GM_webRequest 29 | // @grant GM_xmlhttpRequest 30 | // @grant unsafeWindow 31 | // @run-at document-start 32 | // ==/UserScript== 33 | 34 | (function (gmFetch, vanilla, middleware, init) { 35 | 'use strict'; 36 | 37 | var _GM_deleteValue = /* @__PURE__ */ (() => typeof GM_deleteValue != "undefined" ? GM_deleteValue : void 0)(); 38 | var _GM_getValue = /* @__PURE__ */ (() => typeof GM_getValue != "undefined" ? GM_getValue : void 0)(); 39 | var _GM_registerMenuCommand = /* @__PURE__ */ (() => typeof GM_registerMenuCommand != "undefined" ? GM_registerMenuCommand : void 0)(); 40 | var _GM_setValue = /* @__PURE__ */ (() => typeof GM_setValue != "undefined" ? GM_setValue : void 0)(); 41 | var _GM_unregisterMenuCommand = /* @__PURE__ */ (() => typeof GM_unregisterMenuCommand != "undefined" ? GM_unregisterMenuCommand : void 0)(); 42 | var _GM_webRequest = /* @__PURE__ */ (() => typeof GM_webRequest != "undefined" ? GM_webRequest : void 0)(); 43 | var _unsafeWindow = /* @__PURE__ */ (() => typeof unsafeWindow != "undefined" ? unsafeWindow : void 0)(); 44 | var concatenateTemplateLiteralTag = function concatenateTemplateLiteralTag2(raw) { 45 | return String.raw.apply(String, [{ 46 | raw 47 | }].concat([].slice.call(arguments, 1))); 48 | }; 49 | var any = concatenateTemplateLiteralTag; 50 | const windowDefineGetter = Object.prototype.__defineGetter__.bind(_unsafeWindow); 51 | let windowLocalStorage = void 0; 52 | try { 53 | windowLocalStorage = _unsafeWindow.localStorage; 54 | } catch (_) { 55 | } 56 | const stylePreset = any` 57 | @font-face { 58 | font-family: "汉仪旗黑50S"; 59 | src: url("https://fastly.jsdelivr.net/gh/Sec-ant/weread-scraper/public/fonts/HYQiHei_50S.woff2"); 60 | } 61 | @font-face { 62 | font-family: "汉仪旗黑65S"; 63 | src: url("https://fastly.jsdelivr.net/gh/Sec-ant/weread-scraper/public/fonts/HYQiHei_65S.woff2"); 64 | } 65 | @font-face { 66 | font-family: "汉仪楷体"; 67 | src: url("https://fastly.jsdelivr.net/gh/Sec-ant/weread-scraper/public/fonts/HYKaiTiS.woff2"); 68 | } 69 | @font-face { 70 | font-family: "方正仿宋"; 71 | src: url("https://fastly.jsdelivr.net/gh/Sec-ant/weread-scraper/public/fonts/FZFSJW.woff2"); 72 | } 73 | @font-face { 74 | font-family: "PingFang SC"; 75 | src: url("https://fastly.jsdelivr.net/gh/Sec-ant/weread-scraper/public/fonts/PingFang-SC-Regular.woff2"); 76 | } 77 | .readerChapterContent { 78 | break-after: page; 79 | /* 支持旧版本浏览器 */ 80 | page-break-after: always; 81 | } 82 | `; 83 | const annotationStyle = any` 84 | /* 携带注释信息的元素,下面的样式用来让它显示为一个黑色的圆 */ 85 | span.reader_footer_note { 86 | text-indent: 0; /* 避免继承段落的缩进样式 */ 87 | text-align: left; /* 文字左对齐 */ 88 | position: relative; /* 用来给伪元素做定位参照 */ 89 | display: inline-block; /* 使宽度和高度指定有效 */ 90 | width: 1em; /* 设定宽度 */ 91 | height: 1em; /* 设定高度 */ 92 | background-color: black; /* 设定背景为黑色 */ 93 | border-radius: 50%; /* 圆角化为圆形 */ 94 | cursor: pointer; /* 光标样式改为手指 */ 95 | } 96 | /* before 伪元素用来显示“注”这个字 */ 97 | span.reader_footer_note:before { 98 | position: absolute; /* 绝对位置,基准为 span.reader_footer_note */ 99 | content: "注"; /* 显示“注”字 */ 100 | color: white; /* 字颜色为白色 */ 101 | left: 0.15em; /* 微调字的位置 */ 102 | top: 0.1em; /* 微调字的位置 */ 103 | font-size: 0.75em; /* 设定文字大小 */ 104 | font-family: "汉仪楷体"; /* 设定字体 */ 105 | } 106 | /* after 伪元素用来显示注释内容,只在光标移至“注”上方时才显示 */ 107 | span.reader_footer_note:hover:after { 108 | position: fixed; /* 相对于视窗的位置 */ 109 | content: attr(data-wr-footernote); /* 获取并设置注释内容 */ 110 | left: 0; /* 设定相对于视窗的位置 */ 111 | bottom: 0; /* 设定相对于视窗的位置 */ 112 | margin: 1em; /* 设定背景气泡与视窗边缘预留的空间 */ 113 | background: black; /* 设定背景气泡为黑色 */ 114 | border-radius: 0.25em; /* 背景气泡圆角 */ 115 | color: white; /* 设定文字为白色 */ 116 | padding: 0.5em; /* 设定文字内容与背景气泡边缘预留的空间 */ 117 | font-size: 1em; /* 设定文字大小 */ 118 | font-family: "汉仪楷体"; /* 设定字体 */ 119 | z-index: 1; /* 避免被其它元素遮挡 */ 120 | } 121 | `; 122 | const htmlElement = document.createElement("html"); 123 | const headElement = document.createElement("head"); 124 | const styleElement = document.createElement("style"); 125 | const bodyElement = document.createElement("body"); 126 | headElement.insertAdjacentHTML("beforeend", any``); 127 | headElement.append(styleElement); 128 | htmlElement.append(headElement, bodyElement); 129 | const encoder = new TextEncoder(); 130 | const decoder = new TextDecoder(); 131 | const wasmInitPromise = gmFetch("https://fastly.jsdelivr.net/npm/minify-html-wasm@0.1.1/dist/no-modules/index_bg.wasm").then(init); 132 | const preRenderContainerObserver = new MutationObserver(async () => { 133 | const preRenderContainer = document.querySelector( 134 | ".preRenderContainer:not([style])" 135 | ); 136 | if (!preRenderContainer) { 137 | return; 138 | } 139 | const preRenderContent = preRenderContainer.querySelector("#preRenderContent"); 140 | if (!preRenderContent) { 141 | return; 142 | } 143 | scraperPageStore.setState({ 144 | preRenderContainer: preRenderContainer.cloneNode( 145 | true 146 | ) 147 | }); 148 | }); 149 | const scraperSessionInitialState = { 150 | scraping: false, 151 | chapterLevelList: {} 152 | }; 153 | const scraperSessionStore = vanilla.createStore()( 154 | middleware.subscribeWithSelector( 155 | middleware.persist(() => scraperSessionInitialState, { 156 | name: "scraper-session-storage", 157 | storage: middleware.createJSONStorage(() => sessionStorage) 158 | }) 159 | ) 160 | ); 161 | const GMStorage = { 162 | getItem: (name) => { 163 | return _GM_getValue(name); 164 | }, 165 | setItem: (name, value) => { 166 | _GM_setValue(name, value); 167 | }, 168 | removeItem: (name) => { 169 | _GM_deleteValue(name); 170 | } 171 | }; 172 | const scraperGMInitialState = { 173 | clickInterval: 0, 174 | booleanOptions: [ 175 | { 176 | name: "Inline Images", 177 | value: false 178 | }, 179 | { 180 | name: "Display Annotations", 181 | value: false 182 | } 183 | ] 184 | }; 185 | const scraperGMStore = vanilla.createStore()( 186 | middleware.subscribeWithSelector( 187 | middleware.persist(() => scraperGMInitialState, { 188 | name: "scraper-gm-storage", 189 | storage: middleware.createJSONStorage(() => GMStorage), 190 | merge: (persistedState, currentState) => { 191 | return { 192 | ...currentState, 193 | ...persistedState, 194 | booleanOptions: currentState.booleanOptions.map( 195 | (currentBooleanOption) => { 196 | const persistedBooleanOption = persistedState.booleanOptions.find( 197 | ({ name }) => name === currentBooleanOption.name 198 | ); 199 | if (persistedBooleanOption) { 200 | return persistedBooleanOption; 201 | } 202 | return currentBooleanOption; 203 | } 204 | ) 205 | }; 206 | } 207 | }) 208 | ) 209 | ); 210 | const scraperPageInitialState = { 211 | preRenderContainer: null, 212 | pageContentLoaded: false, 213 | isNewChapter: false, 214 | timeout: 0, 215 | pageContentLoadedCleanUp: () => { 216 | } 217 | }; 218 | const scraperPageStore = vanilla.createStore()( 219 | middleware.subscribeWithSelector(() => scraperPageInitialState) 220 | ); 221 | function scrapingOn() { 222 | windowDefineGetter("localStorage", () => void 0); 223 | _GM_webRequest( 224 | [ 225 | // 阻截微信读书的阅读进度请求,避免抓取过程中的翻页信息被记录为阅读进度 226 | // 发出这个请求表示此时页面已经加载完毕 227 | { 228 | selector: "https://weread.qq.com/web/book/read*", 229 | action: "cancel" 230 | }, 231 | // 订阅微信读书的章节内容获取请求 232 | // 发出这个请求表示内容为新章节,否则为接续页 233 | // chapter/e_* 是 epub 格式,chapter/t_* 是 txt 格式 234 | // 将请求重定向到一个没有被加入到 @match 的网址会让请求正常发出 235 | // 但仍可以正常触发回调函数 236 | { 237 | selector: "https://weread.qq.com/web/book/chapter/*", 238 | action: { 239 | redirect: "https://chapter.invalid" 240 | } 241 | } 242 | ], 243 | (info) => { 244 | switch (info) { 245 | case "cancel": 246 | scraperPageStore.setState({ 247 | pageContentLoaded: true 248 | }); 249 | break; 250 | case "redirect": 251 | scraperPageStore.setState({ 252 | isNewChapter: true 253 | }); 254 | break; 255 | } 256 | } 257 | ); 258 | preRenderContainerObserver.observe(document.documentElement, { 259 | childList: true, 260 | subtree: true 261 | }); 262 | const unsub = subscribePageContentLoaded(); 263 | scraperPageStore.setState({ 264 | pageContentLoadedCleanUp: getPageContentLoadedCleanUpFunction(unsub) 265 | }); 266 | } 267 | function scrapingOff() { 268 | scraperPageStore.getState().pageContentLoadedCleanUp(); 269 | preRenderContainerObserver.disconnect(); 270 | _GM_webRequest([], () => { 271 | }); 272 | windowDefineGetter("localStorage", () => windowLocalStorage); 273 | } 274 | scraperSessionStore.subscribe( 275 | (state) => state.scraping, 276 | (scraping) => { 277 | if (scraping) { 278 | scrapingOn(); 279 | } else { 280 | scrapingOff(); 281 | } 282 | }, 283 | { 284 | fireImmediately: true 285 | } 286 | ); 287 | function subscribePageContentLoaded() { 288 | return scraperPageStore.subscribe( 289 | (state) => state.pageContentLoaded, 290 | async (pageContentLoaded) => { 291 | var _a, _b; 292 | if (!pageContentLoaded) { 293 | return; 294 | } 295 | const { preRenderContainer } = scraperPageStore.getState(); 296 | if (preRenderContainer) { 297 | const chapterTitle = ((_b = (_a = document.querySelector(".chapterTitle")) == null ? void 0 : _a.textContent) == null ? void 0 : _b.trim()) || void 0; 298 | await feed(preRenderContainer, chapterTitle); 299 | } else { 300 | console.warn("Failed to find .preRenderContainer element."); 301 | } 302 | let nextPageButton = document.querySelector(".readerFooter_button"); 303 | if (!nextPageButton) { 304 | const ending = document.querySelector(".readerFooter_ending"); 305 | if (ending) { 306 | stopScrapingAndSave(); 307 | } 308 | return; 309 | } 310 | await new Promise((resolve) => { 311 | scraperPageStore.setState({ 312 | timeout: setTimeout(() => { 313 | resolve(); 314 | }, scraperGMStore.getState().clickInterval) 315 | }); 316 | }); 317 | scraperPageStore.setState(scraperPageInitialState); 318 | nextPageButton = document.querySelector(".readerFooter_button"); 319 | nextPageButton == null ? void 0 : nextPageButton.dispatchEvent( 320 | new MouseEvent("click", { 321 | clientX: 1, 322 | clientY: 1 323 | }) 324 | ); 325 | } 326 | ); 327 | } 328 | function getPageContentLoadedCleanUpFunction(unsub) { 329 | return () => { 330 | unsub(); 331 | clearTimeout(scraperPageStore.getState().timeout); 332 | scraperPageStore.setState(scraperPageInitialState); 333 | }; 334 | } 335 | async function feed(preRenderContainer, chapterTitle) { 336 | var _a, _b, _c, _d; 337 | if (styleElement.childNodes.length === 0) { 338 | const preRenderStyleElement = preRenderContainer.querySelector("style") || styleElement; 339 | styleElement.append(stylePreset, preRenderStyleElement.innerHTML); 340 | if (scraperGMStore.getState().booleanOptions[1].value) { 341 | styleElement.prepend(annotationStyle); 342 | } 343 | await wasmInitPromise; 344 | styleElement.outerHTML = decoder.decode( 345 | init.minify(encoder.encode(styleElement.outerHTML), { 346 | minify_css: true 347 | }) 348 | ); 349 | } 350 | const preRenderContent = preRenderContainer.querySelector( 351 | "#preRenderContent" 352 | ); 353 | if (scraperGMStore.getState().booleanOptions[0].value) { 354 | const fetchImagePromises = []; 355 | const backgroundImageRegExp = new RegExp("(?<=background-image:url\\().+?(?=\\))"); 356 | for (const image of preRenderContainer.querySelectorAll("img")) { 357 | const url = image.getAttribute("data-src") ?? image.src; 358 | if (!url) { 359 | continue; 360 | } 361 | fetchImagePromises.push( 362 | (async () => { 363 | try { 364 | const resp = await gmFetch(url); 365 | if (resp.ok) { 366 | const imageBlob = await resp.blob(); 367 | const imageDataUrl = await blobToBase64(imageBlob); 368 | image.src = imageDataUrl; 369 | } 370 | } catch (e) { 371 | console.warn(`Failed to fetch image (${url}): ${e}`); 372 | } 373 | })() 374 | ); 375 | } 376 | for (const element of preRenderContainer.querySelectorAll( 377 | '[style*="background-image:url("]' 378 | )) { 379 | const styleAttribute = element.getAttribute("style"); 380 | if (!styleAttribute) { 381 | continue; 382 | } 383 | const url = (_a = styleAttribute == null ? void 0 : styleAttribute.match(backgroundImageRegExp)) == null ? void 0 : _a[0]; 384 | if (!url) { 385 | continue; 386 | } 387 | fetchImagePromises.push( 388 | (async () => { 389 | try { 390 | const resp = await gmFetch(url); 391 | if (resp.ok) { 392 | const imageBlob = await resp.blob(); 393 | const imageDataUrl = await blobToBase64(imageBlob); 394 | element.setAttribute( 395 | "style", 396 | styleAttribute.replace(backgroundImageRegExp, imageDataUrl) 397 | ); 398 | } 399 | } catch (e) { 400 | console.warn(`Failed to fetch background image (${url}): ${e}`); 401 | } 402 | })() 403 | ); 404 | } 405 | await Promise.all(fetchImagePromises); 406 | } else { 407 | for (const image of preRenderContainer.querySelectorAll("img")) { 408 | image.src = image.getAttribute("data-src") ?? image.src; 409 | } 410 | } 411 | recursivelyRemoveDataAttr(preRenderContent); 412 | collapseSpans(preRenderContent); 413 | if (scraperPageStore.getState().isNewChapter) { 414 | preRenderContent.removeAttribute("id"); 415 | preRenderContent.classList.add("readerChapterContent"); 416 | const dataChapterTitle = ((_c = (_b = document.querySelector("span.readerTopBar_title_chapter")) == null ? void 0 : _b.textContent) == null ? void 0 : _c.trim()) || ""; 417 | preRenderContent.setAttribute("data-chapter-title", dataChapterTitle); 418 | preRenderContent.setAttribute( 419 | "data-chapter-level", 420 | scraperSessionStore.getState().chapterLevelList[dataChapterTitle] || "1" 421 | ); 422 | typeof chapterTitle === "string" && preRenderContent.insertAdjacentHTML( 423 | "afterbegin", 424 | any`

${chapterTitle}

` 425 | ); 426 | await wasmInitPromise; 427 | preRenderContent.innerHTML = decoder.decode( 428 | init.minify(encoder.encode(preRenderContent.innerHTML), {}) 429 | ); 430 | bodyElement.insertAdjacentElement("beforeend", preRenderContent); 431 | } else { 432 | await wasmInitPromise; 433 | (_d = bodyElement.lastElementChild) == null ? void 0 : _d.insertAdjacentHTML( 434 | "beforeend", 435 | decoder.decode(init.minify(encoder.encode(preRenderContent.innerHTML), {})) 436 | ); 437 | } 438 | } 439 | _GM_registerMenuCommand("Start Scraping", startScraping); 440 | function startScraping() { 441 | scraperSessionStore.setState({ 442 | scraping: true, 443 | chapterLevelList: Object.fromEntries( 444 | [...document.querySelectorAll(".chapterItem_link")].map((e) => { 445 | var _a, _b; 446 | return [ 447 | ((_a = e.textContent) == null ? void 0 : _a.trim()) || "", 448 | ((_b = e.className.match(new RegExp("(?<=chapterItem_level)\\d+"))) == null ? void 0 : _b[0]) || "1" 449 | ]; 450 | }) 451 | ) 452 | }); 453 | window.location.reload(); 454 | } 455 | _GM_registerMenuCommand("Cancel Scraping", cancelScraping); 456 | function cancelScraping() { 457 | scraperSessionStore.setState({ scraping: false, chapterLevelList: {} }); 458 | styleElement.innerHTML = ""; 459 | bodyElement.innerHTML = ""; 460 | } 461 | _GM_registerMenuCommand("Stop Scraping & Save", stopScrapingAndSave); 462 | async function stopScrapingAndSave() { 463 | var _a, _b; 464 | scraperSessionStore.setState({ 465 | scraping: false, 466 | chapterLevelList: {} 467 | }); 468 | saveContent( 469 | any`` + htmlElement.outerHTML, 470 | (_b = (_a = document.querySelector(".readerCatalog_bookInfo_title_txt")) == null ? void 0 : _a.textContent) == null ? void 0 : _b.trim() 471 | ); 472 | styleElement.innerHTML = ""; 473 | bodyElement.innerHTML = ""; 474 | } 475 | _GM_registerMenuCommand("Set Click Interval", setClickInterval); 476 | function setClickInterval() { 477 | const prevClickInterval = scraperGMStore.getState().clickInterval; 478 | let newClickInterval = parseFloat( 479 | window.prompt("Click interval (ms): ", prevClickInterval.toString()) || "" 480 | ); 481 | if (!Number.isFinite(newClickInterval) || newClickInterval < 0) { 482 | newClickInterval = prevClickInterval; 483 | } 484 | scraperGMStore.setState({ 485 | clickInterval: newClickInterval 486 | }); 487 | } 488 | scraperGMStore.subscribe( 489 | (state) => state.booleanOptions, 490 | (() => { 491 | const menuIds = []; 492 | return (booleanOptions) => { 493 | for (let i = 0; i < booleanOptions.length; ++i) { 494 | if (typeof menuIds[i] !== "undefined") { 495 | _GM_unregisterMenuCommand(menuIds[i]); 496 | } 497 | menuIds[i] = _GM_registerMenuCommand( 498 | `${booleanOptions[i].name} ${booleanOptions[i].value ? "✔" : "✘"}`, 499 | () => { 500 | toggleBooleanOptions(i); 501 | } 502 | ); 503 | } 504 | }; 505 | })(), 506 | { 507 | fireImmediately: true 508 | } 509 | ); 510 | function toggleBooleanOptions(index) { 511 | const nextBooleanOptions = [...scraperGMStore.getState().booleanOptions]; 512 | nextBooleanOptions[index].value = !nextBooleanOptions[index].value; 513 | scraperGMStore.setState({ 514 | booleanOptions: nextBooleanOptions 515 | }); 516 | } 517 | function recursivelyRemoveDataAttr(element) { 518 | const attributes = element.attributes; 519 | for (let i = attributes.length - 1; i >= 0; --i) { 520 | const attributeName = attributes[i].name; 521 | if (["data-wr-id", "data-wr-co"].includes(attributeName)) { 522 | element.removeAttribute(attributeName); 523 | } 524 | } 525 | for (const child of element.children) { 526 | recursivelyRemoveDataAttr(child); 527 | } 528 | } 529 | function isSimpleSpan(element) { 530 | return (element == null ? void 0 : element.tagName) === "SPAN" && (element == null ? void 0 : element.attributes.length) === 0 && element.innerHTML.length <= 1; 531 | } 532 | function collapseSpans(element) { 533 | for (const span of element.querySelectorAll("span")) { 534 | if (!isSimpleSpan(span)) { 535 | continue; 536 | } 537 | let nextElementSibling = span.nextElementSibling; 538 | while (isSimpleSpan(nextElementSibling)) { 539 | span.append(nextElementSibling.textContent ?? ""); 540 | nextElementSibling.remove(); 541 | nextElementSibling = span.nextElementSibling; 542 | } 543 | } 544 | } 545 | function saveContent(content, fileName = "微信读书") { 546 | const contentBlob = new Blob([content], { 547 | type: "text/html;charset=utf-8" 548 | }); 549 | const dummyLink = document.createElement("a"); 550 | dummyLink.href = URL.createObjectURL(contentBlob); 551 | dummyLink.download = `${fileName}.html`; 552 | document.body.appendChild(dummyLink); 553 | dummyLink.click(); 554 | document.body.removeChild(dummyLink); 555 | URL.revokeObjectURL(dummyLink.href); 556 | } 557 | async function blobToBase64(blob) { 558 | return await new Promise((resolve) => { 559 | const reader = new FileReader(); 560 | reader.onloadend = () => resolve(reader.result); 561 | reader.readAsDataURL(blob); 562 | }); 563 | } 564 | 565 | })(gmFetch, zustandVanilla, zustandMiddleware, wasm_bindgen); 566 | --------------------------------------------------------------------------------