├── LICENSE ├── README.md └── wereadScraper.user.js /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Ze-Zheng Wu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # weread-scraper 2 | Export Weread books to html files 3 | 4 | 5 | 6 | ## Steps 7 | 8 | - Add [this userscript](https://greasyfork.org/zh-CN/scripts/450169-weread-scraper) in [Tampermonkey](https://www.tampermonkey.net/) 9 | - Select the book you want to save in Weread, e.g. https://weread.qq.com/web/bookDetail/f6432a905b73c0f64797a8d 10 | - Browse the first page of this book or any other page where you want to start to save, e.g. https://weread.qq.com/web/reader/f6432a905b73c0f64797a8dkc81322c012c81e728d9d180 11 | - Left click Tampermonkey icon and then click the "Start Scraping" button in the popped up menu 12 | - Wait for scraping to complete and an HTML file will be automatically generated and downloaded 13 | - You can cancel an ongoing scraping process by clicking "Cancel Scraping" 14 | - You can stop an ongoing scraping process and save the available scraped contents by clicking "Stop Scraping & Save" 15 | - You can set a next-page-click interval by clicking "Set Click Interval" and input the desired value (in milliseconds) 16 | - That's it! 17 | 18 | ## Notes 19 | 20 | - Chrome and MS Edge are recommended to run this script 21 | - Firefox is recommended to print the downloaded HTML to PDF 22 | - Many of the books use these fonts, install them to have a better reading experience: [汉仪旗黑 50S](https://www.hanyi.com.cn/productdetail?id=831), [汉仪旗黑 65S](https://www.hanyi.com.cn/productdetail.php?id=834) and [汉仪楷体S](https://www.hanyi.com.cn/productdetail.php?id=814). There might be more fonts but I didn't check them all. If you know, feel free to create issues. Btw, `PingFang SC` would be a good choice to act as a fallback font 23 | - Scraping books with many many pages can crush your browser, cuz I didn't use any mechanism of streaming, chunking or garbage collection. Further work can be done to fix this 24 | 25 | Enjoy and please don't use this script for pirating books and selling them! 26 | -------------------------------------------------------------------------------- /wereadScraper.user.js: -------------------------------------------------------------------------------- 1 | // ==UserScript== 2 | // @name Weread Scraper 3 | // @namespace https://github.com/Sec-ant/weread-scraper 4 | // @version 0.4 5 | // @description Export Weread books to html file 6 | // @author Secant 7 | // @match https://weread.qq.com/web/reader/* 8 | // @icon https://weread.qq.com/favicon.ico 9 | // @grant GM_registerMenuCommand 10 | // @grant GM_setValue 11 | // @grant GM_getValue 12 | // @run-at document-start 13 | // ==/UserScript== 14 | 15 | (async function () { 16 | "use strict"; 17 | // interactive 18 | GM_registerMenuCommand("Start Scraping", startScraping); 19 | GM_registerMenuCommand("Cancel Scraping", cancelScraping); 20 | GM_registerMenuCommand("Stop Scraping & Save", stopScrapingAndSave); 21 | GM_registerMenuCommand("Set Click Interval", setClickInterval); 22 | 23 | // construct html root 24 | const rootElement = document.createElement("html"); 25 | const styleElement = document.createElement("style"); 26 | const bodyElement = document.createElement("body"); 27 | rootElement.append(styleElement); 28 | rootElement.append(bodyElement); 29 | 30 | // initialize flags 31 | const scrapeFlag = JSON.parse( 32 | sessionStorage.getItem("scrapeFlag") || "false" 33 | ); 34 | let contentFound = false; 35 | let timeoutIsSet = false; 36 | let abortTimeout = false; 37 | 38 | // define observer handlers 39 | const contentObserver = new MutationObserver(async (_, observer) => { 40 | const content = document.querySelector(".preRenderContainer:not([style])"); 41 | if (!contentFound && content) { 42 | // define styles 43 | if (styleElement.childNodes.length === 0) { 44 | const contentStyle = content.querySelector("style"); 45 | if (contentStyle?.childNodes.length) { 46 | styleElement.innerHTML = contentStyle.innerHTML 47 | .replaceAll(".readerChapterContent", ".preRenderContent") 48 | .replaceAll(/汉仪旗黑(?=\d)/g, "汉仪旗黑 ") 49 | .replaceAll(/汉仪楷体(?!S)/g, "汉仪楷体S"); 50 | styleElement.append( 51 | ".preRenderContent { page-break-after: always; }" 52 | ); 53 | } 54 | } 55 | // append contents 56 | const contentDiv = content.querySelector("#preRenderContent"); 57 | if (contentDiv) { 58 | contentDiv.removeAttribute("id"); 59 | contentDiv 60 | .querySelectorAll("img") 61 | .forEach( 62 | (img) => (img.src = img.getAttribute("data-src") || img.src) 63 | ); 64 | bodyElement.append(contentDiv.cloneNode(true)); 65 | contentFound = true; 66 | } 67 | } 68 | 69 | // turn to next page 70 | const nextPage = document.querySelector(".readerFooter_button"); 71 | if (contentFound && nextPage && !timeoutIsSet) { 72 | contentFound = false; 73 | timeoutIsSet = true; 74 | // sleep for {click interval} ms 75 | await new Promise((resolve) => { 76 | setTimeout(() => { 77 | resolve(); 78 | }, getClickInterval()); 79 | }); 80 | timeoutIsSet = false; 81 | if (abortTimeout) { 82 | abortTimeout = false; 83 | } else { 84 | nextPage.dispatchEvent( 85 | new MouseEvent("click", { 86 | clientX: 1, 87 | clientY: 1, 88 | }) 89 | ); 90 | } 91 | } 92 | 93 | // complete 94 | const ending = document.querySelector(".readerFooter_ending"); 95 | if (ending) { 96 | stopScrapingAndSave(); 97 | } 98 | }); 99 | 100 | // start observation 101 | if (scrapeFlag) { 102 | contentObserver.observe(document.documentElement, { 103 | childList: true, 104 | subtree: true, 105 | }); 106 | } 107 | 108 | // get click interval helper function 109 | function getClickInterval() { 110 | return GM_getValue("clickInterval", 0); 111 | } 112 | 113 | // menu functions 114 | function stopScrapingAndSave() { 115 | sessionStorage.setItem("scrapeFlag", "false"); 116 | contentObserver.disconnect(); 117 | const docBlob = new Blob([rootElement.outerHTML], { 118 | type: "text/html;charset=utf-8;", 119 | }); 120 | const dummyLink = document.createElement("a"); 121 | dummyLink.href = URL.createObjectURL(docBlob); 122 | const bookTitle = document 123 | .querySelector(".readerCatalog_bookInfo_title_txt") 124 | .textContent.trim(); 125 | dummyLink.download = `${bookTitle}.html`; 126 | document.body.appendChild(dummyLink); 127 | dummyLink.click(); 128 | document.body.removeChild(dummyLink); 129 | URL.revokeObjectURL(dummyLink.href); 130 | styleElement.innerHTML = ""; 131 | bodyElement.innerHTML = ""; 132 | contentFound = false; 133 | timeoutIsSet = false; 134 | abortTimeout = true; 135 | } 136 | 137 | function startScraping() { 138 | sessionStorage.setItem("scrapeFlag", "true"); 139 | window.location.reload(); 140 | } 141 | 142 | function cancelScraping() { 143 | sessionStorage.setItem("scrapeFlag", "false"); 144 | window.location.reload(); 145 | } 146 | 147 | function setClickInterval() { 148 | const prevClickInterval = getClickInterval(); 149 | let newClickInterval; 150 | try { 151 | newClickInterval = parseFloat( 152 | window.prompt("Click interval (ms):", prevClickInterval) 153 | ); 154 | if (!Number.isFinite(newClickInterval) || newClickInterval < 0) { 155 | throw undefined; 156 | } 157 | } catch (e) { 158 | newClickInterval = prevClickInterval; 159 | } 160 | GM_setValue("clickInterval", newClickInterval); 161 | } 162 | })(); 163 | --------------------------------------------------------------------------------