├── .editorconfig
├── .eslintignore
├── .eslintrc.yml
├── .gitignore
├── .husky
    └── pre-commit
├── .mocharc.yml
├── .npmrc
├── .vscode
    └── launch.json
├── CHANGELOG.md
├── Jakefile.ts
├── LICENSE
├── NOTES.md
├── README.md
├── assets
    └── templates
    │   ├── cover.html
    │   ├── default.css
    │   ├── default.html
    │   └── epub
    │       ├── META-INF
    │           ├── com.apple.ibooks.display-options.xml
    │           └── container.xml
    │       └── OEBPS
    │           ├── content.opf
    │           ├── content.xhtml
    │           ├── cover.xhtml
    │           ├── nav.xhtml
    │           └── toc.ncx
├── bak
    ├── index.main.js
    └── index.worker.js
├── bin
    ├── .dev
    └── weread-spy.js
├── dev-notes
    └── 2023-10-21.md
├── docs
    └── epub.md
├── package.json
├── pnpm-lock.yaml
├── pre-generated
    ├── link.txt
    └── 红楼梦(全集).epub
├── prettier.config.cjs
├── scripts
    └── build.ts
├── src
    ├── bin.ts
    ├── commands
    │   ├── check.ts
    │   ├── download.ts
    │   ├── gen.ts
    │   ├── info.ts
    │   ├── launch.ts
    │   └── one.ts
    ├── common
    │   ├── books-map.ts
    │   └── index.ts
    ├── typings
    │   └── prettier-config.js
    └── utils
    │   ├── Book.ts
    │   ├── EpubModel
    │       └── index.ts
    │   ├── epub-img.ts
    │   ├── epub.ts
    │   ├── epubcheck.ts
    │   ├── pptr-anti-spider
    │       ├── bak.txt
    │       └── index.ts
    │   ├── pptr.ts
    │   └── processContent
    │       ├── example-start-info.json
    │       ├── index.ts
    │       └── worker
    │           ├── index.main.ts
    │           └── index.worker.ts
├── test
    ├── .gitkeep
    └── mocha.opts
├── tsconfig.json
└── tsup.config.ts


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | indent_style = space
 5 | indent_size = 2
 6 | charset = utf-8
 7 | trim_trailing_whitespace = true
 8 | insert_final_newline = true
 9 | 
10 | [*.md]
11 | trim_trailing_whitespace = false
12 | 


--------------------------------------------------------------------------------
/.eslintignore:
--------------------------------------------------------------------------------
1 | /lib
2 | /dist
3 | 


--------------------------------------------------------------------------------
/.eslintrc.yml:
--------------------------------------------------------------------------------
 1 | # root
 2 | root: true
 3 | 
 4 | parser: '@typescript-eslint/parser'
 5 | 
 6 | plugins:
 7 |   - '@typescript-eslint'
 8 | 
 9 | extends:
10 |   - '@magicdawn'
11 |   - 'plugin:@typescript-eslint/recommended'
12 |   - prettier
13 | 
14 | rules:
15 |   '@typescript-eslint/ban-ts-comment': off
16 |   '@typescript-eslint/no-extra-semi': off
17 |   '@typescript-eslint/no-unused-vars': off
18 |   '@typescript-eslint/explicit-module-boundary-types': off
19 |   '@typescript-eslint/ban-types': off
20 |   '@typescript-eslint/no-explicit-any': off
21 |   prefer-const: warn
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *-debug.log
 2 | *-error.log
 3 | *.log
 4 | .DS_Store
 5 | .nyc_output
 6 | /.nyc_output
 7 | /dist
 8 | /lib
 9 | /package-lock.json
10 | /tmp
11 | coverage
12 | coverage.*
13 | data/
14 | logs
15 | node_modules
16 | npm-debug.log*
17 | yarn.lock
18 | 
19 | *.tsbuildinfo
20 | 
21 | # yarn2
22 | .yarn/*
23 | !.yarn/patches
24 | !.yarn/releases
25 | !.yarn/plugins
26 | !.yarn/sdks
27 | !.yarn/versions
28 | .pnp.*
29 | 
30 | # ts
31 | **/*.tsbuildinfo
32 | 
33 | # lib
34 | /lib-test
35 | 
36 | # for dev
37 | /example
38 | 


--------------------------------------------------------------------------------
/.husky/pre-commit:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | . "$(dirname "$0")/_/husky.sh"
3 | 
4 | pnpm lint-staged
5 | 


--------------------------------------------------------------------------------
/.mocharc.yml:
--------------------------------------------------------------------------------
1 | require: [should]
2 | reporter: spec
3 | timeout: 5000
4 | recursive: true
5 | 


--------------------------------------------------------------------------------
/.npmrc:
--------------------------------------------------------------------------------
1 | registry=https://registry.npmmirror.com/
2 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "configurations": [
 3 |     {
 4 |       "type": "node",
 5 |       "request": "launch",
 6 |       "name": "bin.ts",
 7 |       "runtimeArgs": ["-r", "ts-node/register"],
 8 |       "args": ["${workspaceFolder}/src/bin.ts"],
 9 |       "autoAttachChildProcesses": false
10 |     },
11 |     {
12 |       "type": "node",
13 |       "request": "launch",
14 |       "name": "gen-epub",
15 |       "runtimeArgs": ["-r", "ts-node/register"],
16 |       "args": ["${workspaceFolder}/src/bin.ts", "gen-epub", "910419"],
17 |       "autoAttachChildProcesses": false
18 |     }
19 |   ]
20 | }
21 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # CHANGELOG
 2 | 
 3 | ## v0.7.6
 4 | 
 5 | - fix not existing issue when success
 6 | 
 7 | ## v0.7.5 2023-10-21
 8 | 
 9 | - udpate deps
10 | - use `process.env` to fix `__vue__` exposure
11 | 
12 | ## v0.7.4 2023-08-26
13 | 
14 | - udpate deps
15 | 
16 | ## v0.7.3 2023-07-21
17 | 
18 | - chore: update puppeteer, proxy-agent related
19 | 
20 | ## v0.7.2 2023-07-19
21 | 
22 | - fix: use store.subscribe, cause pptr modify fails.
23 | 
24 | ## v0.7.1 2023-07-19
25 | 
26 | - feat: use epubckeck-assets
27 | - feat: rm execa, use plain `child_process.execSync`
28 | 
29 | ## v0.7.0 2023-07-19
30 | 
31 | - fix `__vue__` exposure
32 | 
33 | ## v0.6.0 2023-06-20
34 | 
35 | - 支持多页
36 | 
37 | ### v0.5.2 2023-04-26
38 | 
39 | - 升级 w3c/epubcheck
40 | 
41 | ### v0.5.1 2023-04-26
42 | 
43 | - 清理更多没有使用的依赖
44 | 
45 | ### v0.5.0 2023-04-26
46 | 
47 | - 添加 `weread-spy info` 命令
48 | - 添加 `DEBUG_PROCESS_CONTENT=1` 支持, 不开启 workers process content
49 | - 添加 `weread-spy gen -D/--debug/--decompress <book-id-or-url-or-title>`, `-D` 解压缩 `.ePub` 文件, 方便 debug
50 | - 修复 cheerio, xml + cjk + pre/code 的处理, see https://github.com/cheeriojs/cheerio/issues/1198
51 | - 移除 gulp, 移除 globby, 直接用 fast-glob 更好
52 | 
53 | ### v0.4.0 2023-03-15
54 | 
55 | - 修复 htchapterContentHtml 抓取
56 | 
57 | ### v0.3.0 2022-12-25
58 | 
59 | - 强制打开微信读书 `__vue__` 属性的使用
60 | 
61 | ### v0.2.0 2022-09-03
62 | 
63 | - `one` / `dl` 命令新增 `--interval <毫秒数>` 切换章节间隔
64 | 
65 | ### v0.1.1 2022-07-09
66 | 
67 | - map.json 结构调整, 数据文件夹命名调整
68 | 
69 | ### v0.1.0 2022-07-09
70 | 
71 | - first publish on npm
72 | 
73 | ### v0.0.1 2020-09-12
74 | 
75 | - first release
76 | 


--------------------------------------------------------------------------------
/Jakefile.ts:
--------------------------------------------------------------------------------
 1 | import { execSync } from 'child_process'
 2 | import fse from 'fs-extra'
 3 | import path from 'path'
 4 | import { version } from './package.json'
 5 | 
 6 | const exec = (cmd: string) => {
 7 |   console.log('[exec]: %s', cmd)
 8 |   execSync(cmd, { stdio: 'inherit' })
 9 | }
10 | 
11 | desc('show available tasks')
12 | task('default', () => {
13 |   exec('jake -t')
14 | })
15 | 
16 | namespace('build', () => {
17 |   desc('build executable via pkg')
18 |   task('pkg', () => {
19 |     const dir = path.join(__dirname, 'dist', 'v' + version)
20 |     fse.emptyDirSync(dir)
21 | 
22 |     // other
23 |     fse.copySync(__dirname + '/node_modules/puppeteer/.local-chromium/', dir + '/puppeteer')
24 | 
25 |     // %1: node_modules/sharp/build/Release
26 |     // %2: path-to-executable/sharp/build/Release
27 |     fse.copySync(__dirname + '/node_modules/sharp/build/Release', dir + '/sharp/build/Release')
28 | 
29 |     // %1: node_modules/sharp/vendor/<version>/lib
30 |     // %2: path-to-executable/sharp/vendor/<version>/lib
31 |     fse.copySync(__dirname + '/node_modules/sharp/vendor', dir + '/sharp/vendor')
32 | 
33 |     // /node_modules/nunjucks/node_modules/chokidar/node_modules/fsevents/fsevents.node
34 |     // fsevents.node
35 |     // fse.copySync(
36 |     //   __dirname +
37 |     //     '/node_modules/nunjucks/node_modules/chokidar/node_modules/fsevents/fsevents.node',
38 |     //   dir + '/fsevents.node'
39 |     // )
40 | 
41 |     // epubcheck
42 |     fse.copySync(__dirname + '/assets/lib', dir + '/assets/lib')
43 |     fse.copySync(__dirname + '/assets/epubcheck.jar', dir + '/assets/epubcheck.jar')
44 | 
45 |     // build ts
46 |     exec('pnpm build')
47 | 
48 |     // pkg
49 |     exec(`pnpm dlx pkg -t node16-mac --out-path ${dir} .`)
50 |   })
51 | })
52 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2018 Magicdawn(magicdawn@qq.com)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/NOTES.md:
--------------------------------------------------------------------------------
1 | ## Todos
2 | 
3 | - [ ] map.json 结构
4 | - [ ] download id.json 重命名
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # weread-spy
 2 | 
 3 | > 使用微信读书 Web 版生成 ePub 电子书 (需要无限卡权限, 或已购买的书)
 4 | 
 5 | [![npm version](https://img.shields.io/npm/v/weread-spy.svg?style=flat-square)](https://www.npmjs.com/package/weread-spy)
 6 | [![npm downloads](https://img.shields.io/npm/dm/weread-spy.svg?style=flat-square)](https://www.npmjs.com/package/weread-spy)
 7 | [![npm license](https://img.shields.io/npm/l/weread-spy.svg?style=flat-square)](http://magicdawn.mit-license.org)
 8 | 
 9 | > [!CAUTION]
10 | > 项目未维护, 可能会 [导致封号](https://github.com/magicdawn/weread-spy/issues/44#issuecomment-1810076252)
11 | 
12 | ## 声明
13 | 
14 | 本项目仅供技术研究使用, 请勿用于商业用途!<br/>
15 | 本项目仅供技术研究使用, 请勿用于商业用途!<br/>
16 | 本项目仅供技术研究使用, 请勿用于商业用途!<br/>
17 | 
18 | ## 安装
19 | 
20 | ```sh
21 | $ pnpm add weread-spy -g
22 | ```
23 | 
24 | ### 或者使用源码
25 | 
26 | - `git clone` 此项目
27 | - `pnpm i`
28 | - `pnpm link --global`
29 | 
30 | 这样就可以使用 `weread-spy` 命令了
31 | 
32 | ## epub 规范 & 阅读器
33 | 
34 | - 本项目使用 ePub v3 规范, 且使用 epubcheck lint, 如果有 lint 报错的地方, 请添加 issue
35 | - ePub 阅读器推荐 `Koodo Reader` or `Apple Books`
36 | 
37 | ## 一站式操作 `weread-spy one`
38 | 
39 | - 运行此命令, 会自动打开 puppeteer 浏览器
40 | - 扫码登录
41 | - 浏览自己想下载的书, 返回命令行. 监控到 url 像是一本书, 输入 `y` 开始生成
42 | 
43 | ### 注意事项
44 | 
45 | - 需要安装 `Java`, epub check 依赖 java, 可以认为是 ePub 文件的 lint 工具
46 | - 数据文件在 `~/Library/Appication Support/weread-spy/` 目录下
47 | - 生成 epub 文件在当前目录下, 或者使用 `weread-spy one -d some-dir` 指定输出目录
48 | 
49 | #### Options
50 | 
51 | | flag         | desc                   | default |
52 | | ------------ | ---------------------- | ------- |
53 | | `-d,--dir`   | 最终 ePub 文件输出目录 | pwd     |
54 | | `--interval` | 切换章节间隔, 毫秒     | 0       |
55 | 
56 | ## 其他分步的命令
57 | 
58 | - `weread-spy dl -u <url>` 下载电子书信息
59 | - `weread-spy gen -u <url>` 根据下载的信息, 生成电子书
60 | - `weread-spy check` 跑 epub check
61 | 
62 | ## 更新日志
63 | 
64 | see CHANGELOG.md
65 | 
66 | ## License
67 | 
68 | the MIT License http://magicdawn.mit-license.org
69 | 


--------------------------------------------------------------------------------
/assets/templates/cover.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html class="type--{{filetype}}" style="height: 100vh;">
 3 | 	<head>
 4 | 		<meta charset="utf-8" />
 5 | 		<style type="text/css">
 6 | 			{{ style }}
 7 | 		</style>
 8 | 	</head>
 9 | 	<body style="height: 100vh;">
10 | 		<div class="cover">
11 | 			<div class="cover__content">
12 | 				<h1 class="cover__title">{{ title }}</h1>
13 | 				<p class="cover__subtitle">
14 | 					<time class="cover__date" datetime="{{ date }}"
15 | 						>{{ date }}</time
16 | 					>
17 | 				</p>
18 | 			</div>
19 | 		</div>
20 | 	</body>
21 | </html>
22 | 


--------------------------------------------------------------------------------
/assets/templates/default.css:
--------------------------------------------------------------------------------
  1 | :root {
  2 | 	--main-font: Palatino, 'Palatino Linotype', 'Times New Roman', 'Droid Serif',
  3 | 		Times, 'Source Serif Pro', serif, 'Apple Color Emoji', 'Segoe UI Emoji',
  4 | 		'Segoe UI Symbol';
  5 | 	--alt-font: 'helvetica neue', ubuntu, roboto, noto, 'segoe ui', arial,
  6 | 		sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';
  7 | 	--code-font: Menlo, Consolas, monospace;
  8 | 	--accent-color: black;
  9 | }
 10 | 
 11 | @page {
 12 | 	size: A5 portrait;
 13 | 	margin: 1cm 1cm 2cm;
 14 | }
 15 | 
 16 | html {
 17 | 	font-size: 12pt;
 18 | 	line-height: 1.3;
 19 | 	font-family: var(--main-font);
 20 | 	-webkit-print-color-adjust: exact;
 21 | }
 22 | 
 23 | h1,
 24 | h2,
 25 | h3,
 26 | h4,
 27 | h5,
 28 | h6 {
 29 | 	margin-bottom: 0.5em;
 30 | 	font-family: var(--alt-font);
 31 | 	font-weight: bold;
 32 | 	page-break-after: avoid;
 33 | }
 34 | 
 35 | a {
 36 | 	color: inherit;
 37 | 	text-decoration: underline;
 38 | }
 39 | 
 40 | /*
 41 | 	Going on a limb here,
 42 | 	but a.anchor in heading elements
 43 | 	is most likely a '#' or '§' anchor
 44 | 	we don't want to display in the PDF.
 45 |  */
 46 | h1 a.anchor,
 47 | h2 a.anchor,
 48 | h3 a.anchor,
 49 | h4 a.anchor,
 50 | h5 a.anchor,
 51 | h6 a.anchor {
 52 | 	visibility: hidden;
 53 | 	position: absolute;
 54 | }
 55 | 
 56 | th {
 57 | 	font-family: var(--alt-font);
 58 | }
 59 | 
 60 | code,
 61 | pre {
 62 | 	font-size: 0.85em;
 63 | }
 64 | 
 65 | pre code {
 66 | 	font-size: 1em;
 67 | }
 68 | 
 69 | /*
 70 | 	Don't display hidden elements
 71 |  */
 72 | [hidden],
 73 | [aria-hidden] {
 74 | 	display: none;
 75 | }
 76 | 
 77 | /*
 78 | 	Table of Contents page
 79 | 	----------------------------------------------------
 80 |  */
 81 | 
 82 | .toc {
 83 | 	page-break-before: always;
 84 | 	page-break-after: always;
 85 | }
 86 | 
 87 | /*
 88 | 	Article formatting
 89 | 	----------------------------------------------------
 90 |  */
 91 | 
 92 | article {
 93 | 	font-size: 1em;
 94 | }
 95 | 
 96 | article:not(:last-of-type) {
 97 | 	page-break-after: always;
 98 | }
 99 | 
100 | /*
101 | 	Article Header
102 | 	--------------
103 |  */
104 | 
105 | .article__header {
106 | 	margin: 0 0 1.3em;
107 | }
108 | 
109 | .article__title {
110 | 	font-size: 2.4em;
111 | 	margin: 0 0 0.25em;
112 | 	letter-spacing: -0.03em;
113 | 	line-height: 1.1;
114 | }
115 | 
116 | .article__url {
117 | 	font-style: italic;
118 | 	font-size: 0.9em;
119 | }
120 | 
121 | /*
122 | 	Article Content
123 | 	---------------
124 |  */
125 | 
126 | .article__content img {
127 | 	max-width: 100%;
128 | 	display: block;
129 | 	margin: 0 auto;
130 | }
131 | 
132 | .article__content figure {
133 | 	display: block;
134 | 	margin: 1.5em 0;
135 | 	padding: 0;
136 | 	text-align: center;
137 | }
138 | 
139 | .article__content figcaption {
140 | 	font-size: 0.8em;
141 | 	font-family: var(--alt-font);
142 | 	margin: 0.81em 0;
143 | 	line-height: 1.625;
144 | }
145 | 
146 | .article__content figure blockquote,
147 | .article__content figure pre {
148 | 	text-align: left;
149 | }
150 | 
151 | .article__content table,
152 | .article__content figure {
153 | 	page-break-inside: avoid;
154 | }
155 | 
156 | .article__content pre,
157 | .article__content code {
158 | 	font-family: var(--code-font);
159 | }
160 | 
161 | .article__content pre {
162 | 	border: 0.25pt solid #000;
163 | 	padding: 0.75em;
164 | 	font-size: 0.9em;
165 | 	white-space: pre-wrap;
166 | 	word-wrap: break-word;
167 | }
168 | 
169 | .article__content kbd,
170 | .article__content var,
171 | .article__content samp {
172 | 	padding: 0 0.5em;
173 | 	box-shadow: 2pt 2pt 0 #ccc;
174 | 	border: 0.5pt solid #000;
175 | 	border-radius: 0.25em;
176 | 	font-size: 0.9em;
177 | }
178 | 
179 | .article__content p {
180 | 	margin: 0;
181 | 	orphans: 3;
182 | 	widows: 3;
183 | }
184 | 
185 | /*
186 | 	Indent all subsequent paragraphs.
187 |  */
188 | .article__content p + p {
189 | 	text-indent: 2em;
190 | }
191 | 
192 | /*
193 | 	Fixes the text indent for images
194 | 	that get wrapped in a <p> tag
195 | 	by Readability.
196 | 
197 | 	Reference:
198 | 	https://github.com/danburzo/percollate/issues/48
199 |  */
200 | .article__content p + p > img:only-child {
201 | 	margin-left: -2em;
202 | }
203 | 
204 | .article__content hr {
205 | 	border: none;
206 | 	height: 0.5pt;
207 | 	margin: 1.3em 0;
208 | 	background: #000;
209 | }
210 | 
211 | .article__content blockquote {
212 | 	font-size: 0.9em;
213 | 	line-height: 1.44;
214 | 	padding-left: 2em;
215 | 	border-left: 3pt solid #000;
216 | 	margin-left: 0;
217 | }
218 | 
219 | .article__content table {
220 | 	width: 100%;
221 | 	border-collapse: collapse;
222 | 	page-break-inside: auto;
223 | 	font-size: 0.9em;
224 | 	line-height: 1.44;
225 | 	margin: 1.44em 0;
226 | }
227 | 
228 | .article__content th {
229 | }
230 | 
231 | .article__content th,
232 | .article__content td {
233 | 	text-align: left;
234 | 	vertical-align: top;
235 | 	padding: 0.36em 1em 0.36em 0;
236 | }
237 | 
238 | .article__content tr {
239 | 	border-bottom: 0.25pt solid #000;
240 | 	page-break-inside: avoid;
241 | 	page-break-after: auto;
242 | }
243 | 
244 | .article__content dt {
245 | 	font-weight: bold;
246 | }
247 | 
248 | .article__content ol,
249 | .article__content ul {
250 | 	padding-left: 2em;
251 | 	list-style-position: outside;
252 | 	margin: 0.65em 0;
253 | }
254 | 
255 | .article__content aside {
256 | 	font-family: var(--alt-font);
257 | 	font-size: 0.9em;
258 | 	line-height: 1.44;
259 | 	padding-left: 2em;
260 | }
261 | 
262 | .article__content details {
263 | 	margin: 0.65em 0;
264 | }
265 | 
266 | .article__content details > summary {
267 | 	font-weight: bold;
268 | 	font-size: 0.9em;
269 | 	font-family: var(--alt-font);
270 | }
271 | 
272 | /*
273 | 	Page header / footer
274 | 	--------------------
275 | 
276 | 	These are extracted when generating the PDF
277 | 	and are not subject to the page's CSS cascade.
278 | 
279 | 	They're just placed here for easier style coordination
280 |  */
281 | 
282 | .header-template {
283 | }
284 | 
285 | .footer-template {
286 | 	font-size: 10pt;
287 | 	font-weight: bold;
288 | }
289 | 
290 | /*
291 | 	Cover page
292 | 	----------
293 |  */
294 | 
295 | .cover {
296 | 	color: var(--accent-color);
297 | 	border: 0.5em solid;
298 | 	font-family: var(--cover-font, var(--alt-font));
299 | 	padding: 2em;
300 | }
301 | 
302 | .cover__title {
303 | 	font-size: 2.4em;
304 | 	margin: 0;
305 | 	line-height: 1.1;
306 | }
307 | 
308 | .cover__subtitle {
309 | 	margin: 1em 0;
310 | }
311 | 
312 | .cover__date {
313 | 	font-weight: bold;
314 | }
315 | 
316 | /*
317 | 	Filetype specific
318 | 	-----------------
319 |  */
320 | 
321 | .type--pdf body {
322 | 	margin: 0;
323 | 	padding: 0;
324 | }
325 | 
326 | .type--pdf a:not(.no-href):after {
327 | 	content: ' → ' attr(href) '';
328 | 	font-size: 0.8em;
329 | 	word-break: break-all;
330 | 	word-wrap: break-word;
331 | 	font-family: var(--alt-font);
332 | }
333 | 
334 | .type--pdf .cover,
335 | .type--epub .cover {
336 | 	position: absolute;
337 | 	overflow: hidden;
338 | }
339 | 
340 | .type--pdf .cover {
341 | 	top: 0;
342 | 	left: 0;
343 | 	right: 0;
344 | 	bottom: 0;
345 | }
346 | 
347 | .type--epub .cover {
348 | 	top: 2em;
349 | 	left: 2em;
350 | 	right: 2em;
351 | 	bottom: 2em;
352 | }
353 | 
354 | .type--pdf .cover__content,
355 | .type--epub .cover__content {
356 | 	position: absolute;
357 | 	top: 30%;
358 | 	left: 2em;
359 | 	right: 2em;
360 | 	transform: translate(0, -50%);
361 | }
362 | 
363 | .type--pdf .cover__sentinel {
364 | 	page-break-after: always;
365 | }
366 | 


--------------------------------------------------------------------------------
/assets/templates/default.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html class="type--{{filetype}}">
 3 | 	<head>
 4 | 		<meta charset="utf-8" />
 5 | 		<title>{{ title }}</title>
 6 | 
 7 | 		<style type="text/css">
 8 | 			{{style }}
 9 | 		</style>
10 | 	</head>
11 | 	<body>
12 | 		{% if options.use_cover %}
13 | 		<div class="cover">
14 | 			<div class="cover__content">
15 | 				<h1 class="cover__title">{{ title }}</h1>
16 | 				<p class="cover__subtitle">
17 | 					<time class="cover__date" datetime="{{ date }}"
18 | 						>{{ date }}</time
19 | 					>
20 | 				</p>
21 | 			</div>
22 | 		</div>
23 | 		<div class="cover__sentinel">&nbsp;</div>
24 | 		{% endif %} {% if options.use_toc %}
25 | 		<nav class="toc">
26 | 			<h1 class="toc__title">Table of Contents</h1>
27 | 			<ol class="toc__list">
28 | 				{% for item in items %}
29 | 				<li class="toc__list-item">
30 | 					<a href="#{{ item.id }}" class="no-href"
31 | 						>{{ item.title }}</a
32 | 					>
33 | 				</li>
34 | 				{% endfor %}
35 | 			</ol>
36 | 		</nav>
37 | 		{% endif %} {% for item in items %}
38 | 		<article id="{{ item.id }}" class="article">
39 | 			<header class="article__header">
40 | 				<h1 class="article__title">
41 | 					{{ item.title }}
42 | 				</h1>
43 | 				{% if item.byline %}
44 | 				<p class="article__byline">By <span>{{ item.byline }}</span></p>
45 | 				{% endif %}
46 | 				<p class="article__url">
47 | 					Source:
48 | 					<a class="no-href" href="{{item.url}}">{{item.url}}</a>
49 | 				</p>
50 | 			</header>
51 | 
52 | 			<div class="article__content">
53 | 				{{ item.content }}
54 | 			</div>
55 | 		</article>
56 | 		{% endfor %}
57 | 
58 | 		<!-- Template to use for page footer -->
59 | 		<template class="footer-template">
60 | 			<div class="text center">
61 | 				<span class="pageNumber"></span>
62 | 			</div>
63 | 		</template>
64 | 	</body>
65 | </html>
66 | 


--------------------------------------------------------------------------------
/assets/templates/epub/META-INF/com.apple.ibooks.display-options.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <display_options>
3 | 	<platform name="*">
4 | 		<option name="specified-fonts">true</option>
5 | 	</platform>
6 | </display_options>


--------------------------------------------------------------------------------
/assets/templates/epub/META-INF/container.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
3 | 	<rootfiles>
4 | 		<rootfile 
5 | 			full-path="OEBPS/content.opf" 
6 | 			media-type="application/oebps-package+xml"
7 | 		/>
8 | 	</rootfiles>
9 | </container>


--------------------------------------------------------------------------------
/assets/templates/epub/OEBPS/content.opf:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="weread-book-id">
 3 | 	<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
 4 |     <dc:identifier id="weread-book-id">{{ bookId }}</dc:identifier>
 5 | 		<dc:title>{{ title | e }}</dc:title>
 6 | 		<dc:date>{{ date }}</dc:date>
 7 | 		<meta property="dcterms:modified">{{ date }}</meta>
 8 |     <dc:creator>{{ creator }}</dc:creator>
 9 |     <dc:publisher>{{ publisher }}</dc:publisher>
10 |     <dc:description>{{ description }}</dc:description>
11 | 		<dc:language>{{ lang | default('en-US')}}</dc:language>
12 | 		{% if cover -%}
13 | 			<meta name="cover" content="{{ cover.id }}" />
14 | 		{%- endif %}
15 | 	</metadata>
16 | 
17 | 	<manifest>
18 | 		{% for item in manifest -%}
19 | 			<item
20 |         id="{{ item.id }}"
21 |         href="{{ item.filename }}"
22 |         media-type="{{ item.mimetype }}"
23 |         {% if item.properties %}properties="{{ item.properties }}"{% endif %}
24 |       />
25 | 		{% endfor %}
26 | 	</manifest>
27 | 
28 | 	<spine toc="ncx">
29 |     {% for item in spine -%}
30 |       <itemref idref="{{ item.id }}" />
31 | 		{% endfor %}
32 | 	</spine>
33 | 
34 | 	<guide>
35 | 		<reference type="toc" title="{{ title | e }}" href="nav.xhtml" />
36 | 	</guide>
37 | </package>
38 | 


--------------------------------------------------------------------------------
/assets/templates/epub/OEBPS/content.xhtml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <html xmlns="http://www.w3.org/1999/xhtml">
 3 | 	<head>
 4 | 		<meta charset="utf-8" />
 5 | 		<title>{{ item.title | e }}</title>
 6 | 		<link rel="stylesheet" type="text/css" href="./style.css" />
 7 | 	</head>
 8 | 	<body>
 9 | 		<header class="article__header">
10 | 			<h1 class="article__title">{{ item.title | e }}</h1>
11 | 			{% if item.byline %}
12 | 			<p class="article__byline">By <span>{{ item.byline | e }}</span></p>
13 | 			{% endif %}
14 | 			<p class="article__url">
15 | 				Source:
16 | 				<a class="no-href" href="{{item.url}}">{{item.url}}</a>
17 | 			</p>
18 | 		</header>
19 | 		<div class="article__content">
20 | 			{{ item.content }}
21 | 		</div>
22 | 	</body>
23 | </html>
24 | 


--------------------------------------------------------------------------------
/assets/templates/epub/OEBPS/cover.xhtml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="zh">
 3 |   <head>
 4 |     <title>封面</title>
 5 |   </head>
 6 |   <body>
 7 |     <div>
 8 |       <img src="{{ cover.filename }}" alt="" />
 9 |     </div>
10 |   </body>
11 | </html>
12 | 


--------------------------------------------------------------------------------
/assets/templates/epub/OEBPS/nav.xhtml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE html>
 3 | <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
 4 |   <head>
 5 |     <meta charset="utf-8" />
 6 |     <title>{{ title | e }}</title>
 7 |   </head>
 8 |   <body>
 9 |     <nav epub:type="toc" id="toc">
10 |       <ol>
11 |         {% for item in navItems %}
12 |         <li>
13 |           <a href="{{item.filename}}">{{ item.title | e }}</a>
14 |           {% if item.children %}
15 |           <ol>
16 |             {% for subitem in item.children %}
17 |             <li>
18 |               <a href="{{subitem.filename}}">{{ subitem.title | e }}</a>
19 |             </li>
20 |             {%- endfor %}
21 |           </ol>
22 |           {%- endif %}
23 |         </li>
24 |         {%- endfor %}
25 |       </ol>
26 |     </nav>
27 |   </body>
28 | </html>
29 | 


--------------------------------------------------------------------------------
/assets/templates/epub/OEBPS/toc.ncx:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
 3 | 	<head>
 4 | 		<meta name="dtb:uid" content="{{ bookId }}"/>
 5 | 		<meta name="dtb:depth" content="{{ maxNavDepth | default(1) }}"/>
 6 | 		<meta name="dtb:totalPageCount" content="0"/>
 7 | 		<meta name="dtb:maxPageNumber" content="0"/>
 8 | 	</head>
 9 | 	<docTitle>
10 | 		<text>{{ title }}</text>
11 | 	</docTitle>
12 | 	<navMap>
13 | 		{% for item in navItems %}
14 | 		<navPoint id="navPoint-for-{{item.id}}" playOrder="{{item.playOrder}}">
15 | 			<navLabel>
16 | 				<text>{{item.title}}</text>
17 | 			</navLabel>
18 | 			<content src="{{item.filename}}" />
19 |       {% for subitem in item.children %}
20 |   		<navPoint id="navPoint-for-{{subitem.id}}" playOrder="{{subitem.playOrder}}">
21 |   			<navLabel>
22 |   				<text>{{subitem.title}}</text>
23 |   			</navLabel>
24 |   			<content src="{{subitem.filename}}" />
25 |   		</navPoint>
26 |   		{%- endfor %}
27 | 		</navPoint>
28 | 		{%- endfor %}
29 | 	</navMap>
30 | </ncx>
31 | 


--------------------------------------------------------------------------------
/bak/index.main.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 如果使用 ts-node
 3 |  * 开发使用 __dirname + /index.worker.js
 4 |  * 内容是 require ts-node/register + require index.worker.ts
 5 |  *
 6 |  * ts 编译后,
 7 |  * ./index.worker.ts -> dist/index.worker.js
 8 |  * ./index.worker.js 不参与 ts 编译
 9 |  *
10 |  * esbuild bundle 后
11 |  */
12 | 
13 | // const workerFile = process.env.ESBUILD_BUNDLE
14 | //   ? __dirname + '/processContent.worker.js'
15 | //   : __dirname + '/index.worker.js'
16 | 


--------------------------------------------------------------------------------
/bak/index.worker.js:
--------------------------------------------------------------------------------
1 | /* eslint-disable @typescript-eslint/no-var-requires */
2 | const path = require('path')
3 | require('ts-node').register({
4 |   project: path.join(__dirname, '/../../../tsconfig.json'),
5 | })
6 | require(__dirname + '/index.worker.ts')
7 | 


--------------------------------------------------------------------------------
/bin/.dev:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/magicdawn/weread-spy/13e70510a0b7343689fe1bbf76c5f83096396bc3/bin/.dev


--------------------------------------------------------------------------------
/bin/weread-spy.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | /* eslint-disable @typescript-eslint/no-var-requires */
 4 | 
 5 | let isDev = require('fs').existsSync(__dirname + '/.dev')
 6 | 
 7 | // force use dist
 8 | // FIXME: comment this before publish
 9 | // isDev = false
10 | 
11 | if (isDev) {
12 |   require('ts-node').register({
13 |     project: __dirname + '/../tsconfig.json',
14 |   })
15 |   require('../src/bin')
16 | } else {
17 |   require('../dist/bin')
18 | }
19 | 


--------------------------------------------------------------------------------
/dev-notes/2023-10-21.md:
--------------------------------------------------------------------------------
1 | # 2023-10-21 16:22:26
2 | 
3 | prev version
4 | 
5 | - 修改 js, 暴露 `__vue__`
6 | - `$store.subscribe` 获取 html
7 | 


--------------------------------------------------------------------------------
/docs/epub.md:
--------------------------------------------------------------------------------
 1 | # ePub
 2 | 
 3 | ## doc
 4 | 
 5 | - http://www.theheratik.net/books/tech-epub/
 6 | 
 7 | ## toc
 8 | 
 9 | eEpub 2 &3
10 | 
11 | ### 3
12 | 
13 | - 3 使用 nav.xhtml
14 | - 2 使用 toc.ncx
15 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "name": "weread-spy",
  3 |   "description": "weread spy",
  4 |   "version": "0.7.6",
  5 |   "author": "magicdawn",
  6 |   "type": "module",
  7 |   "main": "dist/bin.js",
  8 |   "bin": {
  9 |     "weread-spy": "dist/bin.js"
 10 |   },
 11 |   "files": [
 12 |     "dist",
 13 |     "assets",
 14 |     "CHANGELOG.md"
 15 |   ],
 16 |   "bugs": {
 17 |     "url": "https://github.com/magicdawn/weread-spy/issues"
 18 |   },
 19 |   "engines": {
 20 |     "node": ">=18"
 21 |   },
 22 |   "homepage": "https://github.com/magicdawn/weread-spy#readme",
 23 |   "keywords": [
 24 |     "download",
 25 |     "weread"
 26 |   ],
 27 |   "scripts": {
 28 |     "prepare": "husky install",
 29 |     "_dev": "tsc -w --incremental",
 30 |     "_build:tsc": "rm -rf lib; rm tsconfig.tsbuildinfo; tsc",
 31 |     "_build": "./scripts/build.ts",
 32 |     "dev": "NODE_ENV=development tsup --watch",
 33 |     "build": "NODE_ENV=production tsup",
 34 |     "typecheck": "tsc --noEmit",
 35 |     "test": "mocha",
 36 |     "test-cover": "nyc --reporter=lcov --reporter=text mocha",
 37 |     "prepublishOnly": "pnpm build"
 38 |   },
 39 |   "repository": {
 40 |     "type": "git",
 41 |     "url": "git+ssh://git@github.com/magicdawn/weread-spy.git"
 42 |   },
 43 |   "dependencies": {
 44 |     "@magicdawn/prettier-config": "^0.0.2",
 45 |     "adm-zip": "^0.5.10",
 46 |     "cheerio": "1.0.0-rc.12",
 47 |     "clipanion": "^3.2.1",
 48 |     "comlink": "^4.4.1",
 49 |     "debug": "^4.3.4",
 50 |     "delay": "^6.0.0",
 51 |     "dl-vampire": "^2.0.0",
 52 |     "env-paths": "^3.0.0",
 53 |     "epubcheck-assets": "^5.1.0",
 54 |     "escape-string-regexp": "^5.0.0",
 55 |     "esm-utils": "^4.1.2",
 56 |     "fast-glob": "^3.3.1",
 57 |     "filenamify": "^6.0.0",
 58 |     "fs-extra": "^11.1.1",
 59 |     "inquirer": "9",
 60 |     "jszip": "^3.10.1",
 61 |     "lodash-es": "^4.17.21",
 62 |     "mime": "^3.0.0",
 63 |     "mimetype": "^0.0.8",
 64 |     "moment": "^2.29.4",
 65 |     "ms": "^2.1.3",
 66 |     "nunjucks": "^3.2.4",
 67 |     "pkg-up": "^4.0.0",
 68 |     "prettier": "^3.0.3",
 69 |     "promise.map": "^0.5.0",
 70 |     "puppeteer": "^21.4.0",
 71 |     "puppeteer-intercept-and-modify-requests": "^1.2.2",
 72 |     "sharp": "^0.32.6",
 73 |     "tslib": "^2.6.2",
 74 |     "type-fest": "^4.5.0",
 75 |     "urijs": "^1.19.11"
 76 |   },
 77 |   "devDependencies": {
 78 |     "@magicdawn/eslint-config": "^0.1.0",
 79 |     "@types/adm-zip": "^0.5.3",
 80 |     "@types/debug": "^4.1.10",
 81 |     "@types/fs-extra": "^11.0.3",
 82 |     "@types/inquirer": "^9.0.6",
 83 |     "@types/jake": "^0.0.35",
 84 |     "@types/lodash-es": "^4.17.10",
 85 |     "@types/mime": "^3.0.3",
 86 |     "@types/mocha": "^10.0.3",
 87 |     "@types/ms": "^0.7.33",
 88 |     "@types/node": "^20.8.7",
 89 |     "@types/nunjucks": "^3.2.5",
 90 |     "@types/prettier": "^2.7.3",
 91 |     "@types/sharp": "^0.31.1",
 92 |     "@types/urijs": "^1.19.22",
 93 |     "@typescript-eslint/eslint-plugin": "^6.8.0",
 94 |     "@typescript-eslint/parser": "^6.8.0",
 95 |     "esbuild": "^0.19.5",
 96 |     "eslint": "^8.52.0",
 97 |     "eslint-config-prettier": "^9.0.0",
 98 |     "husky": "^8.0.3",
 99 |     "lint-staged": "^15.0.2",
100 |     "mocha": "^10.2.0",
101 |     "should": "^13.2.3",
102 |     "tsup": "^7.2.0",
103 |     "typescript": "^5.2.2",
104 |     "why-is-node-running": "^2.2.2"
105 |   },
106 |   "pkg": {
107 |     "assets": [
108 |       "assets/templates/"
109 |     ],
110 |     "scripts": [
111 |       "lib/**/*.worker.js"
112 |     ]
113 |   },
114 |   "lint-staged": {
115 |     "*.{js,jsx,ts,tsx,less,md}": [
116 |       "prettier --write"
117 |     ]
118 |   },
119 |   "publishConfig": {
120 |     "registry": "https://registry.npmjs.org"
121 |   },
122 |   "packageManager": "pnpm@9.14.4+sha512.c8180b3fbe4e4bca02c94234717896b5529740a6cbadf19fa78254270403ea2f27d4e1d46a08a0f56c89b63dc8ebfd3ee53326da720273794e6200fcf0d184ab"
123 | }
124 | 


--------------------------------------------------------------------------------
/pre-generated/link.txt:
--------------------------------------------------------------------------------
1 | 红楼梦（全集）
2 | https://weread.qq.com/web/reader/41432f705de453414ca0b4akc81322c012c81e728d9d180
3 | 


--------------------------------------------------------------------------------
/pre-generated/红楼梦(全集).epub:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/magicdawn/weread-spy/13e70510a0b7343689fe1bbf76c5f83096396bc3/pre-generated/红楼梦(全集).epub


--------------------------------------------------------------------------------
/prettier.config.cjs:
--------------------------------------------------------------------------------
1 | // https://github.com/prettier/prettier/issues/12701
2 | module.exports = require('@magicdawn/prettier-config')
3 | 


--------------------------------------------------------------------------------
/scripts/build.ts:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ts-node
 2 | 
 3 | import esbuild from 'esbuild'
 4 | import path from 'path'
 5 | 
 6 | const projectHome = path.join(__dirname, '..')
 7 | 
 8 | esbuild.buildSync({
 9 |   entryPoints: {
10 |     'bin': path.join(projectHome, 'src/bin.ts'),
11 |     'processContent.worker': path.join(projectHome, 'src/utils/processContent/index.worker.ts'),
12 |   },
13 |   bundle: true,
14 |   outdir: path.join(projectHome, 'dist'),
15 |   platform: 'node',
16 |   target: ['node18'],
17 |   packages: 'external',
18 |   external: [path.join(projectHome, 'package.json')],
19 |   minify: true,
20 |   define: {
21 |     'process.env.ESBUILD_BUNDLE': 'true',
22 |   },
23 | })
24 | 
25 | console.log('[bundle]: success')
26 | 


--------------------------------------------------------------------------------
/src/bin.ts:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | import { Builtins, Cli } from 'clipanion'
 4 | import debugFactory from 'debug'
 5 | import { CheckCommand } from './commands/check.js'
 6 | import { DownloadCommand } from './commands/download.js'
 7 | import { GenCommand } from './commands/gen.js'
 8 | import { InfoCommand } from './commands/info.js'
 9 | import { LaunchCommand } from './commands/launch.js'
10 | import { OneCommand } from './commands/one.js'
11 | import { $esm } from './common/index.js'
12 | 
13 | const { require } = $esm(import.meta)
14 | 
15 | // enable logs
16 | if (!process.env.DEBUG) {
17 |   const enabledNSP = [
18 |     `weread-spy:*`,
19 |     process.env.NODE_ENV !== 'production' && 'weread-spy-detail:*',
20 |   ]
21 |     .filter(Boolean)
22 |     .join(',')
23 |   debugFactory.enable(enabledNSP)
24 | }
25 | 
26 | // @ts-ignore
27 | // eslint-disable-next-line @typescript-eslint/no-var-requires
28 | const { version } = require('../package.json')
29 | 
30 | const cli = new Cli({
31 |   binaryLabel: '微信读书下载器',
32 |   binaryName: 'weread-spy',
33 |   binaryVersion: version,
34 | })
35 | 
36 | // default commands
37 | cli.register(Builtins.HelpCommand)
38 | cli.register(Builtins.VersionCommand)
39 | 
40 | // commands
41 | cli.register(OneCommand)
42 | cli.register(DownloadCommand)
43 | cli.register(GenCommand)
44 | cli.register(LaunchCommand)
45 | cli.register(CheckCommand)
46 | cli.register(InfoCommand)
47 | 
48 | cli.runExit(process.argv.slice(2), {
49 |   ...Cli.defaultContext,
50 | })
51 | 


--------------------------------------------------------------------------------
/src/commands/check.ts:
--------------------------------------------------------------------------------
 1 | import { Command, Option } from 'clipanion'
 2 | import fg from 'fast-glob'
 3 | import epubcheck from '../utils/epubcheck.js'
 4 | 
 5 | export class CheckCommand extends Command {
 6 |   static usage = Command.Usage({
 7 |     description: `检查 epub 文件是否符合规范`,
 8 |   })
 9 | 
10 |   static paths = [['check'], ['c']]
11 | 
12 |   files: string[] = Option.Rest({ required: 1 })
13 | 
14 |   async execute() {
15 |     const files = this.files
16 | 
17 |     for (const f of files) {
18 |       const pattern = f.includes('*')
19 |       if (pattern) {
20 |         const subfiles = fg.sync(f)
21 |         subfiles.forEach((f) => epubcheck(f))
22 |         continue
23 |       }
24 |       if (f) {
25 |         epubcheck(f)
26 |       }
27 |     }
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/commands/download.ts:
--------------------------------------------------------------------------------
  1 | /* eslint-disable @typescript-eslint/no-non-null-assertion */
  2 | /* eslint-disable @typescript-eslint/no-explicit-any */
  3 | 
  4 | import { Command, Option } from 'clipanion'
  5 | import delay from 'delay'
  6 | import filenamify from 'filenamify'
  7 | import fse from 'fs-extra'
  8 | import path from 'path'
  9 | import * as pptr from 'puppeteer'
 10 | import { addBook, queryBook } from '../common/books-map.js'
 11 | import { BOOKS_DIR, baseDebug } from '../common/index.js'
 12 | import { getBrowser } from '../utils/pptr.js'
 13 | 
 14 | const debug = baseDebug.extend('download')
 15 | 
 16 | export class DownloadCommand extends Command {
 17 |   static usage = Command.Usage({
 18 |     description: `下载 epub`,
 19 |   })
 20 | 
 21 |   static paths = [['dl'], ['download']]
 22 | 
 23 |   url: string = Option.String('-u,--url', {
 24 |     description: 'book url, e.g(https://weread.qq.com/web/reader/9f232de07184869c9f2cc73)',
 25 |     required: true,
 26 |   })
 27 | 
 28 |   interval?: string = Option.String('--interval', {
 29 |     description: '数字, 切换章节间隔, 单位毫秒',
 30 |   })
 31 | 
 32 |   async execute() {
 33 |     let { url, interval } = this
 34 | 
 35 |     if (/^\w+$/.test(url) && !url.includes('/')) {
 36 |       // id
 37 |       if (/^\d+$/.test(url)) {
 38 |         url = (await queryBook({ id: url }))?.url || ''
 39 |         if (!url) {
 40 |           console.error('url not found for id = %s', url)
 41 |           process.exit(1)
 42 |         }
 43 |       } else {
 44 |         url = `https://weread.qq.com/web/bookDetail/${url}`
 45 |       }
 46 |     }
 47 | 
 48 |     main(url, { interval })
 49 |   }
 50 | }
 51 | 
 52 | export async function main(
 53 |   bookReadUrl: string,
 54 |   options: { page?: pptr.Page; browser?: pptr.Browser; interval?: number | string } = {}
 55 | ) {
 56 |   // create if not provided
 57 |   if (!options.page || !options.browser) {
 58 |     Object.assign(options, await getBrowser())
 59 |   }
 60 |   const browser = options.browser!
 61 |   const page = options.page!
 62 | 
 63 |   await page.goto(bookReadUrl)
 64 | 
 65 |   /**
 66 |    * Engine start
 67 |    */
 68 | 
 69 |   await waitReaderReady(page)
 70 |   await subscribeToVuexMutaion(page)
 71 | 
 72 |   // save map
 73 |   const startInfo = await getInfoFromPage(page)
 74 |   await addBook({ id: startInfo.bookId, title: startInfo.bookInfo.title, url: bookReadUrl })
 75 | 
 76 |   let usingInterval: number | undefined = undefined
 77 |   if (options.interval) {
 78 |     if (typeof options.interval === 'number') {
 79 |       usingInterval = options.interval
 80 |     }
 81 |     if (typeof options.interval === 'string') {
 82 |       usingInterval = Number(options.interval)
 83 |       if (isNaN(usingInterval)) {
 84 |         throw new Error('expect a number for --interval')
 85 |       }
 86 |     }
 87 |   }
 88 |   if (usingInterval) {
 89 |     debug('切换章节间隔 %s ms', usingInterval)
 90 |   }
 91 | 
 92 |   // 先切到 index = 1, 后面会切到 index = 0, 触发 mutation
 93 |   await changeChapter(page, startInfo.chapterInfos[1].chapterUid)
 94 | 
 95 |   const infos: any[] = []
 96 |   for (const [index, c] of startInfo.chapterInfos.entries()) {
 97 |     const { chapterUid } = c
 98 | 
 99 |     // delay before change chapter
100 |     if (index > 0 && usingInterval) {
101 |       await delay(usingInterval)
102 |     }
103 |     await changeChapter(page, chapterUid)
104 | 
105 |     const info = await getInfoFromPage(page)
106 |     infos.push(info)
107 |     debug('已收集章节 id=%s', chapterUid)
108 |   }
109 | 
110 |   // 书籍信息
111 |   const json = {
112 |     startInfo,
113 |     infos,
114 |   }
115 | 
116 |   const {
117 |     bookId,
118 |     bookInfo: { title },
119 |   } = startInfo
120 |   const bookJsonFile = path.join(BOOKS_DIR, filenamify(`${bookId}-${title}.json`))
121 |   await fse.outputJson(bookJsonFile, json, {
122 |     spaces: 2,
123 |   })
124 | 
125 |   debug('book id = %s url = %s', bookId, bookReadUrl)
126 |   debug('downloaded to %s', bookJsonFile)
127 | 
128 |   await browser.close()
129 | }
130 | 
131 | /**
132 |  * pptr Actions
133 |  */
134 | 
135 | export async function waitReaderReady(page: pptr.Page) {
136 |   return page.waitForFunction(
137 |     () => {
138 |       const state = globalThis.app?.__vue__?.$store?.state
139 |       return state?.reader?.chapterContentState === 'DONE'
140 |     },
141 |     { polling: 100 }
142 |   )
143 | }
144 | 
145 | export async function subscribeToVuexMutaion(page: pptr.Page) {
146 |   await page.evaluate(() => {
147 |     const $store = globalThis.app.__vue__.$store
148 |     $store.subscribe((mutation, state) => {
149 |       console.log('VUEX mutation type=%s', mutation.type, mutation.payload)
150 |       if (mutation.type === 'updateReaderContentHtml') {
151 |         globalThis.__chapterContentHtmlArray__ = mutation.payload
152 |       }
153 |     })
154 |   })
155 | }
156 | 
157 | export async function changeChapter(page: pptr.Page, uid: number) {
158 |   // start
159 |   await page.$eval(
160 |     '#routerView',
161 |     (el, uid) => {
162 |       ;(el as any).__vue__.changeChapter({ chapterUid: uid })
163 |     },
164 |     uid
165 |   )
166 | 
167 |   // wait complete
168 |   await waitReaderReady(page)
169 |   await page.waitForFunction(
170 |     (id) => {
171 |       const state = globalThis.app.__vue__.$store.state
172 |       const currentChapterId = state?.reader?.currentChapter?.chapterUid
173 |       const currentState = state?.reader?.chapterContentState
174 |       console.log({ currentChapterId, currentState, id })
175 |       return currentChapterId === id && currentState === 'DONE'
176 |     },
177 |     { polling: 100 },
178 |     uid
179 |   )
180 | }
181 | 
182 | export async function getInfoFromPage(page: pptr.Page) {
183 |   const { state, chapterContentHtmlArray } = await page.evaluate(() => {
184 |     const state = globalThis.app.__vue__.$store.state
185 |     const chapterContentHtmlArray = globalThis.__chapterContentHtmlArray__
186 |     return { state, chapterContentHtmlArray }
187 |   })
188 |   // want
189 |   const info = {
190 |     bookId: state.reader.bookId,
191 |     bookInfo: state.reader.bookInfo,
192 |     chapterInfos: state.reader.chapterInfos,
193 |     chapterContentHtmlArray: chapterContentHtmlArray, // state.reader.chapterContentHtml,
194 |     chapterContentStyles: state.reader.chapterContentStyles,
195 |     currentChapterId: state.reader.currentChapter.chapterUid,
196 |   }
197 |   return info
198 | }
199 | 


--------------------------------------------------------------------------------
/src/commands/gen.ts:
--------------------------------------------------------------------------------
 1 | import { genEpubFor } from '$utils/epub'
 2 | import epubcheck from '$utils/epubcheck'
 3 | import { Command, Option } from 'clipanion'
 4 | import { homedir } from 'os'
 5 | import path from 'path'
 6 | import { currentBooks, queryBookAny } from '../common/books-map.js'
 7 | 
 8 | export class GenCommand extends Command {
 9 |   static usage = Command.Usage({
10 |     description: `根据已下载的信息生成 epub 文件`,
11 |     details: `<book> can be id/url/title`,
12 |   })
13 | 
14 |   static paths = [['gen'], ['gen-epub']]
15 | 
16 |   // book can be
17 |   // url: 'book start url. e.g(https://weread.qq.com/web/reader/41432f705de453414ca0b4akc81322c012c81e728d9d180)',
18 |   // title: %s
19 |   // id: 812443
20 |   book = Option.String({ required: true, name: 'book' })
21 | 
22 |   clean = Option.Boolean('-c,--clean', {
23 |     description: 'clean imgs before gen',
24 |   })
25 | 
26 |   dir = Option.String('-d,--dir', {
27 |     description: 'epub 文件输出目录, 默认当前文件夹',
28 |   })
29 | 
30 |   decompress = Option.Boolean('-D,--debug,--decompress', {
31 |     description: 'decompress .ePub file for debug purpose',
32 |   })
33 | 
34 |   err(msg: string) {
35 |     console.error('Error: %s', msg)
36 |     process.exit(1)
37 |   }
38 | 
39 |   async execute() {
40 |     const { clean, dir, decompress } = this
41 | 
42 |     const book = await queryBookAny(this.book)
43 |     if (!book) return this.err('book not found')
44 | 
45 |     const id = book.id
46 |     const url = book.url
47 |     await genCommandMain({ url, id, clean: Boolean(clean), dir, decompress })
48 |   }
49 | }
50 | 
51 | export async function genCommandMain({
52 |   url,
53 |   clean,
54 |   id,
55 |   dir,
56 |   decompress = false,
57 | }: {
58 |   url?: string
59 |   clean: boolean
60 |   id?: string
61 |   dir?: string
62 |   decompress?: boolean
63 | }) {
64 |   let bookId: string | undefined
65 |   if (id) {
66 |     bookId = id
67 |   }
68 |   // url => id
69 |   else if (url) {
70 |     bookId = currentBooks.find((x) => x.url === url)?.id
71 |   }
72 | 
73 |   if (!bookId) {
74 |     console.error('can not find id !!!')
75 |     return
76 |   }
77 | 
78 |   // normalize
79 |   dir = path.resolve(dir || process.cwd())
80 |   // if run in project root, gen to `example/` subdir
81 |   if (dir === path.join(homedir(), 'projects/weread-spy-private')) {
82 |     dir = path.join(dir, 'example')
83 |   }
84 | 
85 |   const file = await genEpubFor(bookId, dir, clean, decompress)
86 |   epubcheck(file)
87 | 
88 |   setTimeout(async () => {
89 |     // console.log('why-is-node-running ->')
90 |     // const { default: log } = await import('why-is-node-running')
91 |     // log()
92 |     // console.log(process.getActiveResourcesInfo())
93 |     // console.log(process._getActiveHandles())
94 |   }, 100)
95 | 
96 |   return file
97 | }
98 | 


--------------------------------------------------------------------------------
/src/commands/info.ts:
--------------------------------------------------------------------------------
 1 | import { Command, Usage } from 'clipanion'
 2 | import { BOOKS_MAP_FILE, currentBooks, loadBooks } from '../common/books-map.js'
 3 | import { BOOKS_DIR, PPTR_DATA_DIR } from '../common/index.js'
 4 | 
 5 | export class InfoCommand extends Command {
 6 |   static paths?: string[][] = [['info']]
 7 |   static usage?: Usage = {
 8 |     description: '查看相关文件夹位置, 储存的书籍信息',
 9 |   }
10 | 
11 |   async execute(): Promise<number | void> {
12 |     console.log('目录信息:')
13 |     console.log('  PPTR_DATA_DIR: %s', PPTR_DATA_DIR)
14 |     console.log('      BOOKS_DIR: %s', BOOKS_DIR)
15 |     console.log(' BOOKS_MAP_FILE: %s', BOOKS_MAP_FILE)
16 |     console.log()
17 | 
18 |     await loadBooks()
19 |     console.log('Books: \n')
20 |     for (const item of currentBooks) {
21 |       console.log('%s', item.title)
22 |       console.log('  ID: %s', item.id)
23 |       console.log('  URL: %s', item.url)
24 |       console.log('')
25 |     }
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/commands/launch.ts:
--------------------------------------------------------------------------------
 1 | import { Command } from 'clipanion'
 2 | import { getBrowser } from '../utils/pptr.js'
 3 | 
 4 | export class LaunchCommand extends Command {
 5 |   static usage = Command.Usage({
 6 |     description: '单纯启动内置的 puppeteer 浏览器',
 7 |   })
 8 | 
 9 |   static paths = [['launch']]
10 | 
11 |   async execute() {
12 |     const { browser } = await getBrowser()
13 |     // operate here
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/src/commands/one.ts:
--------------------------------------------------------------------------------
  1 | import { Command, Option } from 'clipanion'
  2 | import inquirer from 'inquirer'
  3 | import * as _ from 'lodash-es'
  4 | import * as pptr from 'puppeteer'
  5 | import URI from 'urijs'
  6 | import { baseDebug } from '../common/index.js'
  7 | import { getBrowser } from '../utils/pptr.js'
  8 | import { changeChapter, main as download, waitReaderReady } from './download.js'
  9 | import { genCommandMain as gen } from './gen.js'
 10 | 
 11 | const debug = baseDebug.extend('one')
 12 | 
 13 | export class OneCommand extends Command {
 14 |   static usage = Command.Usage({
 15 |     description: '一站式操作, 启动浏览器, 浏览阅读网页, 回到控制台输入 y 开始生成',
 16 |   })
 17 | 
 18 |   static paths = [['one']]
 19 | 
 20 |   dir = Option.String('-d,--dir', {
 21 |     description: 'epub 文件输出目录, 默认当前文件夹',
 22 |   })
 23 | 
 24 |   interval?: string = Option.String('--interval', {
 25 |     description: '数字, 切换章节间隔, 单位毫秒',
 26 |   })
 27 | 
 28 |   async execute() {
 29 |     const { browser, page } = await getBrowser()
 30 | 
 31 |     let prompt: any
 32 | 
 33 |     const handler = async (e: pptr.Frame) => {
 34 |       const pageUrl = e.url()
 35 |       const uri = URI(pageUrl)
 36 |       const path = uri.pathname()
 37 |       // if (path.startsWith('/web/bookDetail/')) {
 38 |       if (path.startsWith('/web/reader/')) {
 39 |         // https://github.com/SBoudrias/Inquirer.js/issues/491#issuecomment-277595658
 40 |         // clean prev
 41 |         if (prompt) {
 42 |           ;(prompt as any).ui.close()
 43 |           console.log('')
 44 |         }
 45 | 
 46 |         const title = await page.title()
 47 |         console.log('')
 48 |         console.log('当前浏览链接像是一本书:')
 49 |         console.log('   [url]: %s', pageUrl)
 50 |         console.log(' [title]: %s', title)
 51 | 
 52 |         // prompt
 53 |         prompt = inquirer.prompt([
 54 |           {
 55 |             type: 'confirm',
 56 |             name: 'confirm',
 57 |             message: `是否下载: `,
 58 |           },
 59 |         ])
 60 | 
 61 |         // confirm
 62 |         const { confirm } = await prompt
 63 |         if (!confirm) return
 64 | 
 65 |         // 移除 listener
 66 |         page.off('framenavigated', handlerDebounced)
 67 | 
 68 |         // 确认下载
 69 |         decideDownload(page, browser, this.dir, this.interval)
 70 |       }
 71 |     }
 72 | 
 73 |     const handlerDebounced = _.debounce(handler, 1000)
 74 |     page.on('framenavigated', handlerDebounced)
 75 | 
 76 |     // FIXME: only for dev-test
 77 |     // await page.goto('https://weread.qq.com/web/reader/e1932d70813ab82e7g014f5b')
 78 |     // await page.goto('https://weread.qq.com/web/reader/f1132f80813ab821eg018540')
 79 |   }
 80 | }
 81 | 
 82 | async function decideDownload(
 83 |   page: pptr.Page,
 84 |   browser: pptr.Browser,
 85 |   dir?: string,
 86 |   interval?: string
 87 | ) {
 88 |   await waitReaderReady(page)
 89 | 
 90 |   const state = await page.evaluate(() => {
 91 |     return globalThis.app.__vue__.$store.state
 92 |   })
 93 | 
 94 |   const chapterInfos = state.reader.chapterInfos
 95 |   // why? 不记得了
 96 |   // second + first
 97 |   const firstChapterUid = chapterInfos[0].chapterUid
 98 |   const secondChapterUid = chapterInfos[1].chapterUid
 99 |   // to second
100 |   await changeChapter(page, secondChapterUid)
101 |   // to first
102 |   await changeChapter(page, firstChapterUid)
103 | 
104 |   const bookCoverUrl = page.url()
105 | 
106 |   // download
107 |   await download(bookCoverUrl, { page, browser, interval })
108 |   debug('-'.repeat(20), 'download complete', '-'.repeat(20))
109 | 
110 |   // generate
111 |   const file = await gen({ url: bookCoverUrl, clean: true, dir })
112 |   console.log('')
113 |   debug('-'.repeat(20), 'generate complete', '-'.repeat(20))
114 |   debug('epub 文件: %s', file)
115 | }
116 | 


--------------------------------------------------------------------------------
/src/common/books-map.ts:
--------------------------------------------------------------------------------
 1 | import fse from 'fs-extra'
 2 | import path from 'path'
 3 | import { BOOKS_DIR } from './index.js'
 4 | 
 5 | // v1: json = { [id]: {title,id,url} }
 6 | // v2: json = [ {id, title, url} ]
 7 | export const BOOKS_MAP_FILE = path.join(BOOKS_DIR, 'map-v2.json')
 8 | 
 9 | export type BookItem = {
10 |   id: string
11 |   title: string
12 |   url: string
13 | }
14 | 
15 | export let currentBooks: BookItem[] = []
16 | 
17 | let loaded = false
18 | export async function loadBooks() {
19 |   if (loaded) return
20 | 
21 |   let list: BookItem[] = []
22 |   if (await fse.pathExists(BOOKS_MAP_FILE)) list = await fse.readJSON(BOOKS_MAP_FILE)
23 | 
24 |   currentBooks = list
25 |   loaded = true
26 | }
27 | export async function saveBooks() {
28 |   return fse.outputJSON(BOOKS_MAP_FILE, currentBooks, { spaces: 2 })
29 | }
30 | 
31 | export async function addBook(item: BookItem) {
32 |   await loadBooks()
33 |   const list = currentBooks.slice()
34 | 
35 |   // remove
36 |   const { id, url } = item
37 |   {
38 |     const index = list.findIndex((x) => x.id === id)
39 |     if (index > -1) list.splice(index, 1)
40 |   }
41 |   {
42 |     const index = list.findIndex((x) => x.url === url)
43 |     if (index > -1) list.splice(index, 1)
44 |   }
45 | 
46 |   list.push(item)
47 |   currentBooks = list
48 |   saveBooks()
49 | }
50 | 
51 | export async function queryBook(query: Partial<BookItem>) {
52 |   await loadBooks()
53 |   const item = currentBooks.find((item) => Object.keys(query).every((k) => item[k] === query[k]))
54 |   return item
55 | }
56 | 
57 | export async function queryBookAny(query: string) {
58 |   let _query: Partial<BookItem> = {}
59 |   if (/^\d+$/.test(query)) {
60 |     _query = { id: query }
61 |   } else if (/^https?:\/\//.test(query)) {
62 |     _query = { url: query }
63 |   } else {
64 |     _query = { title: query }
65 |   }
66 |   return queryBook(_query)
67 | }
68 | 


--------------------------------------------------------------------------------
/src/common/index.ts:
--------------------------------------------------------------------------------
 1 | import { load as $load } from 'cheerio'
 2 | import d from 'debug'
 3 | import envPaths from 'env-paths'
 4 | import $esm from 'esm-utils'
 5 | import path from 'path'
 6 | import { pkgUpSync } from 'pkg-up'
 7 | import { SetOptional } from 'type-fest'
 8 | 
 9 | export { $esm }
10 | 
11 | const { __dirname } = $esm(import.meta)
12 | 
13 | import type exampleStartInfo from '../utils/processContent/example-start-info.json'
14 | export type Info = SetOptional<
15 |   typeof exampleStartInfo,
16 |   // 旧的是 html: string
17 |   // 新的是 htmlArray: string[]
18 |   'chapterContentHtml' | 'chapterContentHtmlArray'
19 | >
20 | 
21 | export type BookInfo = Info['bookInfo']
22 | export type ChapterInfo = Info['chapterInfos'][number]
23 | 
24 | export function getBookHtml(info: Info) {
25 |   // 2021-08-29 出现 chapterContentHtml 为 string[]
26 |   // 2023-06-20 处理多页, chapterContentHtmlArray
27 |   let htmlArray: string[] = []
28 |   if (info.chapterContentHtmlArray) {
29 |     htmlArray = info.chapterContentHtmlArray
30 |   } else if (Array.isArray(info.chapterContentHtml)) {
31 |     htmlArray = info.chapterContentHtml
32 |   } else {
33 |     htmlArray = [info.chapterContentHtml || '']
34 |   }
35 | 
36 |   const extractUselessWrapper = (fullHtml: string) => {
37 |     // extract content from <html><head></head><body>{content}<body></html>
38 |     const $ = $load(fullHtml, { decodeEntities: false, lowerCaseTags: true })
39 |     if ($('body').length) {
40 |       fullHtml = $('body').html() || ''
41 |     }
42 |     return fullHtml
43 |   }
44 | 
45 |   const html = htmlArray.map((item) => extractUselessWrapper(item)).join('\n')
46 |   return html
47 | }
48 | 
49 | export const baseDebug = d('weread-spy')
50 | export const baseDebugDetail = d('weread-spy-detail')
51 | 
52 | export interface Data {
53 |   startInfo: Info
54 |   infos: Info[]
55 | }
56 | 
57 | const closetPkgJson = pkgUpSync({ cwd: __dirname })
58 | if (!closetPkgJson) {
59 |   throw new Error('package.json not found')
60 | }
61 | export const PROJECT_ROOT = path.dirname(closetPkgJson)
62 | 
63 | /**
64 |  * 通用的数据目录
65 |  * ApplicationSupport/weread-spy
66 |  */
67 | 
68 | export const APP_SUP_DIR = envPaths('weread-spy', { suffix: '' }).data
69 | export const BOOKS_DIR = path.join(APP_SUP_DIR, 'books')
70 | export const PPTR_DATA_DIR = path.join(APP_SUP_DIR, 'pptr-data')
71 | 


--------------------------------------------------------------------------------
/src/typings/prettier-config.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/magicdawn/weread-spy/13e70510a0b7343689fe1bbf76c5f83096396bc3/src/typings/prettier-config.js


--------------------------------------------------------------------------------
/src/utils/Book.ts:
--------------------------------------------------------------------------------
  1 | /* eslint-disable @typescript-eslint/no-non-null-assertion */
  2 | 
  3 | import { BOOKS_DIR, Data } from '$common'
  4 | import fg from 'fast-glob'
  5 | import filenamify from 'filenamify'
  6 | import fse from 'fs-extra'
  7 | import JSZip, { InputType, JSZipFileOptions } from 'jszip'
  8 | import * as _ from 'lodash-es'
  9 | import { trimEnd } from 'lodash-es'
 10 | import path from 'path'
 11 | import { FileItem, FileItemFields } from './EpubModel/index.js'
 12 | 
 13 | export type NavItem = {
 14 |   id: string
 15 |   filename: string
 16 |   title: string
 17 |   playOrder: number
 18 |   children?: NavItem[]
 19 | }
 20 | 
 21 | interface InputByType {
 22 |   base64: string
 23 |   string: string
 24 |   text: string
 25 |   binarystring: string
 26 |   array: number[]
 27 |   uint8array: Uint8Array
 28 |   arraybuffer: ArrayBuffer
 29 |   blob: Blob
 30 |   stream: NodeJS.ReadableStream
 31 | }
 32 | 
 33 | export default class Book {
 34 |   data: Data
 35 | 
 36 |   // normal files
 37 |   manifestFiles: FileItem[] = []
 38 | 
 39 |   // 封面
 40 |   coverPageFile: FileItem // cover.xhtml
 41 | 
 42 |   // 导航
 43 |   navPageFile: FileItem
 44 | 
 45 |   // 章节
 46 |   textFiles: FileItem[] = []
 47 | 
 48 |   constructor(data: Data) {
 49 |     this.data = data
 50 |   }
 51 | 
 52 |   /**
 53 |    * getters
 54 |    */
 55 | 
 56 |   get bookId() {
 57 |     return this.data.startInfo.bookId
 58 |   }
 59 |   get bookTitle() {
 60 |     return this.data.startInfo.bookInfo.title
 61 |   }
 62 |   get bookDir() {
 63 |     return path.join(BOOKS_DIR, filenamify(this.bookId + '-' + this.bookTitle))
 64 |   }
 65 | 
 66 |   get coverUrl(): string {
 67 |     let imgUrl = this.data.startInfo.bookInfo.cover
 68 | 
 69 |     // e.g
 70 |     // https://wfqqreader-1252317822.image.myqcloud.com/cover/723/26224723/s_26224723.jpg
 71 |     // https://wfqqreader-1252317822.image.myqcloud.com/cover/723/26224723/t9_26224723.jpg
 72 |     if (/(s)_\d+\.\w+$/.test(imgUrl)) {
 73 |       imgUrl = imgUrl.replace(/s_(\d+\.\w+)$/, 't9_$1')
 74 |     }
 75 | 
 76 |     return imgUrl
 77 |   }
 78 | 
 79 |   addFile = (f: FileItemFields | FileItem) => {
 80 |     if (f instanceof FileItem) {
 81 |       this.manifestFiles.push(f)
 82 |     } else {
 83 |       const fileItem = new FileItem(f)
 84 |       this.manifestFiles.push(fileItem)
 85 |     }
 86 |     return this
 87 |   }
 88 | 
 89 |   addTextFile = (options: FileItemFields) => {
 90 |     const f = new FileItem(options)
 91 |     this.textFiles.push(f)
 92 |     return this
 93 |   }
 94 | 
 95 |   getManifest() {
 96 |     return [this.coverPageFile, this.navPageFile, ...this.textFiles, ...this.manifestFiles].filter(
 97 |       Boolean
 98 |     )
 99 |   }
100 | 
101 |   getSpine() {
102 |     return [this.coverPageFile, this.navPageFile, ...this.textFiles].filter(Boolean)
103 |   }
104 | 
105 |   getNavInfo() {
106 |     const navItems: NavItem[] = []
107 |     let f: FileItem
108 |     let maxNavDepth = 1
109 |     let playOrder = 1
110 | 
111 |     // 封面
112 |     if (this.coverPageFile) {
113 |       f = this.coverPageFile
114 |       const { id, filename } = f
115 |       navItems.push({
116 |         id,
117 |         filename,
118 |         title: '封面',
119 |         playOrder: playOrder++,
120 |       })
121 |     }
122 | 
123 |     // 目录
124 |     if (this.navPageFile) {
125 |       f = this.navPageFile
126 |       const { id, filename } = f
127 |       navItems.push({
128 |         id,
129 |         filename,
130 |         title: '目录',
131 |         playOrder: playOrder++,
132 |       })
133 |     }
134 | 
135 |     // 章节
136 |     this.data.startInfo.chapterInfos.forEach((cur, index) => {
137 |       maxNavDepth = Math.max(maxNavDepth, cur.level)
138 | 
139 |       let arr = navItems
140 |       _.times(cur.level - 1, () => {
141 |         const item = _.last(navItems)
142 |         // FIXME: none null assert
143 |         if (!item!.children) item!.children = []
144 |         arr = item!.children
145 |       })
146 | 
147 |       const { id, filename } = this.textFiles[index]
148 |       arr.push({
149 |         id,
150 |         filename,
151 |         title: cur.title,
152 |         playOrder: playOrder++,
153 |       })
154 |     })
155 | 
156 |     return { navItems, maxNavDepth }
157 |   }
158 | 
159 |   /**
160 |    * zip related
161 |    */
162 | 
163 |   zip: JSZip = new JSZip()
164 |   zipFolders: [string, string][] = []
165 | 
166 |   // add file
167 |   addZipFile<T extends InputType>(
168 |     path: string,
169 |     content: InputByType[T],
170 |     options?: JSZipFileOptions
171 |   ) {
172 |     this.zip.file(path, content, options)
173 |   }
174 | 
175 |   // add folder
176 |   async addZipFolder(name: string, localFolder: string) {
177 |     const files = await fg('**/*.*', { cwd: localFolder })
178 |     const content = files.map((f) => fse.createReadStream(path.join(localFolder, f)))
179 |     files.forEach((f, index) => {
180 |       this.addZipFile(trimEnd(name, '/') + '/' + f, content[index])
181 |     })
182 |   }
183 | }
184 | 


--------------------------------------------------------------------------------
/src/utils/EpubModel/index.ts:
--------------------------------------------------------------------------------
 1 | import mime from 'mime'
 2 | 
 3 | export interface MetadataFields {
 4 |   bookId: string
 5 |   title: string
 6 | 
 7 |   /**
 8 |    * publish date
 9 |    */
10 | 
11 |   date: Date
12 |   modified: Date
13 | 
14 |   /**
15 |    * author
16 |    */
17 |   creator: string
18 | 
19 |   lang: string
20 | 
21 |   /**
22 |    * cover assets id
23 |    */
24 |   cover: string
25 | }
26 | 
27 | export interface FileItemFields {
28 |   filename: string
29 |   content?: string | Buffer
30 |   filepath?: string // for content
31 |   id?: string
32 |   mimetype?: string | null
33 |   properties?: string
34 | }
35 | 
36 | export interface ModelInterface {
37 |   generate(): string
38 | }
39 | 
40 | class Model<T> {
41 |   data: T
42 |   toString() {
43 |     return JSON.stringify(this.data, null, 2)
44 |   }
45 |   constructor(options: T) {
46 |     this.data = { ...options }
47 |   }
48 |   set(options: T) {
49 |     this.data = { ...this.data, ...options }
50 |   }
51 | }
52 | 
53 | export class Metadata extends Model<MetadataFields> {
54 |   get val(): MetadataFields {
55 |     return {
56 |       ...this.data,
57 |     }
58 |   }
59 | }
60 | 
61 | export class FileItem extends Model<FileItemFields> {
62 |   get val(): FileItemFields {
63 |     return {
64 |       ...this.data,
65 |       id: this.id,
66 |       mimetype: this.mimetype,
67 |     }
68 |   }
69 | 
70 |   get filename() {
71 |     return this.data.filename
72 |   }
73 | 
74 |   get properties() {
75 |     return this.data.properties
76 |   }
77 | 
78 |   get content() {
79 |     return this.data.content
80 |   }
81 | 
82 |   get filepath() {
83 |     return this.data.filepath
84 |   }
85 | 
86 |   /**
87 |    * with fallback
88 |    */
89 | 
90 |   get mimetype() {
91 |     return this.data.mimetype || mime.getType(this.data.filename)
92 |   }
93 | 
94 |   get id() {
95 |     return this.data.id || this.data.filename.replace(/[/]/g, '__')
96 |   }
97 | }
98 | 


--------------------------------------------------------------------------------
/src/utils/epub-img.ts:
--------------------------------------------------------------------------------
  1 | import { getBookHtml } from '$common'
  2 | import { createHash } from 'crypto'
  3 | import debugFactory from 'debug'
  4 | import { dl, is404Error } from 'dl-vampire'
  5 | import fse from 'fs-extra'
  6 | import mime from 'mime'
  7 | import ms from 'ms'
  8 | import path from 'path'
  9 | import pmap from 'promise.map'
 10 | import sharp from 'sharp'
 11 | import Book from './Book.js'
 12 | import { getImgSrcs } from './processContent/index.js'
 13 | 
 14 | const debug = debugFactory('weread-spy:utils:epub-img')
 15 | const md5 = (s: string) => createHash('md5').update(s, 'utf8').digest('hex')
 16 | 
 17 | export interface ImgSrcInfo {
 18 |   [key: string]: { contentType: string; ext: string; localFile: string; properties?: string }
 19 | }
 20 | 
 21 | export default async function getImgSrcInfo(book: Book, clean: boolean) {
 22 |   let imgSrcInfo: ImgSrcInfo = {}
 23 | 
 24 |   const { data, bookDir } = book
 25 |   const cacheFile = path.join(bookDir, 'imgs.json')
 26 | 
 27 |   if (clean) {
 28 |     debug('cleaning: remove imgs.json %s', cacheFile)
 29 |     await fse.remove(cacheFile)
 30 | 
 31 |     const imgsDir = path.join(bookDir, 'imgs')
 32 |     debug('cleaning: remove imgs dir %s', imgsDir)
 33 |     await fse.remove(imgsDir)
 34 |   } else if (await fse.pathExists(cacheFile)) {
 35 |     /**
 36 |      * use cache
 37 |      */
 38 |     imgSrcInfo = await fse.readJsonSync(cacheFile)
 39 |     if (Object.keys(imgSrcInfo).length) {
 40 |       return imgSrcInfo
 41 |     }
 42 |   }
 43 | 
 44 |   const { chapterInfos } = data.startInfo
 45 | 
 46 |   // imgSrcs
 47 |   let imgSrcs: string[] = []
 48 |   for (let i = 0; i < chapterInfos.length; i++) {
 49 |     const html = getBookHtml(data.infos[i])
 50 |     const curSrcs = getImgSrcs(html)
 51 |     imgSrcs = imgSrcs.concat(curSrcs)
 52 |   }
 53 | 
 54 |   /**
 55 |    * img 去重
 56 |    */
 57 | 
 58 |   const imgSrcSet = new Set()
 59 |   const originalImgSrcs = [...imgSrcs]
 60 |   imgSrcs = []
 61 |   for (const src of originalImgSrcs) {
 62 |     if (imgSrcSet.has(src)) {
 63 |       continue
 64 |     } else {
 65 |       imgSrcSet.add(src)
 66 |       imgSrcs.push(src)
 67 |     }
 68 |   }
 69 |   debug(
 70 |     'imgSrcs collected, length = %s, unique length = %s',
 71 |     originalImgSrcs.length,
 72 |     imgSrcs.length
 73 |   )
 74 | 
 75 |   // head contentType is not correct
 76 |   // 1.下载
 77 |   // 2.识别 & 重命名
 78 | 
 79 |   imgSrcs.forEach((src) => {
 80 |     let localFile: string
 81 |     // https://res.weread.qq.com/wrepub/epub_25462428_587
 82 |     const match = /^https?:\/\/res\.weread\.qq\.com\/wrepub\/(epub_[\d\w_-]+)$/.exec(src)
 83 |     if (match) {
 84 |       const name = match[1]
 85 |       localFile = `imgs/${name}`
 86 |     } else {
 87 |       const hash = md5(src)
 88 |       localFile = `imgs/${hash}`
 89 |     }
 90 | 
 91 |     imgSrcInfo[src] = {
 92 |       contentType: '',
 93 |       ext: '',
 94 |       localFile,
 95 |     }
 96 |   })
 97 | 
 98 |   /**
 99 |    * cover
100 |    */
101 | 
102 |   const coverUrl = book.coverUrl
103 |   if (coverUrl) {
104 |     debug('add cover url = %s', coverUrl)
105 |     imgSrcs.push(coverUrl)
106 |     imgSrcInfo[coverUrl] = {
107 |       contentType: '',
108 |       ext: '',
109 |       localFile: 'imgs/cover', // ext will be add later
110 |       properties: 'cover-image',
111 |     }
112 |   }
113 | 
114 |   await pmap(
115 |     imgSrcs,
116 |     async (src) => {
117 |       const { localFile } = imgSrcInfo[src]
118 |       const file = path.join(bookDir, localFile)
119 | 
120 |       // download
121 |       try {
122 |         await dl({
123 |           url: src,
124 |           file,
125 |           // 重试3次, 每次超时 40s
126 |           retry: {
127 |             timeout: ms('40s'),
128 |             times: 3,
129 |           },
130 |         })
131 |       } catch (e) {
132 |         // @example
133 |         // https://res.weread.qq.com/wrepub/web/855825/copyright.jpg
134 |         // https://res.weread.qq.com/wrepub/CB_3300070708_83%28The_Earth_Through_Time%29.png
135 |         if (is404Error(e)) {
136 |           delete imgSrcInfo[src] // 剔除了, 当他不存在
137 |           return
138 |         }
139 | 
140 |         throw e
141 |       }
142 | 
143 |       // 识别
144 |       const buf = await fse.readFile(file)
145 |       const meta = await sharp(buf).metadata()
146 |       const ext = meta.format
147 |       // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
148 |       const contentType = mime.getType(ext!)
149 | 
150 |       // attach
151 |       const localFileNew = localFile + '.' + ext
152 |       Object.assign(imgSrcInfo[src], {
153 |         ext,
154 |         contentType,
155 |         localFile: localFileNew,
156 |       })
157 | 
158 |       // rename
159 |       await fse.rename(path.join(bookDir, localFile), path.join(bookDir, localFileNew))
160 |     },
161 |     10
162 |   )
163 |   debug('download img complete')
164 | 
165 |   // save cache
166 |   await fse.outputJson(cacheFile, imgSrcInfo, { spaces: 2 })
167 | 
168 |   return imgSrcInfo
169 | }
170 | 


--------------------------------------------------------------------------------
/src/utils/epub.ts:
--------------------------------------------------------------------------------
  1 | /*
  2 | 	Produce an EPUB file
  3 | 	--------------------
  4 | 	Reference:
  5 | 		https://www.ibm.com/developerworks/xml/tutorials/x-epubtut/index.html
  6 |     https://github.com/danburzo/percollate/blob/master/index.js#L516
  7 |  */
  8 | 
  9 | import { baseDebug, BOOKS_DIR, Data, PROJECT_ROOT } from '$common'
 10 | import AdmZip from 'adm-zip'
 11 | import filenamify from 'filenamify'
 12 | import fse from 'fs-extra'
 13 | import nunjucks from 'nunjucks'
 14 | import path from 'path'
 15 | import { performance } from 'perf_hooks'
 16 | import pmap, { pmapWorker } from 'promise.map'
 17 | import { pipeline } from 'stream/promises'
 18 | import { queryBook } from '../common/books-map.js'
 19 | import Book from './Book.js'
 20 | import getImgSrcInfo from './epub-img.js'
 21 | import { FileItem } from './EpubModel/index.js'
 22 | 
 23 | // worker
 24 | import { createWorkers } from './processContent/worker/index.main.js'
 25 | 
 26 | // this thread for debugger
 27 | import processContent from './processContent/index.js'
 28 | 
 29 | const debug = baseDebug.extend('utils:epub')
 30 | 
 31 | export async function gen({
 32 |   epubFile,
 33 |   data,
 34 |   clean,
 35 | }: {
 36 |   epubFile: string
 37 |   data: Data
 38 |   clean: boolean
 39 | }): Promise<void> {
 40 |   debug('epubgen %s -> %s', data.startInfo.bookId, epubFile)
 41 |   const templateBase = path.join(PROJECT_ROOT, 'assets/templates/epub/')
 42 | 
 43 |   const book = new Book(data)
 44 |   const { bookDir, addFile, addTextFile } = book
 45 | 
 46 |   // mimetype file must be first
 47 |   book.addZipFile('mimetype', 'application/epub+zip', { compression: 'STORE' })
 48 | 
 49 |   // static files from META-INF
 50 |   await book.addZipFolder('META-INF', path.join(templateBase, 'META-INF'))
 51 | 
 52 |   const [navTemplate, tocTemplate, opfTemplate, coverTemplate] = await Promise.all([
 53 |     fse.readFile(path.join(templateBase, 'OEBPS/nav.xhtml'), 'utf8'),
 54 |     fse.readFile(path.join(templateBase, 'OEBPS/toc.ncx'), 'utf8'),
 55 |     fse.readFile(path.join(templateBase, 'OEBPS/content.opf'), 'utf8'),
 56 |     fse.readFile(path.join(templateBase, 'OEBPS/cover.xhtml'), 'utf8'),
 57 |   ])
 58 | 
 59 |   // 章节 html
 60 |   const { chapterInfos, bookInfo, bookId } = data.startInfo
 61 | 
 62 |   // 图片
 63 |   const imgSrcInfo = await getImgSrcInfo(book, clean)
 64 | 
 65 |   /**
 66 |    * cover
 67 |    */
 68 | 
 69 |   const coverUrl = book.coverUrl
 70 |   let coverFileItem: FileItem | undefined // save for manifest.meta.cover
 71 |   let coverPageFileItem: FileItem | undefined
 72 | 
 73 |   if (book.coverUrl) {
 74 |     const { localFile } = imgSrcInfo[coverUrl]
 75 |     delete imgSrcInfo[coverUrl]
 76 | 
 77 |     // cover img
 78 |     coverFileItem = new FileItem({ filename: localFile }) // 内容随 imgs 打包
 79 |     addFile(coverFileItem)
 80 | 
 81 |     // cover xhtml
 82 |     coverPageFileItem = new FileItem({
 83 |       filename: 'cover.xhtml',
 84 |       content: nunjucks.renderString(coverTemplate, { cover: coverFileItem }),
 85 |     })
 86 |     book.coverPageFile = coverPageFileItem
 87 |   }
 88 | 
 89 |   // extra css
 90 |   const extraCss: string[] = []
 91 |   const customCssFile = path.join(bookDir, 'custom.css')
 92 |   if (await fse.pathExists(customCssFile)) {
 93 |     extraCss.push('custom.css')
 94 |     addFile({ filename: 'custom.css', filepath: customCssFile })
 95 |   }
 96 | 
 97 |   const DEBUG_PROCESS_CONTENT = !!process.env.DEBUG_PROCESS_CONTENT
 98 |   const processContentStart = performance.now()
 99 |   let processResults: Awaited<ReturnType<typeof processContent>>[] = []
100 | 
101 |   //
102 |   // processContent in this thread
103 |   //
104 |   if (DEBUG_PROCESS_CONTENT) {
105 |     processResults = await pmap(
106 |       chapterInfos,
107 |       async (chapterInfo, i, arr) => {
108 |         const c = chapterInfos[i]
109 |         const { chapterUid } = c
110 |         const cssFilenames = [`css/chapter-${chapterUid}.css`, ...extraCss]
111 |         return processContent(data.infos[i], {
112 |           cssFilenames,
113 |           imgSrcInfo,
114 |         })
115 |       },
116 |       5
117 |     )
118 |   }
119 |   //
120 |   // processContent in multiple threads, via workers
121 |   //
122 |   else {
123 |     const workers = createWorkers()
124 |     processResults = await pmapWorker(
125 |       chapterInfos,
126 |       async (chapterInfo, i, arr, worker) => {
127 |         const c = chapterInfos[i]
128 |         const { chapterUid } = c
129 |         const cssFilenames = [`css/chapter-${chapterUid}.css`, ...extraCss]
130 |         return await worker.api.processContent(data.infos[i], {
131 |           cssFilenames,
132 |           imgSrcInfo,
133 |         })
134 |       },
135 |       workers
136 |     )
137 |     workers.forEach((w) => w.nodeWorker.unref())
138 |     await new Promise((resolve) => setTimeout(resolve))
139 |   }
140 |   debug('processContent cost %s ms', (performance.now() - processContentStart).toFixed())
141 | 
142 |   for (let i = 0; i < chapterInfos.length; i++) {
143 |     const c = chapterInfos[i]
144 |     const { chapterUid } = c
145 |     const { xhtml, style } = processResults[i]
146 | 
147 |     // xhtml
148 |     {
149 |       const filename = `chapter-${chapterUid}.xhtml`
150 |       addTextFile({ filename, content: xhtml })
151 |     }
152 | 
153 |     // css
154 |     {
155 |       const filename = `css/chapter-${chapterUid}.css`
156 |       addFile({ filename, content: style })
157 |     }
158 |   }
159 | 
160 |   /**
161 |    * img assets (cover removed)
162 |    */
163 | 
164 |   for (const src of Object.keys(imgSrcInfo)) {
165 |     const { contentType, localFile, properties } = imgSrcInfo[src]
166 |     addFile({ filename: localFile, properties }) // content will be imgs dir
167 |   }
168 | 
169 |   const baseRenderData = {
170 |     bookId,
171 |     e: '',
172 |     title: bookInfo.title,
173 |     date: new Date(bookInfo.updateTime * 1000).toISOString().replace(/\.\d+Z$/, 'Z'),
174 |     lang: 'zh-CN',
175 |     creator: bookInfo.author,
176 |     publisher: bookInfo.publisher,
177 |     description: bookInfo.intro,
178 |     category: bookInfo.category,
179 | 
180 |     // cover
181 |     cover: coverFileItem,
182 |     coverPage: coverPageFileItem,
183 |   }
184 | 
185 |   /**
186 |    * nav
187 |    */
188 | 
189 |   // add nav.xhtml first
190 |   book.navPageFile = new FileItem({ filename: 'nav.xhtml', properties: 'nav' }) // 内容手动写入
191 |   const { navItems, maxNavDepth } = book.getNavInfo()
192 | 
193 |   {
194 |     const renderData = { ...baseRenderData, navItems, maxNavDepth }
195 | 
196 |     const nav = nunjucks.renderString(navTemplate, renderData)
197 |     book.addZipFile('OEBPS/nav.xhtml', nav)
198 | 
199 |     const toc = nunjucks.renderString(tocTemplate, renderData)
200 |     addFile({ filename: 'toc.ncx', content: toc, id: 'ncx' })
201 |   }
202 | 
203 |   const manifest = book.getManifest()
204 |   const spine = book.getSpine()
205 |   {
206 |     // content.opf
207 |     const renderData = { ...baseRenderData, manifest, spine }
208 |     const opf = nunjucks.renderString(opfTemplate, renderData)
209 |     book.addZipFile('OEBPS/content.opf', opf)
210 |   }
211 | 
212 |   // 添加文件
213 |   for (const f of manifest) {
214 |     let content: string | Buffer
215 | 
216 |     // f.content = '' 也需要写入
217 |     if (typeof f.content !== 'undefined' && f.content !== null) {
218 |       content = f.content
219 |     } else if (f.filepath) {
220 |       content = fse.readFileSync(f.filepath)
221 |     } else {
222 |       continue
223 |     }
224 | 
225 |     book.addZipFile(`OEBPS/${f.filename}`, content)
226 |   }
227 | 
228 |   // 添加图片
229 |   await book.addZipFolder('OEBPS/imgs', path.join(bookDir, 'imgs'))
230 | 
231 |   // write .epub file
232 |   const stream = book.zip.generateNodeStream({
233 |     streamFiles: true,
234 |     compression: 'DEFLATE',
235 |     compressionOptions: { level: 9 },
236 |   })
237 |   const output = fse.createWriteStream(epubFile)
238 |   await pipeline(stream, output)
239 | }
240 | 
241 | async function getInfo(id: string, dir: string) {
242 |   const { title = '' } = (await queryBook({ id })) || {}
243 |   const titleAsFilename = filenamify(title)
244 | 
245 |   const data = fse.readJsonSync(path.join(BOOKS_DIR, `${id}-${titleAsFilename}.json`))
246 | 
247 |   let filename = `${titleAsFilename}.epub`
248 |   filename = filename.replace(/（/g, '(').replace(/）/g, ')') // e,g 红楼梦（全集）
249 |   const file = path.join(dir, filename)
250 | 
251 |   return { data, file, titleAsFilename }
252 | }
253 | 
254 | export async function genEpubFor(id: string, dir: string, clean: boolean, decompress = false) {
255 |   const { data, file, titleAsFilename } = await getInfo(id, dir)
256 | 
257 |   await fse.ensureDir(dir)
258 |   await gen({
259 |     epubFile: file,
260 |     data,
261 |     clean,
262 |   })
263 | 
264 |   if (decompress) {
265 |     const epubUnzipDir = path.join(dir, titleAsFilename + '.epub.d')
266 |     debug('decompress: to %s', epubUnzipDir)
267 |     await fse.ensureDir(epubUnzipDir)
268 |     const zip = new AdmZip(file)
269 |     zip.extractAllTo(epubUnzipDir, true)
270 |   }
271 | 
272 |   debug('epub created: %s', file)
273 |   return file
274 | }
275 | 


--------------------------------------------------------------------------------
/src/utils/epubcheck.ts:
--------------------------------------------------------------------------------
 1 | import { baseDebug } from '$common/index'
 2 | import { execSync } from 'child_process'
 3 | import epubcheckJarPath from 'epubcheck-assets'
 4 | 
 5 | const debug = baseDebug.extend('utils:epubcheck')
 6 | 
 7 | // epubchecker 安装时从 github release 下载, 且没有使用 http_proxy
 8 | // function getJarPath() {
 9 | //   const dir = path.dirname(require.resolve('epubchecker/package.json'))
10 | //   const jar = fg.sync('**/epubcheck.jar', { cwd: dir, absolute: true })[0]
11 | //   return jar
12 | // }
13 | 
14 | function getJarPath() {
15 |   // return path.join(PROJECT_ROOT, 'assets/epubcheck-5.0.0/epubcheck.jar')
16 |   return epubcheckJarPath
17 | }
18 | 
19 | export default function epubcheck(file: string) {
20 |   const epubcheckJar = getJarPath()
21 |   if (!epubcheckJar) {
22 |     console.error('can not find epubcheck.jar')
23 |     process.exit(1)
24 |   }
25 | 
26 |   const cmd = `java -jar '${epubcheckJar}' '${file}'`
27 |   debug('[exec]: %s', cmd)
28 |   try {
29 |     execSync(cmd, { stdio: 'inherit' })
30 |     debug('success')
31 |   } catch (e) {
32 |     console.error(e.stack || e)
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/utils/pptr-anti-spider/bak.txt:
--------------------------------------------------------------------------------
 1 | type InterceptionEvent = Parameters<NonNullable<Interception['modifyResponse']>>[0]['event']
 2 | 
 3 | function processJsFile(js: string, event: InterceptionEvent) {
 4 |   const url = event.request.url
 5 |   const pathname = new URL(url).pathname
 6 |   const filename = path.basename(pathname)
 7 | 
 8 |   // app.*.js
 9 |   if (/^app\.\w+\.js$/.test(filename)) {
10 |     return processAppJs(js)
11 |   }
12 | 
13 |   // 期望
14 |   if (!/^\d+\.\w+\.js$/i.test(filename)) return js
15 | 
16 |   // vuex
17 |   // this['commit'] = function(_0xe59d72, _0x31227c, _0x3aa954) {
18 |   //     return _0x5068e8['call'](_0x43325a, _0xe59d72, _0x31227c, _0x3aa954);
19 |   // }
20 | 
21 |   const matches = Array.from(js.matchAll(/this\['commit'\] *?= *?function\(/g))
22 |   console.log('file: %s, matches', filename, matches)
23 | 
24 |   if (matches.length) {
25 |     debug('current patch vuex store.commit')
26 |     debugger
27 |   }
28 | 
29 |   return js
30 | }
31 | 
32 | 
33 | import { Page } from 'puppeteer'
34 | import { baseDebug } from '../../common/index'
35 | 
36 | const debug = baseDebug.extend('utils:anti-spider')
37 | 
38 | export const HTML_CONTENT_STORAGE_KEY = '__chapterContentHtml__'
39 | 
40 | export async function hookVuexCommit(page: Page) {
41 |   debug('hookVuexCommit')
42 | 
43 |   // 注入 $store.commit
44 |   await page.$eval(
45 |     '#app',
46 |     (el: any, htmlContentStorageKey: string) => {
47 |       const original = el.__vue__.$store.commit
48 |       el.__vue__.$store.commit = function (...args: any[]) {
49 |         // action, payload, 第三个参数不知道
50 |         const [action, payload, thirdArg] = args
51 |         console.log('injected vuex.commit: %s %s', action, payload)
52 | 
53 |         if (action === 'updateReaderContentHtml') {
54 |           globalThis[htmlContentStorageKey] = payload[0]
55 |         }
56 |         return original(...args)
57 |       }
58 |     },
59 |     HTML_CONTENT_STORAGE_KEY
60 |   )
61 | }
62 | 


--------------------------------------------------------------------------------
/src/utils/pptr-anti-spider/index.ts:
--------------------------------------------------------------------------------
  1 | import { baseDebugDetail } from '$common/index'
  2 | 
  3 | const debug = baseDebugDetail.extend('pptr:anti-spider')
  4 | 
  5 | export function processAppJs(js: string | undefined, fileBasename: string) {
  6 |   debug('modifying %s', fileBasename)
  7 |   js ||= ''
  8 | 
  9 |   // debugger
 10 |   js = removeDebuggerLimit(js) || js
 11 | 
 12 |   // expose `__vue__`
 13 |   // https://github.com/vuejs/vue/blob/49b6bd4264c25ea41408f066a1835f38bf6fe9f1/src/core/instance/lifecycle.ts#L78
 14 |   // 在 Vue.prototype._update 实现
 15 |   {
 16 |     // 设置环境变量
 17 |     // _0x44ebd7['env'] = {},
 18 |     // _0x44ebd7['argv'] = [],
 19 |     js = js.replace(/(_0x\w+)\['env'\]=\{\},(_0x\w+)\['argv'\]=\[\],/, (match, var1, var2) => {
 20 |       return `${var1}.env = { VUE_DISMISS_DEVTOOLS: 'yes' }, ${var2}.argv = [],`
 21 |     })
 22 | 
 23 |     // 'yes'===_0x16452a['env']['VUE_DISMISS_DEVTOOLS'] && _0x1be68e && (_0x1be68e['__vue__'] = null),
 24 |     // 'yes'===_0x16452a['env'][_0x3744('0x22b')] && _0x5ad1f7['$el'] && (console['log']('__vue__'),
 25 |     js = js.replace(/'yes'===([_\w]+\['env'\])/g, `'yes' !== $1`)
 26 | 
 27 |     // 'yes'===_0x1372e5[_0x3db9('0x5ba')]['VUE_DISMISS_DEVTOOLS'] && _0x243be5 && (_0x243be5[_0x3db9('0xcce')] = null),
 28 |     // 'yes'===_0x1372e5[_0x3db9('0x5ba')][_0x3db9('0xf2')] && _0x45b52d['$el'] && (console['log'](_0x3db9('0xcce')),
 29 |     js = js.replace(/'yes'===([_\w]+\[_0x\w+\('0x\w+'\)\]\[)/g, `'yes' !== $1`)
 30 | 
 31 |     // _0x2a82('0x207')===_0x5d11b9['env']['VUE_DISMISS_DEVTOOLS']&&_0x1051e8[_0x2a82('0xad7')] && (console['log']('__vue__'),
 32 |     // _0x1051e8['$el'][_0x2a82('0x2b2')] = _0x1051e8),
 33 |     // 变种太多
 34 |   }
 35 | 
 36 |   {
 37 |     // vuex
 38 |     // this['commit'] = function(_0xe59d72, _0x31227c, _0x3aa954) {
 39 |     //     return _0x5068e8['call'](_0x43325a, _0xe59d72, _0x31227c, _0x3aa954);
 40 |     // }
 41 |     js = js.replace(
 42 |       /this\['commit'\]=function\((_0x\w+),(_0x\w+),(_0x[\w]+)\)\{([ \S]+?)\}/,
 43 |       (match, arg1, arg2, arg3, functionBody) => {
 44 |         return `
 45 |         // access store
 46 |         window.__stores__ ||= new WeakSet(),
 47 |         window.__stores__.add(this),
 48 | 
 49 |         this['commit'] = function(${arg1}, ${arg2}, ${arg3}) {
 50 |           // hook
 51 |           const [mutation, payload] = [${arg1}, ${arg2}]
 52 |           console.log('injected vuex.commit: ', mutation, payload)
 53 | 
 54 |           // access store
 55 |           if (this) {
 56 |             window.__stores__.add(this)
 57 |             if (Object.keys(this._actions).length > 10) {
 58 |               window.__store__ = this
 59 |             }
 60 |           }
 61 | 
 62 |           if (mutation === 'updateReaderContentHtml') {
 63 |             window.__store__ = this
 64 |             window.__chapterContentHtmlArray__ = payload
 65 |           }
 66 | 
 67 |           ${functionBody}
 68 |         }`
 69 |       }
 70 |     )
 71 |   }
 72 | 
 73 |   return js
 74 | }
 75 | 
 76 | export function findMatchingIndex(input: string, fi: number) {
 77 |   const pairs = {
 78 |     '(': ')',
 79 |     '{': '}',
 80 |     '[': ']',
 81 |   }
 82 | 
 83 |   const left = input[fi]
 84 |   const right = pairs[left]
 85 |   if (!right) {
 86 |     return -1
 87 |   }
 88 | 
 89 |   let count = 1 // input[fi] = left
 90 | 
 91 |   for (let i = fi + 1, len = input.length; i < len; i++) {
 92 |     const cur = input[i]
 93 |     if (cur === right) {
 94 |       count--
 95 |       if (count === 0) {
 96 |         return i
 97 |       }
 98 |     } else if (cur === left) {
 99 |       count++
100 |     }
101 |   }
102 | 
103 |   return -1 // not found
104 | }
105 | 
106 | /**
107 | (function() {
108 |     _0x24fa27(this, function() {
109 |         var _0x2fc99b = new RegExp('function\x20*\x5c(\x20*\x5c)');
110 |         var _0x2ca847 = new RegExp('\x5c+\x5c+\x20*(?:[a-zA-Z_$][0-9a-zA-Z_$]*)','i');
111 |         var _0x1fee8d = _0x4dab42('init');
112 |         if (!_0x2fc99b[_0x1d23('0x403')](_0x1fee8d + 'chain') || !_0x2ca847['test'](_0x1fee8d + 'input')) {
113 |             _0x1fee8d('0');
114 |         } else {
115 |             _0x4dab42();
116 |         }
117 |     })();
118 | }());
119 | 
120 | const _0x2fdb16 = new RegExp(_0x7031('0x945'));
121 |  */
122 | 
123 | // 这种变种太多
124 | // ['constructor']('while\x20(true)\x20{}')['apply']('counter')
125 | // ['constructor']('debu'+_0x4584('0xe0'))['call']('action'))
126 | // ['constructor'](_0x4584('0x3a5')+_0x4584('0xe0'))['apply']('stateObject'))
127 | 
128 | function removeDebuggerLimit(js: string): string | undefined {
129 |   let index = -1
130 |   if (index === -1) {
131 |     index = js.indexOf(String.raw`=new RegExp('function\x20*\x5c(\x20*\x5c)')`)
132 |   }
133 |   if (index === -1) {
134 |     index = js.indexOf(String.raw`=new RegExp('\x5c+\x5c+\x20*(?:[a-zA-Z_$][0-9a-zA-Z_$]*)','i')`)
135 |   }
136 | 
137 |   /**
138 |     const _0x2fdb16 = new RegExp(_0x7031('0x945'));
139 |     const _0x48182a = new RegExp(_0x7031('0x403'),'i');
140 |    */
141 |   if (index === -1) {
142 |     const match =
143 |       /const _0x\w+=new RegExp\(_0x\w+\('0x\w+'\)\);const _0x\w+=new RegExp\(_0x\w+\('0x\w+'\),'i'\);/g.exec(
144 |         js
145 |       )
146 |     if (match) {
147 |       index = match.index
148 |     }
149 |   }
150 | 
151 |   if (index === -1) {
152 |     return
153 |   }
154 | 
155 |   let prevBraceIndex = index
156 |   while (js[prevBraceIndex] !== '{') prevBraceIndex--
157 |   const endBraceIndex = findMatchingIndex(js, prevBraceIndex)
158 | 
159 |   // 变成空 function
160 |   const code = js.slice(0, prevBraceIndex) + `{}` + js.slice(endBraceIndex + 1)
161 |   return code
162 | }
163 | 


--------------------------------------------------------------------------------
/src/utils/pptr.ts:
--------------------------------------------------------------------------------
 1 | import { PPTR_DATA_DIR, baseDebug } from '$common'
 2 | import path from 'path'
 3 | import pptr from 'puppeteer'
 4 | import { RequestInterceptionManager } from 'puppeteer-intercept-and-modify-requests'
 5 | import { processAppJs } from './pptr-anti-spider/index.js'
 6 | 
 7 | const debug = baseDebug.extend('pptr')
 8 | 
 9 | export async function getBrowser() {
10 |   const browser = await pptr.launch({
11 |     headless: false,
12 |     devtools: false,
13 |     userDataDir: PPTR_DATA_DIR,
14 |     defaultViewport: null,
15 |     ignoreDefaultArgs: ['--enable-automation'],
16 |   })
17 | 
18 |   // close existing page
19 |   {
20 |     const pages = await browser.pages()
21 |     process.nextTick(() => {
22 |       pages.forEach((p) => p.close())
23 |     })
24 |   }
25 | 
26 |   const page = await browser.newPage()
27 | 
28 |   // disable cache
29 |   await page.setCacheEnabled(false)
30 | 
31 |   // intercept
32 |   const client = await page.target().createCDPSession()
33 |   // @ts-ignore
34 |   const interceptManager = new RequestInterceptionManager(client)
35 |   await interceptManager.intercept({
36 |     urlPattern: `*/*.*.js`,
37 |     // urlPattern: `*/app.*.js`,
38 |     resourceType: 'Script',
39 |     modifyResponse({ body, event }) {
40 |       const url = event.request.url
41 |       const basename = path.basename(url)
42 | 
43 |       // 1.xxx.js
44 |       // app.xxx.js
45 |       // utils.xxx.js
46 |       if (!/^\w+\.\w+\.js$/.test(basename)) {
47 |         return
48 |       }
49 | 
50 |       body = processAppJs(body, basename)
51 |       return { body }
52 |     },
53 |   })
54 | 
55 |   await page.goto('https://weread.qq.com/')
56 | 
57 |   const loginBtn = '.navBar_link_Login'
58 |   const logined = await page.$$eval(loginBtn, (els) => els.length === 0)
59 |   if (!logined) {
60 |     // 点击登录
61 |     await page.click(loginBtn)
62 | 
63 |     // 扫码
64 | 
65 |     // 等待登录成功
66 |     await page.waitForSelector('.wr_avatar.navBar_avatar', {
67 |       timeout: 0,
68 |     })
69 |     console.log('登录完成')
70 |   }
71 | 
72 |   const ua = await browser.userAgent()
73 |   console.log('ua = %s', ua)
74 | 
75 |   return { browser, page }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/utils/processContent/example-start-info.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "bookId": "25462428",
 3 |   "bookInfo": {
 4 |     "bookId": "25462428",
 5 |     "title": "深入浅出WebAssembly",
 6 |     "author": "于航",
 7 |     "cover": "https://wfqqreader-1252317822.image.myqcloud.com/cover/428/25462428/s_25462428.jpg",
 8 |     "version": 1619444478,
 9 |     "format": "epub",
10 |     "type": 0,
11 |     "price": 89.6,
12 |     "originalPrice": 0,
13 |     "soldout": 0,
14 |     "bookStatus": 1,
15 |     "payType": 4097,
16 |     "finished": 1,
17 |     "maxFreeChapter": 9,
18 |     "free": 0,
19 |     "mcardDiscount": 0,
20 |     "ispub": 1,
21 |     "cpid": 2000000440,
22 |     "centPrice": 8960,
23 |     "category": "科学科技榜-计算机",
24 |     "source": "所有书籍均已获得正版授权",
25 |     "hasLecture": 0,
26 |     "intro": "blabla",
27 |     "lastChapterIdx": 40,
28 |     "paperBook": {
29 |       "skuId": "12474852"
30 |     },
31 |     "chapterSize": 40,
32 |     "updateTime": 1598794618,
33 |     "onTime": 1562642264,
34 |     "unitPrice": 0.05,
35 |     "marketType": 0,
36 |     "isbn": "9787121352171",
37 |     "publisher": "电子工业出版社",
38 |     "publishTime": "2018-11-01 00:00:00",
39 |     "totalWords": 0,
40 |     "publishPrice": 128,
41 |     "bookSize": 1207117,
42 |     "recommended": 0,
43 |     "lectureRecommended": 0,
44 |     "follow": 0,
45 |     "secret": 1,
46 |     "offline": 0,
47 |     "lectureOffline": 0,
48 |     "finishReading": 0,
49 |     "isAutoPay": 0,
50 |     "availables": 0,
51 |     "paid": 0,
52 |     "isChapterPaid": 0,
53 |     "showLectureButton": 1,
54 |     "wxtts": 1,
55 |     "star": 77,
56 |     "ratingCount": 24,
57 |     "ratingDetail": {
58 |       "one": 3,
59 |       "two": 0,
60 |       "three": 2,
61 |       "four": 1,
62 |       "five": 18,
63 |       "recent": 3
64 |     },
65 |     "copyrightInfo": {
66 |       "id": 2000000440,
67 |       "name": "电子工业出版社",
68 |       "userVid": 0
69 |     }
70 |   },
71 |   "chapterInfos": [
72 |     {
73 |       "chapterUid": 2,
74 |       "chapterIdx": 2,
75 |       "title": "版权信息",
76 |       "paid": 0,
77 |       "price": 0,
78 |       "level": 1,
79 |       "updateTime": 0,
80 |       "wordCount": 129,
81 |       "anchors": []
82 |     }
83 |   ],
84 |   "chapterContentHtml": "<div>html here</div>",
85 |   "chapterContentHtmlArray": ["<div>html here</div>"],
86 |   "chapterContentStyles": ".readerChapterContent{ color: red; }",
87 |   "currentChapterId": 13
88 | }
89 | 


--------------------------------------------------------------------------------
/src/utils/processContent/index.ts:
--------------------------------------------------------------------------------
  1 | /* eslint-disable @typescript-eslint/no-var-requires */
  2 | 
  3 | import { $esm, Info, getBookHtml } from '$common'
  4 | import type { AnyNode as $AnyNode, Element as $Element, Cheerio, CheerioAPI } from 'cheerio'
  5 | import { load as $load } from 'cheerio'
  6 | import debugFactory from 'debug'
  7 | import * as _ from 'lodash-es'
  8 | import njk from 'nunjucks'
  9 | import prettier from 'prettier'
 10 | import { ImgSrcInfo } from '../epub-img.js'
 11 | 
 12 | const debug = debugFactory('weread-spy:utils:processContent')
 13 | const { require } = $esm(import.meta)
 14 | const prettierConfig = require('@magicdawn/prettier-config') as prettier.Options
 15 | 
 16 | type TransformImgSrc = (src: string) => string
 17 | interface ProcessContentOptions {
 18 |   cssFilenames: string[]
 19 |   imgSrcInfo: ImgSrcInfo
 20 | }
 21 | 
 22 | const DATA_ATTR_WHITELIST = ['data-src', 'data-bg-img']
 23 | 
 24 | export default async function processContent(info: Info, options: ProcessContentOptions) {
 25 |   const { chapterContentHtml, chapterContentStyles, currentChapterId } = info
 26 |   const { cssFilenames, imgSrcInfo } = options
 27 |   debug('processContent for title=%s chapterUid=%s', info.bookInfo.title, currentChapterId)
 28 | 
 29 |   let html = getBookHtml(info)
 30 | 
 31 |   // apply templates
 32 |   html = applyTemplate({ style: chapterContentStyles, content: html, cssFilenames })
 33 | 
 34 |   // new $
 35 |   const $ = $load(html, {
 36 |     // @ts-ignore
 37 |     _useHtmlParser2: true,
 38 |     decodeEntities: false,
 39 |     lowerCaseTags: true,
 40 |   })
 41 |   // debug('cheerio loaded')
 42 | 
 43 |   // remove all data-xxx
 44 |   traverse($.root()[0], $, removeDataAttr)
 45 |   // debug('removeDataAttr complete')
 46 |   // debug($.xml().trim())
 47 | 
 48 |   // combine span
 49 |   traverse($.root()[0], $, combineTextSpan)
 50 |   // debug('removeUnusedSpan complete')
 51 |   // debug($.xml().trim())
 52 | 
 53 |   /**
 54 |    * special cases
 55 |    */
 56 | 
 57 |   // <p class="fDropContent"><span class="ftext"><span>在</span></span><span>我的第一本书《练习的心态》中
 58 |   // 显示效果差
 59 |   $('.fDropContent > .ftext').removeClass('ftext').data('removed-class', 'ftext')
 60 | 
 61 |   // 图片
 62 |   const transformImgSrc = (src: string) => imgSrcInfo[src]?.localFile
 63 |   const ctx: { transformImgSrc: TransformImgSrc; imgs: Array<{ src: string; newSrc: string }> } = {
 64 |     transformImgSrc,
 65 |     imgs: [],
 66 |   }
 67 |   traverse($.root()[0], $, fixImgSrc, ctx)
 68 |   // debug('fixImgSrc complete')
 69 | 
 70 |   // get xhtml
 71 |   html = $.xml().trim()
 72 | 
 73 |   // format
 74 |   try {
 75 |     // html = prettier.format(html, {...prettierConfig, parser: 'html'})
 76 |   } catch (e) {
 77 |     console.warn('[prettier] format met error: currentChapterId = %s', currentChapterId)
 78 |     console.warn(e.stack || e)
 79 |   }
 80 | 
 81 |   // replace
 82 |   html = html.replace(/&nbsp;/g, ' ')
 83 | 
 84 |   let style = chapterContentStyles
 85 |   try {
 86 |     style = await prettier.format(style, { ...prettierConfig, parser: 'css' })
 87 |   } catch (e) {
 88 |     console.warn('[prettier] format met error: currentChapterId = %s', currentChapterId)
 89 |     console.error(e.stack || e)
 90 |   }
 91 | 
 92 |   return {
 93 |     xhtml: html,
 94 |     style,
 95 |     imgs: ctx.imgs,
 96 |   }
 97 | }
 98 | 
 99 | /**
100 |  * get all img srcs
101 |  */
102 | 
103 | export function getImgSrcs(html: string) {
104 |   // new $
105 |   const $ = $load(html, { decodeEntities: false, xmlMode: true, lowerCaseTags: true })
106 | 
107 |   // collect
108 |   const srcs: string[] = []
109 |   traverse($.root()[0], $, collectImgSrc, srcs)
110 | 
111 |   return srcs
112 | }
113 | 
114 | // <style>
115 | //   {{ style | safe }}
116 | // </style>
117 | function applyTemplate({
118 |   style,
119 |   content,
120 |   cssFilenames,
121 | }: {
122 |   style: string
123 |   content: string
124 |   cssFilenames: string[]
125 | }) {
126 |   const tpl = `
127 |     <?xml version="1.0" encoding="UTF-8"?>
128 |     <html xmlns="http://www.w3.org/1999/xhtml">
129 | 		  <head>
130 | 		    <meta charset="UTF-8" />
131 | 		    <title>Document</title>
132 |         {%- for css in cssFilenames -%}
133 |         <link rel="stylesheet" href="{{css}}" />
134 |         {%- endfor %}
135 | 		  </head>
136 | 		  <body>
137 | 		    <div class="readerChapterContent">
138 | 		      {{ content | safe }}
139 | 		    </div>
140 | 		  </body>
141 | 		</html>
142 | 	`
143 | 
144 |   const str = njk
145 |     .renderString(tpl, {
146 |       style,
147 |       content,
148 |       cssFilenames,
149 |     })
150 |     .trim()
151 | 
152 |   return str
153 | }
154 | 
155 | type OnNodeResult = { traverseChildren?: boolean } | undefined | void
156 | type OnNode = (el: $AnyNode, $: CheerioAPI, extraData?: any) => OnNodeResult
157 | 
158 | function traverse(el: $AnyNode, $: CheerioAPI, onNode: OnNode, extraData?: any) {
159 |   // self
160 |   const { traverseChildren = true } = onNode(el, $, extraData) || {}
161 | 
162 |   // children
163 |   if (traverseChildren && (el.type === 'tag' || el.type === 'root')) {
164 |     el.childNodes.forEach((c) => {
165 |       if (c.type === 'text') return
166 |       traverse(c, $, onNode, extraData)
167 |     })
168 |   }
169 | }
170 | 
171 | function removeDataAttr(el: $Element, $: CheerioAPI): OnNodeResult {
172 |   const $el = $(el)
173 |   if (el.type === 'tag') {
174 |     Object.keys(el.attribs || {})
175 |       .filter((k) => {
176 |         return k.startsWith('data-') && !DATA_ATTR_WHITELIST.includes(k)
177 |       })
178 |       .forEach((attr) => {
179 |         $el.removeAttr(attr)
180 |       })
181 |   }
182 | }
183 | 
184 | function combineTextSpan(el: $Element, $: CheerioAPI): OnNodeResult {
185 |   if (el.type !== 'tag') return
186 |   if (!el.childNodes?.length) {
187 |     return
188 |   }
189 | 
190 |   const isSimpleTextSpan = (c: $AnyNode) =>
191 |     c.type === 'tag' &&
192 |     c.tagName?.toLowerCase() === 'span' &&
193 |     Object.keys((c as $Element).attribs || {}).length === 0
194 | 
195 |   if (isSimpleTextSpan(el)) {
196 |     return { traverseChildren: false }
197 |   }
198 | 
199 |   const $el = $(el)
200 |   const shouldCombine = el.childNodes.every(isSimpleTextSpan)
201 |   if (shouldCombine) {
202 |     const text = $el.text()
203 |     $el.empty()
204 |     $el.append(`<span>${text}</span>`)
205 |     return { traverseChildren: false }
206 |   }
207 | 
208 |   const rate = el.childNodes.filter((c) => !isSimpleTextSpan(c)).length / el.childNodes.length
209 |   if (rate < 1 / 10) {
210 |     const arr: Cheerio<$AnyNode>[] = []
211 |     let lastIsSimpleTextSpan = true
212 | 
213 |     for (const c of el.childNodes) {
214 |       if (isSimpleTextSpan(c)) {
215 |         const cur$ = _.last(arr)
216 |         if (cur$ && lastIsSimpleTextSpan) {
217 |           arr[arr.length - 1] = cur$.add(c)
218 |         } else {
219 |           arr.push($(c))
220 |         }
221 |       } else {
222 |         arr.push($(c))
223 |       }
224 |       lastIsSimpleTextSpan = isSimpleTextSpan(c)
225 |     }
226 | 
227 |     $el.empty()
228 | 
229 |     for (const cur$ of arr) {
230 |       if (cur$.toArray().every(isSimpleTextSpan)) {
231 |         $el.append(`<span>${cur$.text()}</span>`)
232 |       } else {
233 |         $el.append(cur$)
234 |       }
235 |     }
236 | 
237 |     return { traverseChildren: true }
238 |   }
239 | 
240 |   return { traverseChildren: true }
241 | }
242 | 
243 | /**
244 |  * 收集 img src
245 |  */
246 | 
247 | const CLASS_FOOTNOTE = 'qqreader-footnote'
248 | 
249 | function collectImgSrc(el: $Element, $: CheerioAPI, ctx: string[]): OnNodeResult {
250 |   if (el.type === 'tag' && el.tagName?.toLowerCase?.() === 'img') {
251 |     const $el = $(el)
252 | 
253 |     // 不处理这种脚注
254 |     // <img
255 |     //  data-wr-co=\"5445\"
256 |     //  alt=\"敲西瓜。日本的小孩在夏天常玩的游戏。小孩蒙着眼，手拿棍子，比赛谁先可以把西瓜敲碎。\"
257 |     //  class=\"qqreader-footnote\"
258 |     //  src =\"data:image/gif;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVQImWNgYGBgAAAABQABh6FO1AAAAABJRU5ErkJggg==\"
259 |     //  data-src=\"../Images/note.png\"
260 |     // />
261 |     if ($el.hasClass(CLASS_FOOTNOTE)) {
262 |       return
263 |     }
264 | 
265 |     const src = ($el.data('src') as string | undefined) || $el.attr('src')
266 |     if (src) {
267 |       ctx.push(src)
268 |     }
269 |   }
270 | 
271 |   // style="background-image:url(https://res.weread.qq.com/wrepub/web/910419/copyright.jpg);"
272 |   const style = el.type === 'tag' ? el.attribs?.style : ''
273 |   if (style?.includes('background-image:')) {
274 |     const m = /(?:^|; *?)background-image *?: *?url\(([\S]+?)\)/.exec(style)
275 |     if (m?.[1]) {
276 |       const src = m[1]
277 |       $(el).attr('data-bg-img', src) // mark, has no effect, the result html will be abondoned
278 | 
279 |       // TODO: 无法处理
280 |       // <div data-wr-bd=\"1\" data-wr-inset=\"1\" data-wr-co=\"344\" class=\"bgimg\" style=\"background-image:url(../Images/copyright.jpg)
281 |       if (src && !(src.startsWith('../') || src.startsWith('./'))) {
282 |         ctx.push(src)
283 |       }
284 |     }
285 |   }
286 | }
287 | 
288 | function fixImgSrc(el: $Element, $: CheerioAPI, ctx: any): OnNodeResult {
289 |   if (el.type !== 'tag') return
290 | 
291 |   if (el.tagName?.toLowerCase?.() === 'img') {
292 |     const $el = $(el)
293 | 
294 |     if ($el.hasClass(CLASS_FOOTNOTE)) {
295 |       return
296 |     }
297 | 
298 |     // remove alt
299 |     $el.removeAttr('alt')
300 | 
301 |     // transform & change src
302 |     const src = $el.data('src')
303 |     const newSrc = ctx.transformImgSrc(src)
304 |     ctx.imgs.push({
305 |       src,
306 |       newSrc,
307 |     })
308 |     $el.attr('src', newSrc)
309 | 
310 |     // fix width & style
311 |     // <img src="imgs/epub_40870013_2.png" data-src="https://res.weread.qq.com/wrepub/epub_40870013_2" style="width: 15%" width="15%"/>
312 |     // ERROR(RSC-005): ./CSS新世界.epub/OEBPS/chapter-6.xhtml(20,181): Error while parsing file: value of attribute "width" is invalid; must be an integer
313 |     const width = $(el).attr('width')
314 |     const height = $(el).attr('height')
315 |     if (width && isNaN(Number(width))) {
316 |       $(el).css('width', width).removeAttr('width')
317 |     }
318 |     if (height && isNaN(Number(height))) {
319 |       $(el).css('height', height).removeAttr('height')
320 |     }
321 | 
322 |     return
323 |   }
324 | 
325 |   // style="background-image:url(https://res.weread.qq.com/wrepub/web/910419/copyright.jpg);"
326 |   const style: string = el.attribs?.style
327 |   if (style?.includes('background-image:')) {
328 |     const m = /(?:^|; *?)background-image *?: *?url\(([\S]+?)\)/.exec(style)
329 |     if (m?.[1]) {
330 |       const $el = $(el)
331 |       const src = m[1]
332 | 
333 |       // transform
334 |       const newSrc = ctx.transformImgSrc(src)
335 | 
336 |       if (newSrc) {
337 |         ctx.imgs.push({
338 |           src,
339 |           newSrc,
340 |         })
341 | 
342 |         // replace style
343 |         const newStyle = style.replace(src, newSrc)
344 |         $el.attr('style', newStyle)
345 |       }
346 | 
347 |       // 当 src 404 时, 丢弃 style
348 |       else {
349 |         debug('fixImgSrc: transformImgSrc return empty for %s', src)
350 | 
351 |         const newStyle = style
352 |           .split(';')
353 |           .map((s) => s.trim())
354 |           .filter(Boolean)
355 |           .filter((oneStyle) => !oneStyle.startsWith('background-image:'))
356 |           .join(';')
357 | 
358 |         if (newStyle) {
359 |           $el.attr('style', newStyle)
360 |           debug('fixImgSrc: style=%s -> style=%s', style, newStyle)
361 |         } else {
362 |           $el.removeAttr('style')
363 |           debug('fixImgSrc: removeAttr style=%s', style)
364 |         }
365 |       }
366 |     }
367 |   }
368 | }
369 | 


--------------------------------------------------------------------------------
/src/utils/processContent/worker/index.main.ts:
--------------------------------------------------------------------------------
 1 | import { $esm } from '$common/index'
 2 | import * as Comlink from 'comlink/dist/esm/comlink.mjs'
 3 | import nodeEndpoint from 'comlink/dist/esm/node-adapter.mjs' // NOTE: node-adpater 没有 .js 版本
 4 | import os from 'os'
 5 | import { Worker } from 'worker_threads'
 6 | 
 7 | import type processContent from '../index.js'
 8 | type ProcessContent = typeof processContent
 9 | 
10 | const { __dirname } = $esm(import.meta)
11 | 
12 | export function createWorker() {
13 |   const workerFile = __dirname + '/processContent.worker.js'
14 |   const worker = new Worker(workerFile)
15 |   // @ts-ignore
16 |   const api = Comlink.wrap(nodeEndpoint(worker)) as Comlink.Remote<{
17 |     processContent: ProcessContent
18 |   }>
19 |   return { api, nodeWorker: worker }
20 | }
21 | 
22 | export function createWorkers() {
23 |   const cpuCores = os.cpus().length - 1 // 做个人吧~
24 |   return new Array(cpuCores).fill(0).map(() => {
25 |     return createWorker()
26 |   })
27 | }
28 | 


--------------------------------------------------------------------------------
/src/utils/processContent/worker/index.worker.ts:
--------------------------------------------------------------------------------
 1 | import * as Comlink from 'comlink/dist/esm/comlink.mjs'
 2 | import nodeEndpoint from 'comlink/dist/esm/node-adapter.mjs'
 3 | import { parentPort } from 'worker_threads'
 4 | import processContent, { getImgSrcs } from '../index.js'
 5 | 
 6 | const api = {
 7 |   processContent,
 8 |   getImgSrcs,
 9 | }
10 | 
11 | // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
12 | // @ts-ignore
13 | Comlink.expose(api, nodeEndpoint(parentPort!))
14 | 


--------------------------------------------------------------------------------
/test/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/magicdawn/weread-spy/13e70510a0b7343689fe1bbf76c5f83096396bc3/test/.gitkeep


--------------------------------------------------------------------------------
/test/mocha.opts:
--------------------------------------------------------------------------------
1 | --require ts-node/register
2 | --watch-extensions ts
3 | --recursive
4 | --reporter spec
5 | --timeout 5000
6 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "include": ["src/**/*"],
 3 |   "compilerOptions": {
 4 |     "rootDir": "src",
 5 |     "outDir": "lib",
 6 |     "baseUrl": "./",
 7 |     "paths": {
 8 |       "$*": ["src/*.js", "src/*/index.js"]
 9 |     },
10 |     "allowSyntheticDefaultImports": true,
11 |     "esModuleInterop": true,
12 |     "resolveJsonModule": true,
13 |     "target": "ES2020",
14 |     "module": "Node16",
15 |     "strict": false,
16 |     "strictNullChecks": true,
17 |     "skipLibCheck": true,
18 |     "experimentalDecorators": true
19 |   },
20 |   "ts-node": {
21 |     "transpileOnly": true,
22 |     "swc": true,
23 |     "esm": true
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/tsup.config.ts:
--------------------------------------------------------------------------------
 1 | import $esm from 'esm-utils'
 2 | import { defineConfig } from 'tsup'
 3 | 
 4 | const { __dirname } = $esm(import.meta)
 5 | 
 6 | process.env.NODE_ENV ||= 'development'
 7 | const prod = process.env.NODE_ENV === 'production'
 8 | 
 9 | export default defineConfig({
10 |   entry: {
11 |     'bin': 'src/bin.ts',
12 |     'processContent.worker': 'src/utils/processContent/worker/index.worker.ts',
13 |   },
14 |   format: 'esm',
15 |   platform: 'node',
16 |   target: 'node16',
17 |   clean: true,
18 |   minify: prod,
19 |   env: {
20 |     NODE_ENV: process.env.NODE_ENV,
21 |   },
22 | 
23 |   // NOTE: puppeteer-intercept-and-modify-requests 这个包 esm build 有问题
24 |   noExternal: ['puppeteer-intercept-and-modify-requests'],
25 |   external: ['why-is-node-running'],
26 | 
27 |   esbuildOptions(options, context) {
28 |     // init
29 |     options.external ||= []
30 | 
31 |     options.external.push(__dirname + '/package.json')
32 | 
33 |     // use ascii in prod
34 |     options.charset = prod ? undefined : 'utf8'
35 |   },
36 | })
37 | 


--------------------------------------------------------------------------------