├── .github
    └── workflows
    │   └── build.yml
├── .gitignore
├── Epub_Tool_Console.py
├── Epub_Tool_TKUI.py
├── LICENSE
├── README.md
├── build_tool
    ├── pyinstallerForMac.sh
    └── pyinstallerForWindows.txt
├── img
    ├── how_to_use.gif
    ├── icon.icns
    ├── icon.ico
    ├── icon.jpeg
    ├── icon.png
    └── icon_backup.png
├── requirements.txt
└── utils
    ├── decrypt_epub.py
    ├── encrypt_epub.py
    ├── encrypt_font.py
    ├── log.py
    └── reformat_epub.py


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
  1 | name: Build And Release
  2 | 
  3 | on:
  4 |   # push:
  5 |   #   branches:
  6 |   #     - main
  7 |   #   paths:
  8 |   #     - '**.yml'
  9 |   workflow_dispatch:
 10 |     inputs:
 11 |       release:
 12 |         description: 'Create a new release'
 13 |         required: true
 14 |         default: true
 15 |         type: boolean
 16 |       version:
 17 |         description: 'Release version'
 18 |         required: true
 19 |         default: '2025.03.01.v1'
 20 |       body:
 21 |         description: 'Release body text'
 22 |         required: true
 23 |         default: '详情请查看更新日志；Epub_Tool_TKUI为可视化程序，不再支持命令行程序。有需要请直接下载python执行源码。'
 24 | 
 25 | jobs:
 26 |   build:
 27 |     runs-on: ${{ matrix.os }}
 28 |     strategy:
 29 |       matrix: # os: [ubuntu-latest, macos-latest, windows-latest] 暂时去除Ubuntu系统
 30 |         os: [macos-latest, windows-latest]
 31 |     steps:
 32 |     - uses: actions/checkout@v4
 33 |     
 34 |     - name: Set release outputs
 35 |       id: set_release_outputs
 36 |       run: |
 37 |         if [ "${{ github.event.inputs.release }}" == "true" ]; then
 38 |           echo "TAG=${{ github.event.inputs.version }}" >> $GITHUB_ENV
 39 |           echo "BODY=${{ github.event.inputs.body }}" >> $GITHUB_ENV
 40 |         else
 41 |           echo "TAG=$(date +'%Y.%m.%d.v2')" >> $GITHUB_ENV
 42 |           echo "BODY=详情请查看更新日志；Epub_Tool_TKUI为可视化程序，Epub_Tool_Console为命令行程序。" >> $GITHUB_ENV
 43 |         fi
 44 |       shell: bash
 45 | 
 46 |     - name: Set up Python
 47 |       uses: actions/setup-python@v5
 48 |       with:
 49 |         python-version: 3.13.1
 50 | 
 51 |     - name: Install Pyinstaller
 52 |       run: |
 53 |         python -m pip install pyinstaller
 54 |         python -m pip install -r requirements.txt
 55 | 
 56 |     - name: Build the executables
 57 |       run: |
 58 |         if [ "${{ matrix.os }}" == "macos-latest" ]; then
 59 |           pyinstaller -w -i ./img/icon.icns Epub_Tool_TKUI.py -n Epub_Tool_TKUI;
 60 |         else
 61 |           pyinstaller -w -F -i ./img/icon.ico  Epub_Tool_TKUI.py -n Epub_Tool_TKUI;
 62 |         fi
 63 |       shell: bash
 64 | 
 65 |     - name: Rename artifacts with OS prefix (macOS) # Epub_Tool_TKUI.app为文件夹，zip压缩
 66 |       if: matrix.os == 'macos-latest'
 67 |       run: |
 68 |         brew install create-dmg
 69 |         cd ./dist
 70 |         create-dmg \
 71 |           --volname "Epub_Tool_TKUI" \
 72 |           --window-pos 400 200 \
 73 |           --window-size 660 400 \
 74 |           --icon-size 100 \
 75 |           --icon "Epub_Tool_TKUI.app" 160 185 \
 76 |           --hide-extension "Epub_Tool_TKUI.app"\
 77 |           --app-drop-link 500 185 \
 78 |           --no-internet-enable \
 79 |           "./Epub_Tool_TKUI.dmg" \
 80 |           "./Epub_Tool_TKUI.app";
 81 |         rm -rf ./Epub_Tool_TKUI.app;
 82 |         rm -rf ./Epub_Tool_TKUI;
 83 |         cd ..
 84 |         for file in dist/*; do
 85 |           mv "$file" "dist/${{ runner.os }}_$(basename "$file")"
 86 |         done
 87 |       shell: bash
 88 | 
 89 |     # 无Ubuntu系统，暂时不知道Ubuntu版本下打包的可执行文件是什么格式、是否可用
 90 |     # - name: Rename artifacts with OS prefix (Ubuntu)
 91 |     #   if: matrix.os == 'ubuntu-latest'
 92 |     #   run: |
 93 |     #     for file in dist/*; do
 94 |     #       mv "$file" "dist/${{ runner.os }}_$(basename "$file")"
 95 |     #     done
 96 |     #   shell: bash
 97 | 
 98 |     - name: Rename artifacts with OS prefix (Windows)
 99 |       if: matrix.os == 'windows-latest'
100 |       run: |
101 |         Get-ChildItem -Path dist | Rename-Item -NewName { '${{ runner.os }}_' + $_.Name }
102 |       shell: pwsh
103 | 
104 |     # - name: Create zip archive (Windows)
105 |     #   if: matrix.os == 'windows-latest'
106 |     #   run: |
107 |     #     Compress-Archive -Path dist\* -DestinationPath "dist\${{ runner.os }}_all.zip"
108 |     #   shell: pwsh
109 | 
110 |     # - name: Create tar.gz archive (Linux and macOS)
111 |     #   if: matrix.os != 'windows-latest'
112 |     #   run: |
113 |     #     tar -czvf "${{ runner.os }}_all.tar.gz" -C dist $(ls dist)
114 |     #     mv "${{ runner.os }}_all.tar.gz" dist/
115 |     #   shell: bash
116 | 
117 |     - name: Create Release
118 |       uses: ncipollo/release-action@v1
119 |       with:
120 |         tag: ${{ env.TAG }}
121 |         name: ${{ env.TAG }}
122 |         body: ${{ env.BODY }}
123 |         artifacts: 'dist/*'
124 |         allowUpdates: true
125 |         makeLatest: true


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | build
 2 | dist
 3 | test
 4 | epub_tool.spec
 5 | log.txt
 6 | result.txt
 7 | utils/__pycache__/
 8 | log_file
 9 | epub_tool_TKUI.spec
10 | .DS_Store
11 | *.epub
12 | 


--------------------------------------------------------------------------------
/Epub_Tool_Console.py:
--------------------------------------------------------------------------------
  1 | from utils.encrypt_epub import run as encrypt_run
  2 | from utils.decrypt_epub import run as decrypt_run
  3 | from utils.reformat_epub import run as reformat_run
  4 | import sys
  5 | import os
  6 | import argparse
  7 | from tqdm import tqdm
  8 | 
  9 | 
 10 | def prepare_args():
 11 |     parser = argparse.ArgumentParser(description="Epub Tool")
 12 |     parser.add_argument("-i", help="input file/folder")
 13 |     parser.add_argument("-e", action="store_true", help="encrypt epub file")
 14 |     parser.add_argument("-d", action="store_true", help="decrypt epub file")
 15 |     parser.add_argument("-r", action="store_true", help="reformat epub file")
 16 |     parser.add_argument(
 17 |         "-m",
 18 |         help="mode: e: encrypt, d: decrypt, r: reformat, c: choose files, a: all files",
 19 |     )
 20 |     return parser.parse_args()
 21 | 
 22 | 
 23 | def clean_input_path(input_path):
 24 |     return input_path.strip("'").strip('"').strip()
 25 | 
 26 | 
 27 | def check_args(args):
 28 |     while True:
 29 |         if not args.i:
 30 |             args.i = input("请输入epub文件路径或文件夹路径：")
 31 |         args.i = clean_input_path(args.i)
 32 | 
 33 |         # 判断输入文件是否为文件夹
 34 |         if os.path.isdir(args.i):
 35 |             file_list = [
 36 |                 os.path.join(root, file)
 37 |                 for root, _, files in os.walk(args.i)
 38 |                 for file in files
 39 |                 if file.endswith(".epub")
 40 |             ]
 41 | 
 42 |             if file_list:
 43 |                 while True:
 44 |                     if not args.m:
 45 |                         args.m = (
 46 |                             input("请输入操作（c：手动选择，a：全部文件）：")
 47 |                             .strip()
 48 |                             .lower()
 49 |                         )
 50 | 
 51 |                     if args.m == "c":
 52 |                         mode = "手动选择"
 53 |                     elif args.m == "a":
 54 |                         mode = "全部文件"
 55 |                     else:
 56 |                         print("输入错误，请输入 'c' 或 'a'")
 57 |                         args.m = None
 58 |                         continue
 59 | 
 60 |                     print(f"处理模式：{mode}")
 61 | 
 62 |                     if mode == "手动选择":
 63 |                         print("以下是文件夹中的epub文件：")
 64 |                         for idx, file in enumerate(file_list):
 65 |                             print(f"{idx + 1}: {file}")
 66 | 
 67 |                         while True:
 68 |                             selected_files = input(
 69 |                                 "请输入你想要处理的文件序号（多个序号请用空格分开）："
 70 |                             )
 71 |                             selected_indices = selected_files.split()
 72 | 
 73 |                             try:
 74 |                                 args.i = [
 75 |                                     file_list[int(index) - 1]
 76 |                                     for index in selected_indices
 77 |                                 ]
 78 |                                 break
 79 |                             except (ValueError, IndexError):
 80 |                                 print("输入错误，请确保输入的是有效的文件序号")
 81 | 
 82 |                     elif mode == "全部文件":
 83 |                         args.i = file_list
 84 |                     break
 85 |             else:
 86 |                 print("文件夹中没有找到任何epub文件，请重新输入")
 87 |                 args.i = None
 88 |                 continue
 89 |         else:
 90 |             if os.path.exists(args.i):
 91 |                 args.i = [args.i]
 92 |             else:
 93 |                 print(f"输入路径不存在：{args.i}，请重新输入")
 94 |                 args.i = None
 95 |                 continue
 96 | 
 97 |         return args
 98 | 
 99 | 
100 | def check_mode(args):
101 |     while True:
102 |         if args.e or args.m == "e":
103 |             process = "加密"
104 |             func = encrypt_run
105 |         elif args.d or args.m == "d":
106 |             process = "解密"
107 |             func = decrypt_run
108 |         elif args.r or args.m == "r":
109 |             process = "重排"
110 |             func = reformat_run
111 |         else:
112 |             while True:
113 |                 args.m = (
114 |                     input("请输入操作（e：加密，d：解密，r：重排）：").strip().lower()
115 |                 )
116 |                 if args.m in ["e", "d", "r"]:
117 |                     break
118 |                 else:
119 |                     print("输入错误，请输入 'e'、'd' 或 'r'")
120 | 
121 |             continue
122 | 
123 |         return process, func
124 | 
125 | 
126 | def main():
127 |     print("-欢迎使用Epub Tool-")
128 |     print("-此程序由cnwxi提供-")
129 |     args = prepare_args()
130 |     args = check_args(args)
131 |     process, func = check_mode(args)
132 |     print(f"处理模式：{process}")
133 |     tmp_run_result = []
134 | 
135 |     with tqdm(total=len(args.i), ncols=100, desc=f"{process}文件") as pbar:
136 |         for file in args.i:
137 |             try:
138 |                 ret = func(file)
139 |                 if ret == 0:
140 |                     result = f"^_^ {file} 成功"
141 |                 elif ret == "skip":
142 |                     result = f"O_O {file} 跳过：已{process}"
143 |                 else:
144 |                     result = f"T_T {file} 失败：{ret}"
145 |             except Exception as e:
146 |                 result = f"X_X {file} 处理时发生错误：{e}"
147 |             tmp_run_result.append(result)
148 |             pbar.update(1)
149 | 
150 |     print(f"{process}结果：")
151 |     for result in tmp_run_result:
152 |         print(result)
153 | 
154 |     with open("result.txt", "w", encoding="utf-8") as f:
155 |         for result in tmp_run_result:
156 |             f.write(result + "\n")
157 | 
158 |     input("按下回车退出...")
159 | 
160 | 
161 | if __name__ == "__main__":
162 |     main()
163 | 


--------------------------------------------------------------------------------
/Epub_Tool_TKUI.py:
--------------------------------------------------------------------------------
  1 | import tkinter as tk
  2 | 
  3 | # from tkinter.filedialog import askopenfiles
  4 | from tkinter.font import Font
  5 | from tkinter import filedialog, ttk, messagebox
  6 | import os
  7 | from utils.encrypt_epub import run as encrypt_run
  8 | from utils.decrypt_epub import run as decrypt_run
  9 | from utils.reformat_epub import run as reformat_run
 10 | from utils.encrypt_font import run_epub_font_encrypt
 11 | import sys
 12 | import threading
 13 | import subprocess
 14 | import webbrowser
 15 | 
 16 | root = tk.Tk()
 17 | style = ttk.Style()
 18 | 
 19 | root.title("Epub Tool")
 20 | min_width = 500
 21 | min_height = 780
 22 | root.geometry(f"{min_width}x{min_height}")
 23 | root.minsize(min_width, min_height)
 24 | root.maxsize(min_height, min_height)
 25 | root.resizable(True, False)
 26 | tmp_files_dic = {}
 27 | defalut_output_dir = None
 28 | 
 29 | # if sys.platform.startswith("darwin"):  # macOS
 30 | #     default_font = "PingFang SC"
 31 | # elif os.name == "nt":  # Windows
 32 | #     default_font = "SimSun"
 33 | # elif os.name == "posix":  # Linux
 34 | #     default_font = "WenQuanYi Zen Hei"
 35 | # else:
 36 | #     default_font = "Arial"  # 其他系统使用 Arial
 37 | 
 38 | # 创建一个 Frame 用于介绍
 39 | intro_frame = ttk.Frame(root)
 40 | intro_frame.pack(padx=10, pady=10)
 41 | # 创建顶部介绍标签
 42 | 
 43 | style.configure(
 44 |     "Intro.TLabel",
 45 |     font=("TkDefaultFont", 14, "bold"),
 46 |     fg="#333",
 47 |     padding=10,
 48 | )
 49 | intro_label = ttk.Label(
 50 |     intro_frame,
 51 |     text="欢迎使用 Epub Tool\n此工具可帮助您处理电子书文件",
 52 |     style="Intro.TLabel",
 53 |     justify="center",
 54 | )
 55 | intro_label.pack(side=tk.TOP)
 56 | 
 57 | 
 58 | def open_link(event):
 59 |     webbrowser.open_new("https://github.com/cnwxi/epub_tool")
 60 | 
 61 | 
 62 | style.configure(
 63 |     "Link.TLabel",
 64 |     foreground="royalblue",
 65 |     # font=(default_font, 10, "underline"),
 66 |     font=("TkDefaultFont", 10, "underline"),
 67 | )
 68 | link_label = ttk.Label(
 69 |     intro_frame,
 70 |     text="访问本项目GITHUB仓库",
 71 |     style="Link.TLabel",
 72 |     cursor="hand2",
 73 | )
 74 | link_label.pack(side=tk.TOP)
 75 | link_label.bind("<Button-1>", open_link)
 76 | 
 77 | # 添加分界线
 78 | separator = ttk.Separator(root, orient="horizontal")  # 创建水平分界线
 79 | separator.pack(fill="x", padx=5, pady=5)
 80 | 
 81 | add_frame = ttk.Frame(root)
 82 | add_frame.pack(padx=10, pady=5)
 83 | 
 84 | 
 85 | # 刷新文件列表显示
 86 | def display_added_file(files):
 87 |     # 删除 Treeview 中的所有现有项目
 88 |     for item in file_list.get_children():
 89 |         file_list.delete(item)
 90 |     # 插入新的文件列表
 91 |     for i, file_path in enumerate(files):
 92 |         file_name = os.path.basename(file_path)
 93 |         file_name = file_name.rsplit(".", 1)[0]
 94 |         file_list.insert(
 95 |             "",
 96 |             "end",
 97 |             values=(
 98 |                 f" {i+1} ",
 99 |                 f" {file_name} ",
100 |                 file_path,
101 |             ),
102 |         )
103 | 
104 | 
105 | # 利用字典存储文件，避免重复添加
106 | def store_file(files):
107 |     for file in files:
108 |         if file not in tmp_files_dic:
109 |             tmp_files_dic[file] = 1
110 | 
111 | 
112 | # 添加文件（可多选）
113 | def add_file():
114 |     files = filedialog.askopenfilenames(
115 |         title="选择文件", filetypes=[("EPUB files", "*.epub *.EPUB")]
116 |     )
117 |     tmp_files = []
118 |     for file in files:
119 |         if file.lower().endswith(".epub"):
120 |             tmp_files.append(os.path.normpath(file))
121 |     store_file(tmp_files)
122 |     display_added_file(tmp_files_dic.keys())
123 | 
124 | 
125 | # 添加文件夹（单选文件夹）
126 | def add_dir():
127 |     dir = filedialog.askdirectory(title="选择文件夹")
128 |     # 遍历文件夹所有文件
129 |     tmp_files = []
130 |     for root, dirs, files in os.walk(dir):
131 |         for file in files:
132 |             if file.lower().endswith(".epub"):
133 |                 tmp_files.append(os.path.normpath(os.path.join(root, file)))
134 |     store_file(tmp_files)
135 |     display_added_file(tmp_files_dic.keys())
136 | 
137 | 
138 | # 删除选中的文件
139 | def delete_selected():
140 |     # 获取所有选中的项（返回的是一个元组，包含项目的 ID）
141 |     selected_items = file_list.selection()
142 |     if not selected_items:
143 |         messagebox.showwarning("Warning", "未选中任何文件")
144 |         return
145 |     # 从后往前删除选中的项目
146 |     for item in reversed(selected_items):
147 |         # 获取项目的值
148 |         file = file_list.item(item, "values")[2]
149 |         # 删除字典中的元素
150 |         if file in tmp_files_dic:
151 |             del tmp_files_dic[file]
152 |         # 删除 Treeview 中的项目
153 |         file_list.delete(item)
154 |     # 显示删除后的文件列表
155 |     display_added_file(tmp_files_dic.keys())
156 | 
157 | 
158 | # 清空输入文件列表
159 | def delete_all():
160 |     # 删除 Treeview 中的所有项目
161 |     for item in file_list.get_children():
162 |         file_list.delete(item)
163 |     # 清空字典
164 |     tmp_files_dic.clear()
165 | 
166 | 
167 | add_files_btn = ttk.Button(
168 |     add_frame,
169 |     text="添加文件",
170 |     command=add_file,
171 | )
172 | 
173 | add_files_btn.pack(side=tk.LEFT, padx=5)
174 | 
175 | select_dir_btn = ttk.Button(
176 |     add_frame,
177 |     text="添加文件夹",
178 |     command=add_dir,
179 | )
180 | select_dir_btn.pack(side=tk.LEFT, padx=5)
181 | 
182 | delete_button = ttk.Button(
183 |     add_frame,
184 |     text="删除所选",
185 |     command=delete_selected,
186 | )
187 | 
188 | delete_button.pack(side=tk.LEFT, padx=5)
189 | 
190 | delete_all_button = ttk.Button(
191 |     add_frame,
192 |     text="删除全部",
193 |     command=delete_all,
194 | )
195 | delete_all_button.pack(side=tk.LEFT, padx=5)
196 | 
197 | # 创建一个 Frame 用于放置 Listbox 和 Scrollbar
198 | listbox_frame = ttk.Frame(root)
199 | listbox_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
200 | 
201 | file_list = ttk.Treeview(
202 |     listbox_frame,
203 |     selectmode="extended",
204 |     columns=(
205 |         "index",
206 |         "file_name",
207 |         "path",
208 |     ),
209 |     show="headings",
210 | )
211 | # file_list.config(borderwidth=2, relief="solid")
212 | file_list.heading("index", text="序号", anchor="center")
213 | file_list.column("index", width=int(min_width * 0.1), anchor="center", stretch=False)
214 | file_list.heading("file_name", text="书名", anchor="center")
215 | file_list.column("file_name", anchor="w", stretch=True)
216 | file_list.heading("path", text="文件路径")
217 | file_list.column("path", width=1, stretch=False)
218 | file_list["displaycolumns"] = ("index", "file_name")
219 | file_list.grid(row=1, column=0, sticky=tk.NSEW)
220 | 
221 | 
222 | def show_context_menu(event):
223 |     item = file_list.identify_row(event.y)
224 |     if item:
225 |         file_list.selection_set(item)
226 |         context_menu.post(event.x_root, event.y_root)
227 | 
228 | 
229 | def open_selected_file_dir():
230 |     selected_items = file_list.selection()
231 |     if not selected_items:
232 |         messagebox.showwarning("Warning", "未选中任何文件")
233 |         return
234 |     for item in selected_items:
235 |         file_path = file_list.item(item, "values")[2]
236 |         file_path = os.path.dirname(file_path)
237 |         if os.path.exists(file_path):
238 |             try:
239 |                 if sys.platform.startswith("darwin"):  # macOS
240 | 
241 |                     subprocess.run(["open", file_path])
242 |                 elif os.name == "nt":  # Windows
243 |                     os.startfile(file_path)
244 |                 elif os.name == "posix":  # Linux
245 |                     subprocess.run(["xdg-open", file_path])
246 |                 else:
247 |                     messagebox.showerror("Warning", "不支持的操作系统")
248 |             except Exception as e:
249 |                 messagebox.showerror("Warning", f"无法打开路径: {e}")
250 |         else:
251 |             messagebox.showwarning("Warning", f"文件不存在: {file_path}")
252 | 
253 | 
254 | context_menu = tk.Menu(file_list, tearoff=0)
255 | context_menu.add_command(label="打开所在文件夹", command=open_selected_file_dir)
256 | context_menu.add_command(label="删除选中文件", command=delete_selected)
257 | if sys.platform.startswith("win"):
258 |     file_list.bind("<Button-3>", show_context_menu)
259 | elif sys.platform.startswith("darwin"):
260 |     file_list.bind("<Button-2>", show_context_menu)
261 | 
262 | 
263 | class Tooltip:
264 |     def __init__(self, widget):
265 |         self.widget = widget
266 |         self.tooltip_window = None  # 用于存储当前显示的 Tooltip 窗口
267 |         self.widget.bind("<Motion>", self.show_tooltip)
268 |         self.widget.bind("<Leave>", self.hide_tooltip)
269 | 
270 |         # 获取默认字体（Treeview 使用的字体）
271 |         self.font = Font(font="TkDefaultFont")
272 | 
273 |     def show_tooltip(self, event=None):
274 |         # 首先隐藏任何已有的 Tooltip
275 |         self.hide_tooltip()
276 | 
277 |         # 获取鼠标所在的行和列
278 |         row_id = self.widget.identify_row(event.y)
279 |         column = self.widget.identify_column(event.x)
280 |         # print(f"row_id: {row_id}, column: {column}")
281 | 
282 |         if not row_id or not column:  # 如果没有找到行或列，直接返回
283 |             return
284 | 
285 |         # 获取单元格内容
286 |         try:
287 |             # print(self.widget.item(row_id, 'values'))
288 |             cell_value = self.widget.item(row_id, "values")[(int(column[1:]) - 1) * 2]
289 |             # 获取列的宽度（单位：像素）
290 |             # col_width = self.widget.column(column, "width")
291 | 
292 |             # 计算文字的实际宽度（单位：像素）
293 |             # text_width = self.font.measure(cell_value)
294 | 
295 |             # 如果文字宽度超过列宽，显示 Tooltip
296 |             # if text_width > col_width:
297 |             # 如果不是第一列
298 |             if column != "#1" and row_id != "" and cell_value != "":
299 |                 box = self.widget.bbox(row_id, column)  # 获取单元格位置
300 |                 if box is not None:
301 |                     x, y, w, h = box[0], box[1], box[2], box[3]
302 |                     if (
303 |                         x + (w / 2) < 0 or y + (h / 2) < 0
304 |                     ):  # bbox 返回无效值时（如不在可见区域），不显示 Tooltip
305 |                         return
306 | 
307 |                     x += self.widget.winfo_rootx()  # 调整 Tooltip 的 X 坐标
308 |                     y += self.widget.winfo_rooty() + h  # 调整 Tooltip 的 Y 坐标
309 | 
310 |                     # 创建 Tooltip 窗口
311 |                     self.tooltip_window = tw = tk.Toplevel(self.widget)
312 |                     tw.wm_overrideredirect(True)  # 去掉窗口边框
313 |                     tw.wm_geometry(f"+{x}+{y}")  # 设置 Tooltip 的位置
314 | 
315 |                     label = tk.Label(
316 |                         tw,
317 |                         text=cell_value,
318 |                         background="lightyellow",
319 |                         relief="solid",
320 |                         borderwidth=1,
321 |                     )
322 |                     label.pack()
323 |         except IndexError:
324 |             return
325 | 
326 |     def hide_tooltip(self, event=None):
327 |         # 销毁 Tooltip 窗口
328 |         if self.tooltip_window:
329 |             self.tooltip_window.destroy()
330 |             self.tooltip_window = None
331 | 
332 | 
333 | Tooltip(file_list)
334 | 
335 | 
336 | # file_list.bind("<Motion>", on_treeview_motion)
337 | 
338 | 
339 | # 创建垂直 Scrollbar
340 | v_scrollbar = ttk.Scrollbar(
341 |     listbox_frame,
342 |     orient=tk.VERTICAL,
343 |     command=file_list.yview,
344 |     #    width=15
345 | )
346 | v_scrollbar.grid(row=1, column=1, sticky=tk.NS)
347 | 
348 | 
349 | # 创建水平 Scrollbar
350 | # h_scrollbar = ttk.Scrollbar(listbox_frame,
351 | #                            orient=tk.HORIZONTAL,
352 | #                            command=file_list.xview,
353 | #                         #    width=15
354 | #                            )
355 | # h_scrollbar.grid(row=2, column=0, sticky=tk.EW)
356 | 
357 | # 将 Scrollbar 绑定到 Listbox
358 | file_list.configure(
359 |     yscrollcommand=v_scrollbar.set,
360 |     #  xscrollcommand=h_scrollbar.set
361 | )
362 | 
363 | # 配置 grid 行列权重
364 | # listbox_frame.grid_rowconfigure(1, weight=1)
365 | listbox_frame.grid_columnconfigure(0, weight=1)
366 | 
367 | # 添加分界线
368 | separator = ttk.Separator(root, orient="horizontal")  # 创建水平分界线
369 | separator.pack(fill="x", padx=5, pady=5)
370 | 
371 | 
372 | def select_output_dir():
373 |     global defalut_output_dir
374 |     output_dir = os.path.normpath(filedialog.askdirectory(title="选择输出文件夹"))
375 |     if output_dir != "." and os.path.exists(output_dir):
376 |         defalut_output_dir = output_dir
377 |         if len(output_dir) > 30:
378 |             length = len(output_dir) - 15
379 |             output_dir = output_dir[:15] + "..." + output_dir[length:]
380 |         output_dir_label.config(text=f"输出路径: {output_dir}")
381 |         style.configure(
382 |             "FileLink.TLabel",
383 |             font=("TkDefaultFont", 10, "underline"),
384 |             foreground="royalblue",
385 |         )
386 |         output_dir_label.config(style="FileLink.TLabel", cursor="hand2")
387 |         output_dir_label.update()
388 |         result_list.insert(
389 |         "",
390 |         "end",
391 |         values=(
392 |             "^_^",
393 |             "",
394 |             "",
395 |             "已设置路径",
396 |             f"成功设置输出路径为 {defalut_output_dir}",
397 |         ),
398 |         )
399 |         root.update_idletasks()
400 | 
401 | 
402 | def open_output_dir(event):
403 |     path = defalut_output_dir
404 |     if path is not None and os.path.exists(path):
405 |         try:
406 |             if sys.platform.startswith("darwin"):  # macOS
407 |                 subprocess.run(["open", path])
408 |             elif os.name == "nt":  # Windows
409 |                 os.startfile(path)
410 |             elif os.name == "posix":  # Linux
411 |                 subprocess.run(["xdg-open", path])
412 |             else:
413 |                 messagebox.showerror("Warning", "不支持的操作系统")
414 |         except Exception as e:
415 |             messagebox.showerror("Warning", f"无法打开路径: {e}")
416 |     else:
417 |         # messagebox.showwarning("Warning", "未指定输出路径")
418 |         pass
419 | 
420 | 
421 | def reset_output_dir():
422 |     global defalut_output_dir
423 |     defalut_output_dir = None
424 |     output_dir_label.config(text=f"输出路径: 默认文件所在路径")
425 |     style.configure(
426 |         "FileLink.TLabel", font=("TkDefaultFont", 10, "underline"), foreground="DimGray"
427 |     )
428 |     output_dir_label.config(style="FileLink.TLabel", cursor="")
429 |     output_dir_label.update()
430 |     result_list.insert(
431 |         "",
432 |         "end",
433 |         values=(
434 |             "^_^",
435 |             "",
436 |             "",
437 |             "已重置路径",
438 |             "重置输出路径为原文件所在目录",
439 |         ),
440 |     )
441 |     root.update_idletasks()
442 | 
443 | 
444 | outdir_frame = tk.Frame(root)
445 | outdir_frame.pack(padx=10, pady=5)
446 | # 创建一个标签用于显示输出路径
447 | show_btn = ttk.Button(
448 |     outdir_frame,
449 |     text="选择输出路径",
450 |     command=select_output_dir,
451 | )
452 | show_btn.pack(side=tk.LEFT, padx=5)
453 | 
454 | reset_btn = ttk.Button(
455 |     outdir_frame,
456 |     text="重置输出路径",
457 |     command=reset_output_dir,
458 | )
459 | reset_btn.pack(side=tk.LEFT, padx=5)
460 | frame4 = tk.Frame(root)
461 | frame4.pack(pady=5)
462 | 
463 | style.configure(
464 |     "FileLink.TLabel", font=("TkDefaultFont", 10, "underline"), foreground="DimGray"
465 | )
466 | output_dir_label = ttk.Label(
467 |     frame4,
468 |     text="输出路径: 默认文件所在路径",
469 |     style="FileLink.TLabel",
470 | )
471 | output_dir_label.pack(side=tk.LEFT, padx=5)
472 | output_dir_label.bind("<Button-1>", open_output_dir)
473 | 
474 | # 添加分界线
475 | separator = ttk.Separator(root, orient="horizontal")  # 创建水平分界线
476 | separator.pack(fill="x", pady=5, padx=5)
477 | 
478 | 
479 | def start_progress(func, func_name, output_dir, *args):
480 |     # 创建一个新的线程来运行传入的函数
481 |     thread = threading.Thread(
482 |         target=lambda: run_in_thread(func, func_name, output_dir, *args)
483 |     )
484 |     thread.start()
485 | 
486 | 
487 | def run_in_thread(func, func_name, output_dir, *args):
488 |     children = file_list.get_children()
489 |     file_count = len(children)
490 |     if file_count == 0:
491 |         messagebox.showwarning("Warning", "未添加任何文件")
492 |         return
493 |     progress["value"] = 0
494 |     progress["maximum"] = file_count
495 |     root.update_idletasks()
496 | 
497 |     for item in children:
498 |         # 获取文件路径
499 |         file_path = file_list.item(item, "values")[2]
500 |         file_list.delete(item)
501 |         tmp_files_dic.pop(file_path)
502 |         file_name = os.path.basename(file_path)
503 |         file_name = file_name.rsplit(".", 1)[0]
504 |         tmp_output_dir = defalut_output_dir
505 |         # 执行操作
506 |         try:
507 |             ret = func(file_path, output_dir, *args)
508 |             if output_dir == None:
509 |                 tmp_output_dir = os.path.dirname(file_path)
510 |             if ret == 0:
511 |                 emoji = "^_^"
512 |                 result = f" 成功 "
513 |                 info = f"{func_name}成功，输出路径：{tmp_output_dir}"
514 |             elif ret == "skip":
515 |                 emoji = "O_o"
516 |                 result = f" 跳过 "
517 |                 info = f"文件已被{func_name}处理，跳过{func_name}操作"
518 |             else:
519 |                 emoji = "T_T"
520 |                 result = f" 失败"
521 |                 info = f"{func_name}失败，错误信息：{ret}"
522 |         except Exception as e:
523 |             emoji = "@_@"
524 |             result = f" 错误 "
525 |             info = f"{func_name}错误，错误信息：{e}"
526 | 
527 |         # 显示处理结果
528 |         result_list.insert(
529 |             "",
530 |             "end",
531 |             values=(
532 |                 emoji,
533 |                 file_name,
534 |                 tmp_output_dir,
535 |                 result,
536 |                 info,
537 |             ),
538 |         )
539 |         progress["value"] += 1
540 |         root.update_idletasks()
541 | 
542 | 
543 | op_frame = ttk.Frame(root)
544 | op_frame.pack(padx=10, pady=5)
545 | reformat_btn = ttk.Button(
546 |     op_frame,
547 |     text="格式化",
548 |     command=lambda: start_progress(reformat_run, "格式化", defalut_output_dir),
549 | )
550 | reformat_btn.pack(side=tk.LEFT, padx=5)
551 | 
552 | decrypt_btn = ttk.Button(
553 |     op_frame,
554 |     text="文件名解密",
555 |     command=lambda: start_progress(decrypt_run, "解密", defalut_output_dir),
556 | )
557 | decrypt_btn.pack(side=tk.LEFT, padx=5)
558 | 
559 | encrypt_btn = ttk.Button(
560 |     op_frame,
561 |     text="文件名加密",
562 |     command=lambda: start_progress(encrypt_run, "加密", defalut_output_dir),
563 | )
564 | encrypt_btn.pack(side=tk.LEFT, padx=5)
565 | 
566 | 
567 | def run_font_encrypt():
568 |     children = file_list.get_children()
569 |     file_count = len(children)
570 |     if file_count == 0:
571 |         messagebox.showwarning("Warning", "未添加任何文件")
572 |         return
573 |     progress["value"] = 0
574 |     progress["maximum"] = file_count
575 |     root.update_idletasks()
576 |     for item in children:
577 |         # 获取文件路径
578 |         file_path = file_list.item(item, "values")[2]
579 |         file_list.delete(item)
580 |         tmp_files_dic.pop(file_path)
581 |         file_name = os.path.basename(file_path).rsplit(".", 1)[0]
582 |         try:
583 |             ret = run_epub_font_encrypt(file_path, defalut_output_dir)
584 |             if defalut_output_dir == None:
585 |                 outdir = os.path.dirname(file_path)
586 |             else:
587 |                 outdir = defalut_output_dir
588 |             if ret == 0:
589 |                 result_list.insert(
590 |                     "",
591 |                     "end",
592 |                     values=(
593 |                         "^_^",
594 |                         file_name,
595 |                         outdir,
596 |                         "成功",
597 |                         f"字体加密成功，输出路径：{outdir}",
598 |                     ),
599 |                 )
600 |             elif ret == "skip":
601 |                 result_list.insert(
602 |                     "",
603 |                     "end",
604 |                     values=(
605 |                         "O_o",
606 |                         file_name,
607 |                         outdir,
608 |                         "跳过",
609 |                         f"无字体文件，跳过字体加密操作",
610 |                     ),
611 |                 )
612 |             else:
613 |                 result_list.insert(
614 |                     "",
615 |                     "end",
616 |                     values=(
617 |                         "T_T",
618 |                         file_name,
619 |                         outdir,
620 |                         "失败",
621 |                         f"{ret}",
622 |                     ),
623 |                 )
624 |         except Exception as e:
625 |             result_list.insert(
626 |                 "",
627 |                 "end",
628 |                 values=(
629 |                     "@_@",
630 |                     file_name,
631 |                     outdir,
632 |                     "失败",
633 |                     f"字体加密失败，错误信息：{e}",
634 |                 ),
635 |             )
636 |             
637 |         progress["value"] += 1
638 |         root.update_idletasks()
639 | 
640 | font_encrypt_btn = ttk.Button(
641 |     op_frame,
642 |     text="字体加密",
643 |     command=run_font_encrypt,
644 | )
645 | font_encrypt_btn.pack(side=tk.LEFT, padx=5)
646 | 
647 | # 创建一个 Frame 用于放置进度条
648 | progress_frame = ttk.Frame(root)
649 | progress_frame.pack(fill=tk.X, padx=10, pady=0)
650 | 
651 | # 创建进度条
652 | progress = ttk.Progressbar(
653 |     progress_frame,
654 |     orient=tk.HORIZONTAL,
655 |     length=400,
656 |     mode="determinate",
657 | )
658 | progress.pack(fill=tk.X, padx=5, pady=0)
659 | 
660 | # 创建一个 Frame 用于放置 Listbox 和 Scrollbar
661 | result_box_frame = ttk.Frame(root)
662 | result_box_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
663 | 
664 | result_list = ttk.Treeview(
665 |     result_box_frame,
666 |     columns=(
667 |         "emoji",
668 |         "file_name",
669 |         "file_path",
670 |         "result",
671 |         "info",
672 |     ),
673 |     show="headings",
674 |     #   height=10,
675 | )
676 | result_list.heading("emoji", text="状态", anchor="center")
677 | result_list.column("emoji", width=int(min_width * 0.1), anchor="center", stretch=False)
678 | result_list.heading("file_name", text="书名", anchor="center")
679 | result_list.column("file_name", anchor="w", stretch=True)
680 | result_list.column("file_path", width=0, stretch=False)
681 | result_list.heading("result", text="执行结果", anchor="center")
682 | result_list.column(
683 |     "result", width=int(min_width * 0.15), anchor="center", stretch=False
684 | )
685 | result_list.column("info", width=0, stretch=False)
686 | result_list["displaycolumns"] = ("emoji", "file_name", "result")
687 | result_list.grid(row=1, column=0, sticky=tk.NSEW)
688 | 
689 | 
690 | def show_context_menu_result(event):
691 |     item = result_list.identify_row(event.y)
692 |     if item:
693 |         result_list.selection_set(item)
694 |         context_menu_result.post(event.x_root, event.y_root)
695 | 
696 | def open_log_file():
697 |     log_path=os.path.join(
698 |             os.path.dirname(os.path.abspath(sys.argv[0])), "log.txt"
699 |         )
700 |     if os.path.exists(log_path):
701 |         try:
702 |             if sys.platform.startswith("darwin"):  # macOS
703 |                 subprocess.run(["open", log_path])
704 |             elif os.name == "nt":  # Windows
705 |                 os.startfile(log_path)
706 |             elif os.name == "posix":  # Linux
707 |                 subprocess.run(["xdg-open", log_path])
708 |             else:
709 |                 messagebox.showerror("Warning", "不支持的操作系统")
710 |         except Exception as e:
711 |             messagebox.showerror("Warning", f"无法打开路径: {e}")
712 |     else:
713 |         messagebox.showwarning("Warning", f"文件不存在: {log_path}")
714 | 
715 | def open_selected_file_output_dir():
716 |     selected_items = result_list.selection()
717 |     if not selected_items:
718 |         messagebox.showwarning("Warning", "未选中任何文件")
719 |         return
720 |     for item in selected_items:
721 |         file_path = result_list.item(item, "values")[2]
722 |         # file_path = os.path.dirname(file_path)
723 |         if os.path.exists(file_path):
724 |             try:
725 |                 if sys.platform.startswith("darwin"):  # macOS
726 | 
727 |                     subprocess.run(["open", file_path])
728 |                 elif os.name == "nt":  # Windows
729 |                     os.startfile(file_path)
730 |                 elif os.name == "posix":  # Linux
731 |                     subprocess.run(["xdg-open", file_path])
732 |                 else:
733 |                     messagebox.showerror("Warning", "不支持的操作系统")
734 |             except Exception as e:
735 |                 messagebox.showerror("Warning", f"无法打开路径: {e}")
736 |         else:
737 |             messagebox.showwarning("Warning", f"文件不存在: {file_path}")
738 | 
739 | 
740 | context_menu_result = tk.Menu(result_list, tearoff=0)
741 | context_menu_result.add_command(
742 |     label="打开输出文件夹", command=open_selected_file_output_dir
743 | )
744 | context_menu_result.add_command(
745 |     label="打开日志文件", command=open_log_file
746 | )
747 | 
748 | if sys.platform.startswith('win'):
749 |     result_list.bind("<Button-3>", show_context_menu_result)
750 | elif sys.platform.startswith('darwin'):
751 |     result_list.bind("<Button-2>", show_context_menu_result)
752 | 
753 | 
754 | # 创建垂直 Scrollbar
755 | v_scrollbar_result = ttk.Scrollbar(
756 |     result_box_frame,
757 |     orient=tk.VERTICAL,
758 |     command=result_list.yview,
759 |     #   width=10
760 | )
761 | v_scrollbar_result.grid(row=1, column=1, sticky=tk.NS)
762 | 
763 | # 创建水平 Scrollbar
764 | # h_scrollbar_result = ttk.Scrollbar(result_box_frame,
765 | #                                   orient=tk.HORIZONTAL,
766 | #                                   command=result_list.xview,
767 | 
768 | #                                 #   width=15
769 | #                                   )
770 | # h_scrollbar_result.grid(row=2, column=0, sticky=tk.EW)
771 | 
772 | # 将 Scrollbar 绑定到 Listbox
773 | result_list.config(yscrollcommand=v_scrollbar_result.set)
774 | 
775 | # 配置 grid 行列权重
776 | # result_box_frame.grid_rowconfigure(1, weight=1)
777 | result_box_frame.grid_columnconfigure(0, weight=1)
778 | 
779 | 
780 | def adjust_column_width(event):
781 |     # 获取窗口当前宽度
782 |     new_width = file_list.winfo_width()
783 | 
784 |     # 设置列宽为窗口宽度的一部分（例如 80%）
785 |     file_list.column(
786 |         "index", width=int(min_width * 0.1), anchor="center", stretch=False
787 |     )
788 |     # file_list.column(
789 |     #     "file_name", width=int(new_width * 0.84), anchor="center", stretch=True
790 |     # )
791 |     result_list.column(
792 |         "emoji", width=int(min_width * 0.1), anchor="center", stretch=False
793 |     )
794 |     # result_list.column(
795 |     #     "file_name", width=int(new_width * 0.7), anchor="center", stretch=True
796 |     # )
797 |     result_list.column(
798 |         "result", width=int(min_width * 0.15), anchor="center", stretch=False
799 |     )
800 | 
801 | 
802 | # 绑定窗口大小变化事件
803 | root.bind("<Configure>", adjust_column_width)
804 | Tooltip(result_list)
805 | root.mainloop()
806 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 XiangyuWang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ## Ⅰ epub_tool介绍<br>
  4 | 
  5 | 
  6 | <div>
  7 | <img src="./img/icon.ico" alt="icon" style="width:10em">
  8 | </div>
  9 | 
 10 | 
 11 | [![GitHub Releases](https://img.shields.io/github/v/release/cnwxi/epub_tool)](https://github.com/cnwxi/epub_tool/releases/latest)  
 12 |  [![GitHub stars](https://img.shields.io/github/stars/cnwxi/epub_tool)](https://github.com/cnwxi/epub_tool/stargazers)
 13 |   [![GitHub forks](https://img.shields.io/github/forks/cnwxi/epub_tool)](https://github.com/cnwxi/epub_tool/network/members)
 14 | 
 15 | Epub Tool->ET->E-Book Thor->📖🔨
 16 | 
 17 | <details>
 18 |   <summary>包含一些可用的epub工具，用于epub文件的重构、解密、加密、字体混淆。</summary>
 19 |   <p>
 20 | 
 21 | 
 22 | 1. `重构epub为规范格式_v2.8.3.py`->`utils\reformat_epub.py`<br>
 23 | 作用：见原文件名。<br>
 24 | 原始的百度贴吧帖子链接：[遥遥心航的帖子](https://jump2.bdimg.com/p/8090221625)。<br>
 25 | 遥遥心航提供的原始文件：[蓝奏云网盘链接](https://wwb.lanzoub.com/b01k016hg) 密码：`i89p`。<br>
 26 | 2. `重构epub并反文件名混淆.py`->`utils\decrypt_epub.py`<br>
 27 | 作用：见原文件名。<br>
 28 | 3. `重构epub并加入文件名混淆.py`->`utils\encrypt_epub.py`<br>
 29 | 作用：见原文件名。<br>
 30 | 4. `Epub_Tool_Console.py`<br>
 31 | 作用：对上述工具（不包括字体混淆）的整合的命令行程序。<br>
 32 | 5. `utils\encrypt_font.py`<br>
 33 | 作用：对epub文件中指定内嵌字体的文字进行字体混淆。[https://github.com/cnwxi/epub_tool/issues/21]<br>
 34 | 6. `Epub_Tool_TKUI.py`<br>
 35 | 作用：对上述工具的整合的带操作界面的程序。<br>
 36 | 
 37 | 注：重构会严格保证文件夹分类和文件名后缀。[https://github.com/cnwxi/epub_tool/issues/13]
 38 |   </p>
 39 | </details>
 40 | 
 41 | ## Ⅱ 怎么使用？（仅针对最新版本）<br>
 42 | 
 43 | <details>
 44 |   <summary>python源码执行</summary>
 45 |   <p>
 46 | 
 47 | 1. 下载python（推荐3.8或更高版本）；<br> 
 48 | 2. 使用`git clone https://github.com/cnwxi/epub_tool.git`克隆本仓库；或直接在网页下载源码压缩包，解压后得到py文件；<br>
 49 | 3. 准备依赖库，在终端输入`python -m pip install -r requirements.txt`;<br>
 50 | 4. 终端切换工作路径为解压后文件夹所在路径
 51 | 5. 执行py文件`python ./***.py`、`python ./utils/***.py`。<br> 
 52 |     <!-- - 单个工具执行：<br> 
 53 |     1. 使用命令行执行 `python 解压目标文件夹/epub_tool/utils/**.py` 。<br>
 54 |     - 整合工具执行：<br> 
 55 |     1. 使用命令行执行 `python 解压目标文件夹/epub_tool/epub_tool.py -i 需要处理的epub文件或者所在文件夹 -e/d/r` 其中e、d、r为不同的处理模式，分别是混淆`-e`、反混淆`-d`、重新格式化`-r`。<br> 
 56 |     2. 也可使用命令行执行 `python 解压目标文件夹/epub_tool/epub_tool.py -i 需要处理的epub文件或者所在文件夹 -m 处理模式`，处理模式为e、d、r。<br>  -->
 57 | 
 58 |   </p>
 59 | 
 60 |   >（注：会在对应工作路径生成日志文件`log.txt`，每次执行py文件会覆盖写入该文件，无需担心此文件过分占用存储空间<br>
 61 | 
 62 | </details>
 63 | 
 64 | <!-- 
 65 | 
 66 | <details> 
 67 |   <summary>命令行程序</summary>
 68 |   <p>
 69 | 
 70 | 1. 从[releases](https://github.com/cnwxi/epub_tool/releases)下载对应的可执行文件；<br>
 71 | 2. Windows可以直接双击可执行文件；<br>
 72 | ![image](https://github.com/user-attachments/assets/53ed7c69-3f59-44fd-9c59-b754ada6c5a8)
 73 | 3. 或使用命令行工具`CMD、Power Shell、Terminal`执行；<br>
 74 | 4. 如提示无权限运行，可在终端输入 `chmod +x /可执行文件所在路径` （macOS：还需进入“设置-安全性与隐私-通用-允许从以下位置下载的APP”点击“仍要打开”）<br>
 75 | 参考如图：<br>
 76 | ![image](https://github.com/user-attachments/assets/18dd97fb-cc39-47d4-b5eb-fb48b01a28cd)
 77 | ![image](https://github.com/user-attachments/assets/e0f7e997-6912-4792-a72d-f415e0525e34)
 78 | 5. 参数列表参考如下：<br>
 79 | \-i  后面接需要处理的epub文件或所在文件夹；<br>
 80 | \-e  无需后接任何参数，指定程序对epub进行混淆处理；<br>
 81 | \-d  无需后接任何参数，指定程序对epub进行反混淆处理；<br>
 82 | \-r  无需后接任何参数，指定程序对epub进行格式化处理。<br>
 83 | \-m  后接指定的处理模式，e、d、r。（可选，效果同上-e、-d、-r）
 84 | 6. 现在输入为文件夹路径时会提醒选择文件执行edr操作或所有文件执行edr操作。<br> 
 85 |  ![image](https://github.com/user-attachments/assets/4c5d6a6e-2e6e-427d-9251-8d9e4c2a3a68) 
 86 | 
 87 | - 举例：<br>
 88 | 在可执行文件所在文件夹打开命令行工具（或打开命令行工具后切换到可执行文件所在文件夹）。<br>
 89 | 可使用的命令行工具如cmd/powershell/terminal等。<br>
 90 | 输入`Windows_epub_tool.exe -i epub文件路径或所在文件夹路径 -d`或`Windows_epub_tool.exe -i epub文件路径或所在文件夹路径 -m d`
 91 | 并回车（注意不同平台可执行文件名不一致）。<br>
 92 | 此命令行指定程序读取指定目录下所有epub文件，并对这些文件进行反混淆。<br>
 93 | 
 94 |   </p>
 95 | </details>
 96 | 
 97 | <details>
 98 |   <summary>Windows系统CMD命令行操作演示</summary>
 99 |   <p>
100 |     
101 | 1. 可执行文件已下载至C:\Users\Administrator\Downloads\Programs位置，打开文件管理器，进入对应目录。如图：<br>
102 | <img src="https://github.com/user-attachments/assets/0cd71e92-714b-4f44-8060-ad5d353ebb7a" width="600"><br>
103 | 2. 在最上方地址输入框输入cmd并回车，则可以直接在此目录下打开cmd。如图：<br>
104 | <img src="https://github.com/user-attachments/assets/2f23826d-480a-4526-9dbe-f3fb06f5fa35" width="600"><br>
105 | <img src="https://github.com/user-attachments/assets/8def1166-f7f6-4738-bed8-0b3057e1d81b" width="600"><br>
106 | 3. 输入 Windows_epub_tool.exe -i epub文件路径或所在文件夹路径 -d （注：此为演示命令行，具体的输入文件/文件夹和执行模式需要你自行指定）<br>
107 | 或 Windows_epub_tool.exe -i epub文件路径或所在文件夹路径 -m d 。如图：<br>
108 | <img src="https://github.com/user-attachments/assets/0e1c703f-1c78-4242-9dce-480219805005" width="600"><br>
109 |   
110 |   </p>
111 | </details> 
112 | 
113 | -->
114 | 
115 | <details>
116 |   <summary>可视化界面程序（推荐）</summary>
117 |   <p>
118 | 
119 |     
120 | >（注：同样会在可执行程序所在路径生成日志文件`log.txt`，每次启动程序会覆盖写入该文件，无需担心此文件过分占用存储空间，mac文件写入位置为`/Applications/Epub_Tool_TKUI.app/Contents/MacOS/log.txt`，win文件写入位置为`Epub_Tool_TKUI.exe所在目录`）<br>
121 | 
122 | 
123 | > （Mac安装后运行若提示无法验证安全性，请参考[Apple 无法检查 App 是否包含恶意软件（来自Apple官网Mac使用手册）](https://support.apple.com/zh-cn/guide/mac-help/mchleab3a043/mac)，进入系统设置-隐私与安全性-安全性-点击“仍要打开”；Windows若报告病毒文件请忽略警告，允许文件保留本地。）
124 | 
125 | ![操作演示](./img/how_to_use.gif)
126 | 
127 | <!-- - UI预览，具体界面可能随后续更新改动<br>
128 | 
129 |   - mac<br> 
130 | <img width="300" alt="mac" src="https://github.com/user-attachments/assets/dd3ba06c-5fb7-4439-88d6-4ff67ed1f0db" /><br> 
131 | 
132 |   - windows<br> 
133 | <img width="260" alt="windows" src="https://github.com/user-attachments/assets/99acedf7-2f41-44bb-9059-6de9d36dd1d0" /><br>  -->
134 | 
135 |   </p>
136 | </details>
137 | 
138 | ## Ⅲ 执行遇到错误？
139 | 
140 | <details>
141 |   <summary>epub无法正常规范/混淆/反混淆</summary><br>
142 |   <p>
143 |     1、优先解压文件，查看其中content.opf文件，检查是否存在问题。若无法解决，在Issues区提交issue并附带原文件。[https://github.com/cnwxi/epub_tool/issues/8 https://github.com/cnwxi/epub_tool/issues/10]
144 |   </p>
145 |   <p>
146 |     2、若下载文件名带“精品”二字，且解压后文件夹内包含“/META-INF/encryption.xml”，检查此文件内是否有“ZhangYue.Inc”字样。若满足则此文件为掌阅加密书籍，为规避版权问题，此处不提供解密程序，请使用「掌阅」打开阅读。[https://github.com/cnwxi/epub_tool/issues/19]
147 |   </p>
148 | </details>
149 | 
150 | <details>
151 |   <summary>epub字体混淆出现异常</summary><br>
152 |   <p>
153 |     1、字体混淆根据标签名称的字典逆序进行处理，如存在如下标签时：&lt;h2&gt;、&lt;p&gt;、&lt;p class=&quot;p1&quot;&gt;、&lt;span&gt;、&lt;span class=&quot;s1&quot;&gt;，会按照span.s1、span、p.p1、p、h2的顺序进行字体混淆，并以此类推，规划样式标签命名，来保证嵌套标签中的文字能够正常混淆，当然最好避免过分复杂的标签嵌套。<br>
154 |   </p>
155 | </details>
156 | 
157 | ## Ⅳ 更新日志<br>
158 | <details>
159 |   <summary>点击以展开</summary>
160 |   <p>
161 | 
162 | ### 2025.04.27<br>
163 | 界面更新，使用ttk控件替换tk以实现跨平台统一；功能更新，增加右键点击快速打开所在/输出文件夹、删除已添加文件、查看日志文件等功能，添加鼠标悬停显示更多信息功能，添加字体加密功能。<br>
164 | 使用 `20230418《ePub指南——从入门到放弃》编著：赤霓（第2版）`进行字体加密测试，执行过程无报错，具体查看内容有部分字符因复杂样式导致被意外混淆。 [下载链接](https://wwxq.lanzov.com/b0nz4q13i) 密码:8vfp<br>
165 | ### 2025.04.23<br>
166 | 移除命令行程序编译；移除mac编译-F参数；添加icon.icns图标适配macOS，优化显示效果；移除Ubuntu（Linux系统）编译。<br>
167 | ### 2025.03.20<br>
168 | 修复失效的自定义输出路径。<br>
169 | ### 2025.03.01<br>
170 | 图标打包进可执行文件。<br>
171 | ### 2025.02.20<br>
172 | 更新图标。<br>
173 | ### 2024.12.25<br>
174 | 修复在mac上的日志文件写入位置，更改日志写入方式，清理原始脚本中重复的无效循环。<br>
175 | ### 2024.12.24<br>
176 | Update build.yml。https://github.com/cnwxi/epub_tool/pull/17<br>
177 | ### 2024.12.23<br>
178 | 调整UI、取消push自动构建。<br>
179 | ### 2024.12.17<br>
180 | 修复UI显示问题,分支整合。<br>
181 | ### 2024.12.16<br>
182 | 创建新分支TKUI，实现基本UI DEMO，功能已整合。<br>
183 | ### 2024.11.17<br>
184 | 添加文件夹手动选择需要处理文件，输入文件序号进行选择，不再是默认处理文件夹内全部epub文件，添加了输入检测提示，错误后会返回重新输入。https://github.com/cnwxi/epub_tool/pull/15<br>
185 | ### 2024.10.24<br>
186 | 修复未处理输入时拖入文件带引号导致的文件路径检查错误。<br>
187 | ### 2024.09.09<br>
188 | 因额外依赖库未打包到可执行文件，重新打包可执行文件。<br>
189 | 更新相关使用教程。<br>
190 | ### 2024.09.08<br>
191 | 为避免有人不会使用命令行工具，更新Windows系统下相关操作的基础流程。<br>
192 | 程序允许直接双击执行，后续再输入参数。<br>
193 | 对应操作忽略固定后缀跳过文件处理。_encrypt、_decrypt、_reformat<br>
194 | ### 2024.08.29<br>
195 | 修复混淆ID导致的反混淆不完全。<br>
196 | 修复存在异常opf时程序闪退问题。<br>
197 | 更新日志记录。<br>
198 | ### 2024.08.28<br>
199 | 整合代码，使用命令行批量处理epub文件。<br>
200 | 支持输入单个epub文件或epub文件所在文件夹，支持子目录遍历。<br>
201 | 修改输出路径，现为原epub文件同级路径，通过添加不同后缀`encrypt\decrypt\reformat`区分原文件和处理后文件。<br>
202 | ### 2024.06.19<br>
203 | 代码更新，使用相似度计算覆盖opf文件中未混淆的其他文件名情况。<br>
204 | ### 2024.06.13<br>
205 | 更新yml文件。https://github.com/cnwxi/epub_tool/pull/9<br>
206 | ### 2024.06.12<br>
207 | 针对cover页面未混淆的情况做更改。<br>
208 | 修改自动发布逻辑，修改py文件不触发CI，仅修改yml后触发。修改yml，无需手动执行才执行发布。<br>
209 | ### 2024.06.08<br>
210 | CI配置文件更新。https://github.com/cnwxi/epub_tool/pull/6 https://github.com/cnwxi/epub_tool/pull/7<br>
211 | ### 2024.06.07<br>
212 | 修改主函数逻辑，防止epub文件不存在导致的程序崩溃。https://github.com/cnwxi/epub_tool/pull/4<br>
213 | 加入CI自动构建。https://github.com/cnwxi/epub_tool/pull/5<br>
214 | 加入CI自动发布。<br>
215 | ### 2024.05.28<br>
216 | 修正`重构epub为规范格式_v2.8.3.py`中生成的content.opf文件内容格式。https://github.com/cnwxi/epub_tool/pull/3<br>
217 | ### 2024.05.16<br>
218 | 更改文件输出路径。https://github.com/cnwxi/epub_tool/pull/2<br>
219 | ### 2024.05.09<br>
220 | 针对多看~slim文件进行修改，处理html中使用`../`、`./`、`/`开头的链接。<br>
221 | ### 2024.04.23<br>
222 | 初始化仓库。<br>
223 | 
224 |   </p>
225 | </details>
226 | 
227 | ## Ⅴ 鸣谢<br>
228 | 感谢以下用户对此项目的贡献：
229 | - [遥遥心航](https://tieba.baidu.com/home/main?id=tb.1.7f262ae1.5_dXQ2Jp0F0MH9YJtgM2Ew)
230 | - [lgernier](https://github.com/lgernierO)<br>
231 | 


--------------------------------------------------------------------------------
/build_tool/pyinstallerForMac.sh:
--------------------------------------------------------------------------------
 1 | # for mac
 2 | pyinstaller --noconfirm -w -i ./img/icon.icns Epub_Tool_TKUI.py -n Epub_Tool_TKUI;
 3 | cd ./dist
 4 | 
 5 | # hdiutil create -volname Epub_Tool_TKUI -srcfolder ./Epub_Tool_TKUI.app -ov -format UDZO ./Epub_Tool_TKUI.dmg;
 6 | 
 7 | create-dmg \
 8 |     --volname "Epub_Tool_TKUI" \
 9 |     --window-pos 400 200 \
10 |     --window-size 660 400 \
11 |     --icon-size 100 \
12 |     --icon "Epub_Tool_TKUI.app" 160 185 \
13 |     --hide-extension "Epub_Tool_TKUI.app"\
14 |     --app-drop-link 500 185 \
15 |     --no-internet-enable \
16 |     "./Epub_Tool_TKUI.dmg" \
17 |     "./Epub_Tool_TKUI.app";
18 | 
19 | # zip -r Epub_Tool_TKUI.zip ./Epub_Tool_TKUI.app;
20 | rm -rf ./Epub_Tool_TKUI.app;
21 | rm -rf ./Epub_Tool_TKUI;
22 | echo "------finished------";
23 | cd ..
24 | rm -rf ./build;
25 | rm ./Epub_Tool_TKUI.spec;


--------------------------------------------------------------------------------
/build_tool/pyinstallerForWindows.txt:
--------------------------------------------------------------------------------
1 | # for windows
2 | pyinstaller -F -w -i ./img/icon.ico Epub_Tool_TKUI.py -n Epub_Tool_TKUI;
3 | rm ./build;
4 | rm ./Epub_Tool_TKUI.spec;


--------------------------------------------------------------------------------
/img/how_to_use.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnwxi/epub_tool/4c4d2e2eacc28bac2ac833492b0c3eb65392a6cd/img/how_to_use.gif


--------------------------------------------------------------------------------
/img/icon.icns:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnwxi/epub_tool/4c4d2e2eacc28bac2ac833492b0c3eb65392a6cd/img/icon.icns


--------------------------------------------------------------------------------
/img/icon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnwxi/epub_tool/4c4d2e2eacc28bac2ac833492b0c3eb65392a6cd/img/icon.ico


--------------------------------------------------------------------------------
/img/icon.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnwxi/epub_tool/4c4d2e2eacc28bac2ac833492b0c3eb65392a6cd/img/icon.jpeg


--------------------------------------------------------------------------------
/img/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnwxi/epub_tool/4c4d2e2eacc28bac2ac833492b0c3eb65392a6cd/img/icon.png


--------------------------------------------------------------------------------
/img/icon_backup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnwxi/epub_tool/4c4d2e2eacc28bac2ac833492b0c3eb65392a6cd/img/icon_backup.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4
2 | emoji
3 | fonttools
4 | tinycss2
5 | tqdm
6 | pillow


--------------------------------------------------------------------------------
/utils/decrypt_epub.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # !/usr/bin/env python
  3 | # 源码: sigil吧ID: 遥遥心航
  4 | # 二改: cnwxi
  5 | # 额外感谢: 故里
  6 | 
  7 | import zipfile
  8 | import re, sys
  9 | from os import path, mkdir, getcwd
 10 | from urllib.parse import unquote
 11 | from xml.etree import ElementTree
 12 | import copy
 13 | import os
 14 | import difflib
 15 | import hashlib
 16 | try:
 17 |     from utils.log import logwriter
 18 | except:
 19 |     from log import logwriter
 20 | 
 21 | logger = logwriter()
 22 | 
 23 | 
 24 | class EpubTool:
 25 | 
 26 |     def __init__(self, epub_src):
 27 |         self.encrypted = False
 28 |         self.epub = zipfile.ZipFile(epub_src)
 29 |         self.epub_src = epub_src
 30 |         self.epub_name = path.basename(epub_src)
 31 |         self.ebook_root = path.dirname(epub_src)
 32 |         self.output_path = self.ebook_root
 33 |         self.epub_type = ""
 34 |         self.temp_dir = ""
 35 |         self._init_namelist()
 36 |         self._init_mime_map()
 37 |         self._init_opf()
 38 |         self.manifest_list = []  # (id,opf_href,mime,properties)
 39 |         self.toc_rn = {}
 40 |         self.id_to_href = {}  # { id : href.lower, ... }
 41 |         self.href_to_id = {}  # { href.lower : id, ...}
 42 |         self.text_list = []  # (id,opf_href,properties)
 43 |         self.css_list = []  # (id,opf_href,properties)
 44 |         self.image_list = []  # (id,opf_href,properties)
 45 |         self.font_list = []  # (id,opf_href,properties)
 46 |         self.audio_list = []  # (id,opf_href,properties)
 47 |         self.video_list = []  # (id,opf_href,properties)
 48 |         self.spine_list = []  # (sid, linear, properties)
 49 |         self.other_list = []  # (id,opf_href,mime,properties)
 50 |         self.errorOPF_log = []  # (error_type,error_value)
 51 |         self.errorLink_log = {}  # {filepath:[(error_link,correct_link || None),...]}
 52 |         self._parse_opf()
 53 | 
 54 |     def set_output_path(self, output_path):
 55 |         if output_path is not None and os.path.isdir(output_path):
 56 |             self.output_path = output_path
 57 | 
 58 |     def _init_namelist(self):
 59 |         self.namelist = self.epub.namelist()
 60 | 
 61 |     def _init_mime_map(self):
 62 |         self.mime_map = {
 63 |             ".html": "application/xhtml+xml",
 64 |             ".xhtml": "application/xhtml+xml",
 65 |             ".css": "text/css",
 66 |             ".js": "application/javascript",
 67 |             ".jpg": "image/jpeg",
 68 |             ".jpeg": "image/jpeg",
 69 |             ".bmp": "image/bmp",
 70 |             ".png": "image/png",
 71 |             ".gif": "image/gif",
 72 |             ".webp": "image/webp",
 73 |             ".ttf": "font/ttf",
 74 |             ".otf": "font/otf",
 75 |             ".woff": "font/woff",
 76 |             ".ncx": "application/x-dtbncx+xml",
 77 |             ".mp3": "audio/mpeg",
 78 |             ".mp4": "video/mp4",
 79 |             ".smil": "application/smil+xml",
 80 |             ".pls": "application/pls+xml",
 81 |         }
 82 | 
 83 |     def _init_opf(self):
 84 |         # 通过 container.xml 读取 opf 文件
 85 |         container_xml = self.epub.read("META-INF/container.xml").decode("utf-8")
 86 |         rf = re.match(r'<rootfile[^>]*full-path="(?i:(.*?\.opf))"', container_xml)
 87 |         if rf is not None:
 88 |             self.opfpath = rf.group(1)
 89 |             self.opf = self.epub.read(self.opfpath).decode("utf-8")
 90 |             return
 91 |         # 通过路径首个 opf 读取 opf 文件
 92 |         for bkpath in self.namelist:
 93 |             if bkpath.lower().endswith(".opf"):
 94 |                 self.opfpath = bkpath
 95 |                 self.opf = self.epub.read(self.opfpath).decode("utf-8")
 96 |                 return
 97 |         raise RuntimeError("无法发现opf文件")
 98 | 
 99 |     def _parse_opf(self):
100 |         self.etree_opf = {"package": ElementTree.fromstring(self.opf)}
101 | 
102 |         for child in self.etree_opf["package"]:
103 |             tag = re.sub(r"\{.*?\}", r"", child.tag)
104 |             self.etree_opf[tag] = child
105 |         self._parse_metadata()
106 |         self._parse_manifest()
107 |         self._parse_spine()
108 |         self._clear_duplicate_id_href()
109 |         self._parse_hrefs_not_in_epub()
110 |         self._add_files_not_in_opf()
111 | 
112 |         self.manifest_list = []  # (id,opf_href,mime,properties)
113 |         for id in self.id_to_h_m_p:
114 |             href, mime, properties = self.id_to_h_m_p[id]
115 |             self.manifest_list.append((id, href, mime, properties))
116 | 
117 |         epub_type = self.etree_opf["package"].get("version")
118 | 
119 |         if epub_type is not None and epub_type in ["2.0", "3.0"]:
120 |             self.epub_type = epub_type
121 |         else:
122 |             raise RuntimeError("此脚本不支持该EPUB类型")
123 | 
124 |         # 寻找epub2 toc 文件的id。epub3的nav文件直接当做xhtml处理。
125 |         self.tocpath = ""
126 |         self.tocid = ""
127 |         tocid = self.etree_opf["spine"].get("toc")
128 |         self.tocid = tocid if tocid is not None else ""
129 | 
130 |         # opf item分类
131 |         opf_dir = path.dirname(self.opfpath)
132 | 
133 |         # 生成新的href
134 |         ############################################################
135 |         def creatNewHerf(_id, _href):
136 |             file_parts = _href.rsplit(".", 1)
137 |             if len(_id.split(".")) == 1:
138 |                 _id_name = copy.deepcopy(_id)
139 |                 if _id.rsplit(".", 1)[-1].lower().endswith("slim"):
140 |                     image_silm = "~slim"
141 |                     # 如果_id_name中有slim，去掉
142 |                     _id_name = (
143 |                         _id_name.lower()
144 |                         .rstrip("~slim")
145 |                         .rstrip("-slim")
146 |                         .rstrip("_slim")
147 |                         .rstrip("slim")
148 |                     )
149 |                 else:
150 |                     image_silm = ""
151 |                 new_href = f"{_id_name}{image_silm}.{file_parts[-1].lower()}"
152 |             else:
153 |                 _id_name, _id_extension = _id.rsplit(".", 1)
154 |                 if _id_extension.lower() != file_parts[-1].lower():
155 |                     _id_extension = file_parts[-1]
156 |                 # 如果id或者href中有slim，则为多看处理~slim
157 |                 if _href.rsplit(".", 1)[-1].lower().endswith("slim") or _id_name.rsplit(
158 |                     ".", 1
159 |                 )[-1].lower().endswith("slim"):
160 |                     image_silm = "~slim"
161 |                     # 如果id中有slim，去掉
162 |                     _id_name = (
163 |                         _id_name.lower()
164 |                         .rstrip("~slim")
165 |                         .rstrip("-slim")
166 |                         .rstrip("_slim")
167 |                         .rstrip("slim")
168 |                     )
169 |                 else:
170 |                     image_silm = ""
171 |                 # 判断_id_name是否合法
172 |                 if re.search(r'[\\/:*?"<>|]', _id_name):
173 |                     logger.write(f"ID: {_id} 中包含非法字符")
174 |                     _id_name = hashlib.md5(_id_name.encode()).hexdigest()
175 |                     logger.write(f"ID: {_id} 替换为 {_id_name}")
176 |                 new_href = f"{_id_name}{image_silm}.{_id_extension.lower()}"
177 |             logger.write(f"decrypt href: {_id}:{_href} -> {new_href}")
178 |             return new_href
179 | 
180 |         ############################################################
181 |         for id, href, mime, properties in self.manifest_list:
182 |             bkpath = opf_dir + "/" + href if opf_dir else href
183 |             # 判断herf是否包含特殊字符
184 |             if re.search(r'[\\/:*?"<>|]', href.rsplit("/")[-1]):
185 |                 self.encrypted = True
186 |             if mime == "application/xhtml+xml":
187 |                 new_href = creatNewHerf(id, href)
188 |                 self.text_list.append((id, href, properties, new_href))
189 |                 self.toc_rn[href] = new_href
190 |             elif mime == "text/css":
191 |                 self.css_list.append((id, href, properties, creatNewHerf(id, href)))
192 |             elif "image/" in mime:
193 |                 self.image_list.append((id, href, properties, creatNewHerf(id, href)))
194 |             elif "font/" in mime or href.lower().endswith((".ttf", ".otf", ".woff")):
195 |                 self.font_list.append((id, href, properties, creatNewHerf(id, href)))
196 |             elif "audio/" in mime:
197 |                 self.audio_list.append((id, href, properties, creatNewHerf(id, href)))
198 |             elif "video/" in mime:
199 |                 self.video_list.append((id, href, properties, creatNewHerf(id, href)))
200 |             elif self.tocid != "" and id == self.tocid:
201 |                 opf_dir = path.dirname(self.opfpath)
202 |                 self.tocpath = opf_dir + "/" + href if opf_dir else href
203 |             else:
204 |                 self.other_list.append(
205 |                     (id, href, mime, properties, creatNewHerf(id, href))
206 |                 )
207 | 
208 |         self._check_manifest_and_spine()
209 | 
210 |     def _parse_metadata(self):
211 |         self.metadata = {}
212 |         for key in [
213 |             "title",
214 |             "creator",
215 |             "language",
216 |             "subject",
217 |             "source",
218 |             "identifier",
219 |             "cover",
220 |         ]:
221 |             self.metadata[key] = ""
222 |         for meta in self.etree_opf["metadata"]:
223 |             tag = re.sub(r"\{.*?\}", r"", meta.tag)
224 |             if tag in [
225 |                 "title",
226 |                 "creator",
227 |                 "language",
228 |                 "subject",
229 |                 "source",
230 |                 "identifier",
231 |             ]:
232 |                 self.metadata[tag] = meta.text
233 |             elif tag == "meta":
234 |                 if meta.get("name") and meta.get("content"):
235 |                     self.metadata["cover"] = meta.get("content")
236 | 
237 |     def _parse_manifest(self):
238 |         self.id_to_h_m_p = {}  # { id : (href,mime,properties) , ... }
239 |         self.id_to_href = {}  # { id : href.lower, ... }
240 |         self.href_to_id = {}  # { href.lower : id, ...}
241 |         if_error = False
242 |         for item in self.etree_opf["manifest"]:
243 |             # 检查opf文件中是否存在错误
244 |             try:
245 |                 id = item.get("id")
246 |                 href = unquote(item.get("href"))
247 |             except Exception as e:
248 |                 str_item = (
249 |                     ElementTree.tostring(item, encoding="unicode")
250 |                     .replace("\n", "")
251 |                     .replace("\r", "")
252 |                     .replace("\t", "")
253 |                 )
254 |                 logger.write(f"item: {str_item} error: {e}")
255 |                 if_error = True
256 |                 continue
257 |             mime = item.get("media-type")
258 |             properties = item.get("properties") if item.get("properties") else ""
259 | 
260 |             self.id_to_h_m_p[id] = (href, mime, properties)
261 |             self.id_to_href[id] = href.lower()
262 |             self.href_to_id[href.lower()] = id
263 |         if if_error:
264 |             logger.write("opf文件中存在错误，请检查！")
265 | 
266 |     def _parse_spine(self):
267 |         self.spine_list = []  # [ (sid, linear, properties) , ... ]
268 |         for itemref in self.etree_opf["spine"]:
269 |             sid = itemref.get("idref")
270 |             linear = itemref.get("linear") if itemref.get("linear") else ""
271 |             properties = itemref.get("properties") if itemref.get("properties") else ""
272 |             self.spine_list.append((sid, linear, properties))
273 | 
274 |     def _clear_duplicate_id_href(self):
275 | 
276 |         # id_used = [ id_in_spine + cover_id ]
277 |         id_used = [x[0] for x in self.spine_list]
278 |         if self.metadata["cover"]:
279 |             id_used.append(self.metadata["cover"])
280 | 
281 |         del_id = []
282 |         for id, href in self.id_to_href.items():
283 |             if self.href_to_id[href] != id:  # 该href拥有多个id,此id已被覆盖。
284 |                 if id in id_used and self.href_to_id[href] not in id_used:
285 |                     if id not in del_id:
286 |                         del_id.append(self.href_to_id[href])
287 |                     self.href_to_id[href] = id
288 |                 elif id in id_used and self.href_to_id[href] in id_used:
289 |                     continue
290 |                 else:
291 |                     if id not in del_id:
292 |                         del_id.append(id)
293 | 
294 |         for id in del_id:
295 |             self.errorOPF_log.append(("duplicate_id", id))
296 |             del self.id_to_href[id]
297 |             del self.id_to_h_m_p[id]
298 | 
299 |     def _add_files_not_in_opf(self):
300 | 
301 |         hrefs_not_in_opf = []
302 |         for archive_path in self.namelist:
303 |             if archive_path.lower().endswith(
304 |                 (
305 |                     ".html",
306 |                     ".xhtml",
307 |                     ".css",
308 |                     ".jpg",
309 |                     ".jpeg",
310 |                     ".bmp",
311 |                     ".gif",
312 |                     ".png",
313 |                     ".webp",
314 |                     ".svg",
315 |                     ".ttf",
316 |                     ".otf",
317 |                     ".js",
318 |                     ".mp3",
319 |                     ".mp4",
320 |                     ".smil",
321 |                 )
322 |             ):
323 |                 opf_href = get_relpath(self.opfpath, archive_path)
324 |                 if opf_href.lower() not in self.href_to_id.keys():
325 |                     hrefs_not_in_opf.append(opf_href)
326 | 
327 |         def allocate_id(href):  # 自动分配不重复id
328 |             basename = path.basename(href)
329 |             if "A" <= basename[0] <= "Z" or "a" <= basename[0] <= "z":
330 |                 new_id = basename
331 |             else:
332 |                 new_id = "x" + basename
333 |             pre, suf = path.splitext(new_id)
334 |             pre_ = pre
335 |             i = 0
336 |             while pre_ + suf in self.id_to_href.keys():
337 |                 i += 1
338 |                 pre_ = pre + "_" + str(i)
339 |             new_id = pre_ + suf
340 |             return new_id
341 | 
342 |         for href in hrefs_not_in_opf:
343 |             new_id = allocate_id("newsrc")
344 |             self.id_to_href[new_id] = href.lower()
345 |             self.href_to_id[href.lower()] = new_id
346 |             ext = path.splitext(href)[1]
347 |             ext = ext.lower()
348 |             try:
349 |                 mime = self.mime_map[ext]
350 |             except KeyError:
351 |                 mime = "text/plain"
352 |             self.id_to_h_m_p[new_id] = (href, mime, "")
353 | 
354 |     def _check_manifest_and_spine(self):
355 |         spine_idrefs = [i for i, j, k in self.spine_list]
356 | 
357 |         for idref in spine_idrefs:
358 |             if not self.id_to_h_m_p.get(idref):  # spine 引用无效ID
359 |                 self.errorOPF_log.append(("invalid_idref", idref))
360 | 
361 |         for mid, opf_href, mime, properties in self.manifest_list:
362 |             if mime == "application/xhtml+xml":
363 |                 if mid not in spine_idrefs:
364 |                     self.errorOPF_log.append(("xhtml_not_in_spine", mid))
365 | 
366 |     def _parse_hrefs_not_in_epub(self):
367 |         del_id = []
368 |         namelist = [x.lower() for x in self.epub.namelist()]
369 |         for id, href in self.id_to_href.items():
370 |             bkpath = get_bookpath(href, self.opfpath)
371 |             if bkpath.lower() not in namelist:
372 |                 del_id.append(id)
373 |                 del self.href_to_id[href]
374 |         for id in del_id:
375 |             del self.id_to_href[id]
376 |             del self.id_to_h_m_p[id]
377 | 
378 |     def create_tgt_epub(self):
379 |         output_path = self.output_path
380 |         logger.write(f"输出路径: {output_path}")
381 |         return zipfile.ZipFile(
382 |             path.join(output_path, self.epub_name.replace(".epub", "_decrypt.epub")),
383 |             "w",
384 |             zipfile.ZIP_STORED,
385 |         )
386 | 
387 |     # 重构
388 |     def restructure(self):
389 |         self.tgt_epub = self.create_tgt_epub()
390 |         # mimetype
391 |         mimetype = self.epub.read("mimetype")
392 |         self.tgt_epub.writestr("mimetype", mimetype, zipfile.ZIP_DEFLATED)
393 |         # META-INF
394 |         metainf_data = self.epub.read("META-INF/container.xml").decode("utf-8")
395 |         metainf_data = re.sub(
396 |             r'<rootfile[^>]*media-type="application/oebps-[^>]*/>',
397 |             r'<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>',
398 |             metainf_data,
399 |         )
400 |         self.tgt_epub.writestr(
401 |             "META-INF/container.xml",
402 |             bytes(metainf_data, encoding="utf-8"),
403 |             zipfile.ZIP_DEFLATED,
404 |         )
405 |         # OEBPS
406 |         re_path_map = {
407 |             "text": {},
408 |             "css": {},
409 |             "image": {},
410 |             "font": {},
411 |             "audio": {},
412 |             "video": {},
413 |             "other": {},
414 |         }  # { ori_bkpath : re_basename }
415 |         basename_log = {
416 |             "text": [],
417 |             "css": [],
418 |             "image": [],
419 |             "font": [],
420 |             "audio": [],
421 |             "video": [],
422 |             "other": [],
423 |         }
424 |         lowerPath_to_originPath = (
425 |             {}
426 |         )  # 如果路径大小写不一致，则登记为 { 小写路径 : 原始路径 }
427 | 
428 |         def auto_rename(id, href, ftype):
429 |             filename, ext = path.splitext(path.basename(href))
430 |             filename_ = filename
431 |             num = 0
432 |             while filename_ + ext in basename_log[ftype]:
433 |                 num += 1
434 |                 filename_ = filename + "_" + str(num)
435 |             basename = filename_ + ext
436 |             basename_log[ftype].append(basename)
437 |             return basename
438 | 
439 |         def check_link(filename, bkpath, href, self, target_id=""):
440 |             if href == "" or href.startswith(
441 |                 ("http://", "https://", "res:/", "file:/", "data:")
442 |             ):
443 |                 return None
444 |             if bkpath.lower() in lowerPath_to_originPath.keys():
445 |                 if bkpath != lowerPath_to_originPath[bkpath.lower()]:  # 大小写不一致
446 |                     correct_path = lowerPath_to_originPath[bkpath.lower()]
447 |                     self.errorLink_log.setdefault(filename, [])
448 |                     self.errorLink_log[filename].append(
449 |                         (href + target_id, correct_path)
450 |                     )
451 |                     bkpath = correct_path
452 |             else:  # 链接路径找不到对应文件
453 |                 self.errorLink_log.setdefault(filename, [])
454 |                 self.errorLink_log[filename].append((href + target_id, None))
455 |                 return None
456 |             return bkpath
457 | 
458 |         # xhtml文件，关联 toc文件，一切 xhtml中的<a>元素
459 |         for id, href, properties, newhref in self.text_list:
460 |             bkpath = get_bookpath(href, self.opfpath)
461 |             basename = auto_rename(id, newhref, "text")
462 |             re_path_map["text"][bkpath] = basename
463 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
464 | 
465 |         # css 文件，关联 xhtml文件的link，css文件中的@import
466 |         for id, href, properties, newhref in self.css_list:
467 |             bkpath = get_bookpath(href, self.opfpath)
468 |             basename = auto_rename(id, newhref, "css")
469 |             re_path_map["css"][bkpath] = basename
470 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
471 | 
472 |         # 图片，关联css中的url，xhtml文件中的href
473 |         for id, href, properties, newhref in self.image_list:
474 |             bkpath = get_bookpath(href, self.opfpath)
475 |             basename = auto_rename(id, newhref, "image")
476 |             re_path_map["image"][bkpath] = basename
477 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
478 |         # 字体，关联css中的url
479 |         for id, href, properties, newhref in self.font_list:
480 |             bkpath = get_bookpath(href, self.opfpath)
481 |             basename = auto_rename(id, newhref, "font")
482 |             re_path_map["font"][bkpath] = basename
483 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
484 | 
485 |         # 音频
486 |         for id, href, properties, newhref in self.audio_list:
487 |             bkpath = get_bookpath(href, self.opfpath)
488 |             basename = auto_rename(id, newhref, "audio")
489 |             re_path_map["audio"][bkpath] = basename
490 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
491 | 
492 |         # 视频
493 |         for id, href, properties, newhref in self.video_list:
494 |             bkpath = get_bookpath(href, self.opfpath)
495 |             basename = auto_rename(id, newhref, "video")
496 |             re_path_map["video"][bkpath] = basename
497 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
498 | 
499 |         # 其他文件
500 |         for id, href, mime, properties, newhref in self.other_list:
501 |             bkpath = get_bookpath(href, self.opfpath)
502 |             basename = auto_rename(id, newhref, "other")
503 |             re_path_map["other"][bkpath] = basename
504 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
505 | 
506 |         # xhtml文件
507 |         for xhtml_bkpath, new_name in re_path_map["text"].items():
508 |             text = self.epub.read(xhtml_bkpath).decode("utf-8")
509 |             if not text.startswith("<?xml"):
510 |                 text = '<?xml version="1.0" encoding="utf-8"?>\n' + text
511 |             if not re.match(r"(?s).*<!DOCTYPE html", text):
512 |                 text = re.sub(
513 |                     r"(<\?xml.*?>)\n*",
514 |                     r'\1\n<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"\n  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n',
515 |                     text,
516 |                     1,
517 |                 )
518 | 
519 |             # 修改a[href]
520 | 
521 |             def re_href(match):
522 |                 href = match.group(3)
523 |                 href = unquote(href).strip()
524 |                 if "#" in href:
525 |                     href, target_id = href.split("#")
526 |                     target_id = "#" + target_id
527 |                 else:
528 |                     target_id = ""
529 | 
530 |                 bkpath = get_bookpath(href, xhtml_bkpath)
531 |                 bkpath = check_link(xhtml_bkpath, bkpath, href, self, target_id)
532 |                 if not bkpath:
533 |                     return match.group()
534 | 
535 |                 if href.lower().endswith(
536 |                     (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp")
537 |                 ):
538 |                     filename = re_path_map["image"][bkpath]
539 |                     return match.group(1) + "../Images/" + filename + match.group(4)
540 |                 elif href.lower().endswith(".css"):
541 |                     filename = re_path_map["css"][bkpath]
542 |                     return (
543 |                         '<link href="../Styles/'
544 |                         + filename
545 |                         + '" type="text/css" rel="stylesheet"/>'
546 |                     )
547 |                 elif href.lower().endswith((".xhtml", ".html")):
548 |                     filename = re_path_map["text"][bkpath]
549 |                     return match.group(1) + filename + target_id + match.group(4)
550 |                 else:
551 |                     return match.group()
552 | 
553 |             text = re.sub(r"(<[^>]*href=([\'\"]))(.*?)(\2[^>]*>)", re_href, text)
554 | 
555 |             # 修改src
556 |             def re_src(match):
557 |                 href = match.group(3)
558 |                 href = unquote(href).strip()
559 |                 bkpath = get_bookpath(href, xhtml_bkpath)
560 |                 bkpath = check_link(xhtml_bkpath, bkpath, href, self)
561 |                 if not bkpath:
562 |                     return match.group()
563 | 
564 |                 if href.lower().endswith(
565 |                     (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg")
566 |                 ):
567 |                     filename = re_path_map["image"][bkpath]
568 |                     return match.group(1) + "../Images/" + filename + match.group(4)
569 |                 elif href.lower().endswith(".mp3"):
570 |                     filename = re_path_map["audio"][bkpath]
571 |                     return match.group(1) + "../Audio/" + filename + match.group(4)
572 |                 elif href.lower().endswith(".mp4"):
573 |                     filename = re_path_map["video"][bkpath]
574 |                     return match.group(1) + "../Video/" + filename + match.group(4)
575 |                 elif href.lower().endswith(".js"):
576 |                     filename = re_path_map["other"][bkpath]
577 |                     return match.group(1) + "../Misc/" + filename + match.group(4)
578 |                 else:
579 |                     return match.group()
580 | 
581 |             def re_poster(match):
582 |                 href = match.group(3)
583 |                 href = unquote(href).strip()
584 |                 bkpath = get_bookpath(href, xhtml_bkpath)
585 |                 bkpath = check_link(xhtml_bkpath, bkpath, href, self)
586 |                 if not bkpath:
587 |                     return match.group()
588 |                 if href.lower().endswith(
589 |                     (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg")
590 |                 ):
591 |                     filename = re_path_map["image"][bkpath]
592 |                     return match.group(1) + "../Images/" + filename + match.group(4)
593 |                 else:
594 |                     return match.group()
595 | 
596 |             text = re.sub(r"(<[^>]* src=([\'\"]))(.*?)(\2[^>]*>)", re_src, text)
597 |             text = re.sub(r"(<[^>]* poster=([\'\"]))(.*?)(\2[^>]*>)", re_poster, text)
598 | 
599 |             # 修改 url
600 |             def re_url(match):
601 |                 url = match.group(2)
602 |                 url = unquote(url).strip()
603 |                 bkpath = get_bookpath(url, xhtml_bkpath)
604 |                 bkpath = check_link(xhtml_bkpath, bkpath, url, self)
605 |                 if not bkpath:
606 |                     return match.group()
607 | 
608 |                 if url.lower().endswith((".ttf", ".otf")):
609 |                     filename = re_path_map["font"][bkpath]
610 |                     return match.group(1) + "../Fonts/" + filename + match.group(3)
611 |                 elif url.lower().endswith(
612 |                     (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg")
613 |                 ):
614 |                     filename = re_path_map["image"][bkpath]
615 |                     return match.group(1) + "../Images/" + filename + match.group(3)
616 |                 else:
617 |                     return match.group()
618 | 
619 |             text = re.sub(r"(url\([\'\"]?)(.*?)([\'\"]?\))", re_url, text)
620 |             self.tgt_epub.writestr(
621 |                 "OEBPS/Text/" + new_name,
622 |                 bytes(text, encoding="utf-8"),
623 |                 zipfile.ZIP_DEFLATED,
624 |             )
625 |         # css文件
626 |         for css_bkpath, new_name in re_path_map["css"].items():
627 |             try:
628 |                 css = self.epub.read(css_bkpath).decode("utf-8")
629 |             except:
630 |                 continue
631 | 
632 |             # 修改 @import
633 |             def re_import(match):
634 |                 href = match.group(2) if match.group(2) else match.group(3)
635 |                 href = unquote(href).strip()
636 |                 if not href.lower().endswith(".css"):
637 |                     return match.group()
638 |                 bkpath = get_bookpath(href, css_bkpath)
639 |                 bkpath = check_link(css_bkpath, bkpath, href, self)
640 |                 if not bkpath:
641 |                     return match.group()
642 |                 filename = re_path_map.get("css", {}).get(bkpath, path.basename(href))
643 |                 if match.group(2):
644 |                     return '@import "{}"'.format(filename)
645 |                 else:
646 |                     return '@import url("{}")'.format(filename)
647 | 
648 |             css = re.sub(
649 |                 r"@import +([\'\"])(.*?)\1|@import +url\([\'\"]?(.*?)[\'\"]?\)",
650 |                 re_import,
651 |                 css,
652 |             )
653 | 
654 |             # 修改 css的url
655 |             def re_css_url(match):
656 |                 url = match.group(2)
657 |                 url = unquote(url).strip()
658 |                 bkpath = get_bookpath(url, css_bkpath)
659 |                 bkpath = check_link(css_bkpath, bkpath, url, self)
660 |                 if not bkpath:
661 |                     return match.group()
662 |                 if url.lower().endswith((".ttf", ".otf")):
663 |                     filename = re_path_map["font"][bkpath]
664 |                     return match.group(1) + "../Fonts/" + filename + match.group(3)
665 |                 elif url.lower().endswith(
666 |                     (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg")
667 |                 ):
668 |                     filename = re_path_map["image"][bkpath]
669 |                     return match.group(1) + "../Images/" + filename + match.group(3)
670 |                 else:
671 |                     return match.group()
672 | 
673 |             css = re.sub(r"(url\([\'\"]?)(.*?)([\'\"]?\))", re_css_url, css)
674 |             self.tgt_epub.writestr(
675 |                 "OEBPS/Styles/" + new_name,
676 |                 bytes(css, encoding="utf-8"),
677 |                 zipfile.ZIP_DEFLATED,
678 |             )
679 |         # 图片
680 |         for img_bkpath, new_name in re_path_map["image"].items():
681 |             try:
682 |                 img = self.epub.read(img_bkpath)
683 |             except:
684 |                 continue
685 |             self.tgt_epub.writestr(
686 |                 "OEBPS/Images/" + new_name, img, zipfile.ZIP_DEFLATED
687 |             )
688 |         # 字体
689 |         for font_bkpath, new_name in re_path_map["font"].items():
690 |             try:
691 |                 font = self.epub.read(font_bkpath)
692 |             except:
693 |                 continue
694 |             self.tgt_epub.writestr(
695 |                 "OEBPS/Fonts/" + new_name, font, zipfile.ZIP_DEFLATED
696 |             )
697 |         # 音频
698 |         for audio_bkpath, new_name in re_path_map["audio"].items():
699 |             try:
700 |                 audio = self.epub.read(audio_bkpath)
701 |             except:
702 |                 continue
703 |             self.tgt_epub.writestr(
704 |                 "OEBPS/Audio/" + new_name, audio, zipfile.ZIP_DEFLATED
705 |             )
706 |         # 视频
707 |         for video_bkpath, new_name in re_path_map["video"].items():
708 |             try:
709 |                 video = self.epub.read(video_bkpath)
710 |             except:
711 |                 continue
712 |             self.tgt_epub.writestr(
713 |                 "OEBPS/Video/" + new_name, video, zipfile.ZIP_DEFLATED
714 |             )
715 |         # 其他
716 |         for font_bkpath, new_name in re_path_map["other"].items():
717 |             try:
718 |                 other = self.epub.read(font_bkpath)
719 |             except:
720 |                 continue
721 |             self.tgt_epub.writestr(
722 |                 "OEBPS/Misc/" + new_name, other, zipfile.ZIP_DEFLATED
723 |             )
724 | 
725 |         # 读取文件并修改关联
726 |         # toc文件
727 |         if self.tocpath:
728 |             toc = self.epub.read(self.tocpath).decode("utf-8")
729 |             toc_dir = path.dirname(self.tocpath)
730 | 
731 |             def re_toc_href(match):
732 |                 href = match.group(2)
733 |                 href = unquote(href).strip()
734 |                 parts = href.split("#", 1)
735 |                 href_base = parts[0]
736 |                 target_id = "#" + parts[1] if len(parts) > 1 else ""
737 |                 href_base = (
738 |                     self.toc_rn[href_base] if href_base in self.toc_rn else href_base
739 |                 )
740 |                 bkpath = get_bookpath(href_base, self.tocpath)
741 | 
742 |                 if not bkpath:
743 |                     return match.group()
744 |                 filename = path.basename(bkpath)
745 |                 return 'src="Text/' + filename + target_id + '"'
746 | 
747 |             toc = re.sub(r"src=([\'\"])(.*?)\1", re_toc_href, toc)
748 |             self.tgt_epub.writestr(
749 |                 "OEBPS/toc.ncx", bytes(toc, encoding="utf-8"), zipfile.ZIP_DEFLATED
750 |             )
751 | 
752 |         # OPF
753 |         manifest_text = "<manifest>"
754 | 
755 |         for id, href, mime, prop in self.manifest_list:
756 |             bkpath = get_bookpath(href, self.opfpath)
757 |             prop_ = ' properties="' + prop + '"' if prop else ""
758 |             if mime == "application/xhtml+xml":
759 |                 filename = re_path_map["text"][bkpath]
760 |                 manifest_text += '\n    <item id="{id}" href="{href}" media-type="{mime}"{prop}/>'.format(
761 |                     id=id, href="Text/" + filename, mime=mime, prop=prop_
762 |                 )
763 |             elif mime == "text/css":
764 |                 filename = re_path_map["css"][bkpath]
765 |                 manifest_text += '\n    <item id="{id}" href="{href}" media-type="{mime}"{prop}/>'.format(
766 |                     id=id, href="Styles/" + filename, mime=mime, prop=prop_
767 |                 )
768 |             elif "image/" in mime:
769 |                 filename = re_path_map["image"][bkpath]
770 |                 manifest_text += '\n    <item id="{id}" href="{href}" media-type="{mime}"{prop}/>'.format(
771 |                     id=id, href="Images/" + filename, mime=mime, prop=prop_
772 |                 )
773 |             elif "font/" in mime or href.lower().endswith((".ttf", ".otf", ".woff")):
774 |                 filename = re_path_map["font"][bkpath]
775 |                 manifest_text += '\n    <item id="{id}" href="{href}" media-type="{mime}"{prop}/>'.format(
776 |                     id=id, href="Fonts/" + filename, mime=mime, prop=prop_
777 |                 )
778 |             elif "audio/" in mime:
779 |                 filename = re_path_map["audio"][bkpath]
780 |                 manifest_text += '\n    <item id="{id}" href="{href}" media-type="{mime}"{prop}/>'.format(
781 |                     id=id, href="Audio/" + filename, mime=mime, prop=prop_
782 |                 )
783 |             elif "video/" in mime:
784 |                 filename = re_path_map["video"][bkpath]
785 |                 manifest_text += '\n    <item id="{id}" href="{href}" media-type="{mime}"{prop}/>'.format(
786 |                     id=id, href="Video/" + filename, mime=mime, prop=prop_
787 |                 )
788 |             elif id == self.tocid:
789 |                 manifest_text += '\n    <item id="{id}" href="toc.ncx" media-type="application/x-dtbncx+xml"/>'.format(
790 |                     id=id
791 |                 )
792 |             else:
793 |                 filename = re_path_map["other"][bkpath]
794 |                 manifest_text += '\n    <item id="{id}" href="{href}" media-type="{mime}"{prop}/>'.format(
795 |                     id=id, href="Misc/" + filename, mime=mime, prop=prop_
796 |                 )
797 | 
798 |         manifest_text += "\n  </manifest>"
799 |         opf = re.sub(r"(?s)<manifest.*?>.*?</manifest>", manifest_text, self.opf, 1)
800 | 
801 |         def re_refer(match):
802 |             href = match.group(3)
803 |             href = unquote(href).strip()
804 |             basename = path.basename(href)
805 |             filename = unquote(basename)
806 |             if not basename.endswith(".ncx"):
807 |                 try:
808 |                     return match.group(1) + "Text/" + self.toc_rn[href] + match.group(4)
809 |                 except:
810 |                     logger.write(f"写入content.opf时，文件链接出错: {href}")
811 |                     similar_list = []
812 |                     for i in self.text_list:
813 |                         similar = difflib.SequenceMatcher(
814 |                             None,
815 |                             i[0].rsplit("/", 1)[-1].split(".")[0],
816 |                             href.rsplit("/", 1)[-1].split(".")[0],
817 |                         ).quick_ratio()
818 |                         similar_list.append(similar)
819 |                     sorted_id = sorted(
820 |                         range(len(similar_list)),
821 |                         key=lambda k: similar_list[k],
822 |                         reverse=True,
823 |                     )
824 |                     tmp = href
825 |                     href = self.text_list[sorted_id[0]][1]
826 |                     logger.write(
827 |                         f"已自动替换为相似度最高文件: {tmp} <-> {self.text_list[sorted_id[0]]}")
828 |                     return match.group(1) + "Text/" + self.toc_rn[href] + match.group(4)
829 |             else:
830 |                 return match.group()
831 | 
832 |         opf = re.sub(r"(<reference[^>]*href=([\'\"]))(.*?)(\2[^>]*/>)", re_refer, opf)
833 |         self.tgt_epub.writestr(
834 |             "OEBPS/content.opf", bytes(opf, encoding="utf-8"), zipfile.ZIP_DEFLATED
835 |         )
836 |         self.tgt_epub.close()
837 |         self.epub.close()
838 | 
839 | 
840 | # 相对路径计算函数
841 | def get_relpath(from_path, to_path):
842 |     # from_path 和 to_path 都需要是绝对路径
843 |     from_path = re.split(r"[\\/]", from_path)
844 |     to_path = re.split(r"[\\/]", to_path)
845 |     while from_path[0] == to_path[0]:
846 |         from_path.pop(0), to_path.pop(0)
847 |     to_path = "../" * (len(from_path) - 1) + "/".join(to_path)
848 |     return to_path
849 | 
850 | 
851 | # 计算bookpath
852 | def get_bookpath(relative_path, refer_bkpath):
853 |     # relative_path 相对路径，一般是href
854 |     # refer_bkpath 参考的绝对路径
855 | 
856 |     relative_ = re.split(r"[\\/]", relative_path)
857 |     refer_ = re.split(r"[\\/]", refer_bkpath)
858 | 
859 |     back_step = 0
860 |     while relative_[0] == "..":
861 |         back_step += 1
862 |         relative_.pop(0)
863 | 
864 |     if len(refer_) <= 1:
865 |         return "/".join(relative_)
866 |     else:
867 |         refer_.pop(-1)
868 | 
869 |     if back_step < 1:
870 |         return "/".join(refer_ + relative_)
871 |     elif back_step > len(refer_):
872 |         return "/".join(relative_)
873 | 
874 |     # len(refer_) > 1 and back_setp <= len(refer_):
875 |     while back_step > 0 and len(refer_) > 0:
876 |         refer_.pop(-1)
877 |         back_step -= 1
878 | 
879 |     return "/".join(refer_ + relative_)
880 | 
881 | 
882 | def epub_sources():
883 |     if len(sys.argv) <= 1:
884 |         return sys.argv
885 |     epub_srcs = []
886 |     exe_path = path.dirname(sys.argv[0])
887 |     epub_srcs.append(exe_path)
888 |     for epub_src in sys.argv[1:None]:
889 |         filename = path.basename(epub_src)
890 |         basename, ext = path.splitext(filename)
891 |         if ext.lower() == ".epub":
892 |             if path.exists(epub_src):
893 |                 epub_srcs.append(epub_src)
894 |     return epub_srcs
895 | 
896 | 
897 | def run(epub_src, output_path=None):
898 |     try:
899 |         logger.write(f"\n正在尝试解密EPUB: {epub_src}")
900 |         if epub_src.lower().endswith("_decrypt.epub"):
901 |             logger.write("警告: 该文件已解密，无需再次处理！")
902 |             return "skip"
903 |         epub = EpubTool(epub_src)
904 |         epub.set_output_path(output_path)
905 |         if not epub.encrypted:
906 |             logger.write("警告: 该文件未加密，无需处理！")
907 |             return "skip"
908 |         epub.restructure()  # 重构
909 |         el = epub.errorLink_log.copy()
910 |         del_keys = []
911 |         for file_path, log in epub.errorLink_log.items():
912 |             if file_path.lower().endswith(".css"):
913 |                 el[file_path] = list(filter(lambda x: x[1] is not None, log))
914 |                 if el[file_path] == []:
915 |                     del_keys.append(file_path)
916 |         for key in del_keys:
917 |             del el[key]
918 | 
919 |         if epub.errorOPF_log:
920 |             logger.write("-------在 OPF文件 发现问题------:")
921 |             for error_type, error_value in epub.errorOPF_log:
922 |                 if error_type == "duplicate_id":
923 |                     logger.write(
924 |                         f"问题: 发现manifest节点内部存在重复ID {error_value} !!!"
925 |                     )
926 |                     logger.write("措施: 已自动清除重复ID对应的manifest项。")
927 |                 elif error_type == "invalid_idref":
928 |                     logger.write(
929 |                         f"问题: 发现spine节点内部存在无效引用ID {error_value} !!!"
930 |                     )
931 |                     logger.write(
932 |                         "措施: 请自行检查spine内的itemref节点并手动修改，确保引用的ID存在于manifest的item项。\n（大小写不一致也会导致引用无效。）"
933 |                     )
934 |                 elif error_type == "xhtml_not_in_spine":
935 |                     logger.write(
936 |                         f"问题: 发现ID为 {error_value} 的文件manifest中登记为application/xhtml+xml类型，但不被spine节点的项所引用"
937 |                     )
938 |                     logger.write(
939 |                         "措施: 自行检查该文件是否需要被spine引用。部分阅读器中，如果存在xhtml文件不被spine引用，可能导致epub无法打开。"
940 |                     )
941 | 
942 |         if el:
943 |             for file_path, log in el.items():
944 |                 basename = path.basename(file_path)
945 |                 logger.write(f"-----在 {basename} 发现问题链接-----:")
946 |                 for href, correct_path in log:
947 |                     if correct_path is not None:
948 |                         logger.write(
949 |                             f"链接: {href}\n问题: 与实际文件名大小写不一致！\n措施: 程序已自动纠正链接为: {correct_path}。"
950 |                         )
951 |                     else:
952 |                         logger.write(f"链接: {href}\n问题: 未能找到对应文件！！！")
953 |     except Exception as e:
954 |         logger.write(f"{epub_src} 重构EPUB失败: {e}")
955 |         return e
956 |     else:
957 |         logger.write(f"{epub_src} 重构EPUB成功")
958 |     return 0
959 | 
960 | 
961 | def main():
962 |     epub_src = input("【使用说明】请把EPUB文件拖曳到本窗口上（输入'e'退出）: ")
963 |     epub_src = epub_src.strip("'").strip('"').strip()
964 |     if epub_src.lower() == "e":
965 |         print("程序已退出")
966 |         sys.exit()
967 |     if not os.path.isfile(epub_src):
968 |         print("错误: 找不到指定的EPUB文件，请确认文件路径是否正确并重新输入！")
969 |         return
970 |     ret = run(epub_src)
971 |     if ret == "skip":
972 |         print("已跳过该文件")
973 |     elif ret == "e":
974 |         print("操作失败，请检查日志！")
975 |     else:
976 |         print("操作成功！")
977 | 
978 | 
979 | if __name__ == "__main__":
980 |     print(
981 |         "【脚本功能】\n"
982 |         + "1、 将epub目录结构规范化至sigil规范格式。\n"
983 |         + "2、 将没有列入manifest项的epub有效文件自动列入manifest项。\n"
984 |         + "3、 自动清除manifest中携带重复ID或多余ID的无效项。\n"
985 |         + "    脚本将优先保留spine或metadata中关联的ID。\n"
986 |         + "4、 自动检查并提醒spine节点中引用无效ID的itemref项。\n"
987 |         + "5、 自动检查并提醒manifest节点中xhtml类型文件不被spine节点引用的情况。\n"
988 |         + "6、 自动检测并纠正实际文件名与对应的引用链接大小写不一致的问题。\n"
989 |         + "7、 自动检测并提醒找不到对应文件的链接。\n"
990 |         + "8、 反名称混淆，使sigil可以打开修改。"
991 |     )
992 |     while True:
993 |         main()
994 | 
995 | 


--------------------------------------------------------------------------------
/utils/encrypt_epub.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # !/usr/bin/env python
  3 | # 源码: sigil吧ID: 遥遥心航
  4 | # 二改: cnwxi
  5 | 
  6 | import zipfile
  7 | import re, sys
  8 | from os import path, mkdir, getcwd
  9 | from urllib.parse import unquote
 10 | from xml.etree import ElementTree
 11 | import os
 12 | import hashlib
 13 | 
 14 | try:
 15 |     from utils.log import logwriter
 16 | except:
 17 |     from log import logwriter
 18 | 
 19 | logger = logwriter()
 20 | 
 21 | 
 22 | class EpubTool:
 23 | 
 24 |     def __init__(self, epub_src):
 25 |         self.encrypted = False
 26 |         self.epub = zipfile.ZipFile(epub_src)
 27 |         self.epub_src = epub_src
 28 |         self.epub_name = path.basename(epub_src)
 29 |         self.ebook_root = path.dirname(epub_src)
 30 |         self.output_path = self.ebook_root
 31 |         self.epub_type = ""
 32 |         self.temp_dir = ""
 33 |         self._init_namelist()
 34 |         self._init_mime_map()
 35 |         self._init_opf()
 36 |         self.manifest_list = []  # (id,opf_href,mime,properties)
 37 |         self.toc_rn = {}
 38 |         self.all_mixed = {}
 39 |         self.id_to_href = {}  # { id : href.lower, ... }
 40 |         self.href_to_id = {}  # { href.lower : id, ...}
 41 |         self.text_list = []  # (id,opf_href,properties)
 42 |         self.css_list = []  # (id,opf_href,properties)
 43 |         self.image_list = []  # (id,opf_href,properties)
 44 |         self.font_list = []  # (id,opf_href,properties)
 45 |         self.audio_list = []  # (id,opf_href,properties)
 46 |         self.video_list = []  # (id,opf_href,properties)
 47 |         self.spine_list = []  # (sid, linear, properties)
 48 |         self.other_list = []  # (id,opf_href,mime,properties)
 49 |         self.errorOPF_log = []  # (error_type,error_value)
 50 |         self.errorLink_log = {}  # {filepath:[(error_link,correct_link || None),...]}
 51 |         self._parse_opf()
 52 | 
 53 |     def set_output_path(self, output_path):
 54 |         if output_path is not None and os.path.isdir(output_path):
 55 |             self.output_path = output_path
 56 | 
 57 |     def _init_namelist(self):
 58 |         self.namelist = self.epub.namelist()
 59 | 
 60 |     def _init_mime_map(self):
 61 |         self.mime_map = {
 62 |             ".html": "application/xhtml+xml",
 63 |             ".xhtml": "application/xhtml+xml",
 64 |             ".css": "text/css",
 65 |             ".js": "application/javascript",
 66 |             ".jpg": "image/jpeg",
 67 |             ".jpeg": "image/jpeg",
 68 |             ".bmp": "image/bmp",
 69 |             ".png": "image/png",
 70 |             ".gif": "image/gif",
 71 |             ".webp": "image/webp",
 72 |             ".ttf": "font/ttf",
 73 |             ".otf": "font/otf",
 74 |             ".woff": "font/woff",
 75 |             ".ncx": "application/x-dtbncx+xml",
 76 |             ".mp3": "audio/mpeg",
 77 |             ".mp4": "video/mp4",
 78 |             ".smil": "application/smil+xml",
 79 |             ".pls": "application/pls+xml",
 80 |         }
 81 | 
 82 |     def _init_opf(self):
 83 |         # 通过 container.xml 读取 opf 文件
 84 |         container_xml = self.epub.read("META-INF/container.xml").decode("utf-8")
 85 |         rf = re.match(r'<rootfile[^>]*full-path="(?i:(.*?\.opf))"', container_xml)
 86 |         if rf is not None:
 87 |             self.opfpath = rf.group(1)
 88 |             self.opf = self.epub.read(self.opfpath).decode("utf-8")
 89 |             return
 90 |         # 通过路径首个 opf 读取 opf 文件
 91 |         for bkpath in self.namelist:
 92 |             if bkpath.lower().endswith(".opf"):
 93 |                 self.opfpath = bkpath
 94 |                 self.opf = self.epub.read(self.opfpath).decode("utf-8")
 95 |                 return
 96 |         raise RuntimeError("无法发现opf文件")
 97 | 
 98 |     def _parse_opf(self):
 99 |         self.etree_opf = {"package": ElementTree.fromstring(self.opf)}
100 | 
101 |         for child in self.etree_opf["package"]:
102 |             tag = re.sub(r"\{.*?\}", r"", child.tag)
103 |             self.etree_opf[tag] = child
104 |         self._parse_metadata()
105 |         self._parse_manifest()
106 |         self._parse_spine()
107 |         self._clear_duplicate_id_href()
108 |         self._parse_hrefs_not_in_epub()
109 |         self._add_files_not_in_opf()
110 | 
111 |         self.manifest_list = []  # (id,opf_href,mime,properties)
112 |         for id in self.id_to_h_m_p:
113 |             href, mime, properties = self.id_to_h_m_p[id]
114 |             self.manifest_list.append((id, href, mime, properties))
115 | 
116 |         epub_type = self.etree_opf["package"].get("version")
117 | 
118 |         if epub_type is not None and epub_type in ["2.0", "3.0"]:
119 |             self.epub_type = epub_type
120 |         else:
121 |             raise RuntimeError("此脚本不支持该EPUB类型")
122 | 
123 |         # 寻找epub2 toc 文件的id。epub3的nav文件直接当做xhtml处理。
124 |         self.tocpath = ""
125 |         self.tocid = ""
126 |         tocid = self.etree_opf["spine"].get("toc")
127 |         self.tocid = tocid if tocid is not None else ""
128 | 
129 |         # opf item分类
130 |         opf_dir = path.dirname(self.opfpath)
131 | 
132 |         # 生成新的href
133 |         ############################################################
134 |         def creatNewHerf(_id, _href):
135 |             _id_name = _id.split(".")[0]
136 |             _filename, _file_extension = _href.rsplit(".", 1)
137 |             _true_filename = _filename.rsplit("/", 1)[-1]
138 |             if _true_filename.endswith("slim") or _id_name.endswith("slim"):
139 |                 image_slim = "~slim"
140 |                 # _true_filename=_true_filename.rstrip("~slim").rstrip("-slim").rstrip("_slim").rstrip("slim")
141 |                 _id_name = (
142 |                     _id_name.rstrip("~slim")
143 |                     .rstrip("-slim")
144 |                     .rstrip("_slim")
145 |                     .rstrip("slim")
146 |                 )
147 |                 # :*:*:**::**::::******::***::***:*:**::***::*:*::::::**::::**:*.webp
148 |                 # :*:*:**::**::::******::***::***:*:**::***::*:*::::::**::::**:*~slim.webp
149 |             else:
150 |                 image_slim = ""
151 |             _href_hash = hashlib.md5(_id_name.encode()).digest()
152 |             _href_hash = int.from_bytes(_href_hash, byteorder="big")
153 |             bin_hash = bin(_href_hash)
154 |             new_href = (
155 |                 bin_hash.replace("-", "*")
156 |                 .replace("0b", "")
157 |                 .replace("1", "*")
158 |                 .replace("0", ":")
159 |             )
160 |             # 加_为了防止Windows系统异常
161 |             new_href = f"_{new_href}{image_slim}.{_file_extension.lower()}"
162 |             if new_href not in self.toc_rn.values():
163 |                 self.toc_rn[href] = new_href
164 |                 logger.write(f"encrypt href: {_id}:{_href} -> {self.toc_rn[href]}")
165 |             else:
166 |                 self.toc_rn[href] = new_href
167 |                 logger.write(f"encrypt href: {_id}:{_href} -> {new_href} 重复")
168 |             return new_href
169 | 
170 |         ############################################################
171 | 
172 |         for id, href, mime, properties in self.manifest_list:
173 |             bkpath = opf_dir + "/" + href if opf_dir else href
174 |             if re.search(r'[\\/:*?"<>|]', href.rsplit("/")[-1]):
175 |                 self.encrypted = True
176 |             if mime == "application/xhtml+xml":
177 |                 self.text_list.append((id, href, properties, creatNewHerf(id, href)))
178 |             elif mime == "text/css":
179 |                 self.css_list.append((id, href, properties, creatNewHerf(id, href)))
180 |             elif "image/" in mime:
181 |                 self.image_list.append((id, href, properties, creatNewHerf(id, href)))
182 |             elif "font/" in mime or href.lower().endswith((".ttf", ".otf", ".woff")):
183 |                 self.font_list.append((id, href, properties, creatNewHerf(id, href)))
184 |             elif "audio/" in mime:
185 |                 self.audio_list.append((id, href, properties, creatNewHerf(id, href)))
186 |             elif "video/" in mime:
187 |                 self.video_list.append((id, href, properties, creatNewHerf(id, href)))
188 |             elif self.tocid != "" and id == self.tocid:
189 |                 opf_dir = path.dirname(self.opfpath)
190 |                 self.tocpath = opf_dir + "/" + href if opf_dir else href
191 |             else:
192 |                 self.other_list.append(
193 |                     (id, href, mime, properties, creatNewHerf(id, href))
194 |                 )
195 | 
196 |         self._check_manifest_and_spine()
197 | 
198 |     def _parse_metadata(self):
199 |         self.metadata = {}
200 |         for key in [
201 |             "title",
202 |             "creator",
203 |             "language",
204 |             "subject",
205 |             "source",
206 |             "identifier",
207 |             "cover",
208 |         ]:
209 |             self.metadata[key] = ""
210 |         for meta in self.etree_opf["metadata"]:
211 |             tag = re.sub(r"\{.*?\}", r"", meta.tag)
212 |             if tag in [
213 |                 "title",
214 |                 "creator",
215 |                 "language",
216 |                 "subject",
217 |                 "source",
218 |                 "identifier",
219 |             ]:
220 |                 self.metadata[tag] = meta.text
221 |             elif tag == "meta":
222 |                 if meta.get("name") and meta.get("content"):
223 |                     self.metadata["cover"] = meta.get("content")
224 | 
225 |     def _parse_manifest(self):
226 |         self.id_to_h_m_p = {}  # { id : (href,mime,properties) , ... }
227 |         self.id_to_href = {}  # { id : href.lower, ... }
228 |         self.href_to_id = {}  # { href.lower : id, ...}
229 |         if_error = False
230 |         for item in self.etree_opf["manifest"]:
231 |             # 检查opf文件中是否存在错误
232 |             try:
233 |                 id = item.get("id")
234 |                 href = unquote(item.get("href"))
235 |             except Exception as e:
236 |                 str_item = (
237 |                     ElementTree.tostring(item, encoding="unicode")
238 |                     .replace("\n", "")
239 |                     .replace("\r", "")
240 |                     .replace("\t", "")
241 |                 )
242 |                 logger.write(f"item: {str_item} error: {e}")
243 |                 if_error = True
244 |                 continue
245 |             mime = item.get("media-type")
246 |             properties = item.get("properties") if item.get("properties") else ""
247 | 
248 |             self.id_to_h_m_p[id] = (href, mime, properties)
249 |             self.id_to_href[id] = href.lower()
250 |             self.href_to_id[href.lower()] = id
251 |         if if_error:
252 |             logger.write("opf文件中存在错误，请检查！")
253 | 
254 |     def _parse_spine(self):
255 |         self.spine_list = []  # [ (sid, linear, properties) , ... ]
256 |         for itemref in self.etree_opf["spine"]:
257 |             sid = itemref.get("idref")
258 |             linear = itemref.get("linear") if itemref.get("linear") else ""
259 |             properties = itemref.get("properties") if itemref.get("properties") else ""
260 |             self.spine_list.append((sid, linear, properties))
261 | 
262 |     def _clear_duplicate_id_href(self):
263 | 
264 |         # id_used = [ id_in_spine + cover_id ]
265 |         id_used = [x[0] for x in self.spine_list]
266 |         if self.metadata["cover"]:
267 |             id_used.append(self.metadata["cover"])
268 | 
269 |         del_id = []
270 |         for id, href in self.id_to_href.items():
271 |             if self.href_to_id[href] != id:  # 该href拥有多个id,此id已被覆盖。
272 |                 if id in id_used and self.href_to_id[href] not in id_used:
273 |                     if id not in del_id:
274 |                         del_id.append(self.href_to_id[href])
275 |                     self.href_to_id[href] = id
276 |                 elif id in id_used and self.href_to_id[href] in id_used:
277 |                     continue
278 |                 else:
279 |                     if id not in del_id:
280 |                         del_id.append(id)
281 | 
282 |         for id in del_id:
283 |             self.errorOPF_log.append(("duplicate_id", id))
284 |             del self.id_to_href[id]
285 |             del self.id_to_h_m_p[id]
286 | 
287 |     def _add_files_not_in_opf(self):
288 | 
289 |         hrefs_not_in_opf = []
290 |         for archive_path in self.namelist:
291 |             if archive_path.lower().endswith(
292 |                 (
293 |                     ".html",
294 |                     ".xhtml",
295 |                     ".css",
296 |                     ".jpg",
297 |                     ".jpeg",
298 |                     ".bmp",
299 |                     ".gif",
300 |                     ".png",
301 |                     ".webp",
302 |                     ".svg",
303 |                     ".ttf",
304 |                     ".otf",
305 |                     ".js",
306 |                     ".mp3",
307 |                     ".mp4",
308 |                     ".smil",
309 |                 )
310 |             ):
311 |                 opf_href = get_relpath(self.opfpath, archive_path)
312 |                 if opf_href.lower() not in self.href_to_id.keys():
313 |                     hrefs_not_in_opf.append(opf_href)
314 | 
315 |         def allocate_id(href):  # 自动分配不重复id
316 |             basename = path.basename(href)
317 |             if "A" <= basename[0] <= "Z" or "a" <= basename[0] <= "z":
318 |                 new_id = basename
319 |             else:
320 |                 new_id = "x" + basename
321 |             pre, suf = path.splitext(new_id)
322 |             pre_ = pre
323 |             i = 0
324 |             while pre_ + suf in self.id_to_href.keys():
325 |                 i += 1
326 |                 pre_ = pre + "_" + str(i)
327 |             new_id = pre_ + suf
328 |             return new_id
329 | 
330 |         for href in hrefs_not_in_opf:
331 |             new_id = allocate_id("newsrc")
332 |             self.id_to_href[new_id] = href.lower()
333 |             self.href_to_id[href.lower()] = new_id
334 |             ext = path.splitext(href)[1]
335 |             ext = ext.lower()
336 |             try:
337 |                 mime = self.mime_map[ext]
338 |             except KeyError:
339 |                 mime = "text/plain"
340 |             self.id_to_h_m_p[new_id] = (href, mime, "")
341 | 
342 |     def _check_manifest_and_spine(self):
343 |         spine_idrefs = [i for i, j, k in self.spine_list]
344 | 
345 |         for idref in spine_idrefs:
346 |             if not self.id_to_h_m_p.get(idref):  # spine 引用无效ID
347 |                 self.errorOPF_log.append(("invalid_idref", idref))
348 | 
349 |         for mid, opf_href, mime, properties in self.manifest_list:
350 |             if mime == "application/xhtml+xml":
351 |                 if mid not in spine_idrefs:
352 |                     self.errorOPF_log.append(("xhtml_not_in_spine", mid))
353 | 
354 |     def _parse_hrefs_not_in_epub(self):
355 |         del_id = []
356 |         namelist = [x.lower() for x in self.epub.namelist()]
357 |         for id, href in self.id_to_href.items():
358 |             bkpath = get_bookpath(href, self.opfpath)
359 |             if bkpath.lower() not in namelist:
360 |                 del_id.append(id)
361 |                 del self.href_to_id[href]
362 |         for id in del_id:
363 |             del self.id_to_href[id]
364 |             del self.id_to_h_m_p[id]
365 | 
366 |     def create_tgt_epub(self):
367 |         output_path = self.output_path
368 |         logger.write(f"输出路径: {output_path}")
369 |         return zipfile.ZipFile(
370 |             path.join(output_path, self.epub_name.replace(".epub", "_encrypt.epub")),
371 |             "w",
372 |             zipfile.ZIP_STORED,
373 |         )
374 | 
375 |     # 重构
376 |     def restructure(self):
377 |         self.tgt_epub = self.create_tgt_epub()
378 |         # mimetype
379 |         mimetype = self.epub.read("mimetype")
380 |         self.tgt_epub.writestr("mimetype", mimetype, zipfile.ZIP_DEFLATED)
381 |         # META-INF
382 |         metainf_data = self.epub.read("META-INF/container.xml").decode("utf-8")
383 |         metainf_data = re.sub(
384 |             r'<rootfile[^>]*media-type="application/oebps-[^>]*/>',
385 |             r'<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>',
386 |             metainf_data,
387 |         )
388 |         self.tgt_epub.writestr(
389 |             "META-INF/container.xml",
390 |             bytes(metainf_data, encoding="utf-8"),
391 |             zipfile.ZIP_DEFLATED,
392 |         )
393 |         # OEBPS
394 |         re_path_map = {
395 |             "text": {},
396 |             "css": {},
397 |             "image": {},
398 |             "font": {},
399 |             "audio": {},
400 |             "video": {},
401 |             "other": {},
402 |         }  # { ori_bkpath : re_basename }
403 |         basename_log = {
404 |             "text": [],
405 |             "css": [],
406 |             "image": [],
407 |             "font": [],
408 |             "audio": [],
409 |             "video": [],
410 |             "other": [],
411 |         }
412 |         lowerPath_to_originPath = (
413 |             {}
414 |         )  # 如果路径大小写不一致，则登记为 { 小写路径 : 原始路径 }
415 | 
416 |         def auto_rename(id, href, ftype):
417 |             filename, ext = path.splitext(path.basename(href))
418 |             filename_ = filename
419 |             num = 0
420 |             while filename_ + ext in basename_log[ftype]:
421 |                 num += 1
422 |                 filename_ = filename + "_" + str(num)
423 |             basename = filename_ + ext
424 |             basename_log[ftype].append(basename)
425 |             return basename
426 | 
427 |         def check_link(filename, bkpath, href, self, target_id=""):
428 |             if href == "" or href.startswith(
429 |                 ("http://", "https://", "res:/", "file:/", "data:")
430 |             ):
431 |                 return None
432 |             if bkpath.lower() in lowerPath_to_originPath.keys():
433 |                 if bkpath != lowerPath_to_originPath[bkpath.lower()]:  # 大小写不一致
434 |                     correct_path = lowerPath_to_originPath[bkpath.lower()]
435 |                     self.errorLink_log.setdefault(filename, [])
436 |                     self.errorLink_log[filename].append(
437 |                         (href + target_id, correct_path)
438 |                     )
439 |                     bkpath = correct_path
440 |             else:  # 链接路径找不到对应文件
441 |                 self.errorLink_log.setdefault(filename, [])
442 |                 self.errorLink_log[filename].append((href + target_id, None))
443 |                 return None
444 |             return bkpath
445 | 
446 |         # xhtml文件，关联 toc文件，一切 xhtml中的<a>元素
447 |         for id, href, properties, newhref in self.text_list:
448 |             bkpath = get_bookpath(href, self.opfpath)
449 |             basename = auto_rename(id, newhref, "text")
450 |             re_path_map["text"][bkpath] = basename
451 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
452 | 
453 |         # css 文件，关联 xhtml文件的link，css文件中的@import
454 |         for id, href, properties, newhref in self.css_list:
455 |             bkpath = get_bookpath(href, self.opfpath)
456 |             basename = auto_rename(id, newhref, "css")
457 |             re_path_map["css"][bkpath] = basename
458 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
459 | 
460 |         # 图片，关联css中的url，xhtml文件中的href
461 |         for id, href, properties, newhref in self.image_list:
462 |             bkpath = get_bookpath(href, self.opfpath)
463 |             basename = auto_rename(id, newhref, "image")
464 |             re_path_map["image"][bkpath] = basename
465 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
466 |         # 字体，关联css中的url
467 |         for id, href, properties, newhref in self.font_list:
468 |             bkpath = get_bookpath(href, self.opfpath)
469 |             basename = auto_rename(id, newhref, "font")
470 |             re_path_map["font"][bkpath] = basename
471 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
472 | 
473 |         # 音频
474 |         for id, href, properties, newhref in self.audio_list:
475 |             bkpath = get_bookpath(href, self.opfpath)
476 |             basename = auto_rename(id, newhref, "audio")
477 |             re_path_map["audio"][bkpath] = basename
478 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
479 | 
480 |         # 视频
481 |         for id, href, properties, newhref in self.video_list:
482 |             bkpath = get_bookpath(href, self.opfpath)
483 |             basename = auto_rename(id, newhref, "video")
484 |             re_path_map["video"][bkpath] = basename
485 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
486 | 
487 |         # 其他文件
488 |         for id, href, mime, properties, newhref in self.other_list:
489 |             bkpath = get_bookpath(href, self.opfpath)
490 |             basename = auto_rename(id, newhref, "other")
491 |             re_path_map["other"][bkpath] = basename
492 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
493 | 
494 |         # xhtml文件
495 |         for xhtml_bkpath, new_name in re_path_map["text"].items():
496 |             text = self.epub.read(xhtml_bkpath).decode("utf-8")
497 |             if not text.startswith("<?xml"):
498 |                 text = '<?xml version="1.0" encoding="utf-8"?>\n' + text
499 |             if not re.match(r"(?s).*<!DOCTYPE html", text):
500 |                 text = re.sub(
501 |                     r"(<\?xml.*?>)\n*",
502 |                     r'\1\n<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"\n  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n',
503 |                     text,
504 |                     1,
505 |                 )
506 | 
507 |             # 修改a[href]
508 | 
509 |             def re_href(match):
510 |                 href = match.group(3)
511 |                 href = unquote(href).strip()
512 |                 if "#" in href:
513 |                     href, target_id = href.split("#")
514 |                     target_id = "#" + target_id
515 |                 else:
516 |                     target_id = ""
517 | 
518 |                 bkpath = get_bookpath(href, xhtml_bkpath)
519 |                 bkpath = check_link(xhtml_bkpath, bkpath, href, self, target_id)
520 |                 if not bkpath:
521 |                     return match.group()
522 | 
523 |                 if href.lower().endswith(
524 |                     (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp")
525 |                 ):
526 |                     filename = re_path_map["image"][bkpath]
527 |                     return match.group(1) + "../Images/" + filename + match.group(4)
528 |                 elif href.lower().endswith(".css"):
529 |                     filename = re_path_map["css"][bkpath]
530 |                     return (
531 |                         '<link href="../Styles/'
532 |                         + filename
533 |                         + '" type="text/css" rel="stylesheet"/>'
534 |                     )
535 |                 elif href.lower().endswith((".xhtml", ".html")):
536 |                     filename = re_path_map["text"][bkpath]
537 |                     return match.group(1) + filename + target_id + match.group(4)
538 |                 else:
539 |                     return match.group()
540 | 
541 |             text = re.sub(r"(<[^>]*href=([\'\"]))(.*?)(\2[^>]*>)", re_href, text)
542 | 
543 |             # 修改src
544 |             def re_src(match):
545 |                 href = match.group(3)
546 |                 href = unquote(href).strip()
547 |                 bkpath = get_bookpath(href, xhtml_bkpath)
548 |                 bkpath = check_link(xhtml_bkpath, bkpath, href, self)
549 |                 if not bkpath:
550 |                     return match.group()
551 | 
552 |                 if href.lower().endswith(
553 |                     (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg")
554 |                 ):
555 |                     filename = re_path_map["image"][bkpath]
556 |                     return match.group(1) + "../Images/" + filename + match.group(4)
557 |                 elif href.lower().endswith(".mp3"):
558 |                     filename = re_path_map["audio"][bkpath]
559 |                     return match.group(1) + "../Audio/" + filename + match.group(4)
560 |                 elif href.lower().endswith(".mp4"):
561 |                     filename = re_path_map["video"][bkpath]
562 |                     return match.group(1) + "../Video/" + filename + match.group(4)
563 |                 elif href.lower().endswith(".js"):
564 |                     filename = re_path_map["other"][bkpath]
565 |                     return match.group(1) + "../Misc/" + filename + match.group(4)
566 |                 else:
567 |                     return match.group()
568 | 
569 |             def re_poster(match):
570 |                 href = match.group(3)
571 |                 href = unquote(href).strip()
572 |                 bkpath = get_bookpath(href, xhtml_bkpath)
573 |                 bkpath = check_link(xhtml_bkpath, bkpath, href, self)
574 |                 if not bkpath:
575 |                     return match.group()
576 |                 if href.lower().endswith(
577 |                     (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg")
578 |                 ):
579 |                     filename = re_path_map["image"][bkpath]
580 |                     return match.group(1) + "../Images/" + filename + match.group(4)
581 |                 else:
582 |                     return match.group()
583 | 
584 |             text = re.sub(r"(<[^>]* src=([\'\"]))(.*?)(\2[^>]*>)", re_src, text)
585 |             text = re.sub(r"(<[^>]* poster=([\'\"]))(.*?)(\2[^>]*>)", re_poster, text)
586 | 
587 |             # 修改 text
588 |             def re_url(match):
589 |                 url = match.group(2)
590 |                 url = unquote(url).strip()
591 |                 bkpath = get_bookpath(url, xhtml_bkpath)
592 |                 bkpath = check_link(xhtml_bkpath, bkpath, url, self)
593 |                 if not bkpath:
594 |                     return match.group()
595 | 
596 |                 if url.lower().endswith((".ttf", ".otf")):
597 |                     filename = re_path_map["font"][bkpath]
598 |                     return match.group(1) + "../Fonts/" + filename + match.group(3)
599 |                 elif url.lower().endswith(
600 |                     (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg")
601 |                 ):
602 |                     filename = re_path_map["image"][bkpath]
603 |                     return match.group(1) + "../Images/" + filename + match.group(3)
604 |                 else:
605 |                     return match.group()
606 | 
607 |             text = re.sub(r"(url\([\'\"]?)(.*?)([\'\"]?\))", re_url, text)
608 |             self.tgt_epub.writestr(
609 |                 "OEBPS/Text/" + new_name,
610 |                 bytes(text, encoding="utf-8"),
611 |                 zipfile.ZIP_DEFLATED,
612 |             )
613 |         # css文件
614 |         for css_bkpath, new_name in re_path_map["css"].items():
615 |             try:
616 |                 css = self.epub.read(css_bkpath).decode("utf-8")
617 |             except:
618 |                 continue
619 | 
620 |             # 修改 @import
621 |             def re_import(match):
622 |                 href = match.group(2) if match.group(2) else match.group(3)
623 |                 href = unquote(href).strip()
624 |                 if not href.lower().endswith(".css"):
625 |                     return match.group()
626 |                 bkpath = get_bookpath(href, css_bkpath)
627 |                 bkpath = check_link(css_bkpath, bkpath, href, self)
628 |                 if not bkpath:
629 |                     return match.group()
630 |                 filename = re_path_map.get("css", {}).get(bkpath, path.basename(href))
631 |                 if match.group(2):
632 |                     return '@import "{}"'.format(filename)
633 |                 else:
634 |                     return '@import url("{}")'.format(filename)
635 | 
636 |             css = re.sub(
637 |                 r"@import +([\'\"])(.*?)\1|@import +url\([\'\"]?(.*?)[\'\"]?\)",
638 |                 re_import,
639 |                 css,
640 |             )
641 | 
642 |             # 修改 css的url
643 |             def re_css_url(match):
644 |                 url = match.group(2)
645 |                 url = unquote(url).strip()
646 |                 bkpath = get_bookpath(url, css_bkpath)
647 |                 bkpath = check_link(css_bkpath, bkpath, url, self)
648 |                 if not bkpath:
649 |                     return match.group()
650 |                 if url.lower().endswith((".ttf", ".otf")):
651 |                     filename = re_path_map["font"][bkpath]
652 |                     return match.group(1) + "../Fonts/" + filename + match.group(3)
653 |                 elif url.lower().endswith(
654 |                     (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg")
655 |                 ):
656 |                     filename = re_path_map["image"][bkpath]
657 |                     return match.group(1) + "../Images/" + filename + match.group(3)
658 |                 else:
659 |                     return match.group()
660 | 
661 |             css = re.sub(r"(url\([\'\"]?)(.*?)([\'\"]?\))", re_css_url, css)
662 |             self.tgt_epub.writestr(
663 |                 "OEBPS/Styles/" + new_name,
664 |                 bytes(css, encoding="utf-8"),
665 |                 zipfile.ZIP_DEFLATED,
666 |             )
667 |         # 图片
668 |         for img_bkpath, new_name in re_path_map["image"].items():
669 |             try:
670 |                 img = self.epub.read(img_bkpath)
671 |             except:
672 |                 continue
673 |             self.tgt_epub.writestr(
674 |                 "OEBPS/Images/" + new_name, img, zipfile.ZIP_DEFLATED
675 |             )
676 |         # 字体
677 |         for font_bkpath, new_name in re_path_map["font"].items():
678 |             try:
679 |                 font = self.epub.read(font_bkpath)
680 |             except:
681 |                 continue
682 |             self.tgt_epub.writestr(
683 |                 "OEBPS/Fonts/" + new_name, font, zipfile.ZIP_DEFLATED
684 |             )
685 |         # 音频
686 |         for audio_bkpath, new_name in re_path_map["audio"].items():
687 |             try:
688 |                 audio = self.epub.read(audio_bkpath)
689 |             except:
690 |                 continue
691 |             self.tgt_epub.writestr(
692 |                 "OEBPS/Audio/" + new_name, audio, zipfile.ZIP_DEFLATED
693 |             )
694 |         # 视频
695 |         for video_bkpath, new_name in re_path_map["video"].items():
696 |             try:
697 |                 video = self.epub.read(video_bkpath)
698 |             except:
699 |                 continue
700 |             self.tgt_epub.writestr(
701 |                 "OEBPS/Video/" + new_name, video, zipfile.ZIP_DEFLATED
702 |             )
703 |         # 其他
704 |         for font_bkpath, new_name in re_path_map["other"].items():
705 |             try:
706 |                 other = self.epub.read(font_bkpath)
707 |             except:
708 |                 continue
709 |             self.tgt_epub.writestr(
710 |                 "OEBPS/Misc/" + new_name, other, zipfile.ZIP_DEFLATED
711 |             )
712 | 
713 |         # 读取文件并修改关联
714 |         # toc文件
715 |         if self.tocpath:
716 |             toc = self.epub.read(self.tocpath).decode("utf-8")
717 |             toc_dir = path.dirname(self.tocpath)
718 | 
719 |             def re_toc_href(match):
720 |                 href = match.group(2)
721 |                 href = unquote(href).strip()
722 |                 parts = href.split("#", 1)
723 |                 href_base = parts[0]
724 |                 target_id = "#" + parts[1] if len(parts) > 1 else ""
725 |                 href_base = (
726 |                     self.toc_rn[href_base] if href_base in self.toc_rn else href_base
727 |                 )
728 |                 bkpath = get_bookpath(href_base, self.tocpath)
729 | 
730 |                 if not bkpath:
731 |                     return match.group()
732 |                 filename = path.basename(bkpath)
733 |                 return 'src="Text/' + filename + target_id + '"'
734 | 
735 |             toc = re.sub(r"src=([\'\"])(.*?)\1", re_toc_href, toc)
736 |             self.tgt_epub.writestr(
737 |                 "OEBPS/toc.ncx", bytes(toc, encoding="utf-8"), zipfile.ZIP_DEFLATED
738 |             )
739 | 
740 |         # OPF
741 |         manifest_text = "<manifest>"
742 | 
743 |         for id, href, mime, prop in self.manifest_list:
744 |             bkpath = get_bookpath(href, self.opfpath)
745 |             prop_ = ' properties="' + prop + '"' if prop else ""
746 |             if mime == "application/xhtml+xml":
747 |                 filename = re_path_map["text"][bkpath]
748 |                 manifest_text += f'\n    <item id="{id}" href="Text/{filename}" media-type="{mime}"{prop_}/>'
749 |             elif mime == "text/css":
750 |                 filename = re_path_map["css"][bkpath]
751 |                 manifest_text += f'\n    <item id="{id}" href="Styles/{filename}" media-type="{mime}"{prop_}/>'
752 |             elif "image/" in mime:
753 |                 filename = re_path_map["image"][bkpath]
754 |                 manifest_text += f'\n    <item id="{id}" href="Images/{filename}" media-type="{mime}"{prop_}/>'
755 |             elif "font/" in mime or href.lower().endswith((".ttf", ".otf", ".woff")):
756 |                 filename = re_path_map["font"][bkpath]
757 |                 manifest_text += f'\n    <item id="{id}" href="Fonts/{filename}" media-type="{mime}"{prop_}/>'
758 |             elif "audio/" in mime:
759 |                 filename = re_path_map["audio"][bkpath]
760 |                 manifest_text += f'\n    <item id="{id}" href="Audio/{filename}" media-type="{mime}"{prop_}/>'
761 |             elif "video/" in mime:
762 |                 filename = re_path_map["video"][bkpath]
763 |                 manifest_text += f'\n    <item id="{id}" href="Video/{filename}" media-type="{mime}"{prop_}/>'
764 |             elif id == self.tocid:
765 |                 manifest_text += f'\n    <item id="{id}" href="toc.ncx" media-type="application/x-dtbncx+xml"/>'
766 |             else:
767 |                 filename = re_path_map["other"][bkpath]
768 |                 manifest_text += f'\n    <item id="{id}" href="Misc/{filename}" media-type="{mime}"{prop_}/>'
769 | 
770 |         manifest_text += "\n  </manifest>"
771 |         opf = re.sub(r"(?s)<manifest.*?>.*?</manifest>", manifest_text, self.opf, 1)
772 | 
773 |         def re_refer(match):
774 |             href = match.group(3)
775 |             href = unquote(href).strip()
776 |             basename = path.basename(href)
777 |             filename = unquote(basename)
778 |             if not basename.endswith(".ncx"):
779 |                 if href.startswith("/"):
780 |                     href = href[1:]
781 |                 elif href.startswith("./"):
782 |                     href = href[2:]
783 |                 elif href.startswith("../"):
784 |                     href = href[3:]
785 |                 return match.group(1) + "Text/" + self.toc_rn[href] + match.group(4)
786 |             else:
787 |                 return match.group()
788 | 
789 |         opf = re.sub(r"(<reference[^>]*href=([\'\"]))(.*?)(\2[^>]*/>)", re_refer, opf)
790 |         self.tgt_epub.writestr(
791 |             "OEBPS/content.opf", bytes(opf, encoding="utf-8"), zipfile.ZIP_DEFLATED
792 |         )
793 |         self.tgt_epub.close()
794 |         self.epub.close()
795 | 
796 | 
797 | # 相对路径计算函数
798 | def get_relpath(from_path, to_path):
799 |     # from_path 和 to_path 都需要是绝对路径
800 |     from_path = re.split(r"[\\/]", from_path)
801 |     to_path = re.split(r"[\\/]", to_path)
802 |     while from_path[0] == to_path[0]:
803 |         from_path.pop(0), to_path.pop(0)
804 |     to_path = "../" * (len(from_path) - 1) + "/".join(to_path)
805 |     return to_path
806 | 
807 | 
808 | # 计算bookpath
809 | def get_bookpath(relative_path, refer_bkpath):
810 |     # relative_path 相对路径，一般是href
811 |     # refer_bkpath 参考的绝对路径
812 | 
813 |     relative_ = re.split(r"[\\/]", relative_path)
814 |     refer_ = re.split(r"[\\/]", refer_bkpath)
815 | 
816 |     back_step = 0
817 |     while relative_[0] == "..":
818 |         back_step += 1
819 |         relative_.pop(0)
820 | 
821 |     if len(refer_) <= 1:
822 |         return "/".join(relative_)
823 |     else:
824 |         refer_.pop(-1)
825 | 
826 |     if back_step < 1:
827 |         return "/".join(refer_ + relative_)
828 |     elif back_step > len(refer_):
829 |         return "/".join(relative_)
830 | 
831 |     # len(refer_) > 1 and back_setp <= len(refer_):
832 |     while back_step > 0 and len(refer_) > 0:
833 |         refer_.pop(-1)
834 |         back_step -= 1
835 | 
836 |     return "/".join(refer_ + relative_)
837 | 
838 | 
839 | def epub_sources():
840 |     if len(sys.argv) <= 1:
841 |         return sys.argv
842 |     epub_srcs = []
843 |     exe_path = path.dirname(sys.argv[0])
844 |     epub_srcs.append(exe_path)
845 |     for epub_src in sys.argv[1:None]:
846 |         filename = path.basename(epub_src)
847 |         basename, ext = path.splitext(filename)
848 |         if ext.lower() == ".epub":
849 |             if path.exists(epub_src):
850 |                 epub_srcs.append(epub_src)
851 |     return epub_srcs
852 | 
853 | 
854 | def run(epub_src, output_path=None):
855 |     try:
856 |         logger.write(f"\n正在尝试加密EPUB: {epub_src}")
857 |         if epub_src.lower().endswith("_encrypt.epub"):
858 |             logger.write("警告: 该文件已加密，无需再次处理！")
859 |             return "skip"
860 |         epub = EpubTool(epub_src)
861 |         epub.set_output_path(output_path)
862 |         if epub.encrypted == True:
863 |             logger.write("警告: 该文件已加密，无需再次处理！")
864 |             return "skip"
865 |         epub.restructure()  # 重构
866 |         el = epub.errorLink_log.copy()
867 |         del_keys = []
868 |         for file_path, log in epub.errorLink_log.items():
869 |             if file_path.lower().endswith(".css"):
870 |                 el[file_path] = list(filter(lambda x: x[1] is not None, log))
871 |                 if el[file_path] == []:
872 |                     del_keys.append(file_path)
873 |         for key in del_keys:
874 |             del el[key]
875 | 
876 |         if epub.errorOPF_log:
877 |             logger.write("-------在 OPF文件 发现问题------:")
878 |             for error_type, error_value in epub.errorOPF_log:
879 |                 if error_type == "duplicate_id":
880 |                     logger.write(
881 |                         f"问题: 发现manifest节点内部存在重复ID {error_value} !!!"
882 |                     )
883 |                     logger.write("措施: 已自动清除重复ID对应的manifest项。")
884 |                 elif error_type == "invalid_idref":
885 |                     logger.write(
886 |                         f"问题: 发现spine节点内部存在无效引用ID {error_value} !!!"
887 |                     )
888 |                     logger.write(
889 |                         "措施: 请自行检查spine内的itemref节点并手动修改，确保引用的ID存在于manifest的item项。\n大小写不一致也会导致引用无效。）"
890 |                     )
891 |                 elif error_type == "xhtml_not_in_spine":
892 |                     logger.write(
893 |                         f"问题: 发现ID为 {error_value} 的文件manifest中登记为application/xhtml+xml类型，但不被spine节点的项所引用"
894 |                     )
895 |                     logger.write(
896 |                         f"措施: 自行检查该文件是否需要被spine引用。部分阅读器中，如果存在xhtml文件不被spine引用，可能导致epub无法打开。"
897 |                     )
898 | 
899 |         if el:
900 |             for file_path, log in el.items():
901 |                 basename = path.basename(file_path)
902 |                 logger.write(f"-----在 {basename} 发现问题链接-----:")
903 |                 for href, correct_path in log:
904 |                     if correct_path is not None:
905 |                         logger.write(
906 |                             f"链接: {href}\n问题: 与实际文件名大小写不一致！\n措施: 程序已自动纠正链接。"
907 |                         )
908 |                     else:
909 |                         logger.write(f"链接: {href}\n问题: 未能找到对应文件！！！")
910 |     except Exception as e:
911 |         logger.write(f"{epub_src} 重构EPUB失败: {e}")
912 |         return e
913 |     else:
914 |         logger.write(f"{epub_src} 重构EPUB成功")
915 |     return 0
916 | 
917 | 
918 | def main():
919 |     epub_src = input("【使用说明】请把EPUB文件拖曳到本窗口上（输入'e'退出）")
920 |     epub_src = epub_src.strip("'").strip('"').strip()
921 |     if epub_src.lower() == "e":
922 |         print("程序已退出")
923 |         sys.exit()
924 |     if not os.path.isfile(epub_src):
925 |         print("错误: 找不到指定的EPUB文件，请确认文件路径是否正确并重新输入！")
926 |         return
927 |     ret = run(epub_src)
928 |     if ret == "skip":
929 |         print("已跳过该文件")
930 |     elif ret == "e":
931 |         print("操作失败，请检查日志！")
932 |     else:
933 |         print("操作成功！")
934 | 
935 | 
936 | if __name__ == "__main__":
937 |     print(
938 |         "【脚本功能】\n"
939 |         + "1、 将epub目录结构规范化至sigil规范格式。\n"
940 |         + "2、 将没有列入manifest项的epub有效文件自动列入manifest项。\n"
941 |         + "3、 自动清除manifest中携带重复ID或多余ID的无效项。\n"
942 |         + "    脚本将优先保留spine或metadata中关联的ID。\n"
943 |         + "4、 自动检查并提醒spine节点中引用无效ID的itemref项。\n"
944 |         + "5、 自动检查并提醒manifest节点中xhtml类型文件不被spine节点引用的情况。\n"
945 |         + "6、 自动检测并纠正实际文件名与对应的引用链接大小写不一致的问题。\n"
946 |         + "7、 自动检测并提醒找不到对应文件的链接。\n"
947 |         + "8、 加入名称混淆，使sigil无法打开修改。"
948 |     )
949 |     while True:
950 |         main()
951 | 


--------------------------------------------------------------------------------
/utils/encrypt_font.py:
--------------------------------------------------------------------------------
  1 | import zipfile
  2 | import os
  3 | from bs4 import BeautifulSoup
  4 | import tinycss2
  5 | # from tinycss2 import parse_component_value_list
  6 | import emoji
  7 | import re
  8 | from fontTools.ttLib import TTFont
  9 | from fontTools.fontBuilder import FontBuilder
 10 | from fontTools.pens.ttGlyphPen import TTGlyphPen
 11 | from io import BytesIO
 12 | import random
 13 | import traceback
 14 | import html
 15 | from datetime import datetime
 16 | 
 17 | try:
 18 |     from utils.log import logwriter
 19 | except:
 20 |     from log import logwriter
 21 | 
 22 | logger = logwriter()
 23 | 
 24 | class FontEncrypt:
 25 | 
 26 |     def __init__(self, epub_path, output_path):
 27 |         if not os.path.exists(epub_path):
 28 |             raise Exception("EPUB文件不存在")
 29 |         
 30 |         self.epub_path = os.path.normpath(epub_path)
 31 |         self.epub = zipfile.ZipFile(epub_path)
 32 |         if output_path and os.path.exists(output_path):
 33 |             if os.path.isfile(output_path):
 34 |                 raise Exception("输出路径不能是文件")
 35 |             if not os.path.exists(output_path):
 36 |                 raise Exception(f"输出路径{output_path}不存在")
 37 |         else:
 38 |             output_path=os.path.dirname(epub_path)
 39 |             logger.write(f"输出路径不存在，使用默认路径: {output_path}")
 40 |         self.output_path = os.path.normpath(output_path)
 41 |         self.file_write_path=os.path.join(self.output_path, os.path.basename(self.epub_path).replace('.epub','_font_encrypt.epub'))
 42 |         if os.path.exists(self.file_write_path):
 43 |             os.remove(self.file_write_path)
 44 |         self.htmls = []
 45 |         self.css = []
 46 |         self.fonts = []
 47 |         self.ori_files = []
 48 |         self.missing_chars = []
 49 |         self.font_to_font_family_mapping = {}
 50 |         self.css_selector_to_font_mapping = {}
 51 |         self.font_to_char_mapping = {}
 52 |         # self.font_to_unchanged_file_mapping = {}
 53 |         self.target_epub = None
 54 |         for file in self.epub.namelist():
 55 |             if file.lower().endswith('.html') or file.endswith('.xhtml'):
 56 |                 self.htmls.append(file)
 57 |             elif file.lower().endswith('.css'):
 58 |                 self.ori_files.append(file)
 59 |                 self.css.append(file)
 60 |             elif file.lower().endswith((".ttf", ".otf", ".woff")):
 61 |                 self.fonts.append(file)
 62 |             else:
 63 |                 self.ori_files.append(file)
 64 |         
 65 |     
 66 |     def create_target_epub(self):
 67 |         self.target_epub = zipfile.ZipFile(self.file_write_path,"w",
 68 |             zipfile.ZIP_STORED,zipfile.ZIP_STORED,)
 69 | 
 70 |     def find_local_fonts_mapping(self):
 71 |         font_face_rules = []
 72 |         for css in self.css:
 73 |             with self.epub.open(css) as f:
 74 |                 content = f.read().decode('utf-8')
 75 |                 rules = tinycss2.parse_stylesheet(content)
 76 |                 # 遍历所有规则，查找 @font-face
 77 |                 for rule in rules:
 78 |                     all_count = 0
 79 |                     if rule.type == 'at-rule' and rule.lower_at_keyword == 'font-face':
 80 |                         tmp_font_face = tinycss2.serialize(rule.content)
 81 | 
 82 |                         local_count, url_count = tmp_font_face.count(
 83 |                             'local'), tmp_font_face.count('url')
 84 |                         all_count += local_count + url_count
 85 |                         if all_count == 1:
 86 |                             tmp_list = []
 87 | 
 88 |                             for item in tmp_font_face.split(';'):
 89 |                                 if len(item.strip()) > 0:
 90 |                                     tmp_list.append(item.strip())
 91 |                             font_face_rules.append(tmp_list)
 92 |         mapping = {}
 93 |         for font in self.fonts:
 94 |             font_name = os.path.basename(font)
 95 |             for j in font_face_rules:
 96 |                 if font_name in j[1]:
 97 |                     font_family = j[0].split(':')[1].strip().replace(
 98 |                         '"', '').replace("'", '')
 99 |                     mapping[font_family] = font
100 |         self.font_to_font_family_mapping = mapping
101 | 
102 |     def find_selector_to_font_mapping(self):
103 |         mapping = {}
104 |         for css in self.css:
105 |             with self.epub.open(css) as f:
106 |                 content = f.read().decode('utf-8')
107 |                 rules = tinycss2.parse_stylesheet(content)
108 |                 for rule in rules:
109 |                     if rule.type == "qualified-rule":  # 确保是样式规则
110 |                         # 获取选择器
111 |                         selector = tinycss2.serialize(rule.prelude).strip()
112 |                         declarations = tinycss2.parse_declaration_list(
113 |                             rule.content)
114 |                         for declaration in declarations:
115 |                             if declaration.type == "declaration" and declaration.lower_name == "font-family":
116 |                                 # 提取 font-family 的值
117 |                                 font_family_values = [
118 |                                     token.value for token in declaration.value
119 |                                     if token.type == "string"
120 |                                     or token.type == "ident"
121 |                                 ]
122 | 
123 |                                 # 提取第一个字体名称
124 |                                 primary_font = font_family_values[0].strip(
125 |                                     "'\"")
126 | 
127 |                                 # 如果该字体在映射中
128 |                                 if primary_font in self.font_to_font_family_mapping:
129 |                                     # print(
130 |                                     #     f"选择器 '{selector}' 使用了字体文件: {self.font_to_font_family_mapping[primary_font]}"
131 |                                     # )
132 |                                     if primary_font not in mapping:
133 |                                         mapping[
134 |                                             selector] = self.font_to_font_family_mapping[
135 |                                                 primary_font]
136 |         self.css_selector_to_font_mapping = dict(
137 |             sorted(mapping.items(), reverse=True))
138 | 
139 |     def remove_duplicates(self, s):
140 |         seen = set()
141 |         result = []
142 |         for char in s:
143 |             if char not in seen:
144 |                 seen.add(char)
145 |                 result.append(char)
146 |         return ''.join(result)
147 | 
148 |     def find_char_mapping(self):
149 |         mapping = {}
150 |         for one_html in self.htmls:
151 |             with self.epub.open(one_html) as f:
152 |                 content = f.read().decode('utf-8')
153 |                 soup = BeautifulSoup(content, 'html.parser')
154 |                 for css_selector, font_file in self.css_selector_to_font_mapping.items(
155 |                 ):
156 |                     # 使用 CSS 选择器查找对应的标签
157 |                     elements = soup.select(css_selector)
158 | 
159 |                     # 提取每个标签的文字内容
160 |                     text_contents = [
161 |                         element.get_text(strip=True) for element in elements
162 |                     ]
163 |                     combined_sentence = "".join(text_contents)
164 |                     if font_file not in mapping:
165 |                         mapping[font_file] = self.remove_duplicates(
166 |                             combined_sentence)
167 |                     else:
168 |                         mapping[font_file] = self.remove_duplicates(''.join(
169 |                             [mapping[font_file], combined_sentence]))
170 |         self.font_to_char_mapping = mapping
171 | 
172 |     def get_mapping(self):
173 |         self.find_local_fonts_mapping()
174 |         self.find_selector_to_font_mapping()
175 |         self.find_char_mapping()
176 |         logger.write(f"字体文件映射: {self.font_to_font_family_mapping}")
177 |         logger.write(f"CSS选择器映射: {self.css_selector_to_font_mapping}")
178 |         logger.write(f"字体文件到字符映射: {self.font_to_char_mapping}")
179 |         return self.font_to_font_family_mapping, self.css_selector_to_font_mapping, self.font_to_char_mapping
180 | 
181 |     def clean_text(self):
182 |         for key in self.font_to_char_mapping:
183 |             text = self.font_to_char_mapping[key]
184 |             # 去除转义字符和换行符
185 |             self.font_to_char_mapping[key] = text.replace('\n', '').replace(
186 |                 '\r', '').replace('\t', '')
187 |             # 去除标点符号和特殊字符
188 |             self.font_to_char_mapping[key] = re.sub(
189 |                 r'[^\u4e00-\u9fa5a-zA-Z0-9]', '', text)
190 |             self.font_to_char_mapping[key] = emoji.replace_emoji(text,
191 |                                                                  replace='')
192 |         logger.write(f"清理后的文本: {self.font_to_char_mapping}")
193 | 
194 |     # 修改自https://github.com/solarhell/fontObfuscator
195 |     def ensure_cmap_has_all_text(self, cmap: dict, s: str) -> bool:
196 |         missing_chars = []
197 |         exsit_chars = []
198 |         for char in s:
199 |             if ord(char) not in cmap:
200 |                 # raise Exception(f'字库缺少{char}这个字 {ord(char)}')
201 |                 missing_chars.append(char)
202 |             else:
203 |                 exsit_chars.append(char)
204 |         return missing_chars, ''.join(exsit_chars)
205 | 
206 |     # def is_cjk_font(self, font):
207 |     #     """
208 |     #     判断字体文件是否包含CJK字符。
209 |         
210 |     #     :param font_path: 字体文件路径
211 |     #     :return: 如果字体包含CJK字符返回True，否则返回False
212 |     #     """
213 |     #     # 加载字体文件
214 |     #     # font = TTFont(font_io)
215 | 
216 |     #     # 获取所有字符映射表
217 |     #     cmap_tables = font['cmap'].tables
218 | 
219 |     #     # 定义CJK字符的Unicode范围
220 |     #     cjk_ranges = [
221 |     #         (0x4E00, 0x9FFF),  # CJK Unified Ideographs
222 |     #         # (0x3400, 0x4DBF),  # CJK Unified Ideographs Extension A
223 |     #         # (0x20000, 0x2A6DF),  # CJK Unified Ideographs Extension B
224 |     #         # (0x2A700, 0x2B73F),  # CJK Unified Ideographs Extension C
225 |     #         # (0x2B740, 0x2B81F),  # CJK Unified Ideographs Extension D
226 |     #         # (0x2B820, 0x2CEAF),  # CJK Unified Ideographs Extension E
227 |     #         # (0xF900, 0xFAFF),  # CJK Compatibility Ideographs
228 |     #         # (0x2F800, 0x2FA1F)  # CJK Compatibility Ideographs Supplement
229 |     #     ]
230 |     #     # 遍历所有字符映射表
231 |     #     for table in cmap_tables:
232 |     #         # 获取当前表中的字符到字形名称的映射
233 |     #         char_to_glyph = table.cmap
234 | 
235 |     #         # 检查是否存在CJK范围内的字符
236 |     #         available_ranges = []
237 |     #         for code_point in char_to_glyph.keys():
238 |     #             if any(start <= code_point <= end
239 |     #                    for start, end in cjk_ranges):
240 |     #                 available_ranges.append(code_point)
241 |     #         if len(available_ranges) > 0:
242 |     #             # 如果找到CJK字符，返回True
243 |     #             # print(f"找到CJK字符: {available_ranges}")
244 |     #             return True, available_ranges
245 | 
246 |     #     return False, None  # 未找到CJK字符
247 | 
248 | 
249 |     def set_timestamps(self, font):
250 |         # 设置 'head' 表的时间戳
251 |         head_table = font['head']
252 |         current_time = int(datetime.now().timestamp())
253 |         # print(f"原始时间戳: {head_table.created}, {head_table.modified}")
254 |         created_datetime = datetime.fromtimestamp(head_table.created).strftime('%Y-%m-%d %H:%M:%S')
255 |         modified_datetime = datetime.fromtimestamp(head_table.modified).strftime('%Y-%m-%d %H:%M:%S')
256 |         logger.write(f"原始时间戳: {created_datetime}, {modified_datetime}")
257 |         # print(f"转换UTC时间，: {created_datetime}")
258 |         # print(f"转换UTC时间，: {modified_datetime}")
259 |         head_table.created = current_time
260 |         head_table.modified = current_time
261 |         logger.write(f"转换后时间戳 {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}")
262 | 
263 |     # 修改自https://github.com/solarhell/fontObfuscator
264 |     def encrypt_font(self):
265 |         self.create_target_epub()
266 |         for i, (font_path, plain_text) in enumerate(
267 |                 self.font_to_char_mapping.items()):
268 |             original_font = TTFont(BytesIO(self.epub.read(font_path)))
269 |             name_table = original_font['name']
270 |             family_name = None
271 |             style_name = None
272 |             for record in name_table.names:
273 |                 if record.nameID == 1:
274 |                     family_name = record.string.decode(record.getEncoding())
275 |                 elif record.nameID == 2:
276 |                     style_name = record.string.decode(record.getEncoding())
277 | 
278 |                 if family_name and style_name:
279 |                     break
280 |             if family_name is None:
281 |                 family_name = f'ETFamily_{i}'
282 |             if style_name is None:
283 |                 style_name = 'Regular'
284 | 
285 |             NAME_STRING = {
286 |                 'familyName': family_name,
287 |                 'styleName': style_name,
288 |                 'psName': family_name + '-' + style_name,
289 |                 'copyright': 'Created by EpubTool',
290 |                 'version': 'Version 1.0',
291 |                 'vendorURL': 'https://EpubTool.com/',
292 |             }
293 |             original_cmap: dict = original_font.getBestCmap()
294 |             miss_char, plain_text = self.ensure_cmap_has_all_text(
295 |                 original_cmap, plain_text)
296 |             if len(miss_char) > 0:
297 |                 logger.write(f'字体文件{font_path}缺少字符{miss_char}')
298 |             available_ranges= [ord(char) for char in plain_text]
299 |             glyphs, metrics, cmap = {}, {}, {}
300 |             private_codes = random.sample(range(0xAC00, 0xD7AF), len(plain_text))
301 |             cjk_codes = random.sample(available_ranges, len(plain_text))
302 | 
303 |             glyph_set = original_font.getGlyphSet()
304 |             pen = TTGlyphPen(glyph_set)
305 |             glyph_order = original_font.getGlyphOrder()
306 |             final_shadow_text: list = []
307 |             spescial_glyphs= ['null', '.notdef', 'minus', 'dotlessi','uni0307','quotesingle','zero.dnom','fraction','uni0237']
308 | 
309 |             for special_glyph in spescial_glyphs:
310 |                 if special_glyph in glyph_order:
311 |                     glyph_set[special_glyph].draw(pen)
312 |                     glyphs[special_glyph] = pen.glyph()
313 |                     metrics[special_glyph] = original_font['hmtx'][special_glyph]
314 |                     final_shadow_text += [special_glyph]
315 | 
316 |             html_entities = []
317 | 
318 |             for index, plain in enumerate(plain_text):
319 |                 try:
320 |                     shadow_cmap_name = original_cmap[cjk_codes[index]]
321 |                 except KeyError:
322 |                     logger.write(f"字体文件缺少字符，unicode:{cjk_codes[index]}，请检查")
323 | 
324 |                 final_shadow_text += [shadow_cmap_name]
325 |                 glyph_set[original_cmap[ord(plain)]].draw(pen)
326 |                 glyphs[shadow_cmap_name] = pen.glyph()
327 |                 metrics[shadow_cmap_name] = original_font['hmtx'][original_cmap[ord(plain)]]
328 |                 cmap[private_codes[index]] = shadow_cmap_name
329 |                 html_entities += [hex(private_codes[index]).replace('0x', '&#x')]
330 | 
331 |             horizontal_header = {
332 |                 'ascent': original_font['hhea'].ascent,
333 |                 'descent': original_font['hhea'].descent,
334 |             }
335 |             missing_glyphs = [glyph for glyph in final_shadow_text if glyph not in glyphs]
336 |             if missing_glyphs:
337 |                 logger.write(f"以下字形在 glyphs 中缺失: {missing_glyphs}")
338 |                 for glyph in missing_glyphs:
339 |                     glyphs[glyph] = pen.glyph()
340 |                     metrics[glyph] = (0, 0)
341 | 
342 |             glyf_table = original_font['glyf']
343 |             glyphs_to_keep = set(glyphs.keys())
344 |             new_glyph_order = [glyph for glyph in glyph_order if glyph in glyphs_to_keep]
345 |             original_font.setGlyphOrder(new_glyph_order)
346 | 
347 |             # 删除不必要的字形
348 |             for glyph in glyph_order:
349 |                 if glyph not in glyphs_to_keep:
350 |                     if glyph in glyf_table.glyphs:
351 |                         del glyf_table.glyphs[glyph]
352 |                     if glyph in original_font['hmtx'].metrics:
353 |                         del original_font['hmtx'].metrics[glyph]
354 |                     loca_index = glyph_order.index(glyph)
355 |                     if 0 <= loca_index < len(original_font['loca'].locations):
356 |                         original_font['loca'].locations[loca_index] = 0
357 | 
358 |             # 更新 maxp 表
359 |             original_font['maxp'].numGlyphs = len(new_glyph_order)
360 | 
361 |             self.set_timestamps(original_font)
362 | 
363 |             fb = FontBuilder(original_font['head'].unitsPerEm, isTTF=True)
364 |             fb.setupGlyphOrder(new_glyph_order)
365 |             fb.setupCharacterMap(cmap)
366 |             fb.setupGlyf(glyphs)
367 |             fb.setupHorizontalMetrics(metrics)
368 |             fb.setupHorizontalHeader(**horizontal_header)
369 |             fb.setupNameTable(NAME_STRING)
370 |             fb.setupOS2()
371 |             fb.setupPost()
372 |             font_stream = BytesIO()
373 |             fb.save(font_stream)
374 |             # print(plain_text, html_entities)
375 |             # print(f"write {font_path}")
376 | 
377 |             self.target_epub.writestr(font_path, font_stream.getvalue(),zipfile.ZIP_DEFLATED)
378 |             text_list = list(plain_text)
379 |             replace_table = {}
380 |             for a0, a1 in zip(text_list, html_entities):
381 |                 replace_table[a0] = a1
382 |             self.font_to_char_mapping[font_path] = replace_table
383 |             logger.write(f"字体文件{font_path}的加密映射: \n{replace_table}")
384 | 
385 |     def close_file(self):
386 |         self.epub.close()
387 |         self.target_epub.close()
388 | 
389 | 
390 |     def read_html(self):
391 |         for one_html in self.htmls:
392 |             with self.epub.open(one_html) as f:
393 |                 content = f.read().decode('utf-8')
394 |             soup = BeautifulSoup(content, 'html.parser')
395 | 
396 |             for css_selector in self.css_selector_to_font_mapping.keys():
397 |                 font_file = self.css_selector_to_font_mapping[css_selector]
398 |                 replace_table = self.font_to_char_mapping[font_file]
399 |                 trans_table = str.maketrans(replace_table)
400 |                 if '.' in css_selector:
401 |                     selector, selector_class = css_selector.split('.', 1)
402 |                     selector_tags = soup.find_all(selector,
403 |                                                     class_=selector_class)
404 |                 else:
405 |                     selector, selector_class = css_selector, None
406 |                     # print(selector, selector_class)
407 |                     selector_tags = soup.find_all(selector)
408 |                 for tag in selector_tags:
409 |                     ori_text = ''.join(str(item) for item in tag.contents)
410 |                     new_text = ori_text.translate(trans_table)
411 |                     parsed_new_text = BeautifulSoup(
412 |                         html.unescape(new_text), 'html.parser')
413 |                     # print(f"ori_text:{ori_text}\nnew_text:{new_text}")
414 |                     tag.clear()  # 清空当前标签内容
415 |                     tag.append(parsed_new_text)  # 插入新的内容
416 |                     # print(tag.get_text(strip=True))
417 |             formatted_html = soup.prettify(formatter="html")
418 |             self.target_epub.writestr(one_html, formatted_html.encode('utf-8'),zipfile.ZIP_DEFLATED)
419 |         for item in self.ori_files:
420 |             if item in self.epub.namelist():
421 |                 with self.epub.open(item) as f:
422 |                     content = f.read()
423 |                 self.target_epub.writestr(item, content,zipfile.ZIP_DEFLATED)
424 |         self.close_file()
425 |         logger.write(f"EPUB文件处理完成，输出文件路径: {self.file_write_path}")
426 | 
427 |     # def read_unchanged_fonts(self,font_file_mapping=None):
428 |     #    self.font_to_unchanged_file_mapping = font_file_mapping if font_file_mapping else {}
429 | 
430 | def run_epub_font_encrypt(epub_path, output_path):
431 |     logger.write(f"\n正在尝试加密EPUB字体: {epub_path}")
432 |     fe = FontEncrypt(epub_path, output_path)
433 |     if len(fe.fonts) == 0:
434 |         logger.write("没有找到字体文件，退出")
435 |         return "skip"
436 |     logger.write(f"此EPUB文件包含{len(fe.fonts)}个字体文件: {', '.join(fe.fonts)}")
437 |     fe.get_mapping()
438 |     fe.clean_text()
439 |     try:
440 |         fe.encrypt_font()
441 |         logger.write("字体加密成功")
442 |     except Exception as e:
443 |         logger.write(f"字体加密失败，错误信息: {e}")
444 |         traceback.print_exc()
445 |         fe.close_file()
446 |         return f"字体加密失败，错误信息: {e}"
447 |     try:
448 |         fe.read_html()
449 |         logger.write("EPUB文件处理成功")
450 |         fe.close_file()
451 |     except Exception as e:
452 |         logger.write(f"EPUB文件处理失败，错误信息: {e}")
453 |         fe.close_file()
454 |         return f"EPUB文件处理失败，错误信息: {e}"
455 |     return 0
456 | 
457 | if __name__ == '__main__':
458 |     epub_read_path = input("1、请输入EPUB文件路径（如: ./test.epub）: ")
459 |     
460 |     file_write_dir = input(
461 |         "2、请输入输出文件夹路径（如: ./dist）: ")
462 |     
463 |     # epub_read_path= './crazy.epub'
464 |     # file_write_dir = './dist'
465 |     
466 |     fe = FontEncrypt(epub_read_path, file_write_dir)
467 |     fe.get_mapping()
468 |     # the_font_file_mapping = {}
469 |     print(f"3、此EPUB文件包含{len(fe.fonts)}个字体文件:\n{'\n'.join(fe.fonts)}")
470 |     # for i,font_file in enumerate(fe.fonts):
471 |     #     if font_file in fe.font_to_char_mapping.keys():
472 |     #         raw_input = None 
473 |     #         while True:
474 |     #             raw_input= input(
475 |     #                 f"3.{i+1}、请输入字体文件{font_file}对应的文件路径（如: ./font/font.ttf）或输入 Q/q 跳过: \n（若已对内嵌字体进行过字体子集化，请不要跳过此流程）\n")
476 |     #             if raw_input.lower() == 'q':
477 |     #                 print(f"跳过{font_file}的映射")
478 |     #                 break
479 |     #             raw_input = raw_input.strip()
480 |     #             raw_input = os.path.normpath(raw_input)
481 |     #             if os.path.exists(raw_input):
482 |     #                 the_font_file_mapping[font_file] = raw_input
483 |     #                 print(f"已将{font_file}映射到{raw_input}")
484 |     #                 break
485 |     #             else:
486 |     #                 print(f"文件{raw_input}不存在，请重新输入")
487 |     #                 continue
488 |     # fe.read_unchanged_fonts(the_font_file_mapping)
489 |     fe.clean_text()
490 |     try:
491 |         fe.encrypt_font()
492 |         print("4、字体加密成功")
493 |     except Exception as e:
494 |         print(f"4、字体加密失败，错误信息: {e}")
495 |         traceback.print_exc()
496 |         fe.close_file()
497 |         exit(1)
498 |     try:
499 |         fe.read_html()
500 |         print("5、EPUB文件处理成功")
501 |     except Exception as e:
502 |         print(f"5、EPUB文件处理失败，错误信息: {e}")
503 |         fe.close_file()
504 |         exit(1)
505 | 


--------------------------------------------------------------------------------
/utils/log.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import time
 4 | 
 5 | 
 6 | class logwriter:
 7 |     def __init__(self):
 8 |         self.path = os.path.join(
 9 |             os.path.dirname(os.path.abspath(sys.argv[0])), "log.txt"
10 |         )
11 |         # print(self.path)
12 |         with open(self.path, "w",encoding='utf-8') as f:
13 |             current_time = time.strftime(
14 |                 "%Y-%m-%d %H:%M:%S", time.localtime(time.time())
15 |             )
16 |             f.write(f"time: {current_time}\n")
17 | 
18 |     def write(self, text):
19 |         with open(self.path, "a", encoding='utf-8') as f:
20 |             f.write(f"{text}\n")
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     log = logwriter()
25 |     log.write("hello world")
26 | 


--------------------------------------------------------------------------------
/utils/reformat_epub.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #!/usr/bin/env python
  3 | # 源码: sigil吧ID: 遥遥心航
  4 | 
  5 | import zipfile
  6 | import re, sys
  7 | from os import path, mkdir, getcwd
  8 | from urllib.parse import unquote
  9 | from xml.etree import ElementTree
 10 | import os
 11 | try:
 12 |     from utils.log import logwriter
 13 | except:
 14 |     from log import logwriter
 15 | 
 16 | logger = logwriter()
 17 | 
 18 | class EpubTool:
 19 |     def __init__(self, epub_src):
 20 |         self.epub = zipfile.ZipFile(epub_src)
 21 |         self.epub_src = epub_src
 22 |         self.epub_name = path.basename(epub_src)
 23 |         self.ebook_root = path.dirname(epub_src)
 24 |         self.output_path = self.ebook_root
 25 |         self.epub_type = ""
 26 |         self.temp_dir = ""
 27 |         self._init_namelist()
 28 |         self._init_mime_map()
 29 |         self._init_opf()
 30 |         self.manifest_list = []  # (id,opf_href,mime,properties)
 31 |         self.id_to_href = {}  # { id : href.lower, ... }
 32 |         self.href_to_id = {}  # { href.lower : id, ...}
 33 |         self.text_list = []  # (id,opf_href,properties)
 34 |         self.css_list = []  # (id,opf_href,properties)
 35 |         self.image_list = []  # (id,opf_href,properties)
 36 |         self.font_list = []  # (id,opf_href,properties)
 37 |         self.audio_list = []  # (id,opf_href,properties)
 38 |         self.video_list = []  # (id,opf_href,properties)
 39 |         self.spine_list = []  # (sid, linear, properties)
 40 |         self.other_list = []  # (id,opf_href,mime,properties)
 41 |         self.errorOPF_log = []  # (error_type,error_value)
 42 |         self.errorLink_log = {}  # {filepath:[(error_link,correct_link || None),...]}
 43 |         self._parse_opf()
 44 | 
 45 |     def set_output_path(self, output_path):
 46 |         if output_path is not None and os.path.isdir(output_path):
 47 |             self.output_path = output_path
 48 | 
 49 |     def _init_namelist(self):
 50 |         self.namelist = self.epub.namelist()
 51 | 
 52 |     def _init_mime_map(self):
 53 |         self.mime_map = {
 54 |             ".html": "application/xhtml+xml",
 55 |             ".xhtml": "application/xhtml+xml",
 56 |             ".css": "text/css",
 57 |             ".js": "application/javascript",
 58 |             ".jpg": "image/jpeg",
 59 |             ".jpeg": "image/jpeg",
 60 |             ".bmp": "image/bmp",
 61 |             ".png": "image/png",
 62 |             ".gif": "image/gif",
 63 |             ".webp": "image/webp",
 64 |             ".ttf": "font/ttf",
 65 |             ".otf": "font/otf",
 66 |             ".woff": "font/woff",
 67 |             ".ncx": "application/x-dtbncx+xml",
 68 |             ".mp3": "audio/mpeg",
 69 |             ".mp4": "video/mp4",
 70 |             ".smil": "application/smil+xml",
 71 |             ".pls": "application/pls+xml",
 72 |         }
 73 | 
 74 |     def _init_opf(self):
 75 |         # 通过 container.xml 读取 opf 文件
 76 |         container_xml = self.epub.read("META-INF/container.xml").decode("utf-8")
 77 |         rf = re.match(r'<rootfile[^>]*full-path="(?i:(.*?\.opf))"', container_xml)
 78 |         if rf is not None:
 79 |             self.opfpath = rf.group(1)
 80 |             self.opf = self.epub.read(self.opfpath).decode("utf-8")
 81 |             return
 82 |         # 通过路径首个 opf 读取 opf 文件
 83 |         for bkpath in self.namelist:
 84 |             if bkpath.lower().endswith(".opf"):
 85 |                 self.opfpath = bkpath
 86 |                 self.opf = self.epub.read(self.opfpath).decode("utf-8")
 87 |                 return
 88 |         raise RuntimeError("无法发现opf文件")
 89 | 
 90 |     def _parse_opf(self):
 91 |         self.etree_opf = {"package": ElementTree.fromstring(self.opf)}
 92 | 
 93 |         for child in self.etree_opf["package"]:
 94 |             tag = re.sub(r"\{.*?\}", r"", child.tag)
 95 |             self.etree_opf[tag] = child
 96 | 
 97 |         self._parse_metadata()
 98 |         self._parse_manifest()
 99 |         self._parse_spine()
100 |         self._clear_duplicate_id_href()
101 |         self._parse_hrefs_not_in_epub()
102 |         self._add_files_not_in_opf()
103 | 
104 |         self.manifest_list = []  # (id,opf_href,mime,properties)
105 |         for id in self.id_to_h_m_p:
106 |             href, mime, properties = self.id_to_h_m_p[id]
107 |             self.manifest_list.append((id, href, mime, properties))
108 | 
109 |         epub_type = self.etree_opf["package"].get("version")
110 | 
111 |         if epub_type is not None and epub_type in ["2.0", "3.0"]:
112 |             self.epub_type = epub_type
113 |         else:
114 |             raise RuntimeError("此脚本不支持该EPUB类型")
115 | 
116 |         # 寻找epub2 toc 文件的id。epub3的nav文件直接当做xhtml处理。
117 |         self.tocpath = ""
118 |         self.tocid = ""
119 |         tocid = self.etree_opf["spine"].get("toc")
120 |         self.tocid = tocid if tocid is not None else ""
121 | 
122 |         # opf item分类
123 |         opf_dir = path.dirname(self.opfpath)
124 |         for id, href, mime, properties in self.manifest_list:
125 | 
126 |             bkpath = opf_dir + "/" + href if opf_dir else href
127 |             if mime == "application/xhtml+xml":
128 |                 self.text_list.append((id, href, properties))
129 |             elif mime == "text/css":
130 |                 self.css_list.append((id, href, properties))
131 |             elif "image/" in mime:
132 |                 self.image_list.append((id, href, properties))
133 |             elif "font/" in mime or href.lower().endswith((".ttf", ".otf", ".woff")):
134 |                 self.font_list.append((id, href, properties))
135 |             elif "audio/" in mime:
136 |                 self.audio_list.append((id, href, properties))
137 |             elif "video/" in mime:
138 |                 self.video_list.append((id, href, properties))
139 |             elif self.tocid != "" and id == self.tocid:
140 |                 opf_dir = path.dirname(self.opfpath)
141 |                 self.tocpath = opf_dir + "/" + href if opf_dir else href
142 |             else:
143 |                 self.other_list.append((id, href, mime, properties))
144 | 
145 |         self._check_manifest_and_spine()
146 | 
147 |     def _parse_metadata(self):
148 |         self.metadata = {}
149 |         for key in [
150 |             "title",
151 |             "creator",
152 |             "language",
153 |             "subject",
154 |             "source",
155 |             "identifier",
156 |             "cover",
157 |         ]:
158 |             self.metadata[key] = ""
159 |         for meta in self.etree_opf["metadata"]:
160 |             tag = re.sub(r"\{.*?\}", r"", meta.tag)
161 |             if tag in [
162 |                 "title",
163 |                 "creator",
164 |                 "language",
165 |                 "subject",
166 |                 "source",
167 |                 "identifier",
168 |             ]:
169 |                 self.metadata[tag] = meta.text
170 |             elif tag == "meta":
171 |                 if meta.get("name") and meta.get("content"):
172 |                     self.metadata["cover"] = meta.get("content")
173 | 
174 |     def _parse_manifest(self):
175 |         self.id_to_h_m_p = {}  # { id : (href,mime,properties) , ... }
176 |         self.id_to_href = {}  # { id : href.lower, ... }
177 |         self.href_to_id = {}  # { href.lower : id, ...}
178 |         if_error = False
179 |         for item in self.etree_opf["manifest"]:
180 |             # 检查opf文件中是否存在错误
181 |             try:
182 |                 id = item.get("id")
183 |                 href = unquote(item.get("href"))
184 |             except Exception as e:
185 |                 str_item = (
186 |                     ElementTree.tostring(item, encoding="unicode")
187 |                     .replace("\n", "")
188 |                     .replace("\r", "")
189 |                     .replace("\t", "")
190 |                 )
191 |                 logger.write(f"item:{str_item} error:{e}")
192 |                 if_error = True
193 |                 continue
194 |             mime = item.get("media-type")
195 |             properties = item.get("properties") if item.get("properties") else ""
196 | 
197 |             self.id_to_h_m_p[id] = (href, mime, properties)
198 |             self.id_to_href[id] = href.lower()
199 |             self.href_to_id[href.lower()] = id
200 |         if if_error:
201 |             logger.write("opf文件中存在错误，请检查！")
202 | 
203 |     def _parse_spine(self):
204 |         self.spine_list = []  # [ (sid, linear, properties) , ... ]
205 |         for itemref in self.etree_opf["spine"]:
206 |             sid = itemref.get("idref")
207 |             linear = itemref.get("linear") if itemref.get("linear") else ""
208 |             properties = itemref.get("properties") if itemref.get("properties") else ""
209 |             self.spine_list.append((sid, linear, properties))
210 | 
211 |     def _clear_duplicate_id_href(self):
212 | 
213 |         # id_used = [ id_in_spine + cover_id ]
214 |         id_used = [x[0] for x in self.spine_list]
215 |         if self.metadata["cover"]:
216 |             id_used.append(self.metadata["cover"])
217 | 
218 |         del_id = []
219 |         for id, href in self.id_to_href.items():
220 |             if self.href_to_id[href] != id:  # 该href拥有多个id,此id已被覆盖。
221 |                 if id in id_used and self.href_to_id[href] not in id_used:
222 |                     if id not in del_id:
223 |                         del_id.append(self.href_to_id[href])
224 |                     self.href_to_id[href] = id
225 |                 elif id in id_used and self.href_to_id[href] in id_used:
226 |                     continue
227 |                 else:
228 |                     if id not in del_id:
229 |                         del_id.append(id)
230 | 
231 |         for id in del_id:
232 |             self.errorOPF_log.append(("duplicate_id", id))
233 |             del self.id_to_href[id]
234 |             del self.id_to_h_m_p[id]
235 | 
236 |     def _add_files_not_in_opf(self):
237 | 
238 |         hrefs_not_in_opf = []
239 |         for archive_path in self.namelist:
240 |             if archive_path.lower().endswith(
241 |                 (
242 |                     ".html",
243 |                     ".xhtml",
244 |                     ".css",
245 |                     ".jpg",
246 |                     ".jpeg",
247 |                     ".bmp",
248 |                     ".gif",
249 |                     ".png",
250 |                     ".webp",
251 |                     ".svg",
252 |                     ".ttf",
253 |                     ".otf",
254 |                     ".js",
255 |                     ".mp3",
256 |                     ".mp4",
257 |                     ".smil",
258 |                 )
259 |             ):
260 |                 opf_href = get_relpath(self.opfpath, archive_path)
261 |                 if opf_href.lower() not in self.href_to_id.keys():
262 |                     hrefs_not_in_opf.append(opf_href)
263 | 
264 |         def allocate_id(href):  # 自动分配不重复id
265 |             basename = path.basename(href)
266 |             if "A" <= basename[0] <= "Z" or "a" <= basename[0] <= "z":
267 |                 new_id = basename
268 |             else:
269 |                 new_id = "x" + basename
270 |             pre, suf = path.splitext(new_id)
271 |             pre_ = pre
272 |             i = 0
273 |             while pre_ + suf in self.id_to_href.keys():
274 |                 i += 1
275 |                 pre_ = pre + "_" + str(i)
276 |             new_id = pre_ + suf
277 |             return new_id
278 | 
279 |         for href in hrefs_not_in_opf:
280 |             new_id = allocate_id(href)
281 |             self.id_to_href[new_id] = href.lower()
282 |             self.href_to_id[href.lower()] = new_id
283 |             ext = path.splitext(href)[1]
284 |             ext = ext.lower()
285 |             try:
286 |                 mime = self.mime_map[ext]
287 |             except KeyError:
288 |                 mime = "text/plain"
289 |             self.id_to_h_m_p[new_id] = (href, mime, "")
290 | 
291 |     def _check_manifest_and_spine(self):
292 |         spine_idrefs = [i for i, j, k in self.spine_list]
293 | 
294 |         for idref in spine_idrefs:
295 |             if not self.id_to_h_m_p.get(idref):  # spine 引用无效ID
296 |                 self.errorOPF_log.append(("invalid_idref", idref))
297 | 
298 |         for mid, opf_href, mime, properties in self.manifest_list:
299 |             if mime == "application/xhtml+xml":
300 |                 if mid not in spine_idrefs:
301 |                     self.errorOPF_log.append(("xhtml_not_in_spine", mid))
302 | 
303 |     def _parse_hrefs_not_in_epub(self):
304 |         del_id = []
305 |         namelist = [x.lower() for x in self.epub.namelist()]
306 |         for id, href in self.id_to_href.items():
307 |             bkpath = get_bookpath(href, self.opfpath)
308 |             if bkpath.lower() not in namelist:
309 |                 del_id.append(id)
310 |                 del self.href_to_id[href]
311 |         for id in del_id:
312 |             del self.id_to_href[id]
313 |             del self.id_to_h_m_p[id]
314 | 
315 |     def create_tgt_epub(self):
316 |         output_path = self.output_path
317 |         logger.write(f"输出路径: {output_path}")
318 |         return zipfile.ZipFile(
319 |             path.join(output_path, self.epub_name.replace(".epub", "_reformat.epub")),
320 |             "w",
321 |             zipfile.ZIP_STORED,
322 |         )
323 | 
324 |     # 重构
325 |     def restructure(self):
326 |         self.tgt_epub = self.create_tgt_epub()
327 |         # mimetype
328 |         mimetype = self.epub.read("mimetype")
329 |         self.tgt_epub.writestr("mimetype", mimetype, zipfile.ZIP_DEFLATED)
330 |         # META-INF
331 |         metainf_data = self.epub.read("META-INF/container.xml").decode("utf-8")
332 |         metainf_data = re.sub(
333 |             r'<rootfile[^>]*media-type="application/oebps-[^>]*/>',
334 |             r'<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>',
335 |             metainf_data,
336 |         )
337 |         self.tgt_epub.writestr(
338 |             "META-INF/container.xml",
339 |             bytes(metainf_data, encoding="utf-8"),
340 |             zipfile.ZIP_DEFLATED,
341 |         )
342 |         # OEBPS
343 |         re_path_map = {
344 |             "text": {},
345 |             "css": {},
346 |             "image": {},
347 |             "font": {},
348 |             "audio": {},
349 |             "video": {},
350 |             "other": {},
351 |         }  # { ori_bkpath : re_basename }
352 |         basename_log = {
353 |             "text": [],
354 |             "css": [],
355 |             "image": [],
356 |             "font": [],
357 |             "audio": [],
358 |             "video": [],
359 |             "other": [],
360 |         }
361 |         lowerPath_to_originPath = (
362 |             {}
363 |         )  # 如果路径大小写不一致，则登记为 { 小写路径 : 原始路径 }
364 | 
365 |         def auto_rename(id, href, ftype):
366 |             filename, ext = path.splitext(path.basename(href))
367 |             filename_ = filename
368 |             num = 0
369 |             while filename_ + ext in basename_log[ftype]:
370 |                 num += 1
371 |                 filename_ = filename + "_" + str(num)
372 |             basename = filename_ + ext
373 |             basename_log[ftype].append(basename)
374 |             return basename
375 | 
376 |         def check_link(filename, bkpath, href, self, target_id=""):
377 |             if href == "" or href.startswith(
378 |                 ("http://", "https://", "res:/", "file:/", "data:")
379 |             ):
380 |                 return None
381 |             if bkpath.lower() in lowerPath_to_originPath.keys():
382 |                 if bkpath != lowerPath_to_originPath[bkpath.lower()]:  # 大小写不一致
383 |                     correct_path = lowerPath_to_originPath[bkpath.lower()]
384 |                     self.errorLink_log.setdefault(filename, [])
385 |                     self.errorLink_log[filename].append(
386 |                         (href + target_id, correct_path)
387 |                     )
388 |                     bkpath = correct_path
389 |             else:  # 链接路径找不到对应文件
390 |                 self.errorLink_log.setdefault(filename, [])
391 |                 self.errorLink_log[filename].append((href + target_id, None))
392 |                 return None
393 |             return bkpath
394 | 
395 |         # xhtml文件，关联 toc文件，一切 xhtml中的<a>元素
396 |         for id, href, properties in self.text_list:
397 |             bkpath = get_bookpath(href, self.opfpath)
398 |             basename = auto_rename(id, href, "text")
399 |             re_path_map["text"][bkpath] = basename
400 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
401 | 
402 |         # css 文件，关联 xhtml文件的link，css文件中的@import
403 |         for id, href, properties in self.css_list:
404 |             bkpath = get_bookpath(href, self.opfpath)
405 |             basename = auto_rename(id, href, "css")
406 |             re_path_map["css"][bkpath] = basename
407 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
408 | 
409 |         # 图片，关联css中的url，xhtml文件中的href
410 |         for id, href, properties in self.image_list:
411 |             bkpath = get_bookpath(href, self.opfpath)
412 |             basename = auto_rename(id, href, "image")
413 |             re_path_map["image"][bkpath] = basename
414 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
415 | 
416 |         # 字体，关联css中的url
417 |         for id, href, properties in self.font_list:
418 |             bkpath = get_bookpath(href, self.opfpath)
419 |             basename = auto_rename(id, href, "font")
420 |             re_path_map["font"][bkpath] = basename
421 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
422 | 
423 |         # 音频
424 |         for id, href, properties in self.audio_list:
425 |             bkpath = get_bookpath(href, self.opfpath)
426 |             basename = auto_rename(id, href, "audio")
427 |             re_path_map["audio"][bkpath] = basename
428 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
429 | 
430 |         # 视频
431 |         for id, href, properties in self.video_list:
432 |             bkpath = get_bookpath(href, self.opfpath)
433 |             basename = auto_rename(id, href, "video")
434 |             re_path_map["video"][bkpath] = basename
435 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
436 | 
437 |         # 其他文件
438 |         for id, href, mime, properties in self.other_list:
439 |             bkpath = get_bookpath(href, self.opfpath)
440 |             basename = auto_rename(id, href, "other")
441 |             re_path_map["other"][bkpath] = basename
442 |             lowerPath_to_originPath[bkpath.lower()] = bkpath
443 | 
444 |         # 读取文件并修改关联
445 |         # toc文件
446 |         if self.tocpath:
447 |             toc = self.epub.read(self.tocpath).decode("utf-8")
448 |             toc_dir = path.dirname(self.tocpath)
449 | 
450 |             def re_toc_href(match):
451 |                 href = match.group(2)
452 |                 href = unquote(href).strip()
453 |                 if "#" in href:
454 |                     href, target_id = href.split("#")
455 |                     target_id = "#" + target_id
456 |                 else:
457 |                     target_id = ""
458 |                 bkpath = get_bookpath(href, self.tocpath)
459 |                 bkpath = check_link(self.tocpath, bkpath, href, self, target_id)
460 |                 if not bkpath:
461 |                     return match.group()
462 |                 filename = path.basename(bkpath)
463 |                 return 'src="Text/' + filename + '"' + target_id
464 | 
465 |             toc = re.sub(r"src=([\'\"])(.*?)\1", re_toc_href, toc)
466 |             self.tgt_epub.writestr(
467 |                 "OEBPS/toc.ncx", bytes(toc, encoding="utf-8"), zipfile.ZIP_DEFLATED
468 |             )
469 | 
470 |         # xhtml文件
471 |         for xhtml_bkpath, new_name in re_path_map["text"].items():
472 |             text = self.epub.read(xhtml_bkpath).decode("utf-8")
473 |             if not text.startswith("<?xml"):
474 |                 text = '<?xml version="1.0" encoding="utf-8"?>\n' + text
475 |             if not re.match(r"(?s).*<!DOCTYPE html", text):
476 |                 text = re.sub(
477 |                     r"(<\?xml.*?>)\n*",
478 |                     r'\1\n<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"\n  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n',
479 |                     text,
480 |                     1,
481 |                 )
482 |             # 修改a[href]
483 | 
484 |             def re_href(match):
485 |                 href = match.group(3)
486 |                 href = unquote(href).strip()
487 |                 if "#" in href:
488 |                     href, target_id = href.split("#")
489 |                     target_id = "#" + target_id
490 |                 else:
491 |                     target_id = ""
492 | 
493 |                 bkpath = get_bookpath(href, xhtml_bkpath)
494 |                 bkpath = check_link(xhtml_bkpath, bkpath, href, self, target_id)
495 |                 if not bkpath:
496 |                     return match.group()
497 | 
498 |                 if href.lower().endswith(
499 |                     (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp")
500 |                 ):
501 |                     filename = re_path_map["image"][bkpath]
502 |                     return match.group(1) + "../Images/" + filename + match.group(4)
503 |                 elif href.lower().endswith(".css"):
504 |                     filename = re_path_map["css"][bkpath]
505 |                     return (
506 |                         '<link href="../Styles/'
507 |                         + filename
508 |                         + '" type="text/css" rel="stylesheet"/>'
509 |                     )
510 |                 elif href.lower().endswith((".xhtml", ".html")):
511 |                     filename = re_path_map["text"][bkpath]
512 |                     return match.group(1) + filename + target_id + match.group(4)
513 |                 else:
514 |                     return match.group()
515 | 
516 |             text = re.sub(r"(<[^>]*href=([\'\"]))(.*?)(\2[^>]*>)", re_href, text)
517 | 
518 |             # 修改src
519 |             def re_src(match):
520 |                 href = match.group(3)
521 |                 href = unquote(href).strip()
522 |                 bkpath = get_bookpath(href, xhtml_bkpath)
523 |                 bkpath = check_link(xhtml_bkpath, bkpath, href, self)
524 |                 if not bkpath:
525 |                     return match.group()
526 | 
527 |                 if href.lower().endswith(
528 |                     (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg")
529 |                 ):
530 |                     filename = re_path_map["image"][bkpath]
531 |                     return match.group(1) + "../Images/" + filename + match.group(4)
532 |                 elif href.lower().endswith(".mp3"):
533 |                     filename = re_path_map["audio"][bkpath]
534 |                     return match.group(1) + "../Audio/" + filename + match.group(4)
535 |                 elif href.lower().endswith(".mp4"):
536 |                     filename = re_path_map["video"][bkpath]
537 |                     return match.group(1) + "../Video/" + filename + match.group(4)
538 |                 elif href.lower().endswith(".js"):
539 |                     filename = re_path_map["other"][bkpath]
540 |                     return match.group(1) + "../Misc/" + filename + match.group(4)
541 |                 else:
542 |                     return match.group()
543 | 
544 |             text = re.sub(r"(<[^>]* src=([\'\"]))(.*?)(\2[^>]*>)", re_src, text)
545 | 
546 |             # 修改 url
547 |             def re_url(match):
548 |                 url = match.group(2)
549 |                 url = unquote(url).strip()
550 |                 bkpath = get_bookpath(url, xhtml_bkpath)
551 |                 bkpath = check_link(xhtml_bkpath, bkpath, url, self)
552 |                 if not bkpath:
553 |                     return match.group()
554 | 
555 |                 if url.lower().endswith((".ttf", ".otf")):
556 |                     filename = re_path_map["font"][bkpath]
557 |                     return match.group(1) + "../Fonts/" + filename + match.group(3)
558 |                 elif url.lower().endswith(
559 |                     (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg")
560 |                 ):
561 |                     filename = re_path_map["image"][bkpath]
562 |                     return match.group(1) + "../Images/" + filename + match.group(3)
563 |                 else:
564 |                     return match.group()
565 | 
566 |             text = re.sub(r"(url\([\'\"]?)(.*?)([\'\"]?\))", re_url, text)
567 |             self.tgt_epub.writestr(
568 |                 "OEBPS/Text/" + new_name,
569 |                 bytes(text, encoding="utf-8"),
570 |                 zipfile.ZIP_DEFLATED,
571 |             )
572 |         # css文件
573 |         for css_bkpath, new_name in re_path_map["css"].items():
574 |             try:
575 |                 css = self.epub.read(css_bkpath).decode("utf-8")
576 |             except:
577 |                 continue
578 | 
579 |             # 修改 @import
580 |             def re_import(match):
581 |                 if match.group(2):
582 |                     href = match.group(2)
583 |                 else:
584 |                     href = match.group(3)
585 |                 href = unquote(href).strip()
586 |                 if not href.lower().endswith(".css"):
587 |                     return match.group()
588 |                 filename = path.basename(href)
589 |                 return '@import "' + filename + '"'
590 | 
591 |             css = re.sub(
592 |                 r"@import ([\'\"])(.*?)\1|@import url\([\'\"]?(.*?)[\'\"]?\)",
593 |                 re_import,
594 |                 css,
595 |             )
596 | 
597 |             # 修改 css的url
598 |             def re_css_url(match):
599 |                 url = match.group(2)
600 |                 url = unquote(url).strip()
601 |                 bkpath = get_bookpath(url, css_bkpath)
602 |                 bkpath = check_link(css_bkpath, bkpath, url, self)
603 |                 if not bkpath:
604 |                     return match.group()
605 |                 if url.lower().endswith((".ttf", ".otf")):
606 |                     filename = re_path_map["font"][bkpath]
607 |                     return match.group(1) + "../Fonts/" + filename + match.group(3)
608 |                 elif url.lower().endswith(
609 |                     (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg")
610 |                 ):
611 |                     filename = re_path_map["image"][bkpath]
612 |                     return match.group(1) + "../Images/" + filename + match.group(3)
613 |                 else:
614 |                     return match.group()
615 | 
616 |             css = re.sub(r"(url\([\'\"]?)(.*?)([\'\"]?\))", re_css_url, css)
617 |             self.tgt_epub.writestr(
618 |                 "OEBPS/Styles/" + new_name,
619 |                 bytes(css, encoding="utf-8"),
620 |                 zipfile.ZIP_DEFLATED,
621 |             )
622 |         # 图片
623 |         for img_bkpath, new_name in re_path_map["image"].items():
624 |             try:
625 |                 img = self.epub.read(img_bkpath)
626 |             except:
627 |                 continue
628 |             self.tgt_epub.writestr(
629 |                 "OEBPS/Images/" + new_name, img, zipfile.ZIP_DEFLATED
630 |             )
631 |         # 字体
632 |         for font_bkpath, new_name in re_path_map["font"].items():
633 |             try:
634 |                 font = self.epub.read(font_bkpath)
635 |             except:
636 |                 continue
637 |             self.tgt_epub.writestr(
638 |                 "OEBPS/Fonts/" + new_name, font, zipfile.ZIP_DEFLATED
639 |             )
640 |         # 音频
641 |         for audio_bkpath, new_name in re_path_map["audio"].items():
642 |             try:
643 |                 audio = self.epub.read(audio_bkpath)
644 |             except:
645 |                 continue
646 |             self.tgt_epub.writestr(
647 |                 "OEBPS/Audio/" + new_name, audio, zipfile.ZIP_DEFLATED
648 |             )
649 |         # 视频
650 |         for video_bkpath, new_name in re_path_map["video"].items():
651 |             try:
652 |                 video = self.epub.read(video_bkpath)
653 |             except:
654 |                 continue
655 |             self.tgt_epub.writestr(
656 |                 "OEBPS/Video/" + new_name, video, zipfile.ZIP_DEFLATED
657 |             )
658 |         # 其他
659 |         for font_bkpath, new_name in re_path_map["other"].items():
660 |             try:
661 |                 other = self.epub.read(font_bkpath)
662 |             except:
663 |                 continue
664 |             self.tgt_epub.writestr(
665 |                 "OEBPS/Misc/" + new_name, other, zipfile.ZIP_DEFLATED
666 |             )
667 |         # OPF
668 |         manifest_text = "<manifest>"
669 | 
670 |         for id, href, mime, prop in self.manifest_list:
671 |             bkpath = get_bookpath(href, self.opfpath)
672 |             prop_ = ' properties="' + prop + '"' if prop else ""
673 |             if mime == "application/xhtml+xml":
674 |                 filename = re_path_map["text"][bkpath]
675 |                 manifest_text += '\n    <item id="{id}" href="{href}" media-type="{mime}"{prop}/>'.format(
676 |                     id=id, href="Text/" + filename, mime=mime, prop=prop_
677 |                 )
678 |             elif mime == "text/css":
679 |                 filename = re_path_map["css"][bkpath]
680 |                 manifest_text += '\n    <item id="{id}" href="{href}" media-type="{mime}"{prop}/>'.format(
681 |                     id=id, href="Styles/" + filename, mime=mime, prop=prop_
682 |                 )
683 |             elif "image/" in mime:
684 |                 filename = re_path_map["image"][bkpath]
685 |                 manifest_text += '\n    <item id="{id}" href="{href}" media-type="{mime}"{prop}/>'.format(
686 |                     id=id, href="Images/" + filename, mime=mime, prop=prop_
687 |                 )
688 |             elif "font/" in mime or href.lower().endswith((".ttf", ".otf", ".woff")):
689 |                 filename = re_path_map["font"][bkpath]
690 |                 manifest_text += '\n    <item id="{id}" href="{href}" media-type="{mime}"{prop}/>'.format(
691 |                     id=id, href="Fonts/" + filename, mime=mime, prop=prop_
692 |                 )
693 |             elif "audio/" in mime:
694 |                 filename = re_path_map["audio"][bkpath]
695 |                 manifest_text += '\n    <item id="{id}" href="{href}" media-type="{mime}"{prop}/>'.format(
696 |                     id=id, href="Audio/" + filename, mime=mime, prop=prop_
697 |                 )
698 |             elif "video/" in mime:
699 |                 filename = re_path_map["video"][bkpath]
700 |                 manifest_text += '\n    <item id="{id}" href="{href}" media-type="{mime}"{prop}/>'.format(
701 |                     id=id, href="Video/" + filename, mime=mime, prop=prop_
702 |                 )
703 |             elif id == self.tocid:
704 |                 manifest_text += '\n    <item id="{id}" href="toc.ncx" media-type="application/x-dtbncx+xml"/>'.format(
705 |                     id=id
706 |                 )
707 |             else:
708 |                 filename = re_path_map["other"][bkpath]
709 |                 manifest_text += '\n    <item id="{id}" href="{href}" media-type="{mime}"{prop}/>'.format(
710 |                     id=id, href="Misc/" + filename, mime=mime, prop=prop_
711 |                 )
712 | 
713 |         manifest_text += "\n  </manifest>"
714 |         opf = re.sub(r"(?s)<manifest.*?>.*?</manifest>", manifest_text, self.opf, 1)
715 | 
716 |         def re_refer(match):
717 |             href = match.group(3)
718 |             href = unquote(href).strip()
719 |             basename = path.basename(href)
720 |             filename = unquote(basename)
721 |             if not basename.endswith(".ncx"):
722 |                 return match.group(1) + "../Text/" + filename + match.group(4)
723 |             else:
724 |                 return match.group()
725 | 
726 |         opf = re.sub(r"(<reference[^>]*href=([\'\"]))(.*?)(\2[^>]*/>)", re_refer, opf)
727 |         self.tgt_epub.writestr(
728 |             "OEBPS/content.opf", bytes(opf, encoding="utf-8"), zipfile.ZIP_DEFLATED
729 |         )
730 |         self.tgt_epub.close()
731 |         self.epub.close()
732 | 
733 | 
734 | # 相对路径计算函数
735 | def get_relpath(from_path, to_path):
736 |     # from_path 和 to_path 都需要是绝对路径
737 |     from_path = re.split(r"[\\/]", from_path)
738 |     to_path = re.split(r"[\\/]", to_path)
739 |     while from_path[0] == to_path[0]:
740 |         from_path.pop(0), to_path.pop(0)
741 |     to_path = "../" * (len(from_path) - 1) + "/".join(to_path)
742 |     return to_path
743 | 
744 | 
745 | # 计算bookpath
746 | def get_bookpath(relative_path, refer_bkpath):
747 |     # relative_path 相对路径，一般是href
748 |     # refer_bkpath 参考的绝对路径
749 | 
750 |     relative_ = re.split(r"[\\/]", relative_path)
751 |     refer_ = re.split(r"[\\/]", refer_bkpath)
752 | 
753 |     back_step = 0
754 |     while relative_[0] == "..":
755 |         back_step += 1
756 |         relative_.pop(0)
757 | 
758 |     if len(refer_) <= 1:
759 |         return "/".join(relative_)
760 |     else:
761 |         refer_.pop(-1)
762 | 
763 |     if back_step < 1:
764 |         return "/".join(refer_ + relative_)
765 |     elif back_step > len(refer_):
766 |         return "/".join(relative_)
767 | 
768 |     # len(refer_) > 1 and back_setp <= len(refer_):
769 |     while back_step > 0 and len(refer_) > 0:
770 |         refer_.pop(-1)
771 |         back_step -= 1
772 | 
773 |     return "/".join(refer_ + relative_)
774 | 
775 | 
776 | def epub_sources():
777 |     if len(sys.argv) <= 1:
778 |         return sys.argv
779 |     epub_srcs = []
780 |     exe_path = path.dirname(sys.argv[0])
781 |     epub_srcs.append(exe_path)
782 |     for epub_src in sys.argv[1:None]:
783 |         filename = path.basename(epub_src)
784 |         basename, ext = path.splitext(filename)
785 |         if ext.lower() == ".epub":
786 |             if path.exists(epub_src):
787 |                 epub_srcs.append(epub_src)
788 |     return epub_srcs
789 | 
790 | 
791 | def run(epub_src, output_path=None):
792 |     try:
793 |         logger.write(f"\n正在尝试重构EPUB: {epub_src}")
794 |         if epub_src.lower().endswith("_reformat.epub"):
795 |             logger.write("警告: 该文件已经重排，无需再次处理！")
796 |             return "skip"
797 |         epub = EpubTool(epub_src)
798 |         epub.set_output_path(output_path)
799 |         epub.restructure()  # 重构
800 |         el = epub.errorLink_log.copy()
801 |         del_keys = []
802 |         for file_path, log in epub.errorLink_log.items():
803 |             if file_path.lower().endswith(".css"):
804 |                 el[file_path] = list(filter(lambda x: x[1] is not None, log))
805 |                 if el[file_path] == []:
806 |                     del_keys.append(file_path)
807 |         for key in del_keys:
808 |             del el[key]
809 | 
810 |         if epub.errorOPF_log:
811 |             logger.write("-------在 OPF文件 发现问题------:")
812 |             for error_type, error_value in epub.errorOPF_log:
813 |                 if error_type == "duplicate_id":
814 |                     logger.write(f"问题: 发现manifest节点内部存在重复ID {error_value} !!!" )
815 |                     logger.write("措施: 已自动清除重复ID对应的manifest项。")
816 |                 elif error_type == "invalid_idref":
817 |                     logger.write(f"问题: 发现spine节点内部存在无效引用ID {error_value} !!!" )
818 |                     logger.write(
819 |                         "措施: 请自行检查spine内的itemref节点并手动修改，确保引用的ID存在于manifest的item项。\n"
820 |                         + "      （大小写不一致也会导致引用无效。）"
821 |                     )
822 |                 elif error_type == "xhtml_not_in_spine":
823 |                     logger.write(
824 |                         f"问题: 发现ID为 {error_value} 的文件manifest中登记为application/xhtml+xml类型，但不被spine节点的项所引用"
825 |                     )
826 |                     logger.write(
827 |                         "措施: 自行检查该文件是否需要被spine引用。部分阅读器中，如果存在xhtml文件不被spine引用，可能导致epub无法打开。"
828 |                     )
829 | 
830 |         if el:
831 |             for file_path, log in el.items():
832 |                 basename = path.basename(file_path)
833 |                 logger.write(f"-----在 {basename} 发现问题链接-----:")
834 |                 for href, correct_path in log:
835 |                     if correct_path is not None:
836 |                         logger.write(
837 |                             f"链接: {href}\n问题: 与实际文件名大小写不一致！\n措施: 程序已自动纠正链接。"
838 |                         )
839 |                     else:
840 |                         logger.write(f"链接: {href}\n问题: 未能找到对应文件！！！")   
841 |     except Exception as e:
842 |         logger.write(f"{epub_src} 重构EPUB失败: {e}")
843 |         return e
844 |     else:
845 |         logger.write(f"{epub_src} 重构EPUB成功")
846 |     return 0
847 | 
848 | 
849 | def main():
850 |     epub_src = input("【使用说明】请把EPUB文件拖曳到本窗口上（输入'e'退出）: ")
851 |     epub_src = epub_src.strip("'").strip('"').strip()
852 |     if epub_src.lower() == "e":
853 |         print("程序已退出")
854 |         sys.exit()
855 |     if not os.path.isfile(epub_src):
856 |         print("错误: 找不到指定的EPUB文件，请确认文件路径是否正确并重新输入！")
857 |         return
858 |     ret = run(epub_src)
859 |     if ret == "skip":
860 |         print("已跳过该文件")
861 |     elif ret == "e":
862 |         print("操作失败，请检查日志！")
863 |     else:
864 |         print("操作成功！")
865 | 
866 | 
867 | 
868 | if __name__ == "__main__":
869 |     print(
870 |         "【脚本功能】\n"
871 |         + "1、 将epub目录结构规范化至sigil规范格式。\n"
872 |         + "2、 将没有列入manifest项的epub有效文件自动列入manifest项。\n"
873 |         + "3、 自动清除manifest中携带重复ID或多余ID的无效项。\n"
874 |         + "    脚本将优先保留spine或metadata中关联的ID。\n"
875 |         + "4、 自动检查并提醒spine节点中引用无效ID的itemref项。\n"
876 |         + "5、 自动检查并提醒manifest节点中xhtml类型文件不被spine节点引用的情况。\n"
877 |         + "6、 自动检测并纠正实际文件名与对应的引用链接大小写不一致的问题。\n"
878 |         + "7、 自动检测并提醒找不到对应文件的链接。"
879 |     )
880 |     while True:
881 |         main()
882 | 


--------------------------------------------------------------------------------