├── .github └── workflows │ └── build.yml ├── .gitignore ├── Epub_Tool_Console.py ├── Epub_Tool_TKUI.py ├── LICENSE ├── README.md ├── build_tool ├── pyinstallerForMac.sh └── pyinstallerForWindows.txt ├── img ├── how_to_use.gif ├── icon.icns ├── icon.ico ├── icon.jpeg ├── icon.png └── icon_backup.png ├── requirements.txt └── utils ├── decrypt_epub.py ├── encrypt_epub.py ├── encrypt_font.py ├── log.py └── reformat_epub.py /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build And Release 2 | 3 | on: 4 | # push: 5 | # branches: 6 | # - main 7 | # paths: 8 | # - '**.yml' 9 | workflow_dispatch: 10 | inputs: 11 | release: 12 | description: 'Create a new release' 13 | required: true 14 | default: true 15 | type: boolean 16 | version: 17 | description: 'Release version' 18 | required: true 19 | default: '2025.03.01.v1' 20 | body: 21 | description: 'Release body text' 22 | required: true 23 | default: '详情请查看更新日志;Epub_Tool_TKUI为可视化程序,不再支持命令行程序。有需要请直接下载python执行源码。' 24 | 25 | jobs: 26 | build: 27 | runs-on: ${{ matrix.os }} 28 | strategy: 29 | matrix: # os: [ubuntu-latest, macos-latest, windows-latest] 暂时去除Ubuntu系统 30 | os: [macos-latest, windows-latest] 31 | steps: 32 | - uses: actions/checkout@v4 33 | 34 | - name: Set release outputs 35 | id: set_release_outputs 36 | run: | 37 | if [ "${{ github.event.inputs.release }}" == "true" ]; then 38 | echo "TAG=${{ github.event.inputs.version }}" >> $GITHUB_ENV 39 | echo "BODY=${{ github.event.inputs.body }}" >> $GITHUB_ENV 40 | else 41 | echo "TAG=$(date +'%Y.%m.%d.v2')" >> $GITHUB_ENV 42 | echo "BODY=详情请查看更新日志;Epub_Tool_TKUI为可视化程序,Epub_Tool_Console为命令行程序。" >> $GITHUB_ENV 43 | fi 44 | shell: bash 45 | 46 | - name: Set up Python 47 | uses: actions/setup-python@v5 48 | with: 49 | python-version: 3.13.1 50 | 51 | - name: Install Pyinstaller 52 | run: | 53 | python -m pip install pyinstaller 54 | python -m pip install -r requirements.txt 55 | 56 | - name: Build the executables 57 | run: | 58 | if [ "${{ matrix.os }}" == "macos-latest" ]; then 59 | pyinstaller -w -i ./img/icon.icns Epub_Tool_TKUI.py -n Epub_Tool_TKUI; 60 | else 61 | pyinstaller -w -F -i ./img/icon.ico Epub_Tool_TKUI.py -n Epub_Tool_TKUI; 62 | fi 63 | shell: bash 64 | 65 | - name: Rename artifacts with OS prefix (macOS) # Epub_Tool_TKUI.app为文件夹,zip压缩 66 | if: matrix.os == 'macos-latest' 67 | run: | 68 | brew install create-dmg 69 | cd ./dist 70 | create-dmg \ 71 | --volname "Epub_Tool_TKUI" \ 72 | --window-pos 400 200 \ 73 | --window-size 660 400 \ 74 | --icon-size 100 \ 75 | --icon "Epub_Tool_TKUI.app" 160 185 \ 76 | --hide-extension "Epub_Tool_TKUI.app"\ 77 | --app-drop-link 500 185 \ 78 | --no-internet-enable \ 79 | "./Epub_Tool_TKUI.dmg" \ 80 | "./Epub_Tool_TKUI.app"; 81 | rm -rf ./Epub_Tool_TKUI.app; 82 | rm -rf ./Epub_Tool_TKUI; 83 | cd .. 84 | for file in dist/*; do 85 | mv "$file" "dist/${{ runner.os }}_$(basename "$file")" 86 | done 87 | shell: bash 88 | 89 | # 无Ubuntu系统,暂时不知道Ubuntu版本下打包的可执行文件是什么格式、是否可用 90 | # - name: Rename artifacts with OS prefix (Ubuntu) 91 | # if: matrix.os == 'ubuntu-latest' 92 | # run: | 93 | # for file in dist/*; do 94 | # mv "$file" "dist/${{ runner.os }}_$(basename "$file")" 95 | # done 96 | # shell: bash 97 | 98 | - name: Rename artifacts with OS prefix (Windows) 99 | if: matrix.os == 'windows-latest' 100 | run: | 101 | Get-ChildItem -Path dist | Rename-Item -NewName { '${{ runner.os }}_' + $_.Name } 102 | shell: pwsh 103 | 104 | # - name: Create zip archive (Windows) 105 | # if: matrix.os == 'windows-latest' 106 | # run: | 107 | # Compress-Archive -Path dist\* -DestinationPath "dist\${{ runner.os }}_all.zip" 108 | # shell: pwsh 109 | 110 | # - name: Create tar.gz archive (Linux and macOS) 111 | # if: matrix.os != 'windows-latest' 112 | # run: | 113 | # tar -czvf "${{ runner.os }}_all.tar.gz" -C dist $(ls dist) 114 | # mv "${{ runner.os }}_all.tar.gz" dist/ 115 | # shell: bash 116 | 117 | - name: Create Release 118 | uses: ncipollo/release-action@v1 119 | with: 120 | tag: ${{ env.TAG }} 121 | name: ${{ env.TAG }} 122 | body: ${{ env.BODY }} 123 | artifacts: 'dist/*' 124 | allowUpdates: true 125 | makeLatest: true -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | dist 3 | test 4 | epub_tool.spec 5 | log.txt 6 | result.txt 7 | utils/__pycache__/ 8 | log_file 9 | epub_tool_TKUI.spec 10 | .DS_Store 11 | *.epub 12 | -------------------------------------------------------------------------------- /Epub_Tool_Console.py: -------------------------------------------------------------------------------- 1 | from utils.encrypt_epub import run as encrypt_run 2 | from utils.decrypt_epub import run as decrypt_run 3 | from utils.reformat_epub import run as reformat_run 4 | import sys 5 | import os 6 | import argparse 7 | from tqdm import tqdm 8 | 9 | 10 | def prepare_args(): 11 | parser = argparse.ArgumentParser(description="Epub Tool") 12 | parser.add_argument("-i", help="input file/folder") 13 | parser.add_argument("-e", action="store_true", help="encrypt epub file") 14 | parser.add_argument("-d", action="store_true", help="decrypt epub file") 15 | parser.add_argument("-r", action="store_true", help="reformat epub file") 16 | parser.add_argument( 17 | "-m", 18 | help="mode: e: encrypt, d: decrypt, r: reformat, c: choose files, a: all files", 19 | ) 20 | return parser.parse_args() 21 | 22 | 23 | def clean_input_path(input_path): 24 | return input_path.strip("'").strip('"').strip() 25 | 26 | 27 | def check_args(args): 28 | while True: 29 | if not args.i: 30 | args.i = input("请输入epub文件路径或文件夹路径:") 31 | args.i = clean_input_path(args.i) 32 | 33 | # 判断输入文件是否为文件夹 34 | if os.path.isdir(args.i): 35 | file_list = [ 36 | os.path.join(root, file) 37 | for root, _, files in os.walk(args.i) 38 | for file in files 39 | if file.endswith(".epub") 40 | ] 41 | 42 | if file_list: 43 | while True: 44 | if not args.m: 45 | args.m = ( 46 | input("请输入操作(c:手动选择,a:全部文件):") 47 | .strip() 48 | .lower() 49 | ) 50 | 51 | if args.m == "c": 52 | mode = "手动选择" 53 | elif args.m == "a": 54 | mode = "全部文件" 55 | else: 56 | print("输入错误,请输入 'c' 或 'a'") 57 | args.m = None 58 | continue 59 | 60 | print(f"处理模式:{mode}") 61 | 62 | if mode == "手动选择": 63 | print("以下是文件夹中的epub文件:") 64 | for idx, file in enumerate(file_list): 65 | print(f"{idx + 1}: {file}") 66 | 67 | while True: 68 | selected_files = input( 69 | "请输入你想要处理的文件序号(多个序号请用空格分开):" 70 | ) 71 | selected_indices = selected_files.split() 72 | 73 | try: 74 | args.i = [ 75 | file_list[int(index) - 1] 76 | for index in selected_indices 77 | ] 78 | break 79 | except (ValueError, IndexError): 80 | print("输入错误,请确保输入的是有效的文件序号") 81 | 82 | elif mode == "全部文件": 83 | args.i = file_list 84 | break 85 | else: 86 | print("文件夹中没有找到任何epub文件,请重新输入") 87 | args.i = None 88 | continue 89 | else: 90 | if os.path.exists(args.i): 91 | args.i = [args.i] 92 | else: 93 | print(f"输入路径不存在:{args.i},请重新输入") 94 | args.i = None 95 | continue 96 | 97 | return args 98 | 99 | 100 | def check_mode(args): 101 | while True: 102 | if args.e or args.m == "e": 103 | process = "加密" 104 | func = encrypt_run 105 | elif args.d or args.m == "d": 106 | process = "解密" 107 | func = decrypt_run 108 | elif args.r or args.m == "r": 109 | process = "重排" 110 | func = reformat_run 111 | else: 112 | while True: 113 | args.m = ( 114 | input("请输入操作(e:加密,d:解密,r:重排):").strip().lower() 115 | ) 116 | if args.m in ["e", "d", "r"]: 117 | break 118 | else: 119 | print("输入错误,请输入 'e'、'd' 或 'r'") 120 | 121 | continue 122 | 123 | return process, func 124 | 125 | 126 | def main(): 127 | print("-欢迎使用Epub Tool-") 128 | print("-此程序由cnwxi提供-") 129 | args = prepare_args() 130 | args = check_args(args) 131 | process, func = check_mode(args) 132 | print(f"处理模式:{process}") 133 | tmp_run_result = [] 134 | 135 | with tqdm(total=len(args.i), ncols=100, desc=f"{process}文件") as pbar: 136 | for file in args.i: 137 | try: 138 | ret = func(file) 139 | if ret == 0: 140 | result = f"^_^ {file} 成功" 141 | elif ret == "skip": 142 | result = f"O_O {file} 跳过:已{process}" 143 | else: 144 | result = f"T_T {file} 失败:{ret}" 145 | except Exception as e: 146 | result = f"X_X {file} 处理时发生错误:{e}" 147 | tmp_run_result.append(result) 148 | pbar.update(1) 149 | 150 | print(f"{process}结果:") 151 | for result in tmp_run_result: 152 | print(result) 153 | 154 | with open("result.txt", "w", encoding="utf-8") as f: 155 | for result in tmp_run_result: 156 | f.write(result + "\n") 157 | 158 | input("按下回车退出...") 159 | 160 | 161 | if __name__ == "__main__": 162 | main() 163 | -------------------------------------------------------------------------------- /Epub_Tool_TKUI.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | 3 | # from tkinter.filedialog import askopenfiles 4 | from tkinter.font import Font 5 | from tkinter import filedialog, ttk, messagebox 6 | import os 7 | from utils.encrypt_epub import run as encrypt_run 8 | from utils.decrypt_epub import run as decrypt_run 9 | from utils.reformat_epub import run as reformat_run 10 | from utils.encrypt_font import run_epub_font_encrypt 11 | import sys 12 | import threading 13 | import subprocess 14 | import webbrowser 15 | 16 | root = tk.Tk() 17 | style = ttk.Style() 18 | 19 | root.title("Epub Tool") 20 | min_width = 500 21 | min_height = 780 22 | root.geometry(f"{min_width}x{min_height}") 23 | root.minsize(min_width, min_height) 24 | root.maxsize(min_height, min_height) 25 | root.resizable(True, False) 26 | tmp_files_dic = {} 27 | defalut_output_dir = None 28 | 29 | # if sys.platform.startswith("darwin"): # macOS 30 | # default_font = "PingFang SC" 31 | # elif os.name == "nt": # Windows 32 | # default_font = "SimSun" 33 | # elif os.name == "posix": # Linux 34 | # default_font = "WenQuanYi Zen Hei" 35 | # else: 36 | # default_font = "Arial" # 其他系统使用 Arial 37 | 38 | # 创建一个 Frame 用于介绍 39 | intro_frame = ttk.Frame(root) 40 | intro_frame.pack(padx=10, pady=10) 41 | # 创建顶部介绍标签 42 | 43 | style.configure( 44 | "Intro.TLabel", 45 | font=("TkDefaultFont", 14, "bold"), 46 | fg="#333", 47 | padding=10, 48 | ) 49 | intro_label = ttk.Label( 50 | intro_frame, 51 | text="欢迎使用 Epub Tool\n此工具可帮助您处理电子书文件", 52 | style="Intro.TLabel", 53 | justify="center", 54 | ) 55 | intro_label.pack(side=tk.TOP) 56 | 57 | 58 | def open_link(event): 59 | webbrowser.open_new("https://github.com/cnwxi/epub_tool") 60 | 61 | 62 | style.configure( 63 | "Link.TLabel", 64 | foreground="royalblue", 65 | # font=(default_font, 10, "underline"), 66 | font=("TkDefaultFont", 10, "underline"), 67 | ) 68 | link_label = ttk.Label( 69 | intro_frame, 70 | text="访问本项目GITHUB仓库", 71 | style="Link.TLabel", 72 | cursor="hand2", 73 | ) 74 | link_label.pack(side=tk.TOP) 75 | link_label.bind("", open_link) 76 | 77 | # 添加分界线 78 | separator = ttk.Separator(root, orient="horizontal") # 创建水平分界线 79 | separator.pack(fill="x", padx=5, pady=5) 80 | 81 | add_frame = ttk.Frame(root) 82 | add_frame.pack(padx=10, pady=5) 83 | 84 | 85 | # 刷新文件列表显示 86 | def display_added_file(files): 87 | # 删除 Treeview 中的所有现有项目 88 | for item in file_list.get_children(): 89 | file_list.delete(item) 90 | # 插入新的文件列表 91 | for i, file_path in enumerate(files): 92 | file_name = os.path.basename(file_path) 93 | file_name = file_name.rsplit(".", 1)[0] 94 | file_list.insert( 95 | "", 96 | "end", 97 | values=( 98 | f" {i+1} ", 99 | f" {file_name} ", 100 | file_path, 101 | ), 102 | ) 103 | 104 | 105 | # 利用字典存储文件,避免重复添加 106 | def store_file(files): 107 | for file in files: 108 | if file not in tmp_files_dic: 109 | tmp_files_dic[file] = 1 110 | 111 | 112 | # 添加文件(可多选) 113 | def add_file(): 114 | files = filedialog.askopenfilenames( 115 | title="选择文件", filetypes=[("EPUB files", "*.epub *.EPUB")] 116 | ) 117 | tmp_files = [] 118 | for file in files: 119 | if file.lower().endswith(".epub"): 120 | tmp_files.append(os.path.normpath(file)) 121 | store_file(tmp_files) 122 | display_added_file(tmp_files_dic.keys()) 123 | 124 | 125 | # 添加文件夹(单选文件夹) 126 | def add_dir(): 127 | dir = filedialog.askdirectory(title="选择文件夹") 128 | # 遍历文件夹所有文件 129 | tmp_files = [] 130 | for root, dirs, files in os.walk(dir): 131 | for file in files: 132 | if file.lower().endswith(".epub"): 133 | tmp_files.append(os.path.normpath(os.path.join(root, file))) 134 | store_file(tmp_files) 135 | display_added_file(tmp_files_dic.keys()) 136 | 137 | 138 | # 删除选中的文件 139 | def delete_selected(): 140 | # 获取所有选中的项(返回的是一个元组,包含项目的 ID) 141 | selected_items = file_list.selection() 142 | if not selected_items: 143 | messagebox.showwarning("Warning", "未选中任何文件") 144 | return 145 | # 从后往前删除选中的项目 146 | for item in reversed(selected_items): 147 | # 获取项目的值 148 | file = file_list.item(item, "values")[2] 149 | # 删除字典中的元素 150 | if file in tmp_files_dic: 151 | del tmp_files_dic[file] 152 | # 删除 Treeview 中的项目 153 | file_list.delete(item) 154 | # 显示删除后的文件列表 155 | display_added_file(tmp_files_dic.keys()) 156 | 157 | 158 | # 清空输入文件列表 159 | def delete_all(): 160 | # 删除 Treeview 中的所有项目 161 | for item in file_list.get_children(): 162 | file_list.delete(item) 163 | # 清空字典 164 | tmp_files_dic.clear() 165 | 166 | 167 | add_files_btn = ttk.Button( 168 | add_frame, 169 | text="添加文件", 170 | command=add_file, 171 | ) 172 | 173 | add_files_btn.pack(side=tk.LEFT, padx=5) 174 | 175 | select_dir_btn = ttk.Button( 176 | add_frame, 177 | text="添加文件夹", 178 | command=add_dir, 179 | ) 180 | select_dir_btn.pack(side=tk.LEFT, padx=5) 181 | 182 | delete_button = ttk.Button( 183 | add_frame, 184 | text="删除所选", 185 | command=delete_selected, 186 | ) 187 | 188 | delete_button.pack(side=tk.LEFT, padx=5) 189 | 190 | delete_all_button = ttk.Button( 191 | add_frame, 192 | text="删除全部", 193 | command=delete_all, 194 | ) 195 | delete_all_button.pack(side=tk.LEFT, padx=5) 196 | 197 | # 创建一个 Frame 用于放置 Listbox 和 Scrollbar 198 | listbox_frame = ttk.Frame(root) 199 | listbox_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) 200 | 201 | file_list = ttk.Treeview( 202 | listbox_frame, 203 | selectmode="extended", 204 | columns=( 205 | "index", 206 | "file_name", 207 | "path", 208 | ), 209 | show="headings", 210 | ) 211 | # file_list.config(borderwidth=2, relief="solid") 212 | file_list.heading("index", text="序号", anchor="center") 213 | file_list.column("index", width=int(min_width * 0.1), anchor="center", stretch=False) 214 | file_list.heading("file_name", text="书名", anchor="center") 215 | file_list.column("file_name", anchor="w", stretch=True) 216 | file_list.heading("path", text="文件路径") 217 | file_list.column("path", width=1, stretch=False) 218 | file_list["displaycolumns"] = ("index", "file_name") 219 | file_list.grid(row=1, column=0, sticky=tk.NSEW) 220 | 221 | 222 | def show_context_menu(event): 223 | item = file_list.identify_row(event.y) 224 | if item: 225 | file_list.selection_set(item) 226 | context_menu.post(event.x_root, event.y_root) 227 | 228 | 229 | def open_selected_file_dir(): 230 | selected_items = file_list.selection() 231 | if not selected_items: 232 | messagebox.showwarning("Warning", "未选中任何文件") 233 | return 234 | for item in selected_items: 235 | file_path = file_list.item(item, "values")[2] 236 | file_path = os.path.dirname(file_path) 237 | if os.path.exists(file_path): 238 | try: 239 | if sys.platform.startswith("darwin"): # macOS 240 | 241 | subprocess.run(["open", file_path]) 242 | elif os.name == "nt": # Windows 243 | os.startfile(file_path) 244 | elif os.name == "posix": # Linux 245 | subprocess.run(["xdg-open", file_path]) 246 | else: 247 | messagebox.showerror("Warning", "不支持的操作系统") 248 | except Exception as e: 249 | messagebox.showerror("Warning", f"无法打开路径: {e}") 250 | else: 251 | messagebox.showwarning("Warning", f"文件不存在: {file_path}") 252 | 253 | 254 | context_menu = tk.Menu(file_list, tearoff=0) 255 | context_menu.add_command(label="打开所在文件夹", command=open_selected_file_dir) 256 | context_menu.add_command(label="删除选中文件", command=delete_selected) 257 | if sys.platform.startswith("win"): 258 | file_list.bind("", show_context_menu) 259 | elif sys.platform.startswith("darwin"): 260 | file_list.bind("", show_context_menu) 261 | 262 | 263 | class Tooltip: 264 | def __init__(self, widget): 265 | self.widget = widget 266 | self.tooltip_window = None # 用于存储当前显示的 Tooltip 窗口 267 | self.widget.bind("", self.show_tooltip) 268 | self.widget.bind("", self.hide_tooltip) 269 | 270 | # 获取默认字体(Treeview 使用的字体) 271 | self.font = Font(font="TkDefaultFont") 272 | 273 | def show_tooltip(self, event=None): 274 | # 首先隐藏任何已有的 Tooltip 275 | self.hide_tooltip() 276 | 277 | # 获取鼠标所在的行和列 278 | row_id = self.widget.identify_row(event.y) 279 | column = self.widget.identify_column(event.x) 280 | # print(f"row_id: {row_id}, column: {column}") 281 | 282 | if not row_id or not column: # 如果没有找到行或列,直接返回 283 | return 284 | 285 | # 获取单元格内容 286 | try: 287 | # print(self.widget.item(row_id, 'values')) 288 | cell_value = self.widget.item(row_id, "values")[(int(column[1:]) - 1) * 2] 289 | # 获取列的宽度(单位:像素) 290 | # col_width = self.widget.column(column, "width") 291 | 292 | # 计算文字的实际宽度(单位:像素) 293 | # text_width = self.font.measure(cell_value) 294 | 295 | # 如果文字宽度超过列宽,显示 Tooltip 296 | # if text_width > col_width: 297 | # 如果不是第一列 298 | if column != "#1" and row_id != "" and cell_value != "": 299 | box = self.widget.bbox(row_id, column) # 获取单元格位置 300 | if box is not None: 301 | x, y, w, h = box[0], box[1], box[2], box[3] 302 | if ( 303 | x + (w / 2) < 0 or y + (h / 2) < 0 304 | ): # bbox 返回无效值时(如不在可见区域),不显示 Tooltip 305 | return 306 | 307 | x += self.widget.winfo_rootx() # 调整 Tooltip 的 X 坐标 308 | y += self.widget.winfo_rooty() + h # 调整 Tooltip 的 Y 坐标 309 | 310 | # 创建 Tooltip 窗口 311 | self.tooltip_window = tw = tk.Toplevel(self.widget) 312 | tw.wm_overrideredirect(True) # 去掉窗口边框 313 | tw.wm_geometry(f"+{x}+{y}") # 设置 Tooltip 的位置 314 | 315 | label = tk.Label( 316 | tw, 317 | text=cell_value, 318 | background="lightyellow", 319 | relief="solid", 320 | borderwidth=1, 321 | ) 322 | label.pack() 323 | except IndexError: 324 | return 325 | 326 | def hide_tooltip(self, event=None): 327 | # 销毁 Tooltip 窗口 328 | if self.tooltip_window: 329 | self.tooltip_window.destroy() 330 | self.tooltip_window = None 331 | 332 | 333 | Tooltip(file_list) 334 | 335 | 336 | # file_list.bind("", on_treeview_motion) 337 | 338 | 339 | # 创建垂直 Scrollbar 340 | v_scrollbar = ttk.Scrollbar( 341 | listbox_frame, 342 | orient=tk.VERTICAL, 343 | command=file_list.yview, 344 | # width=15 345 | ) 346 | v_scrollbar.grid(row=1, column=1, sticky=tk.NS) 347 | 348 | 349 | # 创建水平 Scrollbar 350 | # h_scrollbar = ttk.Scrollbar(listbox_frame, 351 | # orient=tk.HORIZONTAL, 352 | # command=file_list.xview, 353 | # # width=15 354 | # ) 355 | # h_scrollbar.grid(row=2, column=0, sticky=tk.EW) 356 | 357 | # 将 Scrollbar 绑定到 Listbox 358 | file_list.configure( 359 | yscrollcommand=v_scrollbar.set, 360 | # xscrollcommand=h_scrollbar.set 361 | ) 362 | 363 | # 配置 grid 行列权重 364 | # listbox_frame.grid_rowconfigure(1, weight=1) 365 | listbox_frame.grid_columnconfigure(0, weight=1) 366 | 367 | # 添加分界线 368 | separator = ttk.Separator(root, orient="horizontal") # 创建水平分界线 369 | separator.pack(fill="x", padx=5, pady=5) 370 | 371 | 372 | def select_output_dir(): 373 | global defalut_output_dir 374 | output_dir = os.path.normpath(filedialog.askdirectory(title="选择输出文件夹")) 375 | if output_dir != "." and os.path.exists(output_dir): 376 | defalut_output_dir = output_dir 377 | if len(output_dir) > 30: 378 | length = len(output_dir) - 15 379 | output_dir = output_dir[:15] + "..." + output_dir[length:] 380 | output_dir_label.config(text=f"输出路径: {output_dir}") 381 | style.configure( 382 | "FileLink.TLabel", 383 | font=("TkDefaultFont", 10, "underline"), 384 | foreground="royalblue", 385 | ) 386 | output_dir_label.config(style="FileLink.TLabel", cursor="hand2") 387 | output_dir_label.update() 388 | result_list.insert( 389 | "", 390 | "end", 391 | values=( 392 | "^_^", 393 | "", 394 | "", 395 | "已设置路径", 396 | f"成功设置输出路径为 {defalut_output_dir}", 397 | ), 398 | ) 399 | root.update_idletasks() 400 | 401 | 402 | def open_output_dir(event): 403 | path = defalut_output_dir 404 | if path is not None and os.path.exists(path): 405 | try: 406 | if sys.platform.startswith("darwin"): # macOS 407 | subprocess.run(["open", path]) 408 | elif os.name == "nt": # Windows 409 | os.startfile(path) 410 | elif os.name == "posix": # Linux 411 | subprocess.run(["xdg-open", path]) 412 | else: 413 | messagebox.showerror("Warning", "不支持的操作系统") 414 | except Exception as e: 415 | messagebox.showerror("Warning", f"无法打开路径: {e}") 416 | else: 417 | # messagebox.showwarning("Warning", "未指定输出路径") 418 | pass 419 | 420 | 421 | def reset_output_dir(): 422 | global defalut_output_dir 423 | defalut_output_dir = None 424 | output_dir_label.config(text=f"输出路径: 默认文件所在路径") 425 | style.configure( 426 | "FileLink.TLabel", font=("TkDefaultFont", 10, "underline"), foreground="DimGray" 427 | ) 428 | output_dir_label.config(style="FileLink.TLabel", cursor="") 429 | output_dir_label.update() 430 | result_list.insert( 431 | "", 432 | "end", 433 | values=( 434 | "^_^", 435 | "", 436 | "", 437 | "已重置路径", 438 | "重置输出路径为原文件所在目录", 439 | ), 440 | ) 441 | root.update_idletasks() 442 | 443 | 444 | outdir_frame = tk.Frame(root) 445 | outdir_frame.pack(padx=10, pady=5) 446 | # 创建一个标签用于显示输出路径 447 | show_btn = ttk.Button( 448 | outdir_frame, 449 | text="选择输出路径", 450 | command=select_output_dir, 451 | ) 452 | show_btn.pack(side=tk.LEFT, padx=5) 453 | 454 | reset_btn = ttk.Button( 455 | outdir_frame, 456 | text="重置输出路径", 457 | command=reset_output_dir, 458 | ) 459 | reset_btn.pack(side=tk.LEFT, padx=5) 460 | frame4 = tk.Frame(root) 461 | frame4.pack(pady=5) 462 | 463 | style.configure( 464 | "FileLink.TLabel", font=("TkDefaultFont", 10, "underline"), foreground="DimGray" 465 | ) 466 | output_dir_label = ttk.Label( 467 | frame4, 468 | text="输出路径: 默认文件所在路径", 469 | style="FileLink.TLabel", 470 | ) 471 | output_dir_label.pack(side=tk.LEFT, padx=5) 472 | output_dir_label.bind("", open_output_dir) 473 | 474 | # 添加分界线 475 | separator = ttk.Separator(root, orient="horizontal") # 创建水平分界线 476 | separator.pack(fill="x", pady=5, padx=5) 477 | 478 | 479 | def start_progress(func, func_name, output_dir, *args): 480 | # 创建一个新的线程来运行传入的函数 481 | thread = threading.Thread( 482 | target=lambda: run_in_thread(func, func_name, output_dir, *args) 483 | ) 484 | thread.start() 485 | 486 | 487 | def run_in_thread(func, func_name, output_dir, *args): 488 | children = file_list.get_children() 489 | file_count = len(children) 490 | if file_count == 0: 491 | messagebox.showwarning("Warning", "未添加任何文件") 492 | return 493 | progress["value"] = 0 494 | progress["maximum"] = file_count 495 | root.update_idletasks() 496 | 497 | for item in children: 498 | # 获取文件路径 499 | file_path = file_list.item(item, "values")[2] 500 | file_list.delete(item) 501 | tmp_files_dic.pop(file_path) 502 | file_name = os.path.basename(file_path) 503 | file_name = file_name.rsplit(".", 1)[0] 504 | tmp_output_dir = defalut_output_dir 505 | # 执行操作 506 | try: 507 | ret = func(file_path, output_dir, *args) 508 | if output_dir == None: 509 | tmp_output_dir = os.path.dirname(file_path) 510 | if ret == 0: 511 | emoji = "^_^" 512 | result = f" 成功 " 513 | info = f"{func_name}成功,输出路径:{tmp_output_dir}" 514 | elif ret == "skip": 515 | emoji = "O_o" 516 | result = f" 跳过 " 517 | info = f"文件已被{func_name}处理,跳过{func_name}操作" 518 | else: 519 | emoji = "T_T" 520 | result = f" 失败" 521 | info = f"{func_name}失败,错误信息:{ret}" 522 | except Exception as e: 523 | emoji = "@_@" 524 | result = f" 错误 " 525 | info = f"{func_name}错误,错误信息:{e}" 526 | 527 | # 显示处理结果 528 | result_list.insert( 529 | "", 530 | "end", 531 | values=( 532 | emoji, 533 | file_name, 534 | tmp_output_dir, 535 | result, 536 | info, 537 | ), 538 | ) 539 | progress["value"] += 1 540 | root.update_idletasks() 541 | 542 | 543 | op_frame = ttk.Frame(root) 544 | op_frame.pack(padx=10, pady=5) 545 | reformat_btn = ttk.Button( 546 | op_frame, 547 | text="格式化", 548 | command=lambda: start_progress(reformat_run, "格式化", defalut_output_dir), 549 | ) 550 | reformat_btn.pack(side=tk.LEFT, padx=5) 551 | 552 | decrypt_btn = ttk.Button( 553 | op_frame, 554 | text="文件名解密", 555 | command=lambda: start_progress(decrypt_run, "解密", defalut_output_dir), 556 | ) 557 | decrypt_btn.pack(side=tk.LEFT, padx=5) 558 | 559 | encrypt_btn = ttk.Button( 560 | op_frame, 561 | text="文件名加密", 562 | command=lambda: start_progress(encrypt_run, "加密", defalut_output_dir), 563 | ) 564 | encrypt_btn.pack(side=tk.LEFT, padx=5) 565 | 566 | 567 | def run_font_encrypt(): 568 | children = file_list.get_children() 569 | file_count = len(children) 570 | if file_count == 0: 571 | messagebox.showwarning("Warning", "未添加任何文件") 572 | return 573 | progress["value"] = 0 574 | progress["maximum"] = file_count 575 | root.update_idletasks() 576 | for item in children: 577 | # 获取文件路径 578 | file_path = file_list.item(item, "values")[2] 579 | file_list.delete(item) 580 | tmp_files_dic.pop(file_path) 581 | file_name = os.path.basename(file_path).rsplit(".", 1)[0] 582 | try: 583 | ret = run_epub_font_encrypt(file_path, defalut_output_dir) 584 | if defalut_output_dir == None: 585 | outdir = os.path.dirname(file_path) 586 | else: 587 | outdir = defalut_output_dir 588 | if ret == 0: 589 | result_list.insert( 590 | "", 591 | "end", 592 | values=( 593 | "^_^", 594 | file_name, 595 | outdir, 596 | "成功", 597 | f"字体加密成功,输出路径:{outdir}", 598 | ), 599 | ) 600 | elif ret == "skip": 601 | result_list.insert( 602 | "", 603 | "end", 604 | values=( 605 | "O_o", 606 | file_name, 607 | outdir, 608 | "跳过", 609 | f"无字体文件,跳过字体加密操作", 610 | ), 611 | ) 612 | else: 613 | result_list.insert( 614 | "", 615 | "end", 616 | values=( 617 | "T_T", 618 | file_name, 619 | outdir, 620 | "失败", 621 | f"{ret}", 622 | ), 623 | ) 624 | except Exception as e: 625 | result_list.insert( 626 | "", 627 | "end", 628 | values=( 629 | "@_@", 630 | file_name, 631 | outdir, 632 | "失败", 633 | f"字体加密失败,错误信息:{e}", 634 | ), 635 | ) 636 | 637 | progress["value"] += 1 638 | root.update_idletasks() 639 | 640 | font_encrypt_btn = ttk.Button( 641 | op_frame, 642 | text="字体加密", 643 | command=run_font_encrypt, 644 | ) 645 | font_encrypt_btn.pack(side=tk.LEFT, padx=5) 646 | 647 | # 创建一个 Frame 用于放置进度条 648 | progress_frame = ttk.Frame(root) 649 | progress_frame.pack(fill=tk.X, padx=10, pady=0) 650 | 651 | # 创建进度条 652 | progress = ttk.Progressbar( 653 | progress_frame, 654 | orient=tk.HORIZONTAL, 655 | length=400, 656 | mode="determinate", 657 | ) 658 | progress.pack(fill=tk.X, padx=5, pady=0) 659 | 660 | # 创建一个 Frame 用于放置 Listbox 和 Scrollbar 661 | result_box_frame = ttk.Frame(root) 662 | result_box_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) 663 | 664 | result_list = ttk.Treeview( 665 | result_box_frame, 666 | columns=( 667 | "emoji", 668 | "file_name", 669 | "file_path", 670 | "result", 671 | "info", 672 | ), 673 | show="headings", 674 | # height=10, 675 | ) 676 | result_list.heading("emoji", text="状态", anchor="center") 677 | result_list.column("emoji", width=int(min_width * 0.1), anchor="center", stretch=False) 678 | result_list.heading("file_name", text="书名", anchor="center") 679 | result_list.column("file_name", anchor="w", stretch=True) 680 | result_list.column("file_path", width=0, stretch=False) 681 | result_list.heading("result", text="执行结果", anchor="center") 682 | result_list.column( 683 | "result", width=int(min_width * 0.15), anchor="center", stretch=False 684 | ) 685 | result_list.column("info", width=0, stretch=False) 686 | result_list["displaycolumns"] = ("emoji", "file_name", "result") 687 | result_list.grid(row=1, column=0, sticky=tk.NSEW) 688 | 689 | 690 | def show_context_menu_result(event): 691 | item = result_list.identify_row(event.y) 692 | if item: 693 | result_list.selection_set(item) 694 | context_menu_result.post(event.x_root, event.y_root) 695 | 696 | def open_log_file(): 697 | log_path=os.path.join( 698 | os.path.dirname(os.path.abspath(sys.argv[0])), "log.txt" 699 | ) 700 | if os.path.exists(log_path): 701 | try: 702 | if sys.platform.startswith("darwin"): # macOS 703 | subprocess.run(["open", log_path]) 704 | elif os.name == "nt": # Windows 705 | os.startfile(log_path) 706 | elif os.name == "posix": # Linux 707 | subprocess.run(["xdg-open", log_path]) 708 | else: 709 | messagebox.showerror("Warning", "不支持的操作系统") 710 | except Exception as e: 711 | messagebox.showerror("Warning", f"无法打开路径: {e}") 712 | else: 713 | messagebox.showwarning("Warning", f"文件不存在: {log_path}") 714 | 715 | def open_selected_file_output_dir(): 716 | selected_items = result_list.selection() 717 | if not selected_items: 718 | messagebox.showwarning("Warning", "未选中任何文件") 719 | return 720 | for item in selected_items: 721 | file_path = result_list.item(item, "values")[2] 722 | # file_path = os.path.dirname(file_path) 723 | if os.path.exists(file_path): 724 | try: 725 | if sys.platform.startswith("darwin"): # macOS 726 | 727 | subprocess.run(["open", file_path]) 728 | elif os.name == "nt": # Windows 729 | os.startfile(file_path) 730 | elif os.name == "posix": # Linux 731 | subprocess.run(["xdg-open", file_path]) 732 | else: 733 | messagebox.showerror("Warning", "不支持的操作系统") 734 | except Exception as e: 735 | messagebox.showerror("Warning", f"无法打开路径: {e}") 736 | else: 737 | messagebox.showwarning("Warning", f"文件不存在: {file_path}") 738 | 739 | 740 | context_menu_result = tk.Menu(result_list, tearoff=0) 741 | context_menu_result.add_command( 742 | label="打开输出文件夹", command=open_selected_file_output_dir 743 | ) 744 | context_menu_result.add_command( 745 | label="打开日志文件", command=open_log_file 746 | ) 747 | 748 | if sys.platform.startswith('win'): 749 | result_list.bind("", show_context_menu_result) 750 | elif sys.platform.startswith('darwin'): 751 | result_list.bind("", show_context_menu_result) 752 | 753 | 754 | # 创建垂直 Scrollbar 755 | v_scrollbar_result = ttk.Scrollbar( 756 | result_box_frame, 757 | orient=tk.VERTICAL, 758 | command=result_list.yview, 759 | # width=10 760 | ) 761 | v_scrollbar_result.grid(row=1, column=1, sticky=tk.NS) 762 | 763 | # 创建水平 Scrollbar 764 | # h_scrollbar_result = ttk.Scrollbar(result_box_frame, 765 | # orient=tk.HORIZONTAL, 766 | # command=result_list.xview, 767 | 768 | # # width=15 769 | # ) 770 | # h_scrollbar_result.grid(row=2, column=0, sticky=tk.EW) 771 | 772 | # 将 Scrollbar 绑定到 Listbox 773 | result_list.config(yscrollcommand=v_scrollbar_result.set) 774 | 775 | # 配置 grid 行列权重 776 | # result_box_frame.grid_rowconfigure(1, weight=1) 777 | result_box_frame.grid_columnconfigure(0, weight=1) 778 | 779 | 780 | def adjust_column_width(event): 781 | # 获取窗口当前宽度 782 | new_width = file_list.winfo_width() 783 | 784 | # 设置列宽为窗口宽度的一部分(例如 80%) 785 | file_list.column( 786 | "index", width=int(min_width * 0.1), anchor="center", stretch=False 787 | ) 788 | # file_list.column( 789 | # "file_name", width=int(new_width * 0.84), anchor="center", stretch=True 790 | # ) 791 | result_list.column( 792 | "emoji", width=int(min_width * 0.1), anchor="center", stretch=False 793 | ) 794 | # result_list.column( 795 | # "file_name", width=int(new_width * 0.7), anchor="center", stretch=True 796 | # ) 797 | result_list.column( 798 | "result", width=int(min_width * 0.15), anchor="center", stretch=False 799 | ) 800 | 801 | 802 | # 绑定窗口大小变化事件 803 | root.bind("", adjust_column_width) 804 | Tooltip(result_list) 805 | root.mainloop() 806 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 XiangyuWang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Ⅰ epub_tool介绍
4 | 5 | 6 |
7 | icon 8 |
9 | 10 | 11 | [![GitHub Releases](https://img.shields.io/github/v/release/cnwxi/epub_tool)](https://github.com/cnwxi/epub_tool/releases/latest) 12 | [![GitHub stars](https://img.shields.io/github/stars/cnwxi/epub_tool)](https://github.com/cnwxi/epub_tool/stargazers) 13 | [![GitHub forks](https://img.shields.io/github/forks/cnwxi/epub_tool)](https://github.com/cnwxi/epub_tool/network/members) 14 | 15 | Epub Tool->ET->E-Book Thor->📖🔨 16 | 17 |
18 | 包含一些可用的epub工具,用于epub文件的重构、解密、加密、字体混淆。 19 |

20 | 21 | 22 | 1. `重构epub为规范格式_v2.8.3.py`->`utils\reformat_epub.py`
23 | 作用:见原文件名。
24 | 原始的百度贴吧帖子链接:[遥遥心航的帖子](https://jump2.bdimg.com/p/8090221625)。
25 | 遥遥心航提供的原始文件:[蓝奏云网盘链接](https://wwb.lanzoub.com/b01k016hg) 密码:`i89p`。
26 | 2. `重构epub并反文件名混淆.py`->`utils\decrypt_epub.py`
27 | 作用:见原文件名。
28 | 3. `重构epub并加入文件名混淆.py`->`utils\encrypt_epub.py`
29 | 作用:见原文件名。
30 | 4. `Epub_Tool_Console.py`
31 | 作用:对上述工具(不包括字体混淆)的整合的命令行程序。
32 | 5. `utils\encrypt_font.py`
33 | 作用:对epub文件中指定内嵌字体的文字进行字体混淆。[https://github.com/cnwxi/epub_tool/issues/21]
34 | 6. `Epub_Tool_TKUI.py`
35 | 作用:对上述工具的整合的带操作界面的程序。
36 | 37 | 注:重构会严格保证文件夹分类和文件名后缀。[https://github.com/cnwxi/epub_tool/issues/13] 38 |

39 |
40 | 41 | ## Ⅱ 怎么使用?(仅针对最新版本)
42 | 43 |
44 | python源码执行 45 |

46 | 47 | 1. 下载python(推荐3.8或更高版本);
48 | 2. 使用`git clone https://github.com/cnwxi/epub_tool.git`克隆本仓库;或直接在网页下载源码压缩包,解压后得到py文件;
49 | 3. 准备依赖库,在终端输入`python -m pip install -r requirements.txt`;
50 | 4. 终端切换工作路径为解压后文件夹所在路径 51 | 5. 执行py文件`python ./***.py`、`python ./utils/***.py`。
52 | 57 | 58 |

59 | 60 | >(注:会在对应工作路径生成日志文件`log.txt`,每次执行py文件会覆盖写入该文件,无需担心此文件过分占用存储空间
61 | 62 |
63 | 64 | 114 | 115 |
116 | 可视化界面程序(推荐) 117 |

118 | 119 | 120 | >(注:同样会在可执行程序所在路径生成日志文件`log.txt`,每次启动程序会覆盖写入该文件,无需担心此文件过分占用存储空间,mac文件写入位置为`/Applications/Epub_Tool_TKUI.app/Contents/MacOS/log.txt`,win文件写入位置为`Epub_Tool_TKUI.exe所在目录`)
121 | 122 | 123 | > (Mac安装后运行若提示无法验证安全性,请参考[Apple 无法检查 App 是否包含恶意软件(来自Apple官网Mac使用手册)](https://support.apple.com/zh-cn/guide/mac-help/mchleab3a043/mac),进入系统设置-隐私与安全性-安全性-点击“仍要打开”;Windows若报告病毒文件请忽略警告,允许文件保留本地。) 124 | 125 | ![操作演示](./img/how_to_use.gif) 126 | 127 | 134 | 135 |

136 |
137 | 138 | ## Ⅲ 执行遇到错误? 139 | 140 |
141 | epub无法正常规范/混淆/反混淆
142 |

143 | 1、优先解压文件,查看其中content.opf文件,检查是否存在问题。若无法解决,在Issues区提交issue并附带原文件。[https://github.com/cnwxi/epub_tool/issues/8 https://github.com/cnwxi/epub_tool/issues/10] 144 |

145 |

146 | 2、若下载文件名带“精品”二字,且解压后文件夹内包含“/META-INF/encryption.xml”,检查此文件内是否有“ZhangYue.Inc”字样。若满足则此文件为掌阅加密书籍,为规避版权问题,此处不提供解密程序,请使用「掌阅」打开阅读。[https://github.com/cnwxi/epub_tool/issues/19] 147 |

148 |
149 | 150 |
151 | epub字体混淆出现异常
152 |

153 | 1、字体混淆根据标签名称的字典逆序进行处理,如存在如下标签时:<h2>、<p>、<p class="p1">、<span>、<span class="s1">,会按照span.s1、span、p.p1、p、h2的顺序进行字体混淆,并以此类推,规划样式标签命名,来保证嵌套标签中的文字能够正常混淆,当然最好避免过分复杂的标签嵌套。
154 |

155 |
156 | 157 | ## Ⅳ 更新日志
158 |
159 | 点击以展开 160 |

161 | 162 | ### 2025.04.27
163 | 界面更新,使用ttk控件替换tk以实现跨平台统一;功能更新,增加右键点击快速打开所在/输出文件夹、删除已添加文件、查看日志文件等功能,添加鼠标悬停显示更多信息功能,添加字体加密功能。
164 | 使用 `20230418《ePub指南——从入门到放弃》编著:赤霓(第2版)`进行字体加密测试,执行过程无报错,具体查看内容有部分字符因复杂样式导致被意外混淆。 [下载链接](https://wwxq.lanzov.com/b0nz4q13i) 密码:8vfp
165 | ### 2025.04.23
166 | 移除命令行程序编译;移除mac编译-F参数;添加icon.icns图标适配macOS,优化显示效果;移除Ubuntu(Linux系统)编译。
167 | ### 2025.03.20
168 | 修复失效的自定义输出路径。
169 | ### 2025.03.01
170 | 图标打包进可执行文件。
171 | ### 2025.02.20
172 | 更新图标。
173 | ### 2024.12.25
174 | 修复在mac上的日志文件写入位置,更改日志写入方式,清理原始脚本中重复的无效循环。
175 | ### 2024.12.24
176 | Update build.yml。https://github.com/cnwxi/epub_tool/pull/17
177 | ### 2024.12.23
178 | 调整UI、取消push自动构建。
179 | ### 2024.12.17
180 | 修复UI显示问题,分支整合。
181 | ### 2024.12.16
182 | 创建新分支TKUI,实现基本UI DEMO,功能已整合。
183 | ### 2024.11.17
184 | 添加文件夹手动选择需要处理文件,输入文件序号进行选择,不再是默认处理文件夹内全部epub文件,添加了输入检测提示,错误后会返回重新输入。https://github.com/cnwxi/epub_tool/pull/15
185 | ### 2024.10.24
186 | 修复未处理输入时拖入文件带引号导致的文件路径检查错误。
187 | ### 2024.09.09
188 | 因额外依赖库未打包到可执行文件,重新打包可执行文件。
189 | 更新相关使用教程。
190 | ### 2024.09.08
191 | 为避免有人不会使用命令行工具,更新Windows系统下相关操作的基础流程。
192 | 程序允许直接双击执行,后续再输入参数。
193 | 对应操作忽略固定后缀跳过文件处理。_encrypt、_decrypt、_reformat
194 | ### 2024.08.29
195 | 修复混淆ID导致的反混淆不完全。
196 | 修复存在异常opf时程序闪退问题。
197 | 更新日志记录。
198 | ### 2024.08.28
199 | 整合代码,使用命令行批量处理epub文件。
200 | 支持输入单个epub文件或epub文件所在文件夹,支持子目录遍历。
201 | 修改输出路径,现为原epub文件同级路径,通过添加不同后缀`encrypt\decrypt\reformat`区分原文件和处理后文件。
202 | ### 2024.06.19
203 | 代码更新,使用相似度计算覆盖opf文件中未混淆的其他文件名情况。
204 | ### 2024.06.13
205 | 更新yml文件。https://github.com/cnwxi/epub_tool/pull/9
206 | ### 2024.06.12
207 | 针对cover页面未混淆的情况做更改。
208 | 修改自动发布逻辑,修改py文件不触发CI,仅修改yml后触发。修改yml,无需手动执行才执行发布。
209 | ### 2024.06.08
210 | CI配置文件更新。https://github.com/cnwxi/epub_tool/pull/6 https://github.com/cnwxi/epub_tool/pull/7
211 | ### 2024.06.07
212 | 修改主函数逻辑,防止epub文件不存在导致的程序崩溃。https://github.com/cnwxi/epub_tool/pull/4
213 | 加入CI自动构建。https://github.com/cnwxi/epub_tool/pull/5
214 | 加入CI自动发布。
215 | ### 2024.05.28
216 | 修正`重构epub为规范格式_v2.8.3.py`中生成的content.opf文件内容格式。https://github.com/cnwxi/epub_tool/pull/3
217 | ### 2024.05.16
218 | 更改文件输出路径。https://github.com/cnwxi/epub_tool/pull/2
219 | ### 2024.05.09
220 | 针对多看~slim文件进行修改,处理html中使用`../`、`./`、`/`开头的链接。
221 | ### 2024.04.23
222 | 初始化仓库。
223 | 224 |

225 |
226 | 227 | ## Ⅴ 鸣谢
228 | 感谢以下用户对此项目的贡献: 229 | - [遥遥心航](https://tieba.baidu.com/home/main?id=tb.1.7f262ae1.5_dXQ2Jp0F0MH9YJtgM2Ew) 230 | - [lgernier](https://github.com/lgernierO)
231 | -------------------------------------------------------------------------------- /build_tool/pyinstallerForMac.sh: -------------------------------------------------------------------------------- 1 | # for mac 2 | pyinstaller --noconfirm -w -i ./img/icon.icns Epub_Tool_TKUI.py -n Epub_Tool_TKUI; 3 | cd ./dist 4 | 5 | # hdiutil create -volname Epub_Tool_TKUI -srcfolder ./Epub_Tool_TKUI.app -ov -format UDZO ./Epub_Tool_TKUI.dmg; 6 | 7 | create-dmg \ 8 | --volname "Epub_Tool_TKUI" \ 9 | --window-pos 400 200 \ 10 | --window-size 660 400 \ 11 | --icon-size 100 \ 12 | --icon "Epub_Tool_TKUI.app" 160 185 \ 13 | --hide-extension "Epub_Tool_TKUI.app"\ 14 | --app-drop-link 500 185 \ 15 | --no-internet-enable \ 16 | "./Epub_Tool_TKUI.dmg" \ 17 | "./Epub_Tool_TKUI.app"; 18 | 19 | # zip -r Epub_Tool_TKUI.zip ./Epub_Tool_TKUI.app; 20 | rm -rf ./Epub_Tool_TKUI.app; 21 | rm -rf ./Epub_Tool_TKUI; 22 | echo "------finished------"; 23 | cd .. 24 | rm -rf ./build; 25 | rm ./Epub_Tool_TKUI.spec; -------------------------------------------------------------------------------- /build_tool/pyinstallerForWindows.txt: -------------------------------------------------------------------------------- 1 | # for windows 2 | pyinstaller -F -w -i ./img/icon.ico Epub_Tool_TKUI.py -n Epub_Tool_TKUI; 3 | rm ./build; 4 | rm ./Epub_Tool_TKUI.spec; -------------------------------------------------------------------------------- /img/how_to_use.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnwxi/epub_tool/4c4d2e2eacc28bac2ac833492b0c3eb65392a6cd/img/how_to_use.gif -------------------------------------------------------------------------------- /img/icon.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnwxi/epub_tool/4c4d2e2eacc28bac2ac833492b0c3eb65392a6cd/img/icon.icns -------------------------------------------------------------------------------- /img/icon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnwxi/epub_tool/4c4d2e2eacc28bac2ac833492b0c3eb65392a6cd/img/icon.ico -------------------------------------------------------------------------------- /img/icon.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnwxi/epub_tool/4c4d2e2eacc28bac2ac833492b0c3eb65392a6cd/img/icon.jpeg -------------------------------------------------------------------------------- /img/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnwxi/epub_tool/4c4d2e2eacc28bac2ac833492b0c3eb65392a6cd/img/icon.png -------------------------------------------------------------------------------- /img/icon_backup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnwxi/epub_tool/4c4d2e2eacc28bac2ac833492b0c3eb65392a6cd/img/icon_backup.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4 2 | emoji 3 | fonttools 4 | tinycss2 5 | tqdm 6 | pillow -------------------------------------------------------------------------------- /utils/decrypt_epub.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # !/usr/bin/env python 3 | # 源码: sigil吧ID: 遥遥心航 4 | # 二改: cnwxi 5 | # 额外感谢: 故里 6 | 7 | import zipfile 8 | import re, sys 9 | from os import path, mkdir, getcwd 10 | from urllib.parse import unquote 11 | from xml.etree import ElementTree 12 | import copy 13 | import os 14 | import difflib 15 | import hashlib 16 | try: 17 | from utils.log import logwriter 18 | except: 19 | from log import logwriter 20 | 21 | logger = logwriter() 22 | 23 | 24 | class EpubTool: 25 | 26 | def __init__(self, epub_src): 27 | self.encrypted = False 28 | self.epub = zipfile.ZipFile(epub_src) 29 | self.epub_src = epub_src 30 | self.epub_name = path.basename(epub_src) 31 | self.ebook_root = path.dirname(epub_src) 32 | self.output_path = self.ebook_root 33 | self.epub_type = "" 34 | self.temp_dir = "" 35 | self._init_namelist() 36 | self._init_mime_map() 37 | self._init_opf() 38 | self.manifest_list = [] # (id,opf_href,mime,properties) 39 | self.toc_rn = {} 40 | self.id_to_href = {} # { id : href.lower, ... } 41 | self.href_to_id = {} # { href.lower : id, ...} 42 | self.text_list = [] # (id,opf_href,properties) 43 | self.css_list = [] # (id,opf_href,properties) 44 | self.image_list = [] # (id,opf_href,properties) 45 | self.font_list = [] # (id,opf_href,properties) 46 | self.audio_list = [] # (id,opf_href,properties) 47 | self.video_list = [] # (id,opf_href,properties) 48 | self.spine_list = [] # (sid, linear, properties) 49 | self.other_list = [] # (id,opf_href,mime,properties) 50 | self.errorOPF_log = [] # (error_type,error_value) 51 | self.errorLink_log = {} # {filepath:[(error_link,correct_link || None),...]} 52 | self._parse_opf() 53 | 54 | def set_output_path(self, output_path): 55 | if output_path is not None and os.path.isdir(output_path): 56 | self.output_path = output_path 57 | 58 | def _init_namelist(self): 59 | self.namelist = self.epub.namelist() 60 | 61 | def _init_mime_map(self): 62 | self.mime_map = { 63 | ".html": "application/xhtml+xml", 64 | ".xhtml": "application/xhtml+xml", 65 | ".css": "text/css", 66 | ".js": "application/javascript", 67 | ".jpg": "image/jpeg", 68 | ".jpeg": "image/jpeg", 69 | ".bmp": "image/bmp", 70 | ".png": "image/png", 71 | ".gif": "image/gif", 72 | ".webp": "image/webp", 73 | ".ttf": "font/ttf", 74 | ".otf": "font/otf", 75 | ".woff": "font/woff", 76 | ".ncx": "application/x-dtbncx+xml", 77 | ".mp3": "audio/mpeg", 78 | ".mp4": "video/mp4", 79 | ".smil": "application/smil+xml", 80 | ".pls": "application/pls+xml", 81 | } 82 | 83 | def _init_opf(self): 84 | # 通过 container.xml 读取 opf 文件 85 | container_xml = self.epub.read("META-INF/container.xml").decode("utf-8") 86 | rf = re.match(r']*full-path="(?i:(.*?\.opf))"', container_xml) 87 | if rf is not None: 88 | self.opfpath = rf.group(1) 89 | self.opf = self.epub.read(self.opfpath).decode("utf-8") 90 | return 91 | # 通过路径首个 opf 读取 opf 文件 92 | for bkpath in self.namelist: 93 | if bkpath.lower().endswith(".opf"): 94 | self.opfpath = bkpath 95 | self.opf = self.epub.read(self.opfpath).decode("utf-8") 96 | return 97 | raise RuntimeError("无法发现opf文件") 98 | 99 | def _parse_opf(self): 100 | self.etree_opf = {"package": ElementTree.fromstring(self.opf)} 101 | 102 | for child in self.etree_opf["package"]: 103 | tag = re.sub(r"\{.*?\}", r"", child.tag) 104 | self.etree_opf[tag] = child 105 | self._parse_metadata() 106 | self._parse_manifest() 107 | self._parse_spine() 108 | self._clear_duplicate_id_href() 109 | self._parse_hrefs_not_in_epub() 110 | self._add_files_not_in_opf() 111 | 112 | self.manifest_list = [] # (id,opf_href,mime,properties) 113 | for id in self.id_to_h_m_p: 114 | href, mime, properties = self.id_to_h_m_p[id] 115 | self.manifest_list.append((id, href, mime, properties)) 116 | 117 | epub_type = self.etree_opf["package"].get("version") 118 | 119 | if epub_type is not None and epub_type in ["2.0", "3.0"]: 120 | self.epub_type = epub_type 121 | else: 122 | raise RuntimeError("此脚本不支持该EPUB类型") 123 | 124 | # 寻找epub2 toc 文件的id。epub3的nav文件直接当做xhtml处理。 125 | self.tocpath = "" 126 | self.tocid = "" 127 | tocid = self.etree_opf["spine"].get("toc") 128 | self.tocid = tocid if tocid is not None else "" 129 | 130 | # opf item分类 131 | opf_dir = path.dirname(self.opfpath) 132 | 133 | # 生成新的href 134 | ############################################################ 135 | def creatNewHerf(_id, _href): 136 | file_parts = _href.rsplit(".", 1) 137 | if len(_id.split(".")) == 1: 138 | _id_name = copy.deepcopy(_id) 139 | if _id.rsplit(".", 1)[-1].lower().endswith("slim"): 140 | image_silm = "~slim" 141 | # 如果_id_name中有slim,去掉 142 | _id_name = ( 143 | _id_name.lower() 144 | .rstrip("~slim") 145 | .rstrip("-slim") 146 | .rstrip("_slim") 147 | .rstrip("slim") 148 | ) 149 | else: 150 | image_silm = "" 151 | new_href = f"{_id_name}{image_silm}.{file_parts[-1].lower()}" 152 | else: 153 | _id_name, _id_extension = _id.rsplit(".", 1) 154 | if _id_extension.lower() != file_parts[-1].lower(): 155 | _id_extension = file_parts[-1] 156 | # 如果id或者href中有slim,则为多看处理~slim 157 | if _href.rsplit(".", 1)[-1].lower().endswith("slim") or _id_name.rsplit( 158 | ".", 1 159 | )[-1].lower().endswith("slim"): 160 | image_silm = "~slim" 161 | # 如果id中有slim,去掉 162 | _id_name = ( 163 | _id_name.lower() 164 | .rstrip("~slim") 165 | .rstrip("-slim") 166 | .rstrip("_slim") 167 | .rstrip("slim") 168 | ) 169 | else: 170 | image_silm = "" 171 | # 判断_id_name是否合法 172 | if re.search(r'[\\/:*?"<>|]', _id_name): 173 | logger.write(f"ID: {_id} 中包含非法字符") 174 | _id_name = hashlib.md5(_id_name.encode()).hexdigest() 175 | logger.write(f"ID: {_id} 替换为 {_id_name}") 176 | new_href = f"{_id_name}{image_silm}.{_id_extension.lower()}" 177 | logger.write(f"decrypt href: {_id}:{_href} -> {new_href}") 178 | return new_href 179 | 180 | ############################################################ 181 | for id, href, mime, properties in self.manifest_list: 182 | bkpath = opf_dir + "/" + href if opf_dir else href 183 | # 判断herf是否包含特殊字符 184 | if re.search(r'[\\/:*?"<>|]', href.rsplit("/")[-1]): 185 | self.encrypted = True 186 | if mime == "application/xhtml+xml": 187 | new_href = creatNewHerf(id, href) 188 | self.text_list.append((id, href, properties, new_href)) 189 | self.toc_rn[href] = new_href 190 | elif mime == "text/css": 191 | self.css_list.append((id, href, properties, creatNewHerf(id, href))) 192 | elif "image/" in mime: 193 | self.image_list.append((id, href, properties, creatNewHerf(id, href))) 194 | elif "font/" in mime or href.lower().endswith((".ttf", ".otf", ".woff")): 195 | self.font_list.append((id, href, properties, creatNewHerf(id, href))) 196 | elif "audio/" in mime: 197 | self.audio_list.append((id, href, properties, creatNewHerf(id, href))) 198 | elif "video/" in mime: 199 | self.video_list.append((id, href, properties, creatNewHerf(id, href))) 200 | elif self.tocid != "" and id == self.tocid: 201 | opf_dir = path.dirname(self.opfpath) 202 | self.tocpath = opf_dir + "/" + href if opf_dir else href 203 | else: 204 | self.other_list.append( 205 | (id, href, mime, properties, creatNewHerf(id, href)) 206 | ) 207 | 208 | self._check_manifest_and_spine() 209 | 210 | def _parse_metadata(self): 211 | self.metadata = {} 212 | for key in [ 213 | "title", 214 | "creator", 215 | "language", 216 | "subject", 217 | "source", 218 | "identifier", 219 | "cover", 220 | ]: 221 | self.metadata[key] = "" 222 | for meta in self.etree_opf["metadata"]: 223 | tag = re.sub(r"\{.*?\}", r"", meta.tag) 224 | if tag in [ 225 | "title", 226 | "creator", 227 | "language", 228 | "subject", 229 | "source", 230 | "identifier", 231 | ]: 232 | self.metadata[tag] = meta.text 233 | elif tag == "meta": 234 | if meta.get("name") and meta.get("content"): 235 | self.metadata["cover"] = meta.get("content") 236 | 237 | def _parse_manifest(self): 238 | self.id_to_h_m_p = {} # { id : (href,mime,properties) , ... } 239 | self.id_to_href = {} # { id : href.lower, ... } 240 | self.href_to_id = {} # { href.lower : id, ...} 241 | if_error = False 242 | for item in self.etree_opf["manifest"]: 243 | # 检查opf文件中是否存在错误 244 | try: 245 | id = item.get("id") 246 | href = unquote(item.get("href")) 247 | except Exception as e: 248 | str_item = ( 249 | ElementTree.tostring(item, encoding="unicode") 250 | .replace("\n", "") 251 | .replace("\r", "") 252 | .replace("\t", "") 253 | ) 254 | logger.write(f"item: {str_item} error: {e}") 255 | if_error = True 256 | continue 257 | mime = item.get("media-type") 258 | properties = item.get("properties") if item.get("properties") else "" 259 | 260 | self.id_to_h_m_p[id] = (href, mime, properties) 261 | self.id_to_href[id] = href.lower() 262 | self.href_to_id[href.lower()] = id 263 | if if_error: 264 | logger.write("opf文件中存在错误,请检查!") 265 | 266 | def _parse_spine(self): 267 | self.spine_list = [] # [ (sid, linear, properties) , ... ] 268 | for itemref in self.etree_opf["spine"]: 269 | sid = itemref.get("idref") 270 | linear = itemref.get("linear") if itemref.get("linear") else "" 271 | properties = itemref.get("properties") if itemref.get("properties") else "" 272 | self.spine_list.append((sid, linear, properties)) 273 | 274 | def _clear_duplicate_id_href(self): 275 | 276 | # id_used = [ id_in_spine + cover_id ] 277 | id_used = [x[0] for x in self.spine_list] 278 | if self.metadata["cover"]: 279 | id_used.append(self.metadata["cover"]) 280 | 281 | del_id = [] 282 | for id, href in self.id_to_href.items(): 283 | if self.href_to_id[href] != id: # 该href拥有多个id,此id已被覆盖。 284 | if id in id_used and self.href_to_id[href] not in id_used: 285 | if id not in del_id: 286 | del_id.append(self.href_to_id[href]) 287 | self.href_to_id[href] = id 288 | elif id in id_used and self.href_to_id[href] in id_used: 289 | continue 290 | else: 291 | if id not in del_id: 292 | del_id.append(id) 293 | 294 | for id in del_id: 295 | self.errorOPF_log.append(("duplicate_id", id)) 296 | del self.id_to_href[id] 297 | del self.id_to_h_m_p[id] 298 | 299 | def _add_files_not_in_opf(self): 300 | 301 | hrefs_not_in_opf = [] 302 | for archive_path in self.namelist: 303 | if archive_path.lower().endswith( 304 | ( 305 | ".html", 306 | ".xhtml", 307 | ".css", 308 | ".jpg", 309 | ".jpeg", 310 | ".bmp", 311 | ".gif", 312 | ".png", 313 | ".webp", 314 | ".svg", 315 | ".ttf", 316 | ".otf", 317 | ".js", 318 | ".mp3", 319 | ".mp4", 320 | ".smil", 321 | ) 322 | ): 323 | opf_href = get_relpath(self.opfpath, archive_path) 324 | if opf_href.lower() not in self.href_to_id.keys(): 325 | hrefs_not_in_opf.append(opf_href) 326 | 327 | def allocate_id(href): # 自动分配不重复id 328 | basename = path.basename(href) 329 | if "A" <= basename[0] <= "Z" or "a" <= basename[0] <= "z": 330 | new_id = basename 331 | else: 332 | new_id = "x" + basename 333 | pre, suf = path.splitext(new_id) 334 | pre_ = pre 335 | i = 0 336 | while pre_ + suf in self.id_to_href.keys(): 337 | i += 1 338 | pre_ = pre + "_" + str(i) 339 | new_id = pre_ + suf 340 | return new_id 341 | 342 | for href in hrefs_not_in_opf: 343 | new_id = allocate_id("newsrc") 344 | self.id_to_href[new_id] = href.lower() 345 | self.href_to_id[href.lower()] = new_id 346 | ext = path.splitext(href)[1] 347 | ext = ext.lower() 348 | try: 349 | mime = self.mime_map[ext] 350 | except KeyError: 351 | mime = "text/plain" 352 | self.id_to_h_m_p[new_id] = (href, mime, "") 353 | 354 | def _check_manifest_and_spine(self): 355 | spine_idrefs = [i for i, j, k in self.spine_list] 356 | 357 | for idref in spine_idrefs: 358 | if not self.id_to_h_m_p.get(idref): # spine 引用无效ID 359 | self.errorOPF_log.append(("invalid_idref", idref)) 360 | 361 | for mid, opf_href, mime, properties in self.manifest_list: 362 | if mime == "application/xhtml+xml": 363 | if mid not in spine_idrefs: 364 | self.errorOPF_log.append(("xhtml_not_in_spine", mid)) 365 | 366 | def _parse_hrefs_not_in_epub(self): 367 | del_id = [] 368 | namelist = [x.lower() for x in self.epub.namelist()] 369 | for id, href in self.id_to_href.items(): 370 | bkpath = get_bookpath(href, self.opfpath) 371 | if bkpath.lower() not in namelist: 372 | del_id.append(id) 373 | del self.href_to_id[href] 374 | for id in del_id: 375 | del self.id_to_href[id] 376 | del self.id_to_h_m_p[id] 377 | 378 | def create_tgt_epub(self): 379 | output_path = self.output_path 380 | logger.write(f"输出路径: {output_path}") 381 | return zipfile.ZipFile( 382 | path.join(output_path, self.epub_name.replace(".epub", "_decrypt.epub")), 383 | "w", 384 | zipfile.ZIP_STORED, 385 | ) 386 | 387 | # 重构 388 | def restructure(self): 389 | self.tgt_epub = self.create_tgt_epub() 390 | # mimetype 391 | mimetype = self.epub.read("mimetype") 392 | self.tgt_epub.writestr("mimetype", mimetype, zipfile.ZIP_DEFLATED) 393 | # META-INF 394 | metainf_data = self.epub.read("META-INF/container.xml").decode("utf-8") 395 | metainf_data = re.sub( 396 | r']*media-type="application/oebps-[^>]*/>', 397 | r'', 398 | metainf_data, 399 | ) 400 | self.tgt_epub.writestr( 401 | "META-INF/container.xml", 402 | bytes(metainf_data, encoding="utf-8"), 403 | zipfile.ZIP_DEFLATED, 404 | ) 405 | # OEBPS 406 | re_path_map = { 407 | "text": {}, 408 | "css": {}, 409 | "image": {}, 410 | "font": {}, 411 | "audio": {}, 412 | "video": {}, 413 | "other": {}, 414 | } # { ori_bkpath : re_basename } 415 | basename_log = { 416 | "text": [], 417 | "css": [], 418 | "image": [], 419 | "font": [], 420 | "audio": [], 421 | "video": [], 422 | "other": [], 423 | } 424 | lowerPath_to_originPath = ( 425 | {} 426 | ) # 如果路径大小写不一致,则登记为 { 小写路径 : 原始路径 } 427 | 428 | def auto_rename(id, href, ftype): 429 | filename, ext = path.splitext(path.basename(href)) 430 | filename_ = filename 431 | num = 0 432 | while filename_ + ext in basename_log[ftype]: 433 | num += 1 434 | filename_ = filename + "_" + str(num) 435 | basename = filename_ + ext 436 | basename_log[ftype].append(basename) 437 | return basename 438 | 439 | def check_link(filename, bkpath, href, self, target_id=""): 440 | if href == "" or href.startswith( 441 | ("http://", "https://", "res:/", "file:/", "data:") 442 | ): 443 | return None 444 | if bkpath.lower() in lowerPath_to_originPath.keys(): 445 | if bkpath != lowerPath_to_originPath[bkpath.lower()]: # 大小写不一致 446 | correct_path = lowerPath_to_originPath[bkpath.lower()] 447 | self.errorLink_log.setdefault(filename, []) 448 | self.errorLink_log[filename].append( 449 | (href + target_id, correct_path) 450 | ) 451 | bkpath = correct_path 452 | else: # 链接路径找不到对应文件 453 | self.errorLink_log.setdefault(filename, []) 454 | self.errorLink_log[filename].append((href + target_id, None)) 455 | return None 456 | return bkpath 457 | 458 | # xhtml文件,关联 toc文件,一切 xhtml中的元素 459 | for id, href, properties, newhref in self.text_list: 460 | bkpath = get_bookpath(href, self.opfpath) 461 | basename = auto_rename(id, newhref, "text") 462 | re_path_map["text"][bkpath] = basename 463 | lowerPath_to_originPath[bkpath.lower()] = bkpath 464 | 465 | # css 文件,关联 xhtml文件的link,css文件中的@import 466 | for id, href, properties, newhref in self.css_list: 467 | bkpath = get_bookpath(href, self.opfpath) 468 | basename = auto_rename(id, newhref, "css") 469 | re_path_map["css"][bkpath] = basename 470 | lowerPath_to_originPath[bkpath.lower()] = bkpath 471 | 472 | # 图片,关联css中的url,xhtml文件中的href 473 | for id, href, properties, newhref in self.image_list: 474 | bkpath = get_bookpath(href, self.opfpath) 475 | basename = auto_rename(id, newhref, "image") 476 | re_path_map["image"][bkpath] = basename 477 | lowerPath_to_originPath[bkpath.lower()] = bkpath 478 | # 字体,关联css中的url 479 | for id, href, properties, newhref in self.font_list: 480 | bkpath = get_bookpath(href, self.opfpath) 481 | basename = auto_rename(id, newhref, "font") 482 | re_path_map["font"][bkpath] = basename 483 | lowerPath_to_originPath[bkpath.lower()] = bkpath 484 | 485 | # 音频 486 | for id, href, properties, newhref in self.audio_list: 487 | bkpath = get_bookpath(href, self.opfpath) 488 | basename = auto_rename(id, newhref, "audio") 489 | re_path_map["audio"][bkpath] = basename 490 | lowerPath_to_originPath[bkpath.lower()] = bkpath 491 | 492 | # 视频 493 | for id, href, properties, newhref in self.video_list: 494 | bkpath = get_bookpath(href, self.opfpath) 495 | basename = auto_rename(id, newhref, "video") 496 | re_path_map["video"][bkpath] = basename 497 | lowerPath_to_originPath[bkpath.lower()] = bkpath 498 | 499 | # 其他文件 500 | for id, href, mime, properties, newhref in self.other_list: 501 | bkpath = get_bookpath(href, self.opfpath) 502 | basename = auto_rename(id, newhref, "other") 503 | re_path_map["other"][bkpath] = basename 504 | lowerPath_to_originPath[bkpath.lower()] = bkpath 505 | 506 | # xhtml文件 507 | for xhtml_bkpath, new_name in re_path_map["text"].items(): 508 | text = self.epub.read(xhtml_bkpath).decode("utf-8") 509 | if not text.startswith("\n' + text 511 | if not re.match(r"(?s).*)\n*", 514 | r'\1\n\n', 515 | text, 516 | 1, 517 | ) 518 | 519 | # 修改a[href] 520 | 521 | def re_href(match): 522 | href = match.group(3) 523 | href = unquote(href).strip() 524 | if "#" in href: 525 | href, target_id = href.split("#") 526 | target_id = "#" + target_id 527 | else: 528 | target_id = "" 529 | 530 | bkpath = get_bookpath(href, xhtml_bkpath) 531 | bkpath = check_link(xhtml_bkpath, bkpath, href, self, target_id) 532 | if not bkpath: 533 | return match.group() 534 | 535 | if href.lower().endswith( 536 | (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp") 537 | ): 538 | filename = re_path_map["image"][bkpath] 539 | return match.group(1) + "../Images/" + filename + match.group(4) 540 | elif href.lower().endswith(".css"): 541 | filename = re_path_map["css"][bkpath] 542 | return ( 543 | '' 546 | ) 547 | elif href.lower().endswith((".xhtml", ".html")): 548 | filename = re_path_map["text"][bkpath] 549 | return match.group(1) + filename + target_id + match.group(4) 550 | else: 551 | return match.group() 552 | 553 | text = re.sub(r"(<[^>]*href=([\'\"]))(.*?)(\2[^>]*>)", re_href, text) 554 | 555 | # 修改src 556 | def re_src(match): 557 | href = match.group(3) 558 | href = unquote(href).strip() 559 | bkpath = get_bookpath(href, xhtml_bkpath) 560 | bkpath = check_link(xhtml_bkpath, bkpath, href, self) 561 | if not bkpath: 562 | return match.group() 563 | 564 | if href.lower().endswith( 565 | (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg") 566 | ): 567 | filename = re_path_map["image"][bkpath] 568 | return match.group(1) + "../Images/" + filename + match.group(4) 569 | elif href.lower().endswith(".mp3"): 570 | filename = re_path_map["audio"][bkpath] 571 | return match.group(1) + "../Audio/" + filename + match.group(4) 572 | elif href.lower().endswith(".mp4"): 573 | filename = re_path_map["video"][bkpath] 574 | return match.group(1) + "../Video/" + filename + match.group(4) 575 | elif href.lower().endswith(".js"): 576 | filename = re_path_map["other"][bkpath] 577 | return match.group(1) + "../Misc/" + filename + match.group(4) 578 | else: 579 | return match.group() 580 | 581 | def re_poster(match): 582 | href = match.group(3) 583 | href = unquote(href).strip() 584 | bkpath = get_bookpath(href, xhtml_bkpath) 585 | bkpath = check_link(xhtml_bkpath, bkpath, href, self) 586 | if not bkpath: 587 | return match.group() 588 | if href.lower().endswith( 589 | (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg") 590 | ): 591 | filename = re_path_map["image"][bkpath] 592 | return match.group(1) + "../Images/" + filename + match.group(4) 593 | else: 594 | return match.group() 595 | 596 | text = re.sub(r"(<[^>]* src=([\'\"]))(.*?)(\2[^>]*>)", re_src, text) 597 | text = re.sub(r"(<[^>]* poster=([\'\"]))(.*?)(\2[^>]*>)", re_poster, text) 598 | 599 | # 修改 url 600 | def re_url(match): 601 | url = match.group(2) 602 | url = unquote(url).strip() 603 | bkpath = get_bookpath(url, xhtml_bkpath) 604 | bkpath = check_link(xhtml_bkpath, bkpath, url, self) 605 | if not bkpath: 606 | return match.group() 607 | 608 | if url.lower().endswith((".ttf", ".otf")): 609 | filename = re_path_map["font"][bkpath] 610 | return match.group(1) + "../Fonts/" + filename + match.group(3) 611 | elif url.lower().endswith( 612 | (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg") 613 | ): 614 | filename = re_path_map["image"][bkpath] 615 | return match.group(1) + "../Images/" + filename + match.group(3) 616 | else: 617 | return match.group() 618 | 619 | text = re.sub(r"(url\([\'\"]?)(.*?)([\'\"]?\))", re_url, text) 620 | self.tgt_epub.writestr( 621 | "OEBPS/Text/" + new_name, 622 | bytes(text, encoding="utf-8"), 623 | zipfile.ZIP_DEFLATED, 624 | ) 625 | # css文件 626 | for css_bkpath, new_name in re_path_map["css"].items(): 627 | try: 628 | css = self.epub.read(css_bkpath).decode("utf-8") 629 | except: 630 | continue 631 | 632 | # 修改 @import 633 | def re_import(match): 634 | href = match.group(2) if match.group(2) else match.group(3) 635 | href = unquote(href).strip() 636 | if not href.lower().endswith(".css"): 637 | return match.group() 638 | bkpath = get_bookpath(href, css_bkpath) 639 | bkpath = check_link(css_bkpath, bkpath, href, self) 640 | if not bkpath: 641 | return match.group() 642 | filename = re_path_map.get("css", {}).get(bkpath, path.basename(href)) 643 | if match.group(2): 644 | return '@import "{}"'.format(filename) 645 | else: 646 | return '@import url("{}")'.format(filename) 647 | 648 | css = re.sub( 649 | r"@import +([\'\"])(.*?)\1|@import +url\([\'\"]?(.*?)[\'\"]?\)", 650 | re_import, 651 | css, 652 | ) 653 | 654 | # 修改 css的url 655 | def re_css_url(match): 656 | url = match.group(2) 657 | url = unquote(url).strip() 658 | bkpath = get_bookpath(url, css_bkpath) 659 | bkpath = check_link(css_bkpath, bkpath, url, self) 660 | if not bkpath: 661 | return match.group() 662 | if url.lower().endswith((".ttf", ".otf")): 663 | filename = re_path_map["font"][bkpath] 664 | return match.group(1) + "../Fonts/" + filename + match.group(3) 665 | elif url.lower().endswith( 666 | (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg") 667 | ): 668 | filename = re_path_map["image"][bkpath] 669 | return match.group(1) + "../Images/" + filename + match.group(3) 670 | else: 671 | return match.group() 672 | 673 | css = re.sub(r"(url\([\'\"]?)(.*?)([\'\"]?\))", re_css_url, css) 674 | self.tgt_epub.writestr( 675 | "OEBPS/Styles/" + new_name, 676 | bytes(css, encoding="utf-8"), 677 | zipfile.ZIP_DEFLATED, 678 | ) 679 | # 图片 680 | for img_bkpath, new_name in re_path_map["image"].items(): 681 | try: 682 | img = self.epub.read(img_bkpath) 683 | except: 684 | continue 685 | self.tgt_epub.writestr( 686 | "OEBPS/Images/" + new_name, img, zipfile.ZIP_DEFLATED 687 | ) 688 | # 字体 689 | for font_bkpath, new_name in re_path_map["font"].items(): 690 | try: 691 | font = self.epub.read(font_bkpath) 692 | except: 693 | continue 694 | self.tgt_epub.writestr( 695 | "OEBPS/Fonts/" + new_name, font, zipfile.ZIP_DEFLATED 696 | ) 697 | # 音频 698 | for audio_bkpath, new_name in re_path_map["audio"].items(): 699 | try: 700 | audio = self.epub.read(audio_bkpath) 701 | except: 702 | continue 703 | self.tgt_epub.writestr( 704 | "OEBPS/Audio/" + new_name, audio, zipfile.ZIP_DEFLATED 705 | ) 706 | # 视频 707 | for video_bkpath, new_name in re_path_map["video"].items(): 708 | try: 709 | video = self.epub.read(video_bkpath) 710 | except: 711 | continue 712 | self.tgt_epub.writestr( 713 | "OEBPS/Video/" + new_name, video, zipfile.ZIP_DEFLATED 714 | ) 715 | # 其他 716 | for font_bkpath, new_name in re_path_map["other"].items(): 717 | try: 718 | other = self.epub.read(font_bkpath) 719 | except: 720 | continue 721 | self.tgt_epub.writestr( 722 | "OEBPS/Misc/" + new_name, other, zipfile.ZIP_DEFLATED 723 | ) 724 | 725 | # 读取文件并修改关联 726 | # toc文件 727 | if self.tocpath: 728 | toc = self.epub.read(self.tocpath).decode("utf-8") 729 | toc_dir = path.dirname(self.tocpath) 730 | 731 | def re_toc_href(match): 732 | href = match.group(2) 733 | href = unquote(href).strip() 734 | parts = href.split("#", 1) 735 | href_base = parts[0] 736 | target_id = "#" + parts[1] if len(parts) > 1 else "" 737 | href_base = ( 738 | self.toc_rn[href_base] if href_base in self.toc_rn else href_base 739 | ) 740 | bkpath = get_bookpath(href_base, self.tocpath) 741 | 742 | if not bkpath: 743 | return match.group() 744 | filename = path.basename(bkpath) 745 | return 'src="Text/' + filename + target_id + '"' 746 | 747 | toc = re.sub(r"src=([\'\"])(.*?)\1", re_toc_href, toc) 748 | self.tgt_epub.writestr( 749 | "OEBPS/toc.ncx", bytes(toc, encoding="utf-8"), zipfile.ZIP_DEFLATED 750 | ) 751 | 752 | # OPF 753 | manifest_text = "" 754 | 755 | for id, href, mime, prop in self.manifest_list: 756 | bkpath = get_bookpath(href, self.opfpath) 757 | prop_ = ' properties="' + prop + '"' if prop else "" 758 | if mime == "application/xhtml+xml": 759 | filename = re_path_map["text"][bkpath] 760 | manifest_text += '\n '.format( 761 | id=id, href="Text/" + filename, mime=mime, prop=prop_ 762 | ) 763 | elif mime == "text/css": 764 | filename = re_path_map["css"][bkpath] 765 | manifest_text += '\n '.format( 766 | id=id, href="Styles/" + filename, mime=mime, prop=prop_ 767 | ) 768 | elif "image/" in mime: 769 | filename = re_path_map["image"][bkpath] 770 | manifest_text += '\n '.format( 771 | id=id, href="Images/" + filename, mime=mime, prop=prop_ 772 | ) 773 | elif "font/" in mime or href.lower().endswith((".ttf", ".otf", ".woff")): 774 | filename = re_path_map["font"][bkpath] 775 | manifest_text += '\n '.format( 776 | id=id, href="Fonts/" + filename, mime=mime, prop=prop_ 777 | ) 778 | elif "audio/" in mime: 779 | filename = re_path_map["audio"][bkpath] 780 | manifest_text += '\n '.format( 781 | id=id, href="Audio/" + filename, mime=mime, prop=prop_ 782 | ) 783 | elif "video/" in mime: 784 | filename = re_path_map["video"][bkpath] 785 | manifest_text += '\n '.format( 786 | id=id, href="Video/" + filename, mime=mime, prop=prop_ 787 | ) 788 | elif id == self.tocid: 789 | manifest_text += '\n '.format( 790 | id=id 791 | ) 792 | else: 793 | filename = re_path_map["other"][bkpath] 794 | manifest_text += '\n '.format( 795 | id=id, href="Misc/" + filename, mime=mime, prop=prop_ 796 | ) 797 | 798 | manifest_text += "\n " 799 | opf = re.sub(r"(?s).*?", manifest_text, self.opf, 1) 800 | 801 | def re_refer(match): 802 | href = match.group(3) 803 | href = unquote(href).strip() 804 | basename = path.basename(href) 805 | filename = unquote(basename) 806 | if not basename.endswith(".ncx"): 807 | try: 808 | return match.group(1) + "Text/" + self.toc_rn[href] + match.group(4) 809 | except: 810 | logger.write(f"写入content.opf时,文件链接出错: {href}") 811 | similar_list = [] 812 | for i in self.text_list: 813 | similar = difflib.SequenceMatcher( 814 | None, 815 | i[0].rsplit("/", 1)[-1].split(".")[0], 816 | href.rsplit("/", 1)[-1].split(".")[0], 817 | ).quick_ratio() 818 | similar_list.append(similar) 819 | sorted_id = sorted( 820 | range(len(similar_list)), 821 | key=lambda k: similar_list[k], 822 | reverse=True, 823 | ) 824 | tmp = href 825 | href = self.text_list[sorted_id[0]][1] 826 | logger.write( 827 | f"已自动替换为相似度最高文件: {tmp} <-> {self.text_list[sorted_id[0]]}") 828 | return match.group(1) + "Text/" + self.toc_rn[href] + match.group(4) 829 | else: 830 | return match.group() 831 | 832 | opf = re.sub(r"(]*href=([\'\"]))(.*?)(\2[^>]*/>)", re_refer, opf) 833 | self.tgt_epub.writestr( 834 | "OEBPS/content.opf", bytes(opf, encoding="utf-8"), zipfile.ZIP_DEFLATED 835 | ) 836 | self.tgt_epub.close() 837 | self.epub.close() 838 | 839 | 840 | # 相对路径计算函数 841 | def get_relpath(from_path, to_path): 842 | # from_path 和 to_path 都需要是绝对路径 843 | from_path = re.split(r"[\\/]", from_path) 844 | to_path = re.split(r"[\\/]", to_path) 845 | while from_path[0] == to_path[0]: 846 | from_path.pop(0), to_path.pop(0) 847 | to_path = "../" * (len(from_path) - 1) + "/".join(to_path) 848 | return to_path 849 | 850 | 851 | # 计算bookpath 852 | def get_bookpath(relative_path, refer_bkpath): 853 | # relative_path 相对路径,一般是href 854 | # refer_bkpath 参考的绝对路径 855 | 856 | relative_ = re.split(r"[\\/]", relative_path) 857 | refer_ = re.split(r"[\\/]", refer_bkpath) 858 | 859 | back_step = 0 860 | while relative_[0] == "..": 861 | back_step += 1 862 | relative_.pop(0) 863 | 864 | if len(refer_) <= 1: 865 | return "/".join(relative_) 866 | else: 867 | refer_.pop(-1) 868 | 869 | if back_step < 1: 870 | return "/".join(refer_ + relative_) 871 | elif back_step > len(refer_): 872 | return "/".join(relative_) 873 | 874 | # len(refer_) > 1 and back_setp <= len(refer_): 875 | while back_step > 0 and len(refer_) > 0: 876 | refer_.pop(-1) 877 | back_step -= 1 878 | 879 | return "/".join(refer_ + relative_) 880 | 881 | 882 | def epub_sources(): 883 | if len(sys.argv) <= 1: 884 | return sys.argv 885 | epub_srcs = [] 886 | exe_path = path.dirname(sys.argv[0]) 887 | epub_srcs.append(exe_path) 888 | for epub_src in sys.argv[1:None]: 889 | filename = path.basename(epub_src) 890 | basename, ext = path.splitext(filename) 891 | if ext.lower() == ".epub": 892 | if path.exists(epub_src): 893 | epub_srcs.append(epub_src) 894 | return epub_srcs 895 | 896 | 897 | def run(epub_src, output_path=None): 898 | try: 899 | logger.write(f"\n正在尝试解密EPUB: {epub_src}") 900 | if epub_src.lower().endswith("_decrypt.epub"): 901 | logger.write("警告: 该文件已解密,无需再次处理!") 902 | return "skip" 903 | epub = EpubTool(epub_src) 904 | epub.set_output_path(output_path) 905 | if not epub.encrypted: 906 | logger.write("警告: 该文件未加密,无需处理!") 907 | return "skip" 908 | epub.restructure() # 重构 909 | el = epub.errorLink_log.copy() 910 | del_keys = [] 911 | for file_path, log in epub.errorLink_log.items(): 912 | if file_path.lower().endswith(".css"): 913 | el[file_path] = list(filter(lambda x: x[1] is not None, log)) 914 | if el[file_path] == []: 915 | del_keys.append(file_path) 916 | for key in del_keys: 917 | del el[key] 918 | 919 | if epub.errorOPF_log: 920 | logger.write("-------在 OPF文件 发现问题------:") 921 | for error_type, error_value in epub.errorOPF_log: 922 | if error_type == "duplicate_id": 923 | logger.write( 924 | f"问题: 发现manifest节点内部存在重复ID {error_value} !!!" 925 | ) 926 | logger.write("措施: 已自动清除重复ID对应的manifest项。") 927 | elif error_type == "invalid_idref": 928 | logger.write( 929 | f"问题: 发现spine节点内部存在无效引用ID {error_value} !!!" 930 | ) 931 | logger.write( 932 | "措施: 请自行检查spine内的itemref节点并手动修改,确保引用的ID存在于manifest的item项。\n(大小写不一致也会导致引用无效。)" 933 | ) 934 | elif error_type == "xhtml_not_in_spine": 935 | logger.write( 936 | f"问题: 发现ID为 {error_value} 的文件manifest中登记为application/xhtml+xml类型,但不被spine节点的项所引用" 937 | ) 938 | logger.write( 939 | "措施: 自行检查该文件是否需要被spine引用。部分阅读器中,如果存在xhtml文件不被spine引用,可能导致epub无法打开。" 940 | ) 941 | 942 | if el: 943 | for file_path, log in el.items(): 944 | basename = path.basename(file_path) 945 | logger.write(f"-----在 {basename} 发现问题链接-----:") 946 | for href, correct_path in log: 947 | if correct_path is not None: 948 | logger.write( 949 | f"链接: {href}\n问题: 与实际文件名大小写不一致!\n措施: 程序已自动纠正链接为: {correct_path}。" 950 | ) 951 | else: 952 | logger.write(f"链接: {href}\n问题: 未能找到对应文件!!!") 953 | except Exception as e: 954 | logger.write(f"{epub_src} 重构EPUB失败: {e}") 955 | return e 956 | else: 957 | logger.write(f"{epub_src} 重构EPUB成功") 958 | return 0 959 | 960 | 961 | def main(): 962 | epub_src = input("【使用说明】请把EPUB文件拖曳到本窗口上(输入'e'退出): ") 963 | epub_src = epub_src.strip("'").strip('"').strip() 964 | if epub_src.lower() == "e": 965 | print("程序已退出") 966 | sys.exit() 967 | if not os.path.isfile(epub_src): 968 | print("错误: 找不到指定的EPUB文件,请确认文件路径是否正确并重新输入!") 969 | return 970 | ret = run(epub_src) 971 | if ret == "skip": 972 | print("已跳过该文件") 973 | elif ret == "e": 974 | print("操作失败,请检查日志!") 975 | else: 976 | print("操作成功!") 977 | 978 | 979 | if __name__ == "__main__": 980 | print( 981 | "【脚本功能】\n" 982 | + "1、 将epub目录结构规范化至sigil规范格式。\n" 983 | + "2、 将没有列入manifest项的epub有效文件自动列入manifest项。\n" 984 | + "3、 自动清除manifest中携带重复ID或多余ID的无效项。\n" 985 | + " 脚本将优先保留spine或metadata中关联的ID。\n" 986 | + "4、 自动检查并提醒spine节点中引用无效ID的itemref项。\n" 987 | + "5、 自动检查并提醒manifest节点中xhtml类型文件不被spine节点引用的情况。\n" 988 | + "6、 自动检测并纠正实际文件名与对应的引用链接大小写不一致的问题。\n" 989 | + "7、 自动检测并提醒找不到对应文件的链接。\n" 990 | + "8、 反名称混淆,使sigil可以打开修改。" 991 | ) 992 | while True: 993 | main() 994 | 995 | -------------------------------------------------------------------------------- /utils/encrypt_epub.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # !/usr/bin/env python 3 | # 源码: sigil吧ID: 遥遥心航 4 | # 二改: cnwxi 5 | 6 | import zipfile 7 | import re, sys 8 | from os import path, mkdir, getcwd 9 | from urllib.parse import unquote 10 | from xml.etree import ElementTree 11 | import os 12 | import hashlib 13 | 14 | try: 15 | from utils.log import logwriter 16 | except: 17 | from log import logwriter 18 | 19 | logger = logwriter() 20 | 21 | 22 | class EpubTool: 23 | 24 | def __init__(self, epub_src): 25 | self.encrypted = False 26 | self.epub = zipfile.ZipFile(epub_src) 27 | self.epub_src = epub_src 28 | self.epub_name = path.basename(epub_src) 29 | self.ebook_root = path.dirname(epub_src) 30 | self.output_path = self.ebook_root 31 | self.epub_type = "" 32 | self.temp_dir = "" 33 | self._init_namelist() 34 | self._init_mime_map() 35 | self._init_opf() 36 | self.manifest_list = [] # (id,opf_href,mime,properties) 37 | self.toc_rn = {} 38 | self.all_mixed = {} 39 | self.id_to_href = {} # { id : href.lower, ... } 40 | self.href_to_id = {} # { href.lower : id, ...} 41 | self.text_list = [] # (id,opf_href,properties) 42 | self.css_list = [] # (id,opf_href,properties) 43 | self.image_list = [] # (id,opf_href,properties) 44 | self.font_list = [] # (id,opf_href,properties) 45 | self.audio_list = [] # (id,opf_href,properties) 46 | self.video_list = [] # (id,opf_href,properties) 47 | self.spine_list = [] # (sid, linear, properties) 48 | self.other_list = [] # (id,opf_href,mime,properties) 49 | self.errorOPF_log = [] # (error_type,error_value) 50 | self.errorLink_log = {} # {filepath:[(error_link,correct_link || None),...]} 51 | self._parse_opf() 52 | 53 | def set_output_path(self, output_path): 54 | if output_path is not None and os.path.isdir(output_path): 55 | self.output_path = output_path 56 | 57 | def _init_namelist(self): 58 | self.namelist = self.epub.namelist() 59 | 60 | def _init_mime_map(self): 61 | self.mime_map = { 62 | ".html": "application/xhtml+xml", 63 | ".xhtml": "application/xhtml+xml", 64 | ".css": "text/css", 65 | ".js": "application/javascript", 66 | ".jpg": "image/jpeg", 67 | ".jpeg": "image/jpeg", 68 | ".bmp": "image/bmp", 69 | ".png": "image/png", 70 | ".gif": "image/gif", 71 | ".webp": "image/webp", 72 | ".ttf": "font/ttf", 73 | ".otf": "font/otf", 74 | ".woff": "font/woff", 75 | ".ncx": "application/x-dtbncx+xml", 76 | ".mp3": "audio/mpeg", 77 | ".mp4": "video/mp4", 78 | ".smil": "application/smil+xml", 79 | ".pls": "application/pls+xml", 80 | } 81 | 82 | def _init_opf(self): 83 | # 通过 container.xml 读取 opf 文件 84 | container_xml = self.epub.read("META-INF/container.xml").decode("utf-8") 85 | rf = re.match(r']*full-path="(?i:(.*?\.opf))"', container_xml) 86 | if rf is not None: 87 | self.opfpath = rf.group(1) 88 | self.opf = self.epub.read(self.opfpath).decode("utf-8") 89 | return 90 | # 通过路径首个 opf 读取 opf 文件 91 | for bkpath in self.namelist: 92 | if bkpath.lower().endswith(".opf"): 93 | self.opfpath = bkpath 94 | self.opf = self.epub.read(self.opfpath).decode("utf-8") 95 | return 96 | raise RuntimeError("无法发现opf文件") 97 | 98 | def _parse_opf(self): 99 | self.etree_opf = {"package": ElementTree.fromstring(self.opf)} 100 | 101 | for child in self.etree_opf["package"]: 102 | tag = re.sub(r"\{.*?\}", r"", child.tag) 103 | self.etree_opf[tag] = child 104 | self._parse_metadata() 105 | self._parse_manifest() 106 | self._parse_spine() 107 | self._clear_duplicate_id_href() 108 | self._parse_hrefs_not_in_epub() 109 | self._add_files_not_in_opf() 110 | 111 | self.manifest_list = [] # (id,opf_href,mime,properties) 112 | for id in self.id_to_h_m_p: 113 | href, mime, properties = self.id_to_h_m_p[id] 114 | self.manifest_list.append((id, href, mime, properties)) 115 | 116 | epub_type = self.etree_opf["package"].get("version") 117 | 118 | if epub_type is not None and epub_type in ["2.0", "3.0"]: 119 | self.epub_type = epub_type 120 | else: 121 | raise RuntimeError("此脚本不支持该EPUB类型") 122 | 123 | # 寻找epub2 toc 文件的id。epub3的nav文件直接当做xhtml处理。 124 | self.tocpath = "" 125 | self.tocid = "" 126 | tocid = self.etree_opf["spine"].get("toc") 127 | self.tocid = tocid if tocid is not None else "" 128 | 129 | # opf item分类 130 | opf_dir = path.dirname(self.opfpath) 131 | 132 | # 生成新的href 133 | ############################################################ 134 | def creatNewHerf(_id, _href): 135 | _id_name = _id.split(".")[0] 136 | _filename, _file_extension = _href.rsplit(".", 1) 137 | _true_filename = _filename.rsplit("/", 1)[-1] 138 | if _true_filename.endswith("slim") or _id_name.endswith("slim"): 139 | image_slim = "~slim" 140 | # _true_filename=_true_filename.rstrip("~slim").rstrip("-slim").rstrip("_slim").rstrip("slim") 141 | _id_name = ( 142 | _id_name.rstrip("~slim") 143 | .rstrip("-slim") 144 | .rstrip("_slim") 145 | .rstrip("slim") 146 | ) 147 | # :*:*:**::**::::******::***::***:*:**::***::*:*::::::**::::**:*.webp 148 | # :*:*:**::**::::******::***::***:*:**::***::*:*::::::**::::**:*~slim.webp 149 | else: 150 | image_slim = "" 151 | _href_hash = hashlib.md5(_id_name.encode()).digest() 152 | _href_hash = int.from_bytes(_href_hash, byteorder="big") 153 | bin_hash = bin(_href_hash) 154 | new_href = ( 155 | bin_hash.replace("-", "*") 156 | .replace("0b", "") 157 | .replace("1", "*") 158 | .replace("0", ":") 159 | ) 160 | # 加_为了防止Windows系统异常 161 | new_href = f"_{new_href}{image_slim}.{_file_extension.lower()}" 162 | if new_href not in self.toc_rn.values(): 163 | self.toc_rn[href] = new_href 164 | logger.write(f"encrypt href: {_id}:{_href} -> {self.toc_rn[href]}") 165 | else: 166 | self.toc_rn[href] = new_href 167 | logger.write(f"encrypt href: {_id}:{_href} -> {new_href} 重复") 168 | return new_href 169 | 170 | ############################################################ 171 | 172 | for id, href, mime, properties in self.manifest_list: 173 | bkpath = opf_dir + "/" + href if opf_dir else href 174 | if re.search(r'[\\/:*?"<>|]', href.rsplit("/")[-1]): 175 | self.encrypted = True 176 | if mime == "application/xhtml+xml": 177 | self.text_list.append((id, href, properties, creatNewHerf(id, href))) 178 | elif mime == "text/css": 179 | self.css_list.append((id, href, properties, creatNewHerf(id, href))) 180 | elif "image/" in mime: 181 | self.image_list.append((id, href, properties, creatNewHerf(id, href))) 182 | elif "font/" in mime or href.lower().endswith((".ttf", ".otf", ".woff")): 183 | self.font_list.append((id, href, properties, creatNewHerf(id, href))) 184 | elif "audio/" in mime: 185 | self.audio_list.append((id, href, properties, creatNewHerf(id, href))) 186 | elif "video/" in mime: 187 | self.video_list.append((id, href, properties, creatNewHerf(id, href))) 188 | elif self.tocid != "" and id == self.tocid: 189 | opf_dir = path.dirname(self.opfpath) 190 | self.tocpath = opf_dir + "/" + href if opf_dir else href 191 | else: 192 | self.other_list.append( 193 | (id, href, mime, properties, creatNewHerf(id, href)) 194 | ) 195 | 196 | self._check_manifest_and_spine() 197 | 198 | def _parse_metadata(self): 199 | self.metadata = {} 200 | for key in [ 201 | "title", 202 | "creator", 203 | "language", 204 | "subject", 205 | "source", 206 | "identifier", 207 | "cover", 208 | ]: 209 | self.metadata[key] = "" 210 | for meta in self.etree_opf["metadata"]: 211 | tag = re.sub(r"\{.*?\}", r"", meta.tag) 212 | if tag in [ 213 | "title", 214 | "creator", 215 | "language", 216 | "subject", 217 | "source", 218 | "identifier", 219 | ]: 220 | self.metadata[tag] = meta.text 221 | elif tag == "meta": 222 | if meta.get("name") and meta.get("content"): 223 | self.metadata["cover"] = meta.get("content") 224 | 225 | def _parse_manifest(self): 226 | self.id_to_h_m_p = {} # { id : (href,mime,properties) , ... } 227 | self.id_to_href = {} # { id : href.lower, ... } 228 | self.href_to_id = {} # { href.lower : id, ...} 229 | if_error = False 230 | for item in self.etree_opf["manifest"]: 231 | # 检查opf文件中是否存在错误 232 | try: 233 | id = item.get("id") 234 | href = unquote(item.get("href")) 235 | except Exception as e: 236 | str_item = ( 237 | ElementTree.tostring(item, encoding="unicode") 238 | .replace("\n", "") 239 | .replace("\r", "") 240 | .replace("\t", "") 241 | ) 242 | logger.write(f"item: {str_item} error: {e}") 243 | if_error = True 244 | continue 245 | mime = item.get("media-type") 246 | properties = item.get("properties") if item.get("properties") else "" 247 | 248 | self.id_to_h_m_p[id] = (href, mime, properties) 249 | self.id_to_href[id] = href.lower() 250 | self.href_to_id[href.lower()] = id 251 | if if_error: 252 | logger.write("opf文件中存在错误,请检查!") 253 | 254 | def _parse_spine(self): 255 | self.spine_list = [] # [ (sid, linear, properties) , ... ] 256 | for itemref in self.etree_opf["spine"]: 257 | sid = itemref.get("idref") 258 | linear = itemref.get("linear") if itemref.get("linear") else "" 259 | properties = itemref.get("properties") if itemref.get("properties") else "" 260 | self.spine_list.append((sid, linear, properties)) 261 | 262 | def _clear_duplicate_id_href(self): 263 | 264 | # id_used = [ id_in_spine + cover_id ] 265 | id_used = [x[0] for x in self.spine_list] 266 | if self.metadata["cover"]: 267 | id_used.append(self.metadata["cover"]) 268 | 269 | del_id = [] 270 | for id, href in self.id_to_href.items(): 271 | if self.href_to_id[href] != id: # 该href拥有多个id,此id已被覆盖。 272 | if id in id_used and self.href_to_id[href] not in id_used: 273 | if id not in del_id: 274 | del_id.append(self.href_to_id[href]) 275 | self.href_to_id[href] = id 276 | elif id in id_used and self.href_to_id[href] in id_used: 277 | continue 278 | else: 279 | if id not in del_id: 280 | del_id.append(id) 281 | 282 | for id in del_id: 283 | self.errorOPF_log.append(("duplicate_id", id)) 284 | del self.id_to_href[id] 285 | del self.id_to_h_m_p[id] 286 | 287 | def _add_files_not_in_opf(self): 288 | 289 | hrefs_not_in_opf = [] 290 | for archive_path in self.namelist: 291 | if archive_path.lower().endswith( 292 | ( 293 | ".html", 294 | ".xhtml", 295 | ".css", 296 | ".jpg", 297 | ".jpeg", 298 | ".bmp", 299 | ".gif", 300 | ".png", 301 | ".webp", 302 | ".svg", 303 | ".ttf", 304 | ".otf", 305 | ".js", 306 | ".mp3", 307 | ".mp4", 308 | ".smil", 309 | ) 310 | ): 311 | opf_href = get_relpath(self.opfpath, archive_path) 312 | if opf_href.lower() not in self.href_to_id.keys(): 313 | hrefs_not_in_opf.append(opf_href) 314 | 315 | def allocate_id(href): # 自动分配不重复id 316 | basename = path.basename(href) 317 | if "A" <= basename[0] <= "Z" or "a" <= basename[0] <= "z": 318 | new_id = basename 319 | else: 320 | new_id = "x" + basename 321 | pre, suf = path.splitext(new_id) 322 | pre_ = pre 323 | i = 0 324 | while pre_ + suf in self.id_to_href.keys(): 325 | i += 1 326 | pre_ = pre + "_" + str(i) 327 | new_id = pre_ + suf 328 | return new_id 329 | 330 | for href in hrefs_not_in_opf: 331 | new_id = allocate_id("newsrc") 332 | self.id_to_href[new_id] = href.lower() 333 | self.href_to_id[href.lower()] = new_id 334 | ext = path.splitext(href)[1] 335 | ext = ext.lower() 336 | try: 337 | mime = self.mime_map[ext] 338 | except KeyError: 339 | mime = "text/plain" 340 | self.id_to_h_m_p[new_id] = (href, mime, "") 341 | 342 | def _check_manifest_and_spine(self): 343 | spine_idrefs = [i for i, j, k in self.spine_list] 344 | 345 | for idref in spine_idrefs: 346 | if not self.id_to_h_m_p.get(idref): # spine 引用无效ID 347 | self.errorOPF_log.append(("invalid_idref", idref)) 348 | 349 | for mid, opf_href, mime, properties in self.manifest_list: 350 | if mime == "application/xhtml+xml": 351 | if mid not in spine_idrefs: 352 | self.errorOPF_log.append(("xhtml_not_in_spine", mid)) 353 | 354 | def _parse_hrefs_not_in_epub(self): 355 | del_id = [] 356 | namelist = [x.lower() for x in self.epub.namelist()] 357 | for id, href in self.id_to_href.items(): 358 | bkpath = get_bookpath(href, self.opfpath) 359 | if bkpath.lower() not in namelist: 360 | del_id.append(id) 361 | del self.href_to_id[href] 362 | for id in del_id: 363 | del self.id_to_href[id] 364 | del self.id_to_h_m_p[id] 365 | 366 | def create_tgt_epub(self): 367 | output_path = self.output_path 368 | logger.write(f"输出路径: {output_path}") 369 | return zipfile.ZipFile( 370 | path.join(output_path, self.epub_name.replace(".epub", "_encrypt.epub")), 371 | "w", 372 | zipfile.ZIP_STORED, 373 | ) 374 | 375 | # 重构 376 | def restructure(self): 377 | self.tgt_epub = self.create_tgt_epub() 378 | # mimetype 379 | mimetype = self.epub.read("mimetype") 380 | self.tgt_epub.writestr("mimetype", mimetype, zipfile.ZIP_DEFLATED) 381 | # META-INF 382 | metainf_data = self.epub.read("META-INF/container.xml").decode("utf-8") 383 | metainf_data = re.sub( 384 | r']*media-type="application/oebps-[^>]*/>', 385 | r'', 386 | metainf_data, 387 | ) 388 | self.tgt_epub.writestr( 389 | "META-INF/container.xml", 390 | bytes(metainf_data, encoding="utf-8"), 391 | zipfile.ZIP_DEFLATED, 392 | ) 393 | # OEBPS 394 | re_path_map = { 395 | "text": {}, 396 | "css": {}, 397 | "image": {}, 398 | "font": {}, 399 | "audio": {}, 400 | "video": {}, 401 | "other": {}, 402 | } # { ori_bkpath : re_basename } 403 | basename_log = { 404 | "text": [], 405 | "css": [], 406 | "image": [], 407 | "font": [], 408 | "audio": [], 409 | "video": [], 410 | "other": [], 411 | } 412 | lowerPath_to_originPath = ( 413 | {} 414 | ) # 如果路径大小写不一致,则登记为 { 小写路径 : 原始路径 } 415 | 416 | def auto_rename(id, href, ftype): 417 | filename, ext = path.splitext(path.basename(href)) 418 | filename_ = filename 419 | num = 0 420 | while filename_ + ext in basename_log[ftype]: 421 | num += 1 422 | filename_ = filename + "_" + str(num) 423 | basename = filename_ + ext 424 | basename_log[ftype].append(basename) 425 | return basename 426 | 427 | def check_link(filename, bkpath, href, self, target_id=""): 428 | if href == "" or href.startswith( 429 | ("http://", "https://", "res:/", "file:/", "data:") 430 | ): 431 | return None 432 | if bkpath.lower() in lowerPath_to_originPath.keys(): 433 | if bkpath != lowerPath_to_originPath[bkpath.lower()]: # 大小写不一致 434 | correct_path = lowerPath_to_originPath[bkpath.lower()] 435 | self.errorLink_log.setdefault(filename, []) 436 | self.errorLink_log[filename].append( 437 | (href + target_id, correct_path) 438 | ) 439 | bkpath = correct_path 440 | else: # 链接路径找不到对应文件 441 | self.errorLink_log.setdefault(filename, []) 442 | self.errorLink_log[filename].append((href + target_id, None)) 443 | return None 444 | return bkpath 445 | 446 | # xhtml文件,关联 toc文件,一切 xhtml中的元素 447 | for id, href, properties, newhref in self.text_list: 448 | bkpath = get_bookpath(href, self.opfpath) 449 | basename = auto_rename(id, newhref, "text") 450 | re_path_map["text"][bkpath] = basename 451 | lowerPath_to_originPath[bkpath.lower()] = bkpath 452 | 453 | # css 文件,关联 xhtml文件的link,css文件中的@import 454 | for id, href, properties, newhref in self.css_list: 455 | bkpath = get_bookpath(href, self.opfpath) 456 | basename = auto_rename(id, newhref, "css") 457 | re_path_map["css"][bkpath] = basename 458 | lowerPath_to_originPath[bkpath.lower()] = bkpath 459 | 460 | # 图片,关联css中的url,xhtml文件中的href 461 | for id, href, properties, newhref in self.image_list: 462 | bkpath = get_bookpath(href, self.opfpath) 463 | basename = auto_rename(id, newhref, "image") 464 | re_path_map["image"][bkpath] = basename 465 | lowerPath_to_originPath[bkpath.lower()] = bkpath 466 | # 字体,关联css中的url 467 | for id, href, properties, newhref in self.font_list: 468 | bkpath = get_bookpath(href, self.opfpath) 469 | basename = auto_rename(id, newhref, "font") 470 | re_path_map["font"][bkpath] = basename 471 | lowerPath_to_originPath[bkpath.lower()] = bkpath 472 | 473 | # 音频 474 | for id, href, properties, newhref in self.audio_list: 475 | bkpath = get_bookpath(href, self.opfpath) 476 | basename = auto_rename(id, newhref, "audio") 477 | re_path_map["audio"][bkpath] = basename 478 | lowerPath_to_originPath[bkpath.lower()] = bkpath 479 | 480 | # 视频 481 | for id, href, properties, newhref in self.video_list: 482 | bkpath = get_bookpath(href, self.opfpath) 483 | basename = auto_rename(id, newhref, "video") 484 | re_path_map["video"][bkpath] = basename 485 | lowerPath_to_originPath[bkpath.lower()] = bkpath 486 | 487 | # 其他文件 488 | for id, href, mime, properties, newhref in self.other_list: 489 | bkpath = get_bookpath(href, self.opfpath) 490 | basename = auto_rename(id, newhref, "other") 491 | re_path_map["other"][bkpath] = basename 492 | lowerPath_to_originPath[bkpath.lower()] = bkpath 493 | 494 | # xhtml文件 495 | for xhtml_bkpath, new_name in re_path_map["text"].items(): 496 | text = self.epub.read(xhtml_bkpath).decode("utf-8") 497 | if not text.startswith("\n' + text 499 | if not re.match(r"(?s).*)\n*", 502 | r'\1\n\n', 503 | text, 504 | 1, 505 | ) 506 | 507 | # 修改a[href] 508 | 509 | def re_href(match): 510 | href = match.group(3) 511 | href = unquote(href).strip() 512 | if "#" in href: 513 | href, target_id = href.split("#") 514 | target_id = "#" + target_id 515 | else: 516 | target_id = "" 517 | 518 | bkpath = get_bookpath(href, xhtml_bkpath) 519 | bkpath = check_link(xhtml_bkpath, bkpath, href, self, target_id) 520 | if not bkpath: 521 | return match.group() 522 | 523 | if href.lower().endswith( 524 | (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp") 525 | ): 526 | filename = re_path_map["image"][bkpath] 527 | return match.group(1) + "../Images/" + filename + match.group(4) 528 | elif href.lower().endswith(".css"): 529 | filename = re_path_map["css"][bkpath] 530 | return ( 531 | '' 534 | ) 535 | elif href.lower().endswith((".xhtml", ".html")): 536 | filename = re_path_map["text"][bkpath] 537 | return match.group(1) + filename + target_id + match.group(4) 538 | else: 539 | return match.group() 540 | 541 | text = re.sub(r"(<[^>]*href=([\'\"]))(.*?)(\2[^>]*>)", re_href, text) 542 | 543 | # 修改src 544 | def re_src(match): 545 | href = match.group(3) 546 | href = unquote(href).strip() 547 | bkpath = get_bookpath(href, xhtml_bkpath) 548 | bkpath = check_link(xhtml_bkpath, bkpath, href, self) 549 | if not bkpath: 550 | return match.group() 551 | 552 | if href.lower().endswith( 553 | (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg") 554 | ): 555 | filename = re_path_map["image"][bkpath] 556 | return match.group(1) + "../Images/" + filename + match.group(4) 557 | elif href.lower().endswith(".mp3"): 558 | filename = re_path_map["audio"][bkpath] 559 | return match.group(1) + "../Audio/" + filename + match.group(4) 560 | elif href.lower().endswith(".mp4"): 561 | filename = re_path_map["video"][bkpath] 562 | return match.group(1) + "../Video/" + filename + match.group(4) 563 | elif href.lower().endswith(".js"): 564 | filename = re_path_map["other"][bkpath] 565 | return match.group(1) + "../Misc/" + filename + match.group(4) 566 | else: 567 | return match.group() 568 | 569 | def re_poster(match): 570 | href = match.group(3) 571 | href = unquote(href).strip() 572 | bkpath = get_bookpath(href, xhtml_bkpath) 573 | bkpath = check_link(xhtml_bkpath, bkpath, href, self) 574 | if not bkpath: 575 | return match.group() 576 | if href.lower().endswith( 577 | (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg") 578 | ): 579 | filename = re_path_map["image"][bkpath] 580 | return match.group(1) + "../Images/" + filename + match.group(4) 581 | else: 582 | return match.group() 583 | 584 | text = re.sub(r"(<[^>]* src=([\'\"]))(.*?)(\2[^>]*>)", re_src, text) 585 | text = re.sub(r"(<[^>]* poster=([\'\"]))(.*?)(\2[^>]*>)", re_poster, text) 586 | 587 | # 修改 text 588 | def re_url(match): 589 | url = match.group(2) 590 | url = unquote(url).strip() 591 | bkpath = get_bookpath(url, xhtml_bkpath) 592 | bkpath = check_link(xhtml_bkpath, bkpath, url, self) 593 | if not bkpath: 594 | return match.group() 595 | 596 | if url.lower().endswith((".ttf", ".otf")): 597 | filename = re_path_map["font"][bkpath] 598 | return match.group(1) + "../Fonts/" + filename + match.group(3) 599 | elif url.lower().endswith( 600 | (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg") 601 | ): 602 | filename = re_path_map["image"][bkpath] 603 | return match.group(1) + "../Images/" + filename + match.group(3) 604 | else: 605 | return match.group() 606 | 607 | text = re.sub(r"(url\([\'\"]?)(.*?)([\'\"]?\))", re_url, text) 608 | self.tgt_epub.writestr( 609 | "OEBPS/Text/" + new_name, 610 | bytes(text, encoding="utf-8"), 611 | zipfile.ZIP_DEFLATED, 612 | ) 613 | # css文件 614 | for css_bkpath, new_name in re_path_map["css"].items(): 615 | try: 616 | css = self.epub.read(css_bkpath).decode("utf-8") 617 | except: 618 | continue 619 | 620 | # 修改 @import 621 | def re_import(match): 622 | href = match.group(2) if match.group(2) else match.group(3) 623 | href = unquote(href).strip() 624 | if not href.lower().endswith(".css"): 625 | return match.group() 626 | bkpath = get_bookpath(href, css_bkpath) 627 | bkpath = check_link(css_bkpath, bkpath, href, self) 628 | if not bkpath: 629 | return match.group() 630 | filename = re_path_map.get("css", {}).get(bkpath, path.basename(href)) 631 | if match.group(2): 632 | return '@import "{}"'.format(filename) 633 | else: 634 | return '@import url("{}")'.format(filename) 635 | 636 | css = re.sub( 637 | r"@import +([\'\"])(.*?)\1|@import +url\([\'\"]?(.*?)[\'\"]?\)", 638 | re_import, 639 | css, 640 | ) 641 | 642 | # 修改 css的url 643 | def re_css_url(match): 644 | url = match.group(2) 645 | url = unquote(url).strip() 646 | bkpath = get_bookpath(url, css_bkpath) 647 | bkpath = check_link(css_bkpath, bkpath, url, self) 648 | if not bkpath: 649 | return match.group() 650 | if url.lower().endswith((".ttf", ".otf")): 651 | filename = re_path_map["font"][bkpath] 652 | return match.group(1) + "../Fonts/" + filename + match.group(3) 653 | elif url.lower().endswith( 654 | (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg") 655 | ): 656 | filename = re_path_map["image"][bkpath] 657 | return match.group(1) + "../Images/" + filename + match.group(3) 658 | else: 659 | return match.group() 660 | 661 | css = re.sub(r"(url\([\'\"]?)(.*?)([\'\"]?\))", re_css_url, css) 662 | self.tgt_epub.writestr( 663 | "OEBPS/Styles/" + new_name, 664 | bytes(css, encoding="utf-8"), 665 | zipfile.ZIP_DEFLATED, 666 | ) 667 | # 图片 668 | for img_bkpath, new_name in re_path_map["image"].items(): 669 | try: 670 | img = self.epub.read(img_bkpath) 671 | except: 672 | continue 673 | self.tgt_epub.writestr( 674 | "OEBPS/Images/" + new_name, img, zipfile.ZIP_DEFLATED 675 | ) 676 | # 字体 677 | for font_bkpath, new_name in re_path_map["font"].items(): 678 | try: 679 | font = self.epub.read(font_bkpath) 680 | except: 681 | continue 682 | self.tgt_epub.writestr( 683 | "OEBPS/Fonts/" + new_name, font, zipfile.ZIP_DEFLATED 684 | ) 685 | # 音频 686 | for audio_bkpath, new_name in re_path_map["audio"].items(): 687 | try: 688 | audio = self.epub.read(audio_bkpath) 689 | except: 690 | continue 691 | self.tgt_epub.writestr( 692 | "OEBPS/Audio/" + new_name, audio, zipfile.ZIP_DEFLATED 693 | ) 694 | # 视频 695 | for video_bkpath, new_name in re_path_map["video"].items(): 696 | try: 697 | video = self.epub.read(video_bkpath) 698 | except: 699 | continue 700 | self.tgt_epub.writestr( 701 | "OEBPS/Video/" + new_name, video, zipfile.ZIP_DEFLATED 702 | ) 703 | # 其他 704 | for font_bkpath, new_name in re_path_map["other"].items(): 705 | try: 706 | other = self.epub.read(font_bkpath) 707 | except: 708 | continue 709 | self.tgt_epub.writestr( 710 | "OEBPS/Misc/" + new_name, other, zipfile.ZIP_DEFLATED 711 | ) 712 | 713 | # 读取文件并修改关联 714 | # toc文件 715 | if self.tocpath: 716 | toc = self.epub.read(self.tocpath).decode("utf-8") 717 | toc_dir = path.dirname(self.tocpath) 718 | 719 | def re_toc_href(match): 720 | href = match.group(2) 721 | href = unquote(href).strip() 722 | parts = href.split("#", 1) 723 | href_base = parts[0] 724 | target_id = "#" + parts[1] if len(parts) > 1 else "" 725 | href_base = ( 726 | self.toc_rn[href_base] if href_base in self.toc_rn else href_base 727 | ) 728 | bkpath = get_bookpath(href_base, self.tocpath) 729 | 730 | if not bkpath: 731 | return match.group() 732 | filename = path.basename(bkpath) 733 | return 'src="Text/' + filename + target_id + '"' 734 | 735 | toc = re.sub(r"src=([\'\"])(.*?)\1", re_toc_href, toc) 736 | self.tgt_epub.writestr( 737 | "OEBPS/toc.ncx", bytes(toc, encoding="utf-8"), zipfile.ZIP_DEFLATED 738 | ) 739 | 740 | # OPF 741 | manifest_text = "" 742 | 743 | for id, href, mime, prop in self.manifest_list: 744 | bkpath = get_bookpath(href, self.opfpath) 745 | prop_ = ' properties="' + prop + '"' if prop else "" 746 | if mime == "application/xhtml+xml": 747 | filename = re_path_map["text"][bkpath] 748 | manifest_text += f'\n ' 749 | elif mime == "text/css": 750 | filename = re_path_map["css"][bkpath] 751 | manifest_text += f'\n ' 752 | elif "image/" in mime: 753 | filename = re_path_map["image"][bkpath] 754 | manifest_text += f'\n ' 755 | elif "font/" in mime or href.lower().endswith((".ttf", ".otf", ".woff")): 756 | filename = re_path_map["font"][bkpath] 757 | manifest_text += f'\n ' 758 | elif "audio/" in mime: 759 | filename = re_path_map["audio"][bkpath] 760 | manifest_text += f'\n ' 761 | elif "video/" in mime: 762 | filename = re_path_map["video"][bkpath] 763 | manifest_text += f'\n ' 764 | elif id == self.tocid: 765 | manifest_text += f'\n ' 766 | else: 767 | filename = re_path_map["other"][bkpath] 768 | manifest_text += f'\n ' 769 | 770 | manifest_text += "\n " 771 | opf = re.sub(r"(?s).*?", manifest_text, self.opf, 1) 772 | 773 | def re_refer(match): 774 | href = match.group(3) 775 | href = unquote(href).strip() 776 | basename = path.basename(href) 777 | filename = unquote(basename) 778 | if not basename.endswith(".ncx"): 779 | if href.startswith("/"): 780 | href = href[1:] 781 | elif href.startswith("./"): 782 | href = href[2:] 783 | elif href.startswith("../"): 784 | href = href[3:] 785 | return match.group(1) + "Text/" + self.toc_rn[href] + match.group(4) 786 | else: 787 | return match.group() 788 | 789 | opf = re.sub(r"(]*href=([\'\"]))(.*?)(\2[^>]*/>)", re_refer, opf) 790 | self.tgt_epub.writestr( 791 | "OEBPS/content.opf", bytes(opf, encoding="utf-8"), zipfile.ZIP_DEFLATED 792 | ) 793 | self.tgt_epub.close() 794 | self.epub.close() 795 | 796 | 797 | # 相对路径计算函数 798 | def get_relpath(from_path, to_path): 799 | # from_path 和 to_path 都需要是绝对路径 800 | from_path = re.split(r"[\\/]", from_path) 801 | to_path = re.split(r"[\\/]", to_path) 802 | while from_path[0] == to_path[0]: 803 | from_path.pop(0), to_path.pop(0) 804 | to_path = "../" * (len(from_path) - 1) + "/".join(to_path) 805 | return to_path 806 | 807 | 808 | # 计算bookpath 809 | def get_bookpath(relative_path, refer_bkpath): 810 | # relative_path 相对路径,一般是href 811 | # refer_bkpath 参考的绝对路径 812 | 813 | relative_ = re.split(r"[\\/]", relative_path) 814 | refer_ = re.split(r"[\\/]", refer_bkpath) 815 | 816 | back_step = 0 817 | while relative_[0] == "..": 818 | back_step += 1 819 | relative_.pop(0) 820 | 821 | if len(refer_) <= 1: 822 | return "/".join(relative_) 823 | else: 824 | refer_.pop(-1) 825 | 826 | if back_step < 1: 827 | return "/".join(refer_ + relative_) 828 | elif back_step > len(refer_): 829 | return "/".join(relative_) 830 | 831 | # len(refer_) > 1 and back_setp <= len(refer_): 832 | while back_step > 0 and len(refer_) > 0: 833 | refer_.pop(-1) 834 | back_step -= 1 835 | 836 | return "/".join(refer_ + relative_) 837 | 838 | 839 | def epub_sources(): 840 | if len(sys.argv) <= 1: 841 | return sys.argv 842 | epub_srcs = [] 843 | exe_path = path.dirname(sys.argv[0]) 844 | epub_srcs.append(exe_path) 845 | for epub_src in sys.argv[1:None]: 846 | filename = path.basename(epub_src) 847 | basename, ext = path.splitext(filename) 848 | if ext.lower() == ".epub": 849 | if path.exists(epub_src): 850 | epub_srcs.append(epub_src) 851 | return epub_srcs 852 | 853 | 854 | def run(epub_src, output_path=None): 855 | try: 856 | logger.write(f"\n正在尝试加密EPUB: {epub_src}") 857 | if epub_src.lower().endswith("_encrypt.epub"): 858 | logger.write("警告: 该文件已加密,无需再次处理!") 859 | return "skip" 860 | epub = EpubTool(epub_src) 861 | epub.set_output_path(output_path) 862 | if epub.encrypted == True: 863 | logger.write("警告: 该文件已加密,无需再次处理!") 864 | return "skip" 865 | epub.restructure() # 重构 866 | el = epub.errorLink_log.copy() 867 | del_keys = [] 868 | for file_path, log in epub.errorLink_log.items(): 869 | if file_path.lower().endswith(".css"): 870 | el[file_path] = list(filter(lambda x: x[1] is not None, log)) 871 | if el[file_path] == []: 872 | del_keys.append(file_path) 873 | for key in del_keys: 874 | del el[key] 875 | 876 | if epub.errorOPF_log: 877 | logger.write("-------在 OPF文件 发现问题------:") 878 | for error_type, error_value in epub.errorOPF_log: 879 | if error_type == "duplicate_id": 880 | logger.write( 881 | f"问题: 发现manifest节点内部存在重复ID {error_value} !!!" 882 | ) 883 | logger.write("措施: 已自动清除重复ID对应的manifest项。") 884 | elif error_type == "invalid_idref": 885 | logger.write( 886 | f"问题: 发现spine节点内部存在无效引用ID {error_value} !!!" 887 | ) 888 | logger.write( 889 | "措施: 请自行检查spine内的itemref节点并手动修改,确保引用的ID存在于manifest的item项。\n大小写不一致也会导致引用无效。)" 890 | ) 891 | elif error_type == "xhtml_not_in_spine": 892 | logger.write( 893 | f"问题: 发现ID为 {error_value} 的文件manifest中登记为application/xhtml+xml类型,但不被spine节点的项所引用" 894 | ) 895 | logger.write( 896 | f"措施: 自行检查该文件是否需要被spine引用。部分阅读器中,如果存在xhtml文件不被spine引用,可能导致epub无法打开。" 897 | ) 898 | 899 | if el: 900 | for file_path, log in el.items(): 901 | basename = path.basename(file_path) 902 | logger.write(f"-----在 {basename} 发现问题链接-----:") 903 | for href, correct_path in log: 904 | if correct_path is not None: 905 | logger.write( 906 | f"链接: {href}\n问题: 与实际文件名大小写不一致!\n措施: 程序已自动纠正链接。" 907 | ) 908 | else: 909 | logger.write(f"链接: {href}\n问题: 未能找到对应文件!!!") 910 | except Exception as e: 911 | logger.write(f"{epub_src} 重构EPUB失败: {e}") 912 | return e 913 | else: 914 | logger.write(f"{epub_src} 重构EPUB成功") 915 | return 0 916 | 917 | 918 | def main(): 919 | epub_src = input("【使用说明】请把EPUB文件拖曳到本窗口上(输入'e'退出)") 920 | epub_src = epub_src.strip("'").strip('"').strip() 921 | if epub_src.lower() == "e": 922 | print("程序已退出") 923 | sys.exit() 924 | if not os.path.isfile(epub_src): 925 | print("错误: 找不到指定的EPUB文件,请确认文件路径是否正确并重新输入!") 926 | return 927 | ret = run(epub_src) 928 | if ret == "skip": 929 | print("已跳过该文件") 930 | elif ret == "e": 931 | print("操作失败,请检查日志!") 932 | else: 933 | print("操作成功!") 934 | 935 | 936 | if __name__ == "__main__": 937 | print( 938 | "【脚本功能】\n" 939 | + "1、 将epub目录结构规范化至sigil规范格式。\n" 940 | + "2、 将没有列入manifest项的epub有效文件自动列入manifest项。\n" 941 | + "3、 自动清除manifest中携带重复ID或多余ID的无效项。\n" 942 | + " 脚本将优先保留spine或metadata中关联的ID。\n" 943 | + "4、 自动检查并提醒spine节点中引用无效ID的itemref项。\n" 944 | + "5、 自动检查并提醒manifest节点中xhtml类型文件不被spine节点引用的情况。\n" 945 | + "6、 自动检测并纠正实际文件名与对应的引用链接大小写不一致的问题。\n" 946 | + "7、 自动检测并提醒找不到对应文件的链接。\n" 947 | + "8、 加入名称混淆,使sigil无法打开修改。" 948 | ) 949 | while True: 950 | main() 951 | -------------------------------------------------------------------------------- /utils/encrypt_font.py: -------------------------------------------------------------------------------- 1 | import zipfile 2 | import os 3 | from bs4 import BeautifulSoup 4 | import tinycss2 5 | # from tinycss2 import parse_component_value_list 6 | import emoji 7 | import re 8 | from fontTools.ttLib import TTFont 9 | from fontTools.fontBuilder import FontBuilder 10 | from fontTools.pens.ttGlyphPen import TTGlyphPen 11 | from io import BytesIO 12 | import random 13 | import traceback 14 | import html 15 | from datetime import datetime 16 | 17 | try: 18 | from utils.log import logwriter 19 | except: 20 | from log import logwriter 21 | 22 | logger = logwriter() 23 | 24 | class FontEncrypt: 25 | 26 | def __init__(self, epub_path, output_path): 27 | if not os.path.exists(epub_path): 28 | raise Exception("EPUB文件不存在") 29 | 30 | self.epub_path = os.path.normpath(epub_path) 31 | self.epub = zipfile.ZipFile(epub_path) 32 | if output_path and os.path.exists(output_path): 33 | if os.path.isfile(output_path): 34 | raise Exception("输出路径不能是文件") 35 | if not os.path.exists(output_path): 36 | raise Exception(f"输出路径{output_path}不存在") 37 | else: 38 | output_path=os.path.dirname(epub_path) 39 | logger.write(f"输出路径不存在,使用默认路径: {output_path}") 40 | self.output_path = os.path.normpath(output_path) 41 | self.file_write_path=os.path.join(self.output_path, os.path.basename(self.epub_path).replace('.epub','_font_encrypt.epub')) 42 | if os.path.exists(self.file_write_path): 43 | os.remove(self.file_write_path) 44 | self.htmls = [] 45 | self.css = [] 46 | self.fonts = [] 47 | self.ori_files = [] 48 | self.missing_chars = [] 49 | self.font_to_font_family_mapping = {} 50 | self.css_selector_to_font_mapping = {} 51 | self.font_to_char_mapping = {} 52 | # self.font_to_unchanged_file_mapping = {} 53 | self.target_epub = None 54 | for file in self.epub.namelist(): 55 | if file.lower().endswith('.html') or file.endswith('.xhtml'): 56 | self.htmls.append(file) 57 | elif file.lower().endswith('.css'): 58 | self.ori_files.append(file) 59 | self.css.append(file) 60 | elif file.lower().endswith((".ttf", ".otf", ".woff")): 61 | self.fonts.append(file) 62 | else: 63 | self.ori_files.append(file) 64 | 65 | 66 | def create_target_epub(self): 67 | self.target_epub = zipfile.ZipFile(self.file_write_path,"w", 68 | zipfile.ZIP_STORED,zipfile.ZIP_STORED,) 69 | 70 | def find_local_fonts_mapping(self): 71 | font_face_rules = [] 72 | for css in self.css: 73 | with self.epub.open(css) as f: 74 | content = f.read().decode('utf-8') 75 | rules = tinycss2.parse_stylesheet(content) 76 | # 遍历所有规则,查找 @font-face 77 | for rule in rules: 78 | all_count = 0 79 | if rule.type == 'at-rule' and rule.lower_at_keyword == 'font-face': 80 | tmp_font_face = tinycss2.serialize(rule.content) 81 | 82 | local_count, url_count = tmp_font_face.count( 83 | 'local'), tmp_font_face.count('url') 84 | all_count += local_count + url_count 85 | if all_count == 1: 86 | tmp_list = [] 87 | 88 | for item in tmp_font_face.split(';'): 89 | if len(item.strip()) > 0: 90 | tmp_list.append(item.strip()) 91 | font_face_rules.append(tmp_list) 92 | mapping = {} 93 | for font in self.fonts: 94 | font_name = os.path.basename(font) 95 | for j in font_face_rules: 96 | if font_name in j[1]: 97 | font_family = j[0].split(':')[1].strip().replace( 98 | '"', '').replace("'", '') 99 | mapping[font_family] = font 100 | self.font_to_font_family_mapping = mapping 101 | 102 | def find_selector_to_font_mapping(self): 103 | mapping = {} 104 | for css in self.css: 105 | with self.epub.open(css) as f: 106 | content = f.read().decode('utf-8') 107 | rules = tinycss2.parse_stylesheet(content) 108 | for rule in rules: 109 | if rule.type == "qualified-rule": # 确保是样式规则 110 | # 获取选择器 111 | selector = tinycss2.serialize(rule.prelude).strip() 112 | declarations = tinycss2.parse_declaration_list( 113 | rule.content) 114 | for declaration in declarations: 115 | if declaration.type == "declaration" and declaration.lower_name == "font-family": 116 | # 提取 font-family 的值 117 | font_family_values = [ 118 | token.value for token in declaration.value 119 | if token.type == "string" 120 | or token.type == "ident" 121 | ] 122 | 123 | # 提取第一个字体名称 124 | primary_font = font_family_values[0].strip( 125 | "'\"") 126 | 127 | # 如果该字体在映射中 128 | if primary_font in self.font_to_font_family_mapping: 129 | # print( 130 | # f"选择器 '{selector}' 使用了字体文件: {self.font_to_font_family_mapping[primary_font]}" 131 | # ) 132 | if primary_font not in mapping: 133 | mapping[ 134 | selector] = self.font_to_font_family_mapping[ 135 | primary_font] 136 | self.css_selector_to_font_mapping = dict( 137 | sorted(mapping.items(), reverse=True)) 138 | 139 | def remove_duplicates(self, s): 140 | seen = set() 141 | result = [] 142 | for char in s: 143 | if char not in seen: 144 | seen.add(char) 145 | result.append(char) 146 | return ''.join(result) 147 | 148 | def find_char_mapping(self): 149 | mapping = {} 150 | for one_html in self.htmls: 151 | with self.epub.open(one_html) as f: 152 | content = f.read().decode('utf-8') 153 | soup = BeautifulSoup(content, 'html.parser') 154 | for css_selector, font_file in self.css_selector_to_font_mapping.items( 155 | ): 156 | # 使用 CSS 选择器查找对应的标签 157 | elements = soup.select(css_selector) 158 | 159 | # 提取每个标签的文字内容 160 | text_contents = [ 161 | element.get_text(strip=True) for element in elements 162 | ] 163 | combined_sentence = "".join(text_contents) 164 | if font_file not in mapping: 165 | mapping[font_file] = self.remove_duplicates( 166 | combined_sentence) 167 | else: 168 | mapping[font_file] = self.remove_duplicates(''.join( 169 | [mapping[font_file], combined_sentence])) 170 | self.font_to_char_mapping = mapping 171 | 172 | def get_mapping(self): 173 | self.find_local_fonts_mapping() 174 | self.find_selector_to_font_mapping() 175 | self.find_char_mapping() 176 | logger.write(f"字体文件映射: {self.font_to_font_family_mapping}") 177 | logger.write(f"CSS选择器映射: {self.css_selector_to_font_mapping}") 178 | logger.write(f"字体文件到字符映射: {self.font_to_char_mapping}") 179 | return self.font_to_font_family_mapping, self.css_selector_to_font_mapping, self.font_to_char_mapping 180 | 181 | def clean_text(self): 182 | for key in self.font_to_char_mapping: 183 | text = self.font_to_char_mapping[key] 184 | # 去除转义字符和换行符 185 | self.font_to_char_mapping[key] = text.replace('\n', '').replace( 186 | '\r', '').replace('\t', '') 187 | # 去除标点符号和特殊字符 188 | self.font_to_char_mapping[key] = re.sub( 189 | r'[^\u4e00-\u9fa5a-zA-Z0-9]', '', text) 190 | self.font_to_char_mapping[key] = emoji.replace_emoji(text, 191 | replace='') 192 | logger.write(f"清理后的文本: {self.font_to_char_mapping}") 193 | 194 | # 修改自https://github.com/solarhell/fontObfuscator 195 | def ensure_cmap_has_all_text(self, cmap: dict, s: str) -> bool: 196 | missing_chars = [] 197 | exsit_chars = [] 198 | for char in s: 199 | if ord(char) not in cmap: 200 | # raise Exception(f'字库缺少{char}这个字 {ord(char)}') 201 | missing_chars.append(char) 202 | else: 203 | exsit_chars.append(char) 204 | return missing_chars, ''.join(exsit_chars) 205 | 206 | # def is_cjk_font(self, font): 207 | # """ 208 | # 判断字体文件是否包含CJK字符。 209 | 210 | # :param font_path: 字体文件路径 211 | # :return: 如果字体包含CJK字符返回True,否则返回False 212 | # """ 213 | # # 加载字体文件 214 | # # font = TTFont(font_io) 215 | 216 | # # 获取所有字符映射表 217 | # cmap_tables = font['cmap'].tables 218 | 219 | # # 定义CJK字符的Unicode范围 220 | # cjk_ranges = [ 221 | # (0x4E00, 0x9FFF), # CJK Unified Ideographs 222 | # # (0x3400, 0x4DBF), # CJK Unified Ideographs Extension A 223 | # # (0x20000, 0x2A6DF), # CJK Unified Ideographs Extension B 224 | # # (0x2A700, 0x2B73F), # CJK Unified Ideographs Extension C 225 | # # (0x2B740, 0x2B81F), # CJK Unified Ideographs Extension D 226 | # # (0x2B820, 0x2CEAF), # CJK Unified Ideographs Extension E 227 | # # (0xF900, 0xFAFF), # CJK Compatibility Ideographs 228 | # # (0x2F800, 0x2FA1F) # CJK Compatibility Ideographs Supplement 229 | # ] 230 | # # 遍历所有字符映射表 231 | # for table in cmap_tables: 232 | # # 获取当前表中的字符到字形名称的映射 233 | # char_to_glyph = table.cmap 234 | 235 | # # 检查是否存在CJK范围内的字符 236 | # available_ranges = [] 237 | # for code_point in char_to_glyph.keys(): 238 | # if any(start <= code_point <= end 239 | # for start, end in cjk_ranges): 240 | # available_ranges.append(code_point) 241 | # if len(available_ranges) > 0: 242 | # # 如果找到CJK字符,返回True 243 | # # print(f"找到CJK字符: {available_ranges}") 244 | # return True, available_ranges 245 | 246 | # return False, None # 未找到CJK字符 247 | 248 | 249 | def set_timestamps(self, font): 250 | # 设置 'head' 表的时间戳 251 | head_table = font['head'] 252 | current_time = int(datetime.now().timestamp()) 253 | # print(f"原始时间戳: {head_table.created}, {head_table.modified}") 254 | created_datetime = datetime.fromtimestamp(head_table.created).strftime('%Y-%m-%d %H:%M:%S') 255 | modified_datetime = datetime.fromtimestamp(head_table.modified).strftime('%Y-%m-%d %H:%M:%S') 256 | logger.write(f"原始时间戳: {created_datetime}, {modified_datetime}") 257 | # print(f"转换UTC时间,: {created_datetime}") 258 | # print(f"转换UTC时间,: {modified_datetime}") 259 | head_table.created = current_time 260 | head_table.modified = current_time 261 | logger.write(f"转换后时间戳 {datetime.fromtimestamp(current_time).strftime('%Y-%m-%d %H:%M:%S')}") 262 | 263 | # 修改自https://github.com/solarhell/fontObfuscator 264 | def encrypt_font(self): 265 | self.create_target_epub() 266 | for i, (font_path, plain_text) in enumerate( 267 | self.font_to_char_mapping.items()): 268 | original_font = TTFont(BytesIO(self.epub.read(font_path))) 269 | name_table = original_font['name'] 270 | family_name = None 271 | style_name = None 272 | for record in name_table.names: 273 | if record.nameID == 1: 274 | family_name = record.string.decode(record.getEncoding()) 275 | elif record.nameID == 2: 276 | style_name = record.string.decode(record.getEncoding()) 277 | 278 | if family_name and style_name: 279 | break 280 | if family_name is None: 281 | family_name = f'ETFamily_{i}' 282 | if style_name is None: 283 | style_name = 'Regular' 284 | 285 | NAME_STRING = { 286 | 'familyName': family_name, 287 | 'styleName': style_name, 288 | 'psName': family_name + '-' + style_name, 289 | 'copyright': 'Created by EpubTool', 290 | 'version': 'Version 1.0', 291 | 'vendorURL': 'https://EpubTool.com/', 292 | } 293 | original_cmap: dict = original_font.getBestCmap() 294 | miss_char, plain_text = self.ensure_cmap_has_all_text( 295 | original_cmap, plain_text) 296 | if len(miss_char) > 0: 297 | logger.write(f'字体文件{font_path}缺少字符{miss_char}') 298 | available_ranges= [ord(char) for char in plain_text] 299 | glyphs, metrics, cmap = {}, {}, {} 300 | private_codes = random.sample(range(0xAC00, 0xD7AF), len(plain_text)) 301 | cjk_codes = random.sample(available_ranges, len(plain_text)) 302 | 303 | glyph_set = original_font.getGlyphSet() 304 | pen = TTGlyphPen(glyph_set) 305 | glyph_order = original_font.getGlyphOrder() 306 | final_shadow_text: list = [] 307 | spescial_glyphs= ['null', '.notdef', 'minus', 'dotlessi','uni0307','quotesingle','zero.dnom','fraction','uni0237'] 308 | 309 | for special_glyph in spescial_glyphs: 310 | if special_glyph in glyph_order: 311 | glyph_set[special_glyph].draw(pen) 312 | glyphs[special_glyph] = pen.glyph() 313 | metrics[special_glyph] = original_font['hmtx'][special_glyph] 314 | final_shadow_text += [special_glyph] 315 | 316 | html_entities = [] 317 | 318 | for index, plain in enumerate(plain_text): 319 | try: 320 | shadow_cmap_name = original_cmap[cjk_codes[index]] 321 | except KeyError: 322 | logger.write(f"字体文件缺少字符,unicode:{cjk_codes[index]},请检查") 323 | 324 | final_shadow_text += [shadow_cmap_name] 325 | glyph_set[original_cmap[ord(plain)]].draw(pen) 326 | glyphs[shadow_cmap_name] = pen.glyph() 327 | metrics[shadow_cmap_name] = original_font['hmtx'][original_cmap[ord(plain)]] 328 | cmap[private_codes[index]] = shadow_cmap_name 329 | html_entities += [hex(private_codes[index]).replace('0x', '&#x')] 330 | 331 | horizontal_header = { 332 | 'ascent': original_font['hhea'].ascent, 333 | 'descent': original_font['hhea'].descent, 334 | } 335 | missing_glyphs = [glyph for glyph in final_shadow_text if glyph not in glyphs] 336 | if missing_glyphs: 337 | logger.write(f"以下字形在 glyphs 中缺失: {missing_glyphs}") 338 | for glyph in missing_glyphs: 339 | glyphs[glyph] = pen.glyph() 340 | metrics[glyph] = (0, 0) 341 | 342 | glyf_table = original_font['glyf'] 343 | glyphs_to_keep = set(glyphs.keys()) 344 | new_glyph_order = [glyph for glyph in glyph_order if glyph in glyphs_to_keep] 345 | original_font.setGlyphOrder(new_glyph_order) 346 | 347 | # 删除不必要的字形 348 | for glyph in glyph_order: 349 | if glyph not in glyphs_to_keep: 350 | if glyph in glyf_table.glyphs: 351 | del glyf_table.glyphs[glyph] 352 | if glyph in original_font['hmtx'].metrics: 353 | del original_font['hmtx'].metrics[glyph] 354 | loca_index = glyph_order.index(glyph) 355 | if 0 <= loca_index < len(original_font['loca'].locations): 356 | original_font['loca'].locations[loca_index] = 0 357 | 358 | # 更新 maxp 表 359 | original_font['maxp'].numGlyphs = len(new_glyph_order) 360 | 361 | self.set_timestamps(original_font) 362 | 363 | fb = FontBuilder(original_font['head'].unitsPerEm, isTTF=True) 364 | fb.setupGlyphOrder(new_glyph_order) 365 | fb.setupCharacterMap(cmap) 366 | fb.setupGlyf(glyphs) 367 | fb.setupHorizontalMetrics(metrics) 368 | fb.setupHorizontalHeader(**horizontal_header) 369 | fb.setupNameTable(NAME_STRING) 370 | fb.setupOS2() 371 | fb.setupPost() 372 | font_stream = BytesIO() 373 | fb.save(font_stream) 374 | # print(plain_text, html_entities) 375 | # print(f"write {font_path}") 376 | 377 | self.target_epub.writestr(font_path, font_stream.getvalue(),zipfile.ZIP_DEFLATED) 378 | text_list = list(plain_text) 379 | replace_table = {} 380 | for a0, a1 in zip(text_list, html_entities): 381 | replace_table[a0] = a1 382 | self.font_to_char_mapping[font_path] = replace_table 383 | logger.write(f"字体文件{font_path}的加密映射: \n{replace_table}") 384 | 385 | def close_file(self): 386 | self.epub.close() 387 | self.target_epub.close() 388 | 389 | 390 | def read_html(self): 391 | for one_html in self.htmls: 392 | with self.epub.open(one_html) as f: 393 | content = f.read().decode('utf-8') 394 | soup = BeautifulSoup(content, 'html.parser') 395 | 396 | for css_selector in self.css_selector_to_font_mapping.keys(): 397 | font_file = self.css_selector_to_font_mapping[css_selector] 398 | replace_table = self.font_to_char_mapping[font_file] 399 | trans_table = str.maketrans(replace_table) 400 | if '.' in css_selector: 401 | selector, selector_class = css_selector.split('.', 1) 402 | selector_tags = soup.find_all(selector, 403 | class_=selector_class) 404 | else: 405 | selector, selector_class = css_selector, None 406 | # print(selector, selector_class) 407 | selector_tags = soup.find_all(selector) 408 | for tag in selector_tags: 409 | ori_text = ''.join(str(item) for item in tag.contents) 410 | new_text = ori_text.translate(trans_table) 411 | parsed_new_text = BeautifulSoup( 412 | html.unescape(new_text), 'html.parser') 413 | # print(f"ori_text:{ori_text}\nnew_text:{new_text}") 414 | tag.clear() # 清空当前标签内容 415 | tag.append(parsed_new_text) # 插入新的内容 416 | # print(tag.get_text(strip=True)) 417 | formatted_html = soup.prettify(formatter="html") 418 | self.target_epub.writestr(one_html, formatted_html.encode('utf-8'),zipfile.ZIP_DEFLATED) 419 | for item in self.ori_files: 420 | if item in self.epub.namelist(): 421 | with self.epub.open(item) as f: 422 | content = f.read() 423 | self.target_epub.writestr(item, content,zipfile.ZIP_DEFLATED) 424 | self.close_file() 425 | logger.write(f"EPUB文件处理完成,输出文件路径: {self.file_write_path}") 426 | 427 | # def read_unchanged_fonts(self,font_file_mapping=None): 428 | # self.font_to_unchanged_file_mapping = font_file_mapping if font_file_mapping else {} 429 | 430 | def run_epub_font_encrypt(epub_path, output_path): 431 | logger.write(f"\n正在尝试加密EPUB字体: {epub_path}") 432 | fe = FontEncrypt(epub_path, output_path) 433 | if len(fe.fonts) == 0: 434 | logger.write("没有找到字体文件,退出") 435 | return "skip" 436 | logger.write(f"此EPUB文件包含{len(fe.fonts)}个字体文件: {', '.join(fe.fonts)}") 437 | fe.get_mapping() 438 | fe.clean_text() 439 | try: 440 | fe.encrypt_font() 441 | logger.write("字体加密成功") 442 | except Exception as e: 443 | logger.write(f"字体加密失败,错误信息: {e}") 444 | traceback.print_exc() 445 | fe.close_file() 446 | return f"字体加密失败,错误信息: {e}" 447 | try: 448 | fe.read_html() 449 | logger.write("EPUB文件处理成功") 450 | fe.close_file() 451 | except Exception as e: 452 | logger.write(f"EPUB文件处理失败,错误信息: {e}") 453 | fe.close_file() 454 | return f"EPUB文件处理失败,错误信息: {e}" 455 | return 0 456 | 457 | if __name__ == '__main__': 458 | epub_read_path = input("1、请输入EPUB文件路径(如: ./test.epub): ") 459 | 460 | file_write_dir = input( 461 | "2、请输入输出文件夹路径(如: ./dist): ") 462 | 463 | # epub_read_path= './crazy.epub' 464 | # file_write_dir = './dist' 465 | 466 | fe = FontEncrypt(epub_read_path, file_write_dir) 467 | fe.get_mapping() 468 | # the_font_file_mapping = {} 469 | print(f"3、此EPUB文件包含{len(fe.fonts)}个字体文件:\n{'\n'.join(fe.fonts)}") 470 | # for i,font_file in enumerate(fe.fonts): 471 | # if font_file in fe.font_to_char_mapping.keys(): 472 | # raw_input = None 473 | # while True: 474 | # raw_input= input( 475 | # f"3.{i+1}、请输入字体文件{font_file}对应的文件路径(如: ./font/font.ttf)或输入 Q/q 跳过: \n(若已对内嵌字体进行过字体子集化,请不要跳过此流程)\n") 476 | # if raw_input.lower() == 'q': 477 | # print(f"跳过{font_file}的映射") 478 | # break 479 | # raw_input = raw_input.strip() 480 | # raw_input = os.path.normpath(raw_input) 481 | # if os.path.exists(raw_input): 482 | # the_font_file_mapping[font_file] = raw_input 483 | # print(f"已将{font_file}映射到{raw_input}") 484 | # break 485 | # else: 486 | # print(f"文件{raw_input}不存在,请重新输入") 487 | # continue 488 | # fe.read_unchanged_fonts(the_font_file_mapping) 489 | fe.clean_text() 490 | try: 491 | fe.encrypt_font() 492 | print("4、字体加密成功") 493 | except Exception as e: 494 | print(f"4、字体加密失败,错误信息: {e}") 495 | traceback.print_exc() 496 | fe.close_file() 497 | exit(1) 498 | try: 499 | fe.read_html() 500 | print("5、EPUB文件处理成功") 501 | except Exception as e: 502 | print(f"5、EPUB文件处理失败,错误信息: {e}") 503 | fe.close_file() 504 | exit(1) 505 | -------------------------------------------------------------------------------- /utils/log.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | 5 | 6 | class logwriter: 7 | def __init__(self): 8 | self.path = os.path.join( 9 | os.path.dirname(os.path.abspath(sys.argv[0])), "log.txt" 10 | ) 11 | # print(self.path) 12 | with open(self.path, "w",encoding='utf-8') as f: 13 | current_time = time.strftime( 14 | "%Y-%m-%d %H:%M:%S", time.localtime(time.time()) 15 | ) 16 | f.write(f"time: {current_time}\n") 17 | 18 | def write(self, text): 19 | with open(self.path, "a", encoding='utf-8') as f: 20 | f.write(f"{text}\n") 21 | 22 | 23 | if __name__ == "__main__": 24 | log = logwriter() 25 | log.write("hello world") 26 | -------------------------------------------------------------------------------- /utils/reformat_epub.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/env python 3 | # 源码: sigil吧ID: 遥遥心航 4 | 5 | import zipfile 6 | import re, sys 7 | from os import path, mkdir, getcwd 8 | from urllib.parse import unquote 9 | from xml.etree import ElementTree 10 | import os 11 | try: 12 | from utils.log import logwriter 13 | except: 14 | from log import logwriter 15 | 16 | logger = logwriter() 17 | 18 | class EpubTool: 19 | def __init__(self, epub_src): 20 | self.epub = zipfile.ZipFile(epub_src) 21 | self.epub_src = epub_src 22 | self.epub_name = path.basename(epub_src) 23 | self.ebook_root = path.dirname(epub_src) 24 | self.output_path = self.ebook_root 25 | self.epub_type = "" 26 | self.temp_dir = "" 27 | self._init_namelist() 28 | self._init_mime_map() 29 | self._init_opf() 30 | self.manifest_list = [] # (id,opf_href,mime,properties) 31 | self.id_to_href = {} # { id : href.lower, ... } 32 | self.href_to_id = {} # { href.lower : id, ...} 33 | self.text_list = [] # (id,opf_href,properties) 34 | self.css_list = [] # (id,opf_href,properties) 35 | self.image_list = [] # (id,opf_href,properties) 36 | self.font_list = [] # (id,opf_href,properties) 37 | self.audio_list = [] # (id,opf_href,properties) 38 | self.video_list = [] # (id,opf_href,properties) 39 | self.spine_list = [] # (sid, linear, properties) 40 | self.other_list = [] # (id,opf_href,mime,properties) 41 | self.errorOPF_log = [] # (error_type,error_value) 42 | self.errorLink_log = {} # {filepath:[(error_link,correct_link || None),...]} 43 | self._parse_opf() 44 | 45 | def set_output_path(self, output_path): 46 | if output_path is not None and os.path.isdir(output_path): 47 | self.output_path = output_path 48 | 49 | def _init_namelist(self): 50 | self.namelist = self.epub.namelist() 51 | 52 | def _init_mime_map(self): 53 | self.mime_map = { 54 | ".html": "application/xhtml+xml", 55 | ".xhtml": "application/xhtml+xml", 56 | ".css": "text/css", 57 | ".js": "application/javascript", 58 | ".jpg": "image/jpeg", 59 | ".jpeg": "image/jpeg", 60 | ".bmp": "image/bmp", 61 | ".png": "image/png", 62 | ".gif": "image/gif", 63 | ".webp": "image/webp", 64 | ".ttf": "font/ttf", 65 | ".otf": "font/otf", 66 | ".woff": "font/woff", 67 | ".ncx": "application/x-dtbncx+xml", 68 | ".mp3": "audio/mpeg", 69 | ".mp4": "video/mp4", 70 | ".smil": "application/smil+xml", 71 | ".pls": "application/pls+xml", 72 | } 73 | 74 | def _init_opf(self): 75 | # 通过 container.xml 读取 opf 文件 76 | container_xml = self.epub.read("META-INF/container.xml").decode("utf-8") 77 | rf = re.match(r']*full-path="(?i:(.*?\.opf))"', container_xml) 78 | if rf is not None: 79 | self.opfpath = rf.group(1) 80 | self.opf = self.epub.read(self.opfpath).decode("utf-8") 81 | return 82 | # 通过路径首个 opf 读取 opf 文件 83 | for bkpath in self.namelist: 84 | if bkpath.lower().endswith(".opf"): 85 | self.opfpath = bkpath 86 | self.opf = self.epub.read(self.opfpath).decode("utf-8") 87 | return 88 | raise RuntimeError("无法发现opf文件") 89 | 90 | def _parse_opf(self): 91 | self.etree_opf = {"package": ElementTree.fromstring(self.opf)} 92 | 93 | for child in self.etree_opf["package"]: 94 | tag = re.sub(r"\{.*?\}", r"", child.tag) 95 | self.etree_opf[tag] = child 96 | 97 | self._parse_metadata() 98 | self._parse_manifest() 99 | self._parse_spine() 100 | self._clear_duplicate_id_href() 101 | self._parse_hrefs_not_in_epub() 102 | self._add_files_not_in_opf() 103 | 104 | self.manifest_list = [] # (id,opf_href,mime,properties) 105 | for id in self.id_to_h_m_p: 106 | href, mime, properties = self.id_to_h_m_p[id] 107 | self.manifest_list.append((id, href, mime, properties)) 108 | 109 | epub_type = self.etree_opf["package"].get("version") 110 | 111 | if epub_type is not None and epub_type in ["2.0", "3.0"]: 112 | self.epub_type = epub_type 113 | else: 114 | raise RuntimeError("此脚本不支持该EPUB类型") 115 | 116 | # 寻找epub2 toc 文件的id。epub3的nav文件直接当做xhtml处理。 117 | self.tocpath = "" 118 | self.tocid = "" 119 | tocid = self.etree_opf["spine"].get("toc") 120 | self.tocid = tocid if tocid is not None else "" 121 | 122 | # opf item分类 123 | opf_dir = path.dirname(self.opfpath) 124 | for id, href, mime, properties in self.manifest_list: 125 | 126 | bkpath = opf_dir + "/" + href if opf_dir else href 127 | if mime == "application/xhtml+xml": 128 | self.text_list.append((id, href, properties)) 129 | elif mime == "text/css": 130 | self.css_list.append((id, href, properties)) 131 | elif "image/" in mime: 132 | self.image_list.append((id, href, properties)) 133 | elif "font/" in mime or href.lower().endswith((".ttf", ".otf", ".woff")): 134 | self.font_list.append((id, href, properties)) 135 | elif "audio/" in mime: 136 | self.audio_list.append((id, href, properties)) 137 | elif "video/" in mime: 138 | self.video_list.append((id, href, properties)) 139 | elif self.tocid != "" and id == self.tocid: 140 | opf_dir = path.dirname(self.opfpath) 141 | self.tocpath = opf_dir + "/" + href if opf_dir else href 142 | else: 143 | self.other_list.append((id, href, mime, properties)) 144 | 145 | self._check_manifest_and_spine() 146 | 147 | def _parse_metadata(self): 148 | self.metadata = {} 149 | for key in [ 150 | "title", 151 | "creator", 152 | "language", 153 | "subject", 154 | "source", 155 | "identifier", 156 | "cover", 157 | ]: 158 | self.metadata[key] = "" 159 | for meta in self.etree_opf["metadata"]: 160 | tag = re.sub(r"\{.*?\}", r"", meta.tag) 161 | if tag in [ 162 | "title", 163 | "creator", 164 | "language", 165 | "subject", 166 | "source", 167 | "identifier", 168 | ]: 169 | self.metadata[tag] = meta.text 170 | elif tag == "meta": 171 | if meta.get("name") and meta.get("content"): 172 | self.metadata["cover"] = meta.get("content") 173 | 174 | def _parse_manifest(self): 175 | self.id_to_h_m_p = {} # { id : (href,mime,properties) , ... } 176 | self.id_to_href = {} # { id : href.lower, ... } 177 | self.href_to_id = {} # { href.lower : id, ...} 178 | if_error = False 179 | for item in self.etree_opf["manifest"]: 180 | # 检查opf文件中是否存在错误 181 | try: 182 | id = item.get("id") 183 | href = unquote(item.get("href")) 184 | except Exception as e: 185 | str_item = ( 186 | ElementTree.tostring(item, encoding="unicode") 187 | .replace("\n", "") 188 | .replace("\r", "") 189 | .replace("\t", "") 190 | ) 191 | logger.write(f"item:{str_item} error:{e}") 192 | if_error = True 193 | continue 194 | mime = item.get("media-type") 195 | properties = item.get("properties") if item.get("properties") else "" 196 | 197 | self.id_to_h_m_p[id] = (href, mime, properties) 198 | self.id_to_href[id] = href.lower() 199 | self.href_to_id[href.lower()] = id 200 | if if_error: 201 | logger.write("opf文件中存在错误,请检查!") 202 | 203 | def _parse_spine(self): 204 | self.spine_list = [] # [ (sid, linear, properties) , ... ] 205 | for itemref in self.etree_opf["spine"]: 206 | sid = itemref.get("idref") 207 | linear = itemref.get("linear") if itemref.get("linear") else "" 208 | properties = itemref.get("properties") if itemref.get("properties") else "" 209 | self.spine_list.append((sid, linear, properties)) 210 | 211 | def _clear_duplicate_id_href(self): 212 | 213 | # id_used = [ id_in_spine + cover_id ] 214 | id_used = [x[0] for x in self.spine_list] 215 | if self.metadata["cover"]: 216 | id_used.append(self.metadata["cover"]) 217 | 218 | del_id = [] 219 | for id, href in self.id_to_href.items(): 220 | if self.href_to_id[href] != id: # 该href拥有多个id,此id已被覆盖。 221 | if id in id_used and self.href_to_id[href] not in id_used: 222 | if id not in del_id: 223 | del_id.append(self.href_to_id[href]) 224 | self.href_to_id[href] = id 225 | elif id in id_used and self.href_to_id[href] in id_used: 226 | continue 227 | else: 228 | if id not in del_id: 229 | del_id.append(id) 230 | 231 | for id in del_id: 232 | self.errorOPF_log.append(("duplicate_id", id)) 233 | del self.id_to_href[id] 234 | del self.id_to_h_m_p[id] 235 | 236 | def _add_files_not_in_opf(self): 237 | 238 | hrefs_not_in_opf = [] 239 | for archive_path in self.namelist: 240 | if archive_path.lower().endswith( 241 | ( 242 | ".html", 243 | ".xhtml", 244 | ".css", 245 | ".jpg", 246 | ".jpeg", 247 | ".bmp", 248 | ".gif", 249 | ".png", 250 | ".webp", 251 | ".svg", 252 | ".ttf", 253 | ".otf", 254 | ".js", 255 | ".mp3", 256 | ".mp4", 257 | ".smil", 258 | ) 259 | ): 260 | opf_href = get_relpath(self.opfpath, archive_path) 261 | if opf_href.lower() not in self.href_to_id.keys(): 262 | hrefs_not_in_opf.append(opf_href) 263 | 264 | def allocate_id(href): # 自动分配不重复id 265 | basename = path.basename(href) 266 | if "A" <= basename[0] <= "Z" or "a" <= basename[0] <= "z": 267 | new_id = basename 268 | else: 269 | new_id = "x" + basename 270 | pre, suf = path.splitext(new_id) 271 | pre_ = pre 272 | i = 0 273 | while pre_ + suf in self.id_to_href.keys(): 274 | i += 1 275 | pre_ = pre + "_" + str(i) 276 | new_id = pre_ + suf 277 | return new_id 278 | 279 | for href in hrefs_not_in_opf: 280 | new_id = allocate_id(href) 281 | self.id_to_href[new_id] = href.lower() 282 | self.href_to_id[href.lower()] = new_id 283 | ext = path.splitext(href)[1] 284 | ext = ext.lower() 285 | try: 286 | mime = self.mime_map[ext] 287 | except KeyError: 288 | mime = "text/plain" 289 | self.id_to_h_m_p[new_id] = (href, mime, "") 290 | 291 | def _check_manifest_and_spine(self): 292 | spine_idrefs = [i for i, j, k in self.spine_list] 293 | 294 | for idref in spine_idrefs: 295 | if not self.id_to_h_m_p.get(idref): # spine 引用无效ID 296 | self.errorOPF_log.append(("invalid_idref", idref)) 297 | 298 | for mid, opf_href, mime, properties in self.manifest_list: 299 | if mime == "application/xhtml+xml": 300 | if mid not in spine_idrefs: 301 | self.errorOPF_log.append(("xhtml_not_in_spine", mid)) 302 | 303 | def _parse_hrefs_not_in_epub(self): 304 | del_id = [] 305 | namelist = [x.lower() for x in self.epub.namelist()] 306 | for id, href in self.id_to_href.items(): 307 | bkpath = get_bookpath(href, self.opfpath) 308 | if bkpath.lower() not in namelist: 309 | del_id.append(id) 310 | del self.href_to_id[href] 311 | for id in del_id: 312 | del self.id_to_href[id] 313 | del self.id_to_h_m_p[id] 314 | 315 | def create_tgt_epub(self): 316 | output_path = self.output_path 317 | logger.write(f"输出路径: {output_path}") 318 | return zipfile.ZipFile( 319 | path.join(output_path, self.epub_name.replace(".epub", "_reformat.epub")), 320 | "w", 321 | zipfile.ZIP_STORED, 322 | ) 323 | 324 | # 重构 325 | def restructure(self): 326 | self.tgt_epub = self.create_tgt_epub() 327 | # mimetype 328 | mimetype = self.epub.read("mimetype") 329 | self.tgt_epub.writestr("mimetype", mimetype, zipfile.ZIP_DEFLATED) 330 | # META-INF 331 | metainf_data = self.epub.read("META-INF/container.xml").decode("utf-8") 332 | metainf_data = re.sub( 333 | r']*media-type="application/oebps-[^>]*/>', 334 | r'', 335 | metainf_data, 336 | ) 337 | self.tgt_epub.writestr( 338 | "META-INF/container.xml", 339 | bytes(metainf_data, encoding="utf-8"), 340 | zipfile.ZIP_DEFLATED, 341 | ) 342 | # OEBPS 343 | re_path_map = { 344 | "text": {}, 345 | "css": {}, 346 | "image": {}, 347 | "font": {}, 348 | "audio": {}, 349 | "video": {}, 350 | "other": {}, 351 | } # { ori_bkpath : re_basename } 352 | basename_log = { 353 | "text": [], 354 | "css": [], 355 | "image": [], 356 | "font": [], 357 | "audio": [], 358 | "video": [], 359 | "other": [], 360 | } 361 | lowerPath_to_originPath = ( 362 | {} 363 | ) # 如果路径大小写不一致,则登记为 { 小写路径 : 原始路径 } 364 | 365 | def auto_rename(id, href, ftype): 366 | filename, ext = path.splitext(path.basename(href)) 367 | filename_ = filename 368 | num = 0 369 | while filename_ + ext in basename_log[ftype]: 370 | num += 1 371 | filename_ = filename + "_" + str(num) 372 | basename = filename_ + ext 373 | basename_log[ftype].append(basename) 374 | return basename 375 | 376 | def check_link(filename, bkpath, href, self, target_id=""): 377 | if href == "" or href.startswith( 378 | ("http://", "https://", "res:/", "file:/", "data:") 379 | ): 380 | return None 381 | if bkpath.lower() in lowerPath_to_originPath.keys(): 382 | if bkpath != lowerPath_to_originPath[bkpath.lower()]: # 大小写不一致 383 | correct_path = lowerPath_to_originPath[bkpath.lower()] 384 | self.errorLink_log.setdefault(filename, []) 385 | self.errorLink_log[filename].append( 386 | (href + target_id, correct_path) 387 | ) 388 | bkpath = correct_path 389 | else: # 链接路径找不到对应文件 390 | self.errorLink_log.setdefault(filename, []) 391 | self.errorLink_log[filename].append((href + target_id, None)) 392 | return None 393 | return bkpath 394 | 395 | # xhtml文件,关联 toc文件,一切 xhtml中的元素 396 | for id, href, properties in self.text_list: 397 | bkpath = get_bookpath(href, self.opfpath) 398 | basename = auto_rename(id, href, "text") 399 | re_path_map["text"][bkpath] = basename 400 | lowerPath_to_originPath[bkpath.lower()] = bkpath 401 | 402 | # css 文件,关联 xhtml文件的link,css文件中的@import 403 | for id, href, properties in self.css_list: 404 | bkpath = get_bookpath(href, self.opfpath) 405 | basename = auto_rename(id, href, "css") 406 | re_path_map["css"][bkpath] = basename 407 | lowerPath_to_originPath[bkpath.lower()] = bkpath 408 | 409 | # 图片,关联css中的url,xhtml文件中的href 410 | for id, href, properties in self.image_list: 411 | bkpath = get_bookpath(href, self.opfpath) 412 | basename = auto_rename(id, href, "image") 413 | re_path_map["image"][bkpath] = basename 414 | lowerPath_to_originPath[bkpath.lower()] = bkpath 415 | 416 | # 字体,关联css中的url 417 | for id, href, properties in self.font_list: 418 | bkpath = get_bookpath(href, self.opfpath) 419 | basename = auto_rename(id, href, "font") 420 | re_path_map["font"][bkpath] = basename 421 | lowerPath_to_originPath[bkpath.lower()] = bkpath 422 | 423 | # 音频 424 | for id, href, properties in self.audio_list: 425 | bkpath = get_bookpath(href, self.opfpath) 426 | basename = auto_rename(id, href, "audio") 427 | re_path_map["audio"][bkpath] = basename 428 | lowerPath_to_originPath[bkpath.lower()] = bkpath 429 | 430 | # 视频 431 | for id, href, properties in self.video_list: 432 | bkpath = get_bookpath(href, self.opfpath) 433 | basename = auto_rename(id, href, "video") 434 | re_path_map["video"][bkpath] = basename 435 | lowerPath_to_originPath[bkpath.lower()] = bkpath 436 | 437 | # 其他文件 438 | for id, href, mime, properties in self.other_list: 439 | bkpath = get_bookpath(href, self.opfpath) 440 | basename = auto_rename(id, href, "other") 441 | re_path_map["other"][bkpath] = basename 442 | lowerPath_to_originPath[bkpath.lower()] = bkpath 443 | 444 | # 读取文件并修改关联 445 | # toc文件 446 | if self.tocpath: 447 | toc = self.epub.read(self.tocpath).decode("utf-8") 448 | toc_dir = path.dirname(self.tocpath) 449 | 450 | def re_toc_href(match): 451 | href = match.group(2) 452 | href = unquote(href).strip() 453 | if "#" in href: 454 | href, target_id = href.split("#") 455 | target_id = "#" + target_id 456 | else: 457 | target_id = "" 458 | bkpath = get_bookpath(href, self.tocpath) 459 | bkpath = check_link(self.tocpath, bkpath, href, self, target_id) 460 | if not bkpath: 461 | return match.group() 462 | filename = path.basename(bkpath) 463 | return 'src="Text/' + filename + '"' + target_id 464 | 465 | toc = re.sub(r"src=([\'\"])(.*?)\1", re_toc_href, toc) 466 | self.tgt_epub.writestr( 467 | "OEBPS/toc.ncx", bytes(toc, encoding="utf-8"), zipfile.ZIP_DEFLATED 468 | ) 469 | 470 | # xhtml文件 471 | for xhtml_bkpath, new_name in re_path_map["text"].items(): 472 | text = self.epub.read(xhtml_bkpath).decode("utf-8") 473 | if not text.startswith("\n' + text 475 | if not re.match(r"(?s).*)\n*", 478 | r'\1\n\n', 479 | text, 480 | 1, 481 | ) 482 | # 修改a[href] 483 | 484 | def re_href(match): 485 | href = match.group(3) 486 | href = unquote(href).strip() 487 | if "#" in href: 488 | href, target_id = href.split("#") 489 | target_id = "#" + target_id 490 | else: 491 | target_id = "" 492 | 493 | bkpath = get_bookpath(href, xhtml_bkpath) 494 | bkpath = check_link(xhtml_bkpath, bkpath, href, self, target_id) 495 | if not bkpath: 496 | return match.group() 497 | 498 | if href.lower().endswith( 499 | (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp") 500 | ): 501 | filename = re_path_map["image"][bkpath] 502 | return match.group(1) + "../Images/" + filename + match.group(4) 503 | elif href.lower().endswith(".css"): 504 | filename = re_path_map["css"][bkpath] 505 | return ( 506 | '' 509 | ) 510 | elif href.lower().endswith((".xhtml", ".html")): 511 | filename = re_path_map["text"][bkpath] 512 | return match.group(1) + filename + target_id + match.group(4) 513 | else: 514 | return match.group() 515 | 516 | text = re.sub(r"(<[^>]*href=([\'\"]))(.*?)(\2[^>]*>)", re_href, text) 517 | 518 | # 修改src 519 | def re_src(match): 520 | href = match.group(3) 521 | href = unquote(href).strip() 522 | bkpath = get_bookpath(href, xhtml_bkpath) 523 | bkpath = check_link(xhtml_bkpath, bkpath, href, self) 524 | if not bkpath: 525 | return match.group() 526 | 527 | if href.lower().endswith( 528 | (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg") 529 | ): 530 | filename = re_path_map["image"][bkpath] 531 | return match.group(1) + "../Images/" + filename + match.group(4) 532 | elif href.lower().endswith(".mp3"): 533 | filename = re_path_map["audio"][bkpath] 534 | return match.group(1) + "../Audio/" + filename + match.group(4) 535 | elif href.lower().endswith(".mp4"): 536 | filename = re_path_map["video"][bkpath] 537 | return match.group(1) + "../Video/" + filename + match.group(4) 538 | elif href.lower().endswith(".js"): 539 | filename = re_path_map["other"][bkpath] 540 | return match.group(1) + "../Misc/" + filename + match.group(4) 541 | else: 542 | return match.group() 543 | 544 | text = re.sub(r"(<[^>]* src=([\'\"]))(.*?)(\2[^>]*>)", re_src, text) 545 | 546 | # 修改 url 547 | def re_url(match): 548 | url = match.group(2) 549 | url = unquote(url).strip() 550 | bkpath = get_bookpath(url, xhtml_bkpath) 551 | bkpath = check_link(xhtml_bkpath, bkpath, url, self) 552 | if not bkpath: 553 | return match.group() 554 | 555 | if url.lower().endswith((".ttf", ".otf")): 556 | filename = re_path_map["font"][bkpath] 557 | return match.group(1) + "../Fonts/" + filename + match.group(3) 558 | elif url.lower().endswith( 559 | (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg") 560 | ): 561 | filename = re_path_map["image"][bkpath] 562 | return match.group(1) + "../Images/" + filename + match.group(3) 563 | else: 564 | return match.group() 565 | 566 | text = re.sub(r"(url\([\'\"]?)(.*?)([\'\"]?\))", re_url, text) 567 | self.tgt_epub.writestr( 568 | "OEBPS/Text/" + new_name, 569 | bytes(text, encoding="utf-8"), 570 | zipfile.ZIP_DEFLATED, 571 | ) 572 | # css文件 573 | for css_bkpath, new_name in re_path_map["css"].items(): 574 | try: 575 | css = self.epub.read(css_bkpath).decode("utf-8") 576 | except: 577 | continue 578 | 579 | # 修改 @import 580 | def re_import(match): 581 | if match.group(2): 582 | href = match.group(2) 583 | else: 584 | href = match.group(3) 585 | href = unquote(href).strip() 586 | if not href.lower().endswith(".css"): 587 | return match.group() 588 | filename = path.basename(href) 589 | return '@import "' + filename + '"' 590 | 591 | css = re.sub( 592 | r"@import ([\'\"])(.*?)\1|@import url\([\'\"]?(.*?)[\'\"]?\)", 593 | re_import, 594 | css, 595 | ) 596 | 597 | # 修改 css的url 598 | def re_css_url(match): 599 | url = match.group(2) 600 | url = unquote(url).strip() 601 | bkpath = get_bookpath(url, css_bkpath) 602 | bkpath = check_link(css_bkpath, bkpath, url, self) 603 | if not bkpath: 604 | return match.group() 605 | if url.lower().endswith((".ttf", ".otf")): 606 | filename = re_path_map["font"][bkpath] 607 | return match.group(1) + "../Fonts/" + filename + match.group(3) 608 | elif url.lower().endswith( 609 | (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".svg") 610 | ): 611 | filename = re_path_map["image"][bkpath] 612 | return match.group(1) + "../Images/" + filename + match.group(3) 613 | else: 614 | return match.group() 615 | 616 | css = re.sub(r"(url\([\'\"]?)(.*?)([\'\"]?\))", re_css_url, css) 617 | self.tgt_epub.writestr( 618 | "OEBPS/Styles/" + new_name, 619 | bytes(css, encoding="utf-8"), 620 | zipfile.ZIP_DEFLATED, 621 | ) 622 | # 图片 623 | for img_bkpath, new_name in re_path_map["image"].items(): 624 | try: 625 | img = self.epub.read(img_bkpath) 626 | except: 627 | continue 628 | self.tgt_epub.writestr( 629 | "OEBPS/Images/" + new_name, img, zipfile.ZIP_DEFLATED 630 | ) 631 | # 字体 632 | for font_bkpath, new_name in re_path_map["font"].items(): 633 | try: 634 | font = self.epub.read(font_bkpath) 635 | except: 636 | continue 637 | self.tgt_epub.writestr( 638 | "OEBPS/Fonts/" + new_name, font, zipfile.ZIP_DEFLATED 639 | ) 640 | # 音频 641 | for audio_bkpath, new_name in re_path_map["audio"].items(): 642 | try: 643 | audio = self.epub.read(audio_bkpath) 644 | except: 645 | continue 646 | self.tgt_epub.writestr( 647 | "OEBPS/Audio/" + new_name, audio, zipfile.ZIP_DEFLATED 648 | ) 649 | # 视频 650 | for video_bkpath, new_name in re_path_map["video"].items(): 651 | try: 652 | video = self.epub.read(video_bkpath) 653 | except: 654 | continue 655 | self.tgt_epub.writestr( 656 | "OEBPS/Video/" + new_name, video, zipfile.ZIP_DEFLATED 657 | ) 658 | # 其他 659 | for font_bkpath, new_name in re_path_map["other"].items(): 660 | try: 661 | other = self.epub.read(font_bkpath) 662 | except: 663 | continue 664 | self.tgt_epub.writestr( 665 | "OEBPS/Misc/" + new_name, other, zipfile.ZIP_DEFLATED 666 | ) 667 | # OPF 668 | manifest_text = "" 669 | 670 | for id, href, mime, prop in self.manifest_list: 671 | bkpath = get_bookpath(href, self.opfpath) 672 | prop_ = ' properties="' + prop + '"' if prop else "" 673 | if mime == "application/xhtml+xml": 674 | filename = re_path_map["text"][bkpath] 675 | manifest_text += '\n '.format( 676 | id=id, href="Text/" + filename, mime=mime, prop=prop_ 677 | ) 678 | elif mime == "text/css": 679 | filename = re_path_map["css"][bkpath] 680 | manifest_text += '\n '.format( 681 | id=id, href="Styles/" + filename, mime=mime, prop=prop_ 682 | ) 683 | elif "image/" in mime: 684 | filename = re_path_map["image"][bkpath] 685 | manifest_text += '\n '.format( 686 | id=id, href="Images/" + filename, mime=mime, prop=prop_ 687 | ) 688 | elif "font/" in mime or href.lower().endswith((".ttf", ".otf", ".woff")): 689 | filename = re_path_map["font"][bkpath] 690 | manifest_text += '\n '.format( 691 | id=id, href="Fonts/" + filename, mime=mime, prop=prop_ 692 | ) 693 | elif "audio/" in mime: 694 | filename = re_path_map["audio"][bkpath] 695 | manifest_text += '\n '.format( 696 | id=id, href="Audio/" + filename, mime=mime, prop=prop_ 697 | ) 698 | elif "video/" in mime: 699 | filename = re_path_map["video"][bkpath] 700 | manifest_text += '\n '.format( 701 | id=id, href="Video/" + filename, mime=mime, prop=prop_ 702 | ) 703 | elif id == self.tocid: 704 | manifest_text += '\n '.format( 705 | id=id 706 | ) 707 | else: 708 | filename = re_path_map["other"][bkpath] 709 | manifest_text += '\n '.format( 710 | id=id, href="Misc/" + filename, mime=mime, prop=prop_ 711 | ) 712 | 713 | manifest_text += "\n " 714 | opf = re.sub(r"(?s).*?", manifest_text, self.opf, 1) 715 | 716 | def re_refer(match): 717 | href = match.group(3) 718 | href = unquote(href).strip() 719 | basename = path.basename(href) 720 | filename = unquote(basename) 721 | if not basename.endswith(".ncx"): 722 | return match.group(1) + "../Text/" + filename + match.group(4) 723 | else: 724 | return match.group() 725 | 726 | opf = re.sub(r"(]*href=([\'\"]))(.*?)(\2[^>]*/>)", re_refer, opf) 727 | self.tgt_epub.writestr( 728 | "OEBPS/content.opf", bytes(opf, encoding="utf-8"), zipfile.ZIP_DEFLATED 729 | ) 730 | self.tgt_epub.close() 731 | self.epub.close() 732 | 733 | 734 | # 相对路径计算函数 735 | def get_relpath(from_path, to_path): 736 | # from_path 和 to_path 都需要是绝对路径 737 | from_path = re.split(r"[\\/]", from_path) 738 | to_path = re.split(r"[\\/]", to_path) 739 | while from_path[0] == to_path[0]: 740 | from_path.pop(0), to_path.pop(0) 741 | to_path = "../" * (len(from_path) - 1) + "/".join(to_path) 742 | return to_path 743 | 744 | 745 | # 计算bookpath 746 | def get_bookpath(relative_path, refer_bkpath): 747 | # relative_path 相对路径,一般是href 748 | # refer_bkpath 参考的绝对路径 749 | 750 | relative_ = re.split(r"[\\/]", relative_path) 751 | refer_ = re.split(r"[\\/]", refer_bkpath) 752 | 753 | back_step = 0 754 | while relative_[0] == "..": 755 | back_step += 1 756 | relative_.pop(0) 757 | 758 | if len(refer_) <= 1: 759 | return "/".join(relative_) 760 | else: 761 | refer_.pop(-1) 762 | 763 | if back_step < 1: 764 | return "/".join(refer_ + relative_) 765 | elif back_step > len(refer_): 766 | return "/".join(relative_) 767 | 768 | # len(refer_) > 1 and back_setp <= len(refer_): 769 | while back_step > 0 and len(refer_) > 0: 770 | refer_.pop(-1) 771 | back_step -= 1 772 | 773 | return "/".join(refer_ + relative_) 774 | 775 | 776 | def epub_sources(): 777 | if len(sys.argv) <= 1: 778 | return sys.argv 779 | epub_srcs = [] 780 | exe_path = path.dirname(sys.argv[0]) 781 | epub_srcs.append(exe_path) 782 | for epub_src in sys.argv[1:None]: 783 | filename = path.basename(epub_src) 784 | basename, ext = path.splitext(filename) 785 | if ext.lower() == ".epub": 786 | if path.exists(epub_src): 787 | epub_srcs.append(epub_src) 788 | return epub_srcs 789 | 790 | 791 | def run(epub_src, output_path=None): 792 | try: 793 | logger.write(f"\n正在尝试重构EPUB: {epub_src}") 794 | if epub_src.lower().endswith("_reformat.epub"): 795 | logger.write("警告: 该文件已经重排,无需再次处理!") 796 | return "skip" 797 | epub = EpubTool(epub_src) 798 | epub.set_output_path(output_path) 799 | epub.restructure() # 重构 800 | el = epub.errorLink_log.copy() 801 | del_keys = [] 802 | for file_path, log in epub.errorLink_log.items(): 803 | if file_path.lower().endswith(".css"): 804 | el[file_path] = list(filter(lambda x: x[1] is not None, log)) 805 | if el[file_path] == []: 806 | del_keys.append(file_path) 807 | for key in del_keys: 808 | del el[key] 809 | 810 | if epub.errorOPF_log: 811 | logger.write("-------在 OPF文件 发现问题------:") 812 | for error_type, error_value in epub.errorOPF_log: 813 | if error_type == "duplicate_id": 814 | logger.write(f"问题: 发现manifest节点内部存在重复ID {error_value} !!!" ) 815 | logger.write("措施: 已自动清除重复ID对应的manifest项。") 816 | elif error_type == "invalid_idref": 817 | logger.write(f"问题: 发现spine节点内部存在无效引用ID {error_value} !!!" ) 818 | logger.write( 819 | "措施: 请自行检查spine内的itemref节点并手动修改,确保引用的ID存在于manifest的item项。\n" 820 | + " (大小写不一致也会导致引用无效。)" 821 | ) 822 | elif error_type == "xhtml_not_in_spine": 823 | logger.write( 824 | f"问题: 发现ID为 {error_value} 的文件manifest中登记为application/xhtml+xml类型,但不被spine节点的项所引用" 825 | ) 826 | logger.write( 827 | "措施: 自行检查该文件是否需要被spine引用。部分阅读器中,如果存在xhtml文件不被spine引用,可能导致epub无法打开。" 828 | ) 829 | 830 | if el: 831 | for file_path, log in el.items(): 832 | basename = path.basename(file_path) 833 | logger.write(f"-----在 {basename} 发现问题链接-----:") 834 | for href, correct_path in log: 835 | if correct_path is not None: 836 | logger.write( 837 | f"链接: {href}\n问题: 与实际文件名大小写不一致!\n措施: 程序已自动纠正链接。" 838 | ) 839 | else: 840 | logger.write(f"链接: {href}\n问题: 未能找到对应文件!!!") 841 | except Exception as e: 842 | logger.write(f"{epub_src} 重构EPUB失败: {e}") 843 | return e 844 | else: 845 | logger.write(f"{epub_src} 重构EPUB成功") 846 | return 0 847 | 848 | 849 | def main(): 850 | epub_src = input("【使用说明】请把EPUB文件拖曳到本窗口上(输入'e'退出): ") 851 | epub_src = epub_src.strip("'").strip('"').strip() 852 | if epub_src.lower() == "e": 853 | print("程序已退出") 854 | sys.exit() 855 | if not os.path.isfile(epub_src): 856 | print("错误: 找不到指定的EPUB文件,请确认文件路径是否正确并重新输入!") 857 | return 858 | ret = run(epub_src) 859 | if ret == "skip": 860 | print("已跳过该文件") 861 | elif ret == "e": 862 | print("操作失败,请检查日志!") 863 | else: 864 | print("操作成功!") 865 | 866 | 867 | 868 | if __name__ == "__main__": 869 | print( 870 | "【脚本功能】\n" 871 | + "1、 将epub目录结构规范化至sigil规范格式。\n" 872 | + "2、 将没有列入manifest项的epub有效文件自动列入manifest项。\n" 873 | + "3、 自动清除manifest中携带重复ID或多余ID的无效项。\n" 874 | + " 脚本将优先保留spine或metadata中关联的ID。\n" 875 | + "4、 自动检查并提醒spine节点中引用无效ID的itemref项。\n" 876 | + "5、 自动检查并提醒manifest节点中xhtml类型文件不被spine节点引用的情况。\n" 877 | + "6、 自动检测并纠正实际文件名与对应的引用链接大小写不一致的问题。\n" 878 | + "7、 自动检测并提醒找不到对应文件的链接。" 879 | ) 880 | while True: 881 | main() 882 | --------------------------------------------------------------------------------