├── .cross_platform ├── auto_mdx_builder.py └── ebook_utils.py ├── LICENSE ├── README.md ├── auto_mdx_builder.py ├── ebook_utils.py ├── func_lib.py ├── images ├── amb_folder.png ├── auto_split.png ├── img_dict_atmpl.gif ├── img_dict_btmpl.gif ├── imgs_order.png ├── index.png ├── index_all.png ├── settings.png ├── syns.png ├── text_dict_ctmpl.png ├── text_dict_dtmpl.gif ├── toc.png └── work_dir_tree.png ├── lib ├── FreePic2Pdf.ini ├── MuPDF_pcs.txt ├── PDFPatcher_AppConfig.json ├── Pdg2Pic.ini ├── atmpl.css ├── auto_split_2.css ├── bkmk │ ├── FreePic2Pdf.itf │ └── FreePic2Pdf_bkmk.txt ├── bkmk_utf16le │ ├── FreePic2Pdf.itf │ └── FreePic2Pdf_bkmk.txt ├── btmpl.css ├── build.toml ├── ctmpl.css └── dtmpl.css ├── requirements.txt ├── settings.py └── templates ├── __init__.py ├── img_dict_atmpl.py ├── img_dict_btmpl.py ├── text_dict_ctmpl.py └── text_dict_dtmpl.py /.cross_platform/auto_mdx_builder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Date : 2023-11-16 00:00:17 4 | # @Author : Litles (litlesme@gmail.com) 5 | # @Link : https://github.com/Litles 6 | # @Version : 1.5 7 | 8 | import logging 9 | import traceback 10 | import os 11 | import re 12 | import shutil 13 | from colorama import Fore, just_fix_windows_console 14 | from settings import Settings 15 | from func_lib import FuncLib 16 | from img_dict_atmpl import ImgDictAtmpl 17 | from img_dict_btmpl import ImgDictBtmpl 18 | from text_dict_ctmpl import TextDictCtmpl 19 | from text_dict_dtmpl import TextDictDtmpl 20 | from ebook_utils import EbookUtils 21 | 22 | 23 | class AutoMdxBuilder: 24 | """图像词典制作程序""" 25 | def __init__(self): 26 | self.settings = Settings() 27 | self.func = FuncLib(self) 28 | self.utils = EbookUtils(self) 29 | 30 | def auto_processing(self, sel): 31 | """ 根据选择自动处理 """ 32 | if sel == 1: 33 | # --- 解包 mdx/mdd 文件 --- 34 | mfile = input("请输入要解包的 mdx/mdd 文件路径: ").strip('"') 35 | if self.utils.export_mdx(mfile): 36 | print(Fore.GREEN + "\n已输出在同目录下: " + Fore.RESET + os.path.splitext(mfile)[0]) 37 | elif sel == 2: 38 | # --- 将源 txt 文件打包成 mdx 文件 --- 39 | file_final_txt = input("请输入要打包的 txt 文件路径: ").strip('"') 40 | if self.func.text_file_check(file_final_txt) == 2: 41 | # 检查数据文件夹 42 | dir_curr, fname_txt = os.path.split(file_final_txt) 43 | dir_data = os.path.join(dir_curr, 'data') 44 | if not os.path.exists(dir_data): 45 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"文件夹 {dir_data} 不存在, 已默认不打包 mdd") 46 | dir_data = None 47 | elif os.path.exists(dir_data) and len(os.listdir(dir_data)) == 0: 48 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"文件夹 {dir_data} 为空, 已默认不打包 mdd") 49 | dir_data = None 50 | # 生成 info.html 51 | file_info_raw = None 52 | for fname in os.listdir(dir_curr): 53 | if fname == 'info.html': 54 | file_info_raw = os.path.join(dir_curr, fname) 55 | elif fname.endswith('.html') and fname.startswith(os.path.splitext(fname_txt)[0]): 56 | file_info_raw = os.path.join(dir_curr, fname) 57 | break 58 | file_dict_info = self.func.generate_info_html(os.path.splitext(fname_txt)[0], file_info_raw, None) 59 | # 打包 60 | print('\n------------------\n开始打包……\n') 61 | done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_data, dir_curr) 62 | if done_flg: 63 | print(Fore.GREEN + "\n打包完毕。" + Fore.RESET) 64 | else: 65 | print(Fore.RED + "\n材料检查不通过, 请确保材料准备无误再执行程序" + Fore.RESET) 66 | elif sel == 3: 67 | # --- 将资料包文件夹打包成 mdd 文件 --- 68 | dir_data = input("请输入要打包的资料文件夹路径: ").strip('"\\').rstrip('/') 69 | dir_data = dir_data.rstrip('\\') 70 | dir_data = dir_data.rstrip('/') 71 | print('\n------------------\n开始打包……\n') 72 | done_flg = self.utils.pack_to_mdd(dir_data, None) 73 | if done_flg: 74 | print(Fore.GREEN + "\n打包完毕。" + Fore.RESET) 75 | # elif sel == 10: 76 | # # --- 从 PDF文件/pdg文件夹 生成预备原材料 --- 77 | # p = input("请输入 pdf文件/pdg文件夹 路径: ").strip('"\\').rstrip('/') 78 | # if os.path.isfile(p) and os.path.splitext(p)[1] == '.pdf': 79 | # self.pdf_to_amb(p) 80 | # elif os.path.isdir(p): 81 | # self.pdf_to_amb(p, False) 82 | # else: 83 | # print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 84 | elif sel == 11: 85 | # --- 从 toc_all.txt 生成 index_all.txt --- 86 | file_toc_all = input("请输入 toc_all.txt 的文件路径: ").strip('"') 87 | file_index_all = os.path.join(os.path.split(file_toc_all)[0], 'index_all.txt') 88 | if self.func.toc_all_to_index(file_toc_all, file_index_all): 89 | print(Fore.GREEN + "\n处理完成, 生成在同目录下" + Fore.RESET) 90 | else: 91 | print(Fore.RED + "\n文件检查不通过, 请确保文件准备无误再执行程序" + Fore.RESET) 92 | elif sel == 12: 93 | # --- 合并 toc.txt 和 index.txt 为 index_all.txt --- 94 | file_toc = input("(1) 请输入 toc.txt 的文件路径: ").strip('"') 95 | file_index = input("(2) 请输入 index.txt 的文件路径: ").strip('"') 96 | file_index_all = os.path.join(os.path.split(file_index)[0], 'index_all.txt') 97 | self.func.merge_to_index_all(file_toc, file_index, file_index_all) 98 | elif sel == 20: 99 | # --- 生成词典 --- 100 | p = input("请输入原材料文件夹路径或 build.toml 文件路径: ").strip('"\\').rstrip('/') 101 | if os.path.split(p)[1] == 'build.toml': 102 | if self.settings.load_build_toml(p, False, False): 103 | self._build_mdict() 104 | elif os.path.isdir(p): 105 | file_toml = os.path.join(p, 'build.toml') 106 | if os.path.isfile(file_toml): 107 | if self.settings.load_build_toml(file_toml, False, True): 108 | self._build_mdict() 109 | else: 110 | print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 build.toml 文件") 111 | else: 112 | print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 113 | elif sel == 30: 114 | # --- 从词典还原原材料 --- 115 | p = input("请输入词典的文件夹或 mdx/mdd 文件路径: ").strip('"\\').rstrip('/') 116 | if os.path.isfile(p) and os.path.splitext(p)[1] == '.mdx': 117 | self._restore_raw(p, False) 118 | elif os.path.isfile(p) and os.path.splitext(p)[1] == '.mdd': 119 | if os.path.isfile(p[:-1]+'x'): 120 | self._restore_raw(p[:-1]+'x', False) 121 | elif os.path.isdir(p): 122 | for m in os.listdir(p): 123 | if m.endswith('.mdx'): 124 | self._restore_raw(os.path.join(p, m), True) 125 | break 126 | else: 127 | print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 mdx 文件") 128 | else: 129 | print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 130 | # elif sel == 31: 131 | # # --- 从原材料还原 PDF --- 132 | # p = input("请输入原材料文件夹路径或 build.toml 文件路径: ").strip('"\\').rstrip('/') 133 | # if os.path.split(p)[1] == 'build.toml': 134 | # if self.settings.load_build_toml(p, True): 135 | # self.amb_to_pdf(file_toml, False) 136 | # elif os.path.isdir(p): 137 | # file_toml = os.path.join(p, 'build.toml') 138 | # if os.path.isfile(file_toml): 139 | # if self.settings.load_build_toml(file_toml, True): 140 | # self.amb_to_pdf(file_toml, True) 141 | # else: 142 | # print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 build.toml 文件") 143 | # else: 144 | # print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 145 | elif sel == 32: 146 | # --- 从 index_all.txt 还原 toc_all.txt --- 147 | file_index_all = input("请输入 index_all.txt 的文件路径: ").strip('"') 148 | file_toc_all = os.path.join(os.path.split(file_index_all)[0], 'toc_all.txt') 149 | if self.func.index_to_toc(file_index_all, file_toc_all): 150 | print(Fore.GREEN + "\n处理完成, 生成在同目录下" + Fore.RESET) 151 | else: 152 | print(Fore.RED + "\n文件检查不通过, 请确保所有词目都有对应页码" + Fore.RESET) 153 | elif sel == 41: 154 | # --- 从 PDF 提取图片 (MuPDF) --- 155 | p = input("请输入 PDF 文件路径: ").strip('"\\').rstrip('/') 156 | if os.path.isfile(p) and p.lower().endswith('.pdf'): 157 | fname = os.path.split(p)[1] 158 | out_dir = os.path.join(os.path.split(p)[0], fname.split('.')[0]) 159 | self.utils.extract_pdf_to_imgs_fitz(p, out_dir) 160 | else: 161 | print(Fore.RED + "\n输入的路径有误" + Fore.RESET) 162 | elif sel == 42: 163 | # --- 将 PDF 转换成图片 (MuPDF) --- 164 | p = input("请输入 PDF 文件路径: ").strip('"\\').rstrip('/') 165 | if os.path.isfile(p) and p.lower().endswith('.pdf'): 166 | fname = os.path.split(p)[1] 167 | out_dir = os.path.join(os.path.split(p)[0], fname.split('.')[0]) 168 | dpi = input("请输入要生成图片的 DPI(回车则默认300): ") 169 | if re.match(r'^\d+$', dpi): 170 | self.utils.convert_pdf_to_imgs_fitz(p, out_dir, int(dpi)) 171 | else: 172 | self.utils.convert_pdf_to_imgs_fitz(p, out_dir) 173 | else: 174 | print(Fore.RED + "\n输入的路径有误" + Fore.RESET) 175 | # elif sel == 43: 176 | # # --- 将 图片 合成 PDF (MuPDF) --- 177 | # p = input("请输入图片所在文件夹路径: ").strip('"\\').rstrip('/') 178 | # if os.path.isdir(p): 179 | # out_file = p+'.pdf' 180 | # self.utils.combine_img_to_pdf(p, out_file) 181 | # else: 182 | # print(Fore.RED + "\n输入的路径有误" + Fore.RESET) 183 | # elif sel == 44: 184 | # # --- PDF 书签导出/导入(FreePic2Pdf) --- 185 | # file_pdf = input("请输入 PDF 文件路径: ").strip('"\\').rstrip('/') 186 | # dir_bkmk = input("请输入书签文件夹路径(导出则直接回车): ").strip('"\\').rstrip('/') 187 | # if os.path.isdir(dir_bkmk): 188 | # self.utils.eximport_bkmk_fp2p(file_pdf, dir_bkmk, False) 189 | # elif dir_bkmk is None or len(dir_bkmk) == 0: 190 | # fname = os.path.split(file_pdf)[1] 191 | # dir_bkmk = os.path.join(os.path.split(file_pdf)[0], fname.split('.')[0]+'_bkmk') 192 | # self.utils.eximport_bkmk_fp2p(file_pdf, dir_bkmk) 193 | # else: 194 | # print(Fore.RED + "\n输入的路径有误" + Fore.RESET) 195 | else: 196 | pass 197 | 198 | def _build_mdict(self): 199 | done_flg = False 200 | if self.settings.templ_choice in ('a', 'A'): 201 | """ 制作图像词典 (模板A) """ 202 | # 生成 txt 源文本 203 | proc_flg, file_final_txt, dir_imgs_out, file_dict_info = ImgDictAtmpl(self).make_source_file() 204 | if proc_flg: 205 | # 创建输出文件夹 206 | if not os.path.exists(self.settings.dir_output): 207 | os.makedirs(self.settings.dir_output) 208 | # 拷贝模板 css 文件 209 | file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_atmpl) 210 | file_css = os.path.join(self.settings.dir_output, self.settings.fname_css) 211 | shutil.copy(file_css_tmpl, file_css) 212 | # 开始打包 213 | print('\n------------------\n开始打包……\n') 214 | done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_imgs_out, self.settings.dir_output) 215 | elif self.settings.templ_choice in ('b', 'B'): 216 | """ 制作图像词典 (模板B) """ 217 | # 生成 txt 源文本 218 | proc_flg, file_final_txt, dir_imgs_out, file_dict_info = ImgDictBtmpl(self).make_source_file() 219 | if proc_flg: 220 | # 创建输出文件夹 221 | if not os.path.exists(self.settings.dir_output): 222 | os.makedirs(self.settings.dir_output) 223 | # 拷贝模板 css 文件 224 | file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_btmpl) 225 | file_css = os.path.join(self.settings.dir_output, self.settings.fname_css) 226 | shutil.copy(file_css_tmpl, file_css) 227 | # 开始打包 228 | print('\n------------------\n开始打包……\n') 229 | done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_imgs_out, self.settings.dir_output) 230 | elif self.settings.templ_choice in ('c', 'C'): 231 | """ 制作文本词典 (模板C) """ 232 | # 生成 txt 源文本 233 | proc_flg, file_final_txt, file_dict_info = TextDictCtmpl(self).make_source_file() 234 | if proc_flg: 235 | # 创建输出文件夹 236 | if not os.path.exists(self.settings.dir_output): 237 | os.makedirs(self.settings.dir_output) 238 | # 拷贝模板 css 文件 239 | file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_ctmpl) 240 | file_css = os.path.join(self.settings.dir_output, self.settings.fname_css) 241 | shutil.copy(file_css_tmpl, file_css) 242 | # 开始打包 243 | print('\n------------------\n开始打包……\n') 244 | dir_data = os.path.join(self.settings.dir_input, self.settings.dname_data) 245 | if not os.path.exists(dir_data) or len(os.listdir(dir_data)) == 0: 246 | dir_data = None 247 | done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_data, self.settings.dir_output) 248 | elif self.settings.templ_choice in ('d', 'D'): 249 | """ 制作文本词典 (模板D) """ 250 | # 生成 txt 源文本 251 | proc_flg, file_final_txt, file_dict_info = TextDictDtmpl(self).make_source_file() 252 | if proc_flg: 253 | # 创建输出文件夹 254 | if not os.path.exists(self.settings.dir_output): 255 | os.makedirs(self.settings.dir_output) 256 | # 拷贝模板 css 文件 257 | file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_dtmpl) 258 | file_css = os.path.join(self.settings.dir_output, self.settings.fname_css) 259 | shutil.copy(file_css_tmpl, file_css) 260 | # 开始打包 261 | print('\n------------------\n开始打包……\n') 262 | dir_data = os.path.join(self.settings.dir_input, self.settings.dname_data) 263 | if not os.path.exists(dir_data) or len(os.listdir(dir_data)) == 0: 264 | dir_data = None 265 | done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_data, self.settings.dir_output) 266 | else: 267 | pass 268 | if done_flg: 269 | print("\n打包完毕。" + Fore.GREEN + "\n\n恭喜, 词典已生成!" + Fore.RESET) 270 | 271 | def _restore_raw(self, xfile, outside_flg): 272 | """ 将词典还原为原材料 """ 273 | # 1.准备参数 274 | extract_flg = False 275 | dict_name = None 276 | templ_choice = None 277 | dir_input, fname = os.path.split(xfile) 278 | # 2.分析 mdx 文件 279 | tmp_restore = os.path.join(self.settings.dir_output_tmp, 'restore') 280 | if not os.path.exists(tmp_restore): 281 | os.makedirs(tmp_restore) 282 | tmp_xfile = os.path.join(tmp_restore, fname) 283 | tmp_xdir = os.path.splitext(tmp_xfile)[0] 284 | if os.path.exists(tmp_xdir): 285 | shutil.rmtree(tmp_xdir) 286 | shutil.copy(xfile, tmp_xfile) 287 | if self.utils.export_mdx(tmp_xfile): 288 | tmp_final_txt = os.path.join(tmp_xdir, fname.split('.')[0]+'.txt') 289 | # 分析 info 信息, 确定是否支持还原 290 | for f in os.listdir(tmp_xdir): 291 | fp = os.path.join(tmp_xdir, f) 292 | text = '' 293 | if fp.endswith('.info.html'): 294 | with open(fp, 'r', encoding='utf-8') as fr: 295 | pat = re.compile(r'

([^><]*?), built with AutoMdxBuilder[^><]*?based on template ([A-D])\.
', flags=re.I) 296 | text = fr.read() 297 | if pat.search(text): 298 | # 符合条件, 支持还原 299 | dict_name = pat.search(text).group(1) 300 | templ_choice = pat.search(text).group(2) 301 | text = pat.sub('', text) 302 | extract_flg = True 303 | break 304 | # 3.开始提取 305 | if extract_flg: 306 | # 创建目标文件夹 307 | if outside_flg: 308 | out_dir = os.path.join(os.path.split(dir_input)[0], fname.split('.')[0]) + '_amb' 309 | else: 310 | out_dir = os.path.splitext(xfile)[0] + '_amb' 311 | if not os.path.exists(out_dir): 312 | os.makedirs(out_dir) 313 | # 提取 info.html 314 | if not re.match(r'^\s*$', text): 315 | with open(os.path.join(out_dir, 'info.html'), 'w', encoding='utf-8') as fw: 316 | fw.write(text) 317 | # 提取 index, index_all, syns 等信息 318 | if tmp_final_txt: 319 | # 选择函数进行处理 320 | if templ_choice == 'A': 321 | ImgDictAtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name) 322 | elif templ_choice == 'B': 323 | ImgDictBtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name) 324 | elif templ_choice == 'C': 325 | TextDictCtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name) 326 | elif templ_choice == 'D': 327 | TextDictDtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name) 328 | # 处理 mdd 329 | file_mdd = os.path.splitext(xfile)[0] + '.mdd' 330 | if os.path.isfile(file_mdd) and templ_choice in ('A', 'B'): 331 | dir_data = os.path.join(out_dir, "imgs") 332 | if os.path.exists(dir_data): 333 | shutil.rmtree(dir_data) 334 | self.utils.mdict(['-x', file_mdd, '-d', dir_data]) 335 | elif os.path.isfile(file_mdd) and templ_choice in ('C', 'D'): 336 | dir_data = os.path.join(out_dir, "data") 337 | if os.path.exists(dir_data): 338 | shutil.rmtree(dir_data) 339 | self.utils.mdict(['-x', file_mdd, '-d', dir_data]) 340 | else: 341 | print(Fore.YELLOW + "WARN: " + Fore.RESET + "同路径下未找到相应的 mdd 文件, 将不会生成 imgs/data 文件夹") 342 | print(Fore.GREEN + "\n已提取原材料至目录: " + Fore.RESET + out_dir) 343 | else: 344 | print(Fore.RED + "ERROR: " + Fore.RESET + "词典并非由 AutoMdxBuilder 制作, 不支持还原") 345 | shutil.rmtree(tmp_restore) 346 | 347 | # def pdf_to_amb(self, input_path, pdf_flg=True): 348 | # """ 从 PDF文件/pdg文件夹 生成 amb 文件夹 """ 349 | # # 0.准备路径相关 350 | # dir_bkmk = os.path.join(self.settings.dir_output_tmp, 'bkmk') 351 | # if not os.path.exists(dir_bkmk): 352 | # os.makedirs(dir_bkmk) 353 | # # 开始处理 354 | # if pdf_flg: 355 | # fname = os.path.split(input_path)[1] 356 | # out_dir = os.path.join(os.path.split(input_path)[0], fname.split('.')[0]+'_amb') 357 | # if not os.path.exists(out_dir): 358 | # os.makedirs(out_dir) 359 | # # 1.导出书签 360 | # cur_path = os.getcwd() 361 | # self.utils.eximport_bkmk_fp2p(input_path, os.path.join(cur_path, dir_bkmk)) 362 | # try: 363 | # with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'r', encoding='utf-16le') as fr: 364 | # text = fr.read() 365 | # line_num = len(re.findall(r'^', text, flags=re.M)) 366 | # if line_num <= 3: 367 | # print(Fore.YELLOW + "INFO: " + Fore.RESET + "未识别到目录, 将不会生成 toc.txt") 368 | # else: 369 | # with open(os.path.join(out_dir, 'toc.txt'), 'w', encoding='utf-8') as fw: 370 | # fw.write(text) 371 | # if line_num > 500: 372 | # print(Fore.YELLOW + "INFO: " + Fore.RESET + "书签超过 500 行, 请后续确认是否包含索引, 是的话建议改名为 toc_all.txt") 373 | # except UnicodeDecodeError: 374 | # shutil.copy(os.path.join(dir_bkmk, "FreePic2Pdf_bkmk.txt"), os.path.join(out_dir, "[utf-16]toc.txt")) 375 | # print(Fore.YELLOW + "WARN: " + Fore.RESET + "书签中存在无法识别的字符, 已输出为 utf-16 编码") 376 | # with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'r', encoding='utf-16le') as fr: 377 | # mt = re.search(r'(?<=BasePage=)(\d+)', fr.read()) 378 | # if mt: 379 | # body_start = mt.group(0) 380 | # else: 381 | # body_start = 1 382 | # print(Fore.YELLOW + "INFO: " + Fore.RESET + "未识别到正文起始页码, 已设置默认值 1") 383 | # # 2.生成 build.toml 384 | # shutil.copy(os.path.join(self.settings.dir_lib, "build.toml"), os.path.join(out_dir, "build.toml")) 385 | # with open(os.path.join(out_dir, "build.toml"), 'r+', encoding='utf-8') as fr: 386 | # text = fr.read() 387 | # text = re.sub(r'^templ_choice = "\w"', 'templ_choice = "A"', text, flags=re.I+re.M) 388 | # text = re.sub(r'^name = "[^"]+?"', f'name = "{fname.split(".")[0]}"', text, flags=re.I+re.M) 389 | # text = re.sub(r'^name_abbr = "[^"]+?"', 'name_abbr = "XXXXXX"', text, flags=re.I+re.M) 390 | # text = re.sub(r'^body_start = \d+', f'body_start = {str(body_start)}', text, flags=re.I+re.M) 391 | # fr.seek(0) 392 | # fr.truncate() 393 | # fr.write(text) 394 | # # 3.导出图片 395 | # if not os.path.exists(os.path.join(out_dir, 'imgs')): 396 | # os.makedirs(os.path.join(out_dir, 'imgs')) 397 | # self.utils.pdf_to_imgs(input_path, os.path.join(out_dir, 'imgs')) 398 | # else: 399 | # out_dir = input_path+'_amb' 400 | # if not os.path.exists(out_dir): 401 | # os.makedirs(out_dir) 402 | # # 1.pdg 转 img 403 | # if not os.path.exists(os.path.join(out_dir, 'imgs')): 404 | # os.makedirs(os.path.join(out_dir, 'imgs')) 405 | # print(os.path.join(out_dir, 'imgs')) 406 | # self.utils.convert_pdg_to_img(input_path, os.path.join(out_dir, 'imgs')) 407 | # # 2.识别词典信息 408 | # bkmk_itf = os.path.join(os.path.join(out_dir, 'imgs'), 'FreePic2Pdf.itf') 409 | # if os.path.isfile(bkmk_itf): 410 | # with open(bkmk_itf, 'r', encoding='utf-16le') as fr: 411 | # text = fr.read() 412 | # mt_body_start = re.search(r'(?<=TextPage=)(\d+)', text) 413 | # mt_name = re.search(r'(?<=Title=)(.+)', text) 414 | # if mt_body_start: 415 | # body_start = mt_body_start.group(0) 416 | # else: 417 | # body_start = 1 418 | # print(Fore.YELLOW + "INFO: " + Fore.RESET + "未识别到正文起始页码, 已设置默认值 1") 419 | # if mt_name: 420 | # name = mt_name.group(0) 421 | # else: 422 | # name = os.path.split(input_path)[1] 423 | # os.remove(bkmk_itf) 424 | # else: 425 | # print(Fore.YELLOW + "INFO: " + Fore.RESET + "未识别到书籍信息") 426 | # # 3.生成 build.toml 427 | # shutil.copy(os.path.join(self.settings.dir_lib, "build.toml"), os.path.join(out_dir, "build.toml")) 428 | # with open(os.path.join(out_dir, "build.toml"), 'r+', encoding='utf-8') as fr: 429 | # text = fr.read() 430 | # text = re.sub(r'^templ_choice = "\w"', 'templ_choice = "A"', text, flags=re.I+re.M) 431 | # text = re.sub(r'^name = "[^"]+?"', f'name = "{name}"', text, flags=re.I+re.M) 432 | # text = re.sub(r'^name_abbr = "[^"]+?"', 'name_abbr = "XXXXXX"', text, flags=re.I+re.M) 433 | # text = re.sub(r'^body_start = \d+', f'body_start = {str(body_start)}', text, flags=re.I+re.M) 434 | # fr.seek(0) 435 | # fr.truncate() 436 | # fr.write(text) 437 | # shutil.rmtree(dir_bkmk) 438 | # print(Fore.GREEN + "\n\n预备原材料生成完毕!" + Fore.RESET) 439 | 440 | # def amb_to_pdf(self, file_toml, outside_flg): 441 | # """ 从 amb 文件夹合成 PDF 文件 """ 442 | # # 0.准备路径相关 443 | # dir_amb = os.path.split(file_toml)[0] 444 | # if outside_flg: 445 | # out_file = os.path.join(os.path.split(dir_amb)[0], self.settings.name+'.pdf') 446 | # else: 447 | # out_file = os.path.join(dir_amb, self.settings.name+'.pdf') 448 | # dir_bkmk_bk = os.path.join(self.settings.dir_lib, 'bkmk') 449 | # dir_bkmk = os.path.join(self.settings.dir_output_tmp, 'bkmk') 450 | # if not os.path.exists(dir_bkmk): 451 | # os.makedirs(dir_bkmk) 452 | # shutil.copy(os.path.join(dir_bkmk_bk, "FreePic2Pdf.itf"), os.path.join(dir_bkmk, "FreePic2Pdf.itf")) 453 | # shutil.copy(os.path.join(dir_bkmk_bk, "FreePic2Pdf_bkmk.txt"), os.path.join(dir_bkmk, "FreePic2Pdf_bkmk.txt")) 454 | # # 1.生成临时书签 455 | # with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'r+', encoding='utf-8') as fr: 456 | # text = re.sub(r'(?<=BasePage=|TextPage=)\d+', str(self.settings.body_start), fr.read()) 457 | # fr.seek(0) 458 | # fr.truncate() 459 | # fr.write(text) 460 | # toc_flg = False 461 | # for fname in os.listdir(dir_amb): 462 | # if fname == 'toc.txt': 463 | # with open(os.path.join(dir_amb, fname), 'r', encoding='utf-8') as fr: 464 | # text = fr.read() 465 | # with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'r+', encoding='utf-8') as fr: 466 | # fr.seek(0) 467 | # fr.truncate() 468 | # fr.write(text) 469 | # toc_flg = True 470 | # break 471 | # elif fname == 'index_all.txt': 472 | # toc_tmp = os.path.join(self.settings.dir_output_tmp, 'toc_all.txt') 473 | # if self.func.index_to_toc(os.path.join(dir_amb, fname), toc_tmp): 474 | # with open(toc_tmp, 'r', encoding='utf-8') as fr: 475 | # text = fr.read() 476 | # with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'r+', encoding='utf-8') as fr: 477 | # fr.seek(0) 478 | # fr.truncate() 479 | # fr.write(text) 480 | # toc_flg = True 481 | # break 482 | # else: 483 | # pass 484 | # if not toc_flg: 485 | # print(Fore.YELLOW + "WARN: " + Fore.RESET + "未找到 toc.txt/index_all.txt, 生成的 PDF 将不带书签") 486 | # # 2.将图片合成PDF 487 | # if os.path.isdir(os.path.join(dir_amb, 'imgs')): 488 | # self.utils.combine_img_to_pdf_fp2p(os.path.join(dir_amb, 'imgs'), out_file) 489 | # # 3.给PDF挂书签 490 | # cur_path = os.getcwd() 491 | # self.utils.eximport_bkmk_fp2p(out_file, os.path.join(cur_path, dir_bkmk), False) 492 | # shutil.rmtree(dir_bkmk) 493 | # print(Fore.GREEN + "\n\nPDF生成完毕!" + Fore.RESET) 494 | # else: 495 | # print(Fore.RED + "ERROR: " + Fore.RESET + "未找到 imgs 文件夹") 496 | 497 | 498 | def print_menu(): 499 | """ 打印选单 """ 500 | # 功能选单 501 | print("\n(〇) 打包/解包") 502 | print(Fore.CYAN + " 1" + Fore.RESET + ".解包 mdx/mdd 文件") 503 | print(Fore.CYAN + " 2" + Fore.RESET + ".将源 txt 文件打包成 mdx 文件") 504 | print(Fore.CYAN + " 3" + Fore.RESET + ".将资料包文件夹打包成 mdd 文件") 505 | print("\n(一) 准备原材料") 506 | # print(Fore.CYAN + " 10" + Fore.RESET + ".从 PDF文件/pdg文件夹 生成预备原材料" + Fore.YELLOW + " (还需手动检查完善)" + Fore.RESET) 507 | print(Fore.CYAN + " 11" + Fore.RESET + ".从 toc_all.txt 生成 index_all.txt") 508 | print(Fore.CYAN + " 12" + Fore.RESET + ".合并 toc.txt 和 index.txt 为 index_all.txt") 509 | print("\n(二) 制作词典") 510 | print(Fore.CYAN + " 20" + Fore.RESET + ".生成词典" + Fore.YELLOW + " (需准备好原材料)" + Fore.RESET) 511 | print("\n(三) 还原词典") 512 | print(Fore.CYAN + " 30" + Fore.RESET + ".从词典还原原材料" + Fore.YELLOW + " (仅支持 AMB 1.4 以上版本)" + Fore.RESET) 513 | # print(Fore.CYAN + " 31" + Fore.RESET + ".从原材料还原 PDF") 514 | print(Fore.CYAN + " 32" + Fore.RESET + ".从 index_all.txt 还原 toc_all.txt") 515 | print("\n(四) 其他工具") 516 | print(Fore.CYAN + " 41" + Fore.RESET + ".从 PDF 提取图片 (MuPDF)") 517 | print(Fore.CYAN + " 42" + Fore.RESET + ".将 PDF 转换成图片 (MuPDF)") 518 | # print(Fore.CYAN + " 43" + Fore.RESET + ".将 图片 合成 PDF (MuPDF)") 519 | # print(Fore.CYAN + " 44" + Fore.RESET + ".PDF书签导出/导入 (FreePic2Pdf)") 520 | 521 | 522 | def main(): 523 | just_fix_windows_console() 524 | # 程序开始 525 | print(Fore.CYAN + "欢迎使用 AutoMdxBuilder 1.5, 下面是功能选单:" + Fore.RESET) 526 | while True: 527 | print_menu() 528 | sel = input('\n请输入数字(回车或“0”退出程序): ') 529 | # 执行选择 530 | if re.match(r'^\d+$', sel) and int(sel) in range(1, 50): 531 | print('\n------------------') 532 | amb = AutoMdxBuilder() 533 | amb.auto_processing(int(sel)) 534 | print('\n\n------------------------------------') 535 | # 判断是否继续 536 | ctn = input(Fore.CYAN + "回车退出程序, 或输入 Y/y 继续使用 AMB: " + Fore.RESET) 537 | if ctn not in ['Y', 'y']: 538 | break 539 | else: 540 | break 541 | 542 | 543 | if __name__ == '__main__': 544 | logging.basicConfig(format='%(asctime)s | %(message)s', filename=Settings().file_log, filemode='w', level=logging.INFO) 545 | try: 546 | main() 547 | logging.info('The program worked fine.') 548 | except: 549 | logging.error(traceback.format_exc()) 550 | -------------------------------------------------------------------------------- /.cross_platform/ebook_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Date : 2023-11-15 18:43:07 4 | # @Author : Litles (litlesme@gmail.com) 5 | # @Link : https://github.com/Litles 6 | # @Version : 1.5 7 | 8 | import os 9 | import re 10 | import shutil 11 | from colorama import Fore 12 | # from PIL import Image 13 | import sys 14 | from mdict_utils.__main__ import run as mdict_cmd 15 | import fitz 16 | from fitz.__main__ import main as fitz_command 17 | 18 | 19 | class EbookUtils: 20 | """ 电子书(PDF等)实用工具 """ 21 | def __init__(self, amb): 22 | self.settings = amb.settings 23 | 24 | # ========== (〇) mdict-utils ========== 25 | def mdict(self, parms): 26 | """ 执行 mdict-utils 程序 """ 27 | saved_parms = sys.argv[1:] 28 | sys.argv[1:] = parms 29 | mdict_cmd() 30 | sys.argv[1:] = saved_parms 31 | 32 | def export_mdx(self, mfile): 33 | """ 解包 mdx/mdd (取代 MdxExport.exe) """ 34 | done_flg = True 35 | if os.path.isfile(mfile) and mfile.endswith('.mdx'): 36 | out_dir = os.path.splitext(mfile)[0] 37 | self.mdict(['-x', mfile, '-d', out_dir]) 38 | for fname in os.listdir(out_dir): 39 | fp = os.path.join(out_dir, fname) 40 | if os.path.isfile(fp) and ('description' in fname.split('.')): 41 | fp_new = fp.replace('.description', '.info').replace('.mdx', '') 42 | os.rename(fp, fp_new) 43 | elif os.path.isfile(fp): 44 | fp_new = fp.replace('.mdx', '') 45 | os.rename(fp, fp_new) 46 | # 分析 info 信息, 确定是否支持词条顺序的还原 47 | order_flg = False 48 | for f in os.listdir(out_dir): 49 | fp = os.path.join(out_dir, f) 50 | text = '' 51 | if fp.endswith('.info.html'): 52 | with open(fp, 'r', encoding='utf-8') as fr: 53 | if re.search(r'

[^><]*?, (packed|built) with AutoMdxBuilder[^><]*?\.
', fr.read(), flags=re.I): 54 | # 符合条件, 支持词条顺序的还原 55 | order_flg = True 56 | break 57 | if order_flg: 58 | # 按编号精准还原源 txt 59 | xname = os.path.split(mfile)[1] 60 | file_final_txt = os.path.join(out_dir, xname.split('.')[0]+'.txt') 61 | entries = [] 62 | eid = '99999999' 63 | with open(file_final_txt, 'r', encoding='utf-8') as fr: 64 | text = '' 65 | for line in fr: 66 | if re.match(r'^', line): 67 | eid = re.match(r'^', line).group(1) 68 | elif not re.match(r'^\s*$', line): 69 | text += line 70 | else: 71 | text += line 72 | entries.append({"eid": eid, "text": text}) 73 | eid = '99999999' 74 | text = '' 75 | if eid != '': 76 | entries.sort(key=lambda x: x["eid"], reverse=False) 77 | with open(file_final_txt, 'w', encoding='utf-8') as fw: 78 | for entry in entries: 79 | fw.write(entry["text"]) 80 | else: 81 | print(Fore.YELLOW + "WARN: " + Fore.RESET + "检测到词典并非由 AMB 生成, 不保证词条顺序的准确还原") 82 | elif os.path.isfile(mfile) and mfile.endswith('.mdd'): 83 | cur_dir, mname = os.path.split(mfile) 84 | out_dir = os.path.join(os.path.splitext(mfile)[0], 'data') 85 | if os.path.exists(out_dir): 86 | shutil.rmtree(out_dir) 87 | # 检查是否存在 mdd 分包 88 | multi_mdd_flg = False 89 | mdd_names = [mname] 90 | for fname in os.listdir(cur_dir): 91 | if re.search(r'\.\d+\.mdd$', fname.lower()): 92 | multi_mdd_flg = True 93 | mdd_names.append(fname) 94 | # 按检查结果区分处理 95 | if multi_mdd_flg and input('检查到目录下存在 mdd 分包, 是否全部解包 (Y/N): ') in ('Y', 'y'): 96 | mdd_names = list(set(mdd_names)) 97 | mdd_names.sort() 98 | for mdd_name in mdd_names: 99 | print(f"开始解压 '{mdd_name}' :\n") 100 | self.mdict(['-x', os.path.join(cur_dir, mdd_name), '-d', out_dir]) 101 | else: 102 | self.mdict(['-x', mfile, '-d', out_dir]) 103 | else: 104 | print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 105 | done_flg = False 106 | return done_flg 107 | 108 | def pack_to_mdict(self, file_final_txt, file_dict_info, dir_data, dir_output): 109 | """ 打包 mdx/mdd (取代 MdxBuilder.exe) """ 110 | mdx_flg = True 111 | mdd_flg = True 112 | # 打包 mdx 113 | print('正在生成 mdx 文件……\n') 114 | ftitle = os.path.join(dir_output, os.path.splitext(os.path.split(file_final_txt)[1])[0]) 115 | if os.path.exists(file_final_txt) and os.path.exists(file_dict_info): 116 | # 给词条添加编号信息 117 | tmp_final_txt = os.path.join(os.path.join(self.settings.dir_bundle, '_tmp'), 'tmp_final.txt') 118 | with open(file_final_txt, 'r', encoding='utf-8') as fr: 119 | with open(tmp_final_txt, 'w', encoding='utf-8') as fw: 120 | n = 0 121 | link_flg = False 122 | for line in fr: 123 | if re.match(r'^@@@LINK=', line, flags=re.I): 124 | link_flg = True 125 | if (not link_flg) and re.match(r'^\s*$', line): 126 | n += 1 127 | fw.write(f'\n') 128 | link_flg = False 129 | fw.write(line) 130 | self.mdict(['--description', file_dict_info, '--encoding', 'utf-8', '-a', tmp_final_txt, ftitle+'.mdx']) 131 | else: 132 | print(Fore.RED + "ERROR: " + Fore.RESET + f"文件 {file_final_txt} 或 {file_dict_info} 不存在") 133 | mdx_flg = False 134 | # 打包 mdd 135 | if dir_data is not None: 136 | mdd_flg = self.pack_to_mdd(dir_data, ftitle) 137 | if mdx_flg and mdd_flg: 138 | return True 139 | else: 140 | return False 141 | 142 | def pack_to_mdd(self, dir_data, ftitle): 143 | """ 仅打包 mdd (取代 MdxBuilder.exe) """ 144 | done_flg = True 145 | pack_flg = True 146 | if ftitle is None: 147 | ftitle = dir_data 148 | # 判断是否打包 149 | if os.path.exists(dir_data) and len(os.listdir(dir_data)) > 0: 150 | if os.path.exists(ftitle+'.mdd'): 151 | a = input(f'文件 "{ftitle}.mdd" 已存在, 是否重新打包 mdd (Y/N): ') 152 | if a not in ('Y', 'y'): 153 | pack_flg = False 154 | else: 155 | print(Fore.RED + "ERROR: " + Fore.RESET + f"文件夹 {dir_data} 不存在或为空") 156 | pack_flg = False 157 | done_flg = False 158 | # 开始打包 159 | if pack_flg: 160 | print('正在生成 mdd 文件……\n') 161 | # 检查子文件夹的数量 162 | sub_dirs = [] 163 | for item in os.listdir(dir_data): 164 | if os.path.isdir(os.path.join(dir_data, item)): 165 | sub_dirs.append(os.path.join(dir_data, item)) 166 | # 如果有2个子文件夹以上, 再计算子文件夹大小, 如果大小超过 1.5G, 将分包 167 | split_flg = False 168 | size_sum = 0 169 | if len(sub_dirs) > 1: 170 | # 判断子文件夹大小 171 | for sub_dir in sub_dirs: 172 | for fname in os.listdir(sub_dir): 173 | if os.path.isfile(os.path.join(sub_dir, fname)): 174 | size_sum += os.path.getsize(os.path.join(sub_dir, fname)) 175 | if size_sum > 1536000000: 176 | split_flg = True 177 | break 178 | # 按检查结果开始处理 179 | if split_flg: 180 | size_sum = 0 181 | print(Fore.YELLOW + "INFO: " + Fore.RESET + "资料文件夹超过 1.5G, 将自动分包") 182 | # 创建临时文件夹 183 | tmp_dir = os.path.join(os.path.split(dir_data)[0], '_packing') 184 | if not os.path.exists(tmp_dir): 185 | os.makedirs(tmp_dir) 186 | pack_list = [] 187 | pack = [] 188 | n = 0 189 | # 对每个子文件夹作判断 190 | for i in range(len(sub_dirs)): 191 | for fname in os.listdir(sub_dirs[i]): 192 | if os.path.isfile(os.path.join(sub_dirs[i], fname)): 193 | size_sum += os.path.getsize(os.path.join(sub_dirs[i], fname)) 194 | if size_sum > 1024000000: 195 | size_sum = 0 196 | pack.append(sub_dirs[i]) 197 | pack_list.append(pack) 198 | pack = [] 199 | break 200 | pack.append(sub_dirs[i]) 201 | n = i 202 | # 1.打包子文件夹 203 | mdd_rk = 0 204 | for sds in pack_list: 205 | for sd in sds: 206 | # 移动到临时文件夹中 207 | os.rename(sd, os.path.join(tmp_dir, os.path.split(sd)[1])) 208 | # 移完之后打包 209 | if mdd_rk == 0: 210 | self.mdict(['-a', tmp_dir, ftitle+'.mdd']) 211 | else: 212 | self.mdict(['-a', tmp_dir, f'{ftitle}.{str(mdd_rk)}.mdd']) 213 | # 打包完再移回去 214 | for fname in os.listdir(tmp_dir): 215 | os.rename(os.path.join(tmp_dir, fname), os.path.join(dir_data, fname)) 216 | mdd_rk += 1 217 | # 1.打包剩余部分 218 | # 移动文件夹部分(如果有) 219 | if n == len(sub_dirs) - 1: 220 | for sd in pack: 221 | os.rename(sd, os.path.join(tmp_dir, os.path.split(sd)[1])) 222 | # 移动文件部分(如果有) 223 | for item in os.listdir(dir_data): 224 | if not os.path.isdir(os.path.join(dir_data, item)): 225 | os.rename(os.path.join(dir_data, item), os.path.join(tmp_dir, item)) 226 | # 打包 227 | if len(os.listdir(tmp_dir)) == 0: 228 | pass 229 | else: 230 | self.mdict(['-a', tmp_dir, f'{ftitle}.{str(mdd_rk)}.mdd']) 231 | # 移回去 232 | for fname in os.listdir(tmp_dir): 233 | os.rename(os.path.join(tmp_dir, fname), os.path.join(dir_data, fname)) 234 | # 删除临时文件夹 235 | if os.path.exists(tmp_dir): 236 | os.rmdir(tmp_dir) 237 | else: 238 | self.mdict(['-a', dir_data, ftitle+'.mdd']) 239 | return done_flg 240 | 241 | # ========== (一) From PDF to Images ========== 242 | # def pdf_to_imgs(self, file_pdf, dir_out): 243 | # """ 自动判断文字版/图片版PDF, 并选择最优方法导出图像 """ 244 | # # 准备环境 245 | # file_exe = os.path.join(os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'MuPDF'), 'mutool.exe') 246 | # dir_tmp = os.path.join(self.settings.dir_bundle, '_tmp') 247 | # if not os.path.exists(dir_tmp): 248 | # os.makedirs(dir_tmp) 249 | # dir_tmp_mp = os.path.join(dir_tmp, 'MuPDF_tmp') 250 | # if not os.path.exists(dir_tmp_mp): 251 | # os.makedirs(dir_tmp_mp) 252 | # tmp_txt = os.path.join(dir_tmp_mp, 'text.txt') 253 | # # 判断是文字版还是图片版PDF 254 | # img_pdf_flg = True 255 | # os.system(f'{file_exe} draw -o {tmp_txt} -F text {file_pdf} 2-11') 256 | # with open(tmp_txt, 'r', encoding='utf-8') as fr: 257 | # word = re.sub(r'[\r\n\s]', '', fr.read()) 258 | # if len(word) > 50: 259 | # img_pdf_flg = False 260 | # # 开始处理 261 | # if img_pdf_flg: 262 | # self.extract_pdf_to_imgs_pdfpatcher(file_pdf, dir_out) 263 | # else: 264 | # self.convert_pdf_to_imgs(file_pdf, dir_out) 265 | # shutil.rmtree(dir_tmp_mp) 266 | 267 | def convert_pdf_to_imgs_fitz(self, file_pdf, dir_out, dpi=300): 268 | """ 使用 fitz(mupdf), 按 DPI 等参数转换成图片 """ 269 | # 读取 pdf 270 | doc = fitz.open(file_pdf) 271 | mat = fitz.Matrix(1, 1) 272 | count = 0 273 | for p in doc: 274 | count += 1 275 | # 开始导出 276 | if not os.path.exists(dir_out): 277 | os.makedirs(dir_out) 278 | print('转换中……') 279 | for i in range(count): 280 | fname = f"{str(i+1).zfill(8)}.png" 281 | page = doc.load_page(i) 282 | pix = page.get_pixmap(matrix=mat, dpi=dpi, colorspace=fitz.csGRAY, alpha=False) 283 | pix.save(os.path.join(dir_out, fname)) 284 | doc.close() 285 | print('转换完成!') 286 | 287 | def extract_pdf_to_imgs_fitz(self, file_pdf, dir_out): 288 | """ 使用 fitz(mupdf), 如果生成了JBIG2加密的 jb2,则还需要使用 jbig2dec 解密成 png """ 289 | # 准备参数 290 | cmd = ['extract', str(file_pdf), '-images', '-output', str(dir_out)] 291 | saved_parms = sys.argv[1:] 292 | sys.argv[1:] = cmd 293 | # 开始导出 294 | if not os.path.exists(dir_out): 295 | os.makedirs(dir_out) 296 | print('提取中……') 297 | fitz_command() 298 | sys.argv[1:] = saved_parms 299 | print('提取完成!') 300 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Litles 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## AutoMdxBuilder 简介 2 | **自动化制作 mdx 词典工具,人人都可以制作电子词典**(支持 Windows/macOS/Linux) 3 | 4 | AutoMdxBuilder 是 [[Mdict]](https://www.mdict.cn/wp/?lang=en) 词典制作相关的工具,旨在自动化词典制作过程,同时降低制作门槛,该工具目前具备以下功能: 5 | 6 | **(一) 打包/解包** 7 | 8 | * 解包 mdx/mdd 文件。功能同 `MdxExport.exe`,支持自动解 mdd 分包,支持保留原始词条顺序。 9 | * 打包成 mdx/mdd 文件。功能同 `MdxBuilder.exe`,支持 mdd 自动分包,支持保留原始词条顺序。 10 | 11 | **(二) 制作词典** 12 | 13 | * 自动化制作词典 (目前有A-D四个可选模板, 均支持多卷/集合类型) 14 | * 一键从 PDF/pdg 等原料制作词典 15 | 16 | **(三) 还原词典** 17 | 18 | * 将 Mdict 词典逆向还原成原材料,方便词典的二次编辑 19 | * 将 Mdict 词典逆向还原成 PDF 20 | 21 | **(四) 其他实用工具** 22 | 23 | * PDF 与图片互转 24 | * PDF 书签管理 25 | 26 | ## 一、词典制作 27 | 28 | ### (〇) 成品预览 29 | #### 图像词典 (模板A,朴素版) 30 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/img_dict_atmpl.gif) 31 | 32 | #### 图像词典 (模板B,导航版) 33 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/img_dict_btmpl.gif) 34 | 35 | #### 文本词典 (模板C,朴素版) 36 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/text_dict_ctmpl.png) 37 | 38 | #### 文本词典 (模板D,导航版) 39 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/text_dict_dtmpl.gif) 40 | 41 | ### 词典制作概述 42 | 43 | 使用词典制作功能时,需要准备好原材料,将所需要的材料单独用一个文件夹收纳(不妨称它为 amb 文件夹)。词典制作的配置信息写在 build.toml 文件中,同样也放置在该文件夹中。下面是一个示例的 amb 文件夹结构: 44 | 45 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/amb_folder.png) 46 | 47 | ### (一) 原材料准备说明 48 | 49 | 制作不同模板的词典,所需的原材料也不尽相同,下面分模板列举: 50 | 51 | #### 1.图像词典 (模板A) 52 | 53 | * (必须) `imgs` 文件夹: 存放图像文件,不限定图片格式,png、jpg 等均可,也无特定的名称要求(顺序是对的就行); 54 | * (可选) `index.txt`: 索引文件 55 | * (可选) `toc.txt`: 目录文件 56 | 57 | > index 和 toc 二者中必须至少有一个, 如果你的 toc 目录文件比较全, 建议改名 toc_all 然后使用模板 B 58 | 59 | #### 2.图像词典 (模板B) 60 | 61 | * (必须)`imgs` 文件夹:存放图像文件,同模板A 62 | * (可选)`index_all.txt`: 全索引文件 63 | * (可选)`toc_all.txt`: 全目录文件 64 | * (可选)`index.txt`: 附加索引文件 65 | 66 | > index_all 与 toc_all 是等价的, 按偏好使用其中一种即可 67 | > 如果在 index_all 之外还有独立的词条, 可以设置 add_extra_index = true, 并将那些词条以 index.txt 文件的形式作为补充 68 | 69 | #### 3.文本词典 (模板C) 70 | 71 | * (必须)`index.txt`: 索引文件 72 | 73 | #### 4.文本词典 (模板D) 74 | 75 | * (必须)`index_all.txt`: 全索引文件 76 | 77 | **【通用可选】** 除上述各模板的材料准备之外,下面两个是通用材料,制作词典可按需添加: 78 | 79 | * (可选)`syns.txt` 文件:同义词文件; 80 | * (可选)`info.html` 文件:词典介绍等描述。 81 | 82 | **【注意事项】** 83 | 84 | * 凡涉及的文本文件(如`.txt`、`.html`),一律要求 **UTF-8 无 BOM** 的编码格式; 85 | * 原材料文件夹中只放置需要用到的文件/文件夹,**为避免误读取,不用到的不要出现在原材料文件夹内**; 86 | * 文件夹和文件的名称就按本说明所提的,不建议自定义名称。 87 | 88 | ### (二) 配置文件 `build.toml` 参数说明 89 | 90 | 可参见 lib/build.toml 中的初始配置,已有详细注释,制作词典时可直接拷贝修改, 也可以参考 demo 词典的配置情况。下面选取其中部分作为补充说明: 91 | 92 | * `simp_trad_flg`: 是否需要繁简通搜, 开启后将会把所有词头都添加繁体/简体跳转, 以确保 mdx 使用时能繁简通搜。 默认 false 不开启。 93 | * `multi_volume`: 是否是多卷的, true 则开启多卷模式(需要按多卷模式来准备原材料)。默认是 false 即单卷模式。 94 | * `body_start`: 正文起始图片序号, 比如正文第一页是 imgs 文件夹中的第 23 张图, 那么就设置为 `body_start = 23`。(多卷模式下该值是列表,比如 `body_start = [23, 19, 1, 1]`) 95 | * `auto_split_columns`: 是否开启自动分栏, 设置值 2 则自动分割成两栏,该功能是为方便手机等小屏移动设备的使用而设置。默认值 1 表示不开启自动分栏。 96 | * `body_end_page`: 当自动分栏开启时,该值确定了分栏的应用范围,分栏从正文第一页开启, 默认到辞书的最后一页。(多卷模式下该值是列表,比如 `body_end_page = [463, 501, 9999, 9999]`) 97 | 98 | 对于模板 A 的 `navi_items`,其中 `a` 的值是显示文字,`ref`的值是与 `toc.txt` 中词目对应的: 99 | 100 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/settings.png) 101 | 102 | 对于文本词典模板 C,D 中的 `add_headwords` 选项, 词条内容如果已经带有标题,可以将该项设置为 false。 103 | 104 | 105 | ## 二、相关文件格式 106 | 107 | ### 索引文件 `index.txt` 108 | 109 | 格式`词目页码`(页码数是相对正文起始页的,而不是图片序号): 110 | 111 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/index.png) 112 | 113 | > 如果是多卷模式, 则页码需要带分卷号前缀 `[n]` 以标识分卷(第一卷 `[1]` 可以省略不写),比如词条『刘备』是在第4卷第3页, 那么索引应写作 `刘备[4]3`; 114 | 115 | 如果是制作文本词典 (模板C),用到的文件也叫 `index.txt`,只不过其中的 **页码** 换成了 **词条正文**,格式为 `词目词条正文` 。 116 | 117 | ### 目录文件 `toc.txt` 118 | 119 | 格式`[*]词目页码`,格式大概像这样(行首 TAB 缩进表层级): 120 | 121 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/toc.png) 122 | 123 | 格式同程序 `FreePic2Pdf.exe` 的书签文件`FreePic2Pdf_bkmk.txt`,因此也可以直接用 `FreePic2Pdf.exe` 程序从 pdf 文件中导出。 124 | 125 | > 与索引文件一样,多卷模式下, 页码需要带分卷号前缀 `[n]` 以标识分卷(第一卷 `[1]` 可以省略不写) 126 | 127 | ### 全索引文件 `index_all.txt` 128 | 129 | 是 `index.txt` 的拓展,格式同样是 `词目页码` ,只不过 `index_all.txt` 是把 `toc.txt` 也并入进来,并且是严格有序的。 130 | 131 | 其中目录(章节)的词目要加 `【L<层级>】` 前缀标识,比如顶级章节“正文”前缀就是 `【L0】正文` ,“正文”的下一级“史前篇”的前缀就是 `【L1】史前篇` 。 132 | 133 | > 章节词目可以没有对应页码,但要保留 `` 134 | 135 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/index_all.png) 136 | 137 | > 与索引文件一样,多卷模式下, 页码需要带分卷号前缀 `[n]` 以标识分卷(第一卷 `[1]` 可以省略不写) 138 | 139 | 如果是制作文本词典 (模板D),用到的文件也叫 `index_all.txt`,只不过其中的 **页码** 换成了 **词条正文**,格式为 `词目词条正文` 。 140 | 141 | ### 同义词文件 `syns.txt` 142 | 143 | 或说重定向文件,格式`同义词词目`: 144 | 145 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/syns.png) 146 | 147 | ## 三、多卷模式补充说明 148 | 149 | 当在 `build.toml` 中设置 `multi_volume = true` 时,将会按照多卷模式制作词典,这时原材料的命名相比一般模式会有些许不同,下面按模板列举: 150 | 151 | 图像词典模板 A,B 在多卷模式下, 首先图像文件夹结构将是 imgs/vol_01, imgs/vol_02, imgs/vol_03... 即分卷子文件夹名称需加 vol_00 前缀 152 | 153 | * 模板 A: 除可以使用全局索引/目录文件 index.txt, toc.txt 外,也可以使用分卷文件 index_01.txt, index_02.txt ... 和 toc_01.txt, toc_02.txt ... (分卷文件中的页码无需加`[n]`前缀) 154 | * 模板 B: 除可以使用全局全索引/全目录文件 index_all.txt/toc_all.txt 外,也可以使用分卷文件 index_all_01.txt, index_all_02.txt ... 或 toc_all_01.txt, toc_all_02.txt ... (分卷文件中的页码无需加`[n]`前缀) 155 | * 模板 D: 同模板 B, 不过因为没有页码, 所以分卷文件和全局文件无区别 156 | 157 | > 还可以在目录文件、全索引或全目录文件名上标识分卷名称(这样就不用在 `build.toml` 中设置 vol_names 项), 比如 toc_01_军事卷、 toc_all_01_军事卷.txt 或 index_all_01_军事卷.txt, 这样, 程序将会从文件名中读取卷名 158 | 159 | ## 四、其他功能简介 160 | 161 | ## 参考 162 | 163 | + https://github.com/liuyug/mdict-utils 164 | + https://github.com/VimWei/MdxSourceBuilder 165 | -------------------------------------------------------------------------------- /auto_mdx_builder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Date : 2023-11-16 00:00:17 4 | # @Author : Litles (litlesme@gmail.com) 5 | # @Link : https://github.com/Litles 6 | # @Version : 1.6 7 | 8 | # import logging 9 | import traceback 10 | import os 11 | import re 12 | import shutil 13 | from colorama import Fore, just_fix_windows_console 14 | from settings import Settings 15 | from func_lib import FuncLib 16 | from templates.img_dict_atmpl import ImgDictAtmpl 17 | from templates.img_dict_btmpl import ImgDictBtmpl 18 | from templates.text_dict_ctmpl import TextDictCtmpl 19 | from templates.text_dict_dtmpl import TextDictDtmpl 20 | from ebook_utils import EbookUtils 21 | 22 | 23 | class AutoMdxBuilder: 24 | """图像词典制作程序""" 25 | def __init__(self): 26 | self.settings = Settings() 27 | self.func = FuncLib(self) 28 | self.utils = EbookUtils(self) 29 | 30 | def auto_processing(self, sel): 31 | """ 根据选择自动处理 """ 32 | if sel == 1: 33 | # --- 解包 mdx/mdd 文件 --- 34 | mfile = input("请输入要解包的 mdx/mdd 文件路径: ").strip('"') 35 | if self.utils.export_mdx(mfile): 36 | print(Fore.GREEN + "\n已输出在同目录下: " + Fore.RESET + os.path.splitext(mfile)[0]) 37 | elif sel == 2: 38 | # --- 将源 txt 文件打包成 mdx 文件 --- 39 | file_final_txt = input("请输入要打包的 txt 文件路径: ").strip('"') 40 | if self.func.text_file_check(file_final_txt) == 2: 41 | # 检查数据文件夹 42 | dir_curr, fname_txt = os.path.split(file_final_txt) 43 | dir_data = os.path.join(dir_curr, 'data') 44 | if not os.path.exists(dir_data): 45 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"文件夹 {dir_data} 不存在, 已默认不打包 mdd") 46 | dir_data = None 47 | elif os.path.exists(dir_data) and len(os.listdir(dir_data)) == 0: 48 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"文件夹 {dir_data} 为空, 已默认不打包 mdd") 49 | dir_data = None 50 | # 生成 info.html 51 | file_info_raw = None 52 | for fname in os.listdir(dir_curr): 53 | if fname == 'info.html': 54 | file_info_raw = os.path.join(dir_curr, fname) 55 | elif fname.endswith('.html') and fname.startswith(os.path.splitext(fname_txt)[0]): 56 | file_info_raw = os.path.join(dir_curr, fname) 57 | break 58 | file_dict_info = os.path.join(self.settings.dir_output_tmp, self.settings.fname_dict_info) 59 | self.func.generate_info_html(file_info_raw, file_dict_info, os.path.splitext(fname_txt)[0], None) 60 | # 打包 61 | print('\n------------------\n开始打包……\n') 62 | done_flg = self.utils.pack_to_mdict(dir_curr, file_final_txt, file_dict_info, dir_data) 63 | if done_flg: 64 | print(Fore.GREEN + "\n打包完毕。" + Fore.RESET) 65 | else: 66 | print(Fore.RED + "\n材料检查不通过, 请确保材料准备无误再执行程序" + Fore.RESET) 67 | elif sel == 3: 68 | # --- 将资料包文件夹打包成 mdd 文件 --- 69 | dir_data = input("请输入要打包的资料文件夹路径: ").strip('"').rstrip('\\/') 70 | print('\n------------------\n开始打包……\n') 71 | done_flg = self.utils.pack_to_mdd(dir_data, None) 72 | if done_flg: 73 | print(Fore.GREEN + "\n打包完毕。" + Fore.RESET) 74 | elif sel == 10: 75 | # --- 从 PDF文件/pdg文件夹 生成预备原材料 --- 76 | p = input("请输入 pdf文件/pdg文件夹 路径: ").strip('"').rstrip('\\/') 77 | if os.path.isfile(p) and os.path.splitext(p)[1] == '.pdf': 78 | self.pdf_to_amb(p) 79 | elif os.path.isdir(p): 80 | self.pdf_to_amb(p, False) 81 | else: 82 | print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 83 | elif sel == 11: 84 | # --- index_all/toc_all 互转 --- 85 | file_input = input("请输入 toc_all/index_all 的文件路径: ").strip('"') 86 | dir_input, fname = os.path.split(file_input) 87 | done_flg = True 88 | if 'index_all' in fname: 89 | file_toc_all = os.path.join(dir_input, fname.replace('index_all', 'toc_all')) 90 | done_flg = self.func.index_all_to_toc(file_input, file_toc_all) 91 | elif 'toc_all' in fname: 92 | file_index_all = os.path.join(dir_input, fname.replace('toc_all', 'index_all')) 93 | done_flg = self.func.toc_all_to_index(file_input, file_index_all) 94 | else: 95 | it = input("该文本文本是 index_all 吗(Y/N): ") 96 | if it in ('Y', 'y'): 97 | file_toc_all = os.path.join(dir_input, 'toc_all.txt') 98 | done_flg = self.func.index_all_to_toc(file_input, file_toc_all) 99 | elif it in ('N', 'n'): 100 | file_index_all = os.path.join(dir_input, 'index_all.txt') 101 | done_flg = self.func.toc_all_to_index(file_input, file_index_all) 102 | else: 103 | done_flg = False 104 | if done_flg: 105 | print(Fore.GREEN + "\n转换完成, 生成在同目录下" + Fore.RESET) 106 | else: 107 | print(Fore.RED + "\n未完成转换" + Fore.RESET) 108 | elif sel == 12: 109 | # --- 合并 toc 和 index 为 index_all --- 110 | file_toc = input("(1) 请输入 toc 文件的路径: ").strip('"') 111 | file_index = input("(2) 请输入 index 文件的路径: ").strip('"') 112 | file_index_all = os.path.join(os.path.split(file_index)[0], 'index_all.txt') 113 | self.func.merge_to_index_all(file_toc, file_index, file_index_all) 114 | elif sel == 13: 115 | # --- 索引扩充(通过标点符号等分词), 提升查得率 --- 116 | p = input("请输入词头文件路径: ").strip('"') 117 | if os.path.isfile(p) and self.func.text_file_check(p) == 2: 118 | file_result = os.path.splitext(p)[0]+'_split'+os.path.splitext(p)[1] 119 | inp = input("输入分词最少字符数(大于0, 回车默认长度为2): ") 120 | n_chars = 2 121 | if re.match(r'\d+$', inp) and int(inp) > 0: 122 | n_chars = int(inp) 123 | else: 124 | print(Fore.YELLOW + "INFO: " + Fore.RESET + "输入未识别, 已使用默认长度2") 125 | if self.func.make_relinks_split(p, file_result, n_chars): 126 | print(Fore.GREEN + "\n转换完成, 生成在同目录下" + Fore.RESET) 127 | else: 128 | print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 129 | elif sel == 14: 130 | # --- 繁体简体 txt 文本文件互转 --- 131 | p = input("请输入要转换的文本文件路径: ").strip('"') 132 | if os.path.isfile(p) and self.func.text_file_check(p) == 2: 133 | ts = input("将该文本转成繁体(T/t)还是简体(S/s):") 134 | if ts in ('T', 't'): 135 | file_result = os.path.splitext(p)[0]+'_trad'+os.path.splitext(p)[1] 136 | self.func.simp_trad_trans(p, file_result, 'T') 137 | elif ts in ('S', 's'): 138 | file_result = os.path.splitext(p)[0]+'_simp'+os.path.splitext(p)[1] 139 | self.func.simp_trad_trans(p, file_result, 'S') 140 | else: 141 | print(Fore.RED + "ERROR: " + Fore.RESET + "输入有误") 142 | else: 143 | print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 144 | elif sel == 20: 145 | # --- 生成词典 --- 146 | p = input("请输入原材料文件夹路径或 build.toml 文件路径: ").strip('"').rstrip('\\/') 147 | if os.path.split(p)[1] == 'build.toml': 148 | if self.settings.load_build_toml(p, False, False): 149 | self._build_mdict() 150 | elif os.path.isdir(p): 151 | file_toml = os.path.join(p, 'build.toml') 152 | if os.path.isfile(file_toml): 153 | if self.settings.load_build_toml(file_toml, False, True): 154 | self._build_mdict() 155 | else: 156 | print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 build.toml 文件") 157 | else: 158 | print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 159 | elif sel == 30: 160 | # --- 从词典还原原材料 --- 161 | p = input("请输入词典的文件夹或 mdx/mdd 文件路径: ").strip('"').rstrip('\\/') 162 | if os.path.isfile(p) and os.path.splitext(p)[1] == '.mdx': 163 | self._restore_raw(p, False) 164 | elif os.path.isfile(p) and os.path.splitext(p)[1] == '.mdd': 165 | if os.path.isfile(p[:-1]+'x'): 166 | self._restore_raw(p[:-1]+'x', False) 167 | elif os.path.isdir(p): 168 | for m in os.listdir(p): 169 | if m.endswith('.mdx'): 170 | self._restore_raw(os.path.join(p, m), True) 171 | break 172 | else: 173 | print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 mdx 文件") 174 | else: 175 | print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 176 | elif sel == 31: 177 | # --- 从原材料还原 PDF --- 178 | p = input("请输入原材料文件夹路径或 build.toml 文件路径: ").strip('"').rstrip('\\/') 179 | if os.path.split(p)[1] == 'build.toml': 180 | if self.settings.load_build_toml(p, True): 181 | self.amb_to_pdf(file_toml, False) 182 | elif os.path.isdir(p): 183 | file_toml = os.path.join(p, 'build.toml') 184 | if os.path.isfile(file_toml): 185 | if self.settings.load_build_toml(file_toml, True): 186 | self.amb_to_pdf(file_toml, True) 187 | else: 188 | print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 build.toml 文件") 189 | else: 190 | print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 191 | elif sel == 41: 192 | # --- 从 PDF 提取图片 (PDF补丁丁) --- 193 | p = input("请输入 PDF 文件路径: ").strip('"').rstrip('\\/') 194 | if os.path.isfile(p) and p.lower().endswith('.pdf'): 195 | fname = os.path.split(p)[1] 196 | out_dir = os.path.join(os.path.split(p)[0], fname.split('.')[0]) 197 | self.utils.extract_pdf_to_imgs_pdfpatcher(p, out_dir) 198 | else: 199 | print(Fore.RED + "\n输入的路径有误" + Fore.RESET) 200 | elif sel == 42: 201 | # --- 从 PDF 提取图片 (MuPDF) --- 202 | p = input("请输入 PDF 文件路径: ").strip('"').rstrip('\\/') 203 | if os.path.isfile(p) and p.lower().endswith('.pdf'): 204 | fname = os.path.split(p)[1] 205 | out_dir = os.path.join(os.path.split(p)[0], fname.split('.')[0]) 206 | self.utils.extract_pdf_to_imgs(p, out_dir) 207 | else: 208 | print(Fore.RED + "\n输入的路径有误" + Fore.RESET) 209 | elif sel == 43: 210 | # --- 将 PDF 转换成图片 (MuPDF) --- 211 | p = input("请输入 PDF 文件路径: ").strip('"').rstrip('\\/') 212 | if os.path.isfile(p) and p.lower().endswith('.pdf'): 213 | fname = os.path.split(p)[1] 214 | out_dir = os.path.join(os.path.split(p)[0], fname.split('.')[0]) 215 | dpi = input("请输入要生成图片的 DPI(回车则默认300): ") 216 | if re.match(r'\d+$', dpi): 217 | self.utils.convert_pdf_to_imgs(p, out_dir, int(dpi)) 218 | else: 219 | self.utils.convert_pdf_to_imgs(p, out_dir) 220 | else: 221 | print(Fore.RED + "\n输入的路径有误" + Fore.RESET) 222 | elif sel == 44: 223 | # --- 将 图片 合成 PDF (MuPDF) --- 224 | p = input("请输入图片所在文件夹路径(不能包含空格): ").strip('"').rstrip('\\/') 225 | if os.path.isdir(p): 226 | out_file = p+'.pdf' 227 | self.utils.combine_img_to_pdf(p, out_file) 228 | else: 229 | print(Fore.RED + "\n输入的路径有误" + Fore.RESET) 230 | elif sel == 45: 231 | # --- 将 图片 合成 PDF (FreePic2Pdf) --- 232 | p = input("请输入图片所在文件夹路径(不能包含空格): ").strip('"').rstrip('\\/') 233 | if os.path.isdir(p): 234 | out_file = p+'.pdf' 235 | self.utils.combine_img_to_pdf_fp2p(p, out_file) 236 | else: 237 | print(Fore.RED + "\n输入的路径有误" + Fore.RESET) 238 | elif sel == 46: 239 | # --- PDF 书签导出/导入 (FreePic2Pdf) --- 240 | file_pdf = input("请输入 PDF 文件路径: ").strip('"').rstrip('\\/') 241 | dir_bkmk = input("请输入书签文件夹路径(导出则直接回车): ").strip('"').rstrip('\\/') 242 | if os.path.isdir(dir_bkmk): 243 | self.utils.eximport_bkmk_fp2p(file_pdf, dir_bkmk, False) 244 | elif dir_bkmk is None or len(dir_bkmk) == 0: 245 | fname = os.path.split(file_pdf)[1] 246 | dir_bkmk = os.path.join(os.path.split(file_pdf)[0], fname.split('.')[0]+'_bkmk') 247 | self.utils.eximport_bkmk_fp2p(file_pdf, dir_bkmk) 248 | else: 249 | print(Fore.RED + "\n输入的路径有误" + Fore.RESET) 250 | else: 251 | pass 252 | 253 | def _build_mdict(self): 254 | done_flg = False 255 | if self.settings.templ_choice in ('a', 'A'): 256 | """ 制作图像词典 (模板A) """ 257 | # 生成 txt 源文本 258 | make_result = ImgDictAtmpl(self).make_source_file() 259 | if make_result: 260 | file_final_txt, dir_imgs, file_dict_info = make_result 261 | # 创建输出文件夹 262 | if not os.path.exists(self.settings.dir_output): 263 | os.makedirs(self.settings.dir_output) 264 | # 生成 css 文件 265 | file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_atmpl) 266 | file_css = os.path.join(self.settings.dir_output, self.settings.fname_css) 267 | if self.settings.split_columns == 2: 268 | with open(os.path.join(self.settings.dir_lib, self.settings.css_split_2), 'r', encoding='utf-8') as fr: 269 | s = fr.read() 270 | with open(file_css, 'w', encoding='utf-8') as fw: 271 | with open(file_css_tmpl, 'r', encoding='utf-8') as fr: 272 | fw.write(fr.read().replace('/**/', s)) 273 | else: 274 | shutil.copy(file_css_tmpl, file_css) 275 | # 开始打包 276 | print('\n------------------\n开始打包……\n') 277 | done_flg = self.utils.pack_to_mdict(self.settings.dir_output, file_final_txt, file_dict_info, dir_imgs) 278 | elif self.settings.templ_choice in ('b', 'B'): 279 | """ 制作图像词典 (模板B) """ 280 | # 生成 txt 源文本 281 | make_result = ImgDictBtmpl(self).make_source_file() 282 | if make_result: 283 | file_final_txt, dir_imgs, file_dict_info = make_result 284 | # 创建输出文件夹 285 | if not os.path.exists(self.settings.dir_output): 286 | os.makedirs(self.settings.dir_output) 287 | # 生成 css 文件 288 | file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_btmpl) 289 | file_css = os.path.join(self.settings.dir_output, self.settings.fname_css) 290 | if self.settings.split_columns == 2: 291 | with open(os.path.join(self.settings.dir_lib, self.settings.css_split_2), 'r', encoding='utf-8') as fr: 292 | s = fr.read() 293 | with open(file_css, 'w', encoding='utf-8') as fw: 294 | with open(file_css_tmpl, 'r', encoding='utf-8') as fr: 295 | fw.write(fr.read().replace('/**/', s)) 296 | else: 297 | shutil.copy(file_css_tmpl, file_css) 298 | # 开始打包 299 | print('\n------------------\n开始打包……\n') 300 | done_flg = self.utils.pack_to_mdict(self.settings.dir_output, file_final_txt, file_dict_info, dir_imgs) 301 | elif self.settings.templ_choice in ('c', 'C'): 302 | """ 制作文本词典 (模板C) """ 303 | # 生成 txt 源文本 304 | make_result = TextDictCtmpl(self).make_source_file() 305 | if make_result: 306 | file_final_txt, dir_data, file_dict_info = make_result 307 | # 创建输出文件夹 308 | if not os.path.exists(self.settings.dir_output): 309 | os.makedirs(self.settings.dir_output) 310 | # 生成 css 文件 311 | file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_ctmpl) 312 | file_css = os.path.join(self.settings.dir_output, self.settings.fname_css) 313 | shutil.copy(file_css_tmpl, file_css) 314 | # 开始打包 315 | print('\n------------------\n开始打包……\n') 316 | done_flg = self.utils.pack_to_mdict(self.settings.dir_output, file_final_txt, file_dict_info, dir_data) 317 | elif self.settings.templ_choice in ('d', 'D'): 318 | """ 制作文本词典 (模板D) """ 319 | # 生成 txt 源文本 320 | make_result = TextDictDtmpl(self).make_source_file() 321 | if make_result: 322 | file_final_txt, dir_data, file_dict_info = make_result 323 | # 创建输出文件夹 324 | if not os.path.exists(self.settings.dir_output): 325 | os.makedirs(self.settings.dir_output) 326 | # 生成 css 文件 327 | file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_dtmpl) 328 | file_css = os.path.join(self.settings.dir_output, self.settings.fname_css) 329 | shutil.copy(file_css_tmpl, file_css) 330 | # 开始打包 331 | print('\n------------------\n开始打包……\n') 332 | done_flg = self.utils.pack_to_mdict(self.settings.dir_output, file_final_txt, file_dict_info, dir_data) 333 | if done_flg: 334 | print("\n打包完毕。" + Fore.GREEN + "\n\n恭喜, 词典已生成!" + Fore.RESET) 335 | 336 | def _restore_raw(self, xfile, outside_flg): 337 | """ 将词典还原为原材料 """ 338 | # 1.准备参数 339 | extract_flg = False 340 | dict_name = None 341 | templ_choice = None 342 | dir_input, fname = os.path.split(xfile) 343 | # 2.分析 mdx 文件 344 | tmp_restore = os.path.join(self.settings.dir_output_tmp, 'restore') 345 | if not os.path.exists(tmp_restore): 346 | os.makedirs(tmp_restore) 347 | tmp_xfile = os.path.join(tmp_restore, fname) 348 | tmp_xdir = os.path.splitext(tmp_xfile)[0] 349 | if os.path.exists(tmp_xdir): 350 | shutil.rmtree(tmp_xdir) 351 | shutil.copy(xfile, tmp_xfile) 352 | if self.utils.export_mdx(tmp_xfile): 353 | tmp_final_txt = os.path.join(tmp_xdir, fname.split('.')[0]+'.txt') 354 | else: 355 | tmp_final_txt = None 356 | # 分析 info 信息, 确定是否支持还原 357 | for f in os.listdir(tmp_xdir): 358 | fp = os.path.join(tmp_xdir, f) 359 | text = '' 360 | if fp.endswith('.info.html'): 361 | with open(fp, 'r', encoding='utf-8') as fr: 362 | pat = re.compile(r'

([^><]*?), built with AutoMdxBuilder[^><]*?based on template ([A-D])\.
', flags=re.I) 363 | pat_multi = re.compile(r'

([^><]*?), built with AutoMdxBuilder[^><]*?based on template ([ABD]) in (\d+) volumes\.
', flags=re.I) 364 | text = fr.read() 365 | if pat.search(text): 366 | # 符合条件, 支持还原 367 | dict_name = pat.search(text).group(1) 368 | templ_choice = pat.search(text).group(2) 369 | multi_vols_flg = False 370 | volume_num = 1 371 | text = pat.sub('', text) 372 | extract_flg = True 373 | break 374 | elif pat_multi.search(text): 375 | # (多卷)符合条件, 支持还原 376 | dict_name = pat_multi.search(text).group(1) 377 | templ_choice = pat_multi.search(text).group(2) 378 | multi_vols_flg = True 379 | volume_num = int(pat_multi.search(text).group(3)) 380 | text = pat_multi.sub('', text) 381 | extract_flg = True 382 | break 383 | # 3.开始提取 384 | if extract_flg: 385 | # 创建目标文件夹 386 | if outside_flg: 387 | out_dir = os.path.join(os.path.split(dir_input)[0], fname.split('.')[0]) + '_amb' 388 | else: 389 | out_dir = os.path.splitext(xfile)[0] + '_amb' 390 | if not os.path.exists(out_dir): 391 | os.makedirs(out_dir) 392 | # 提取 info.html 393 | if not re.match(r'\s*$', text): 394 | with open(os.path.join(out_dir, 'info.html'), 'w', encoding='utf-8') as fw: 395 | fw.write(text) 396 | # 提取 index, index_all, syns 等信息 397 | file_css = None 398 | for f in os.listdir(dir_input): 399 | if os.path.splitext(f)[1].lower() == '.css': 400 | file_css = os.path.join(dir_input, f) 401 | if tmp_final_txt: 402 | # 选择函数进行处理 403 | if templ_choice == 'A': 404 | # 模板A无备份索引, 故不保证索引顺序的精准还原 405 | ImgDictAtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name, file_css, multi_vols_flg, volume_num) 406 | elif templ_choice == 'B': 407 | ImgDictBtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name, file_css, multi_vols_flg, volume_num) 408 | elif templ_choice == 'C': 409 | TextDictCtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name) 410 | elif templ_choice == 'D': 411 | TextDictDtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name, multi_vols_flg, volume_num) 412 | else: 413 | print(Fore.RED + "ERROR: " + Fore.RESET + "还原失败") 414 | # 处理 mdd 415 | file_mdd = os.path.splitext(xfile)[0] + '.mdd' 416 | if os.path.isfile(file_mdd) and templ_choice in ('A', 'B'): 417 | dir_data = os.path.join(out_dir, "imgs") 418 | if os.path.exists(dir_data): 419 | shutil.rmtree(dir_data) 420 | self.utils.mdict(['-x', file_mdd, '-d', dir_data]) 421 | elif os.path.isfile(file_mdd) and templ_choice in ('C', 'D'): 422 | dir_data = os.path.join(out_dir, "data") 423 | if os.path.exists(dir_data): 424 | shutil.rmtree(dir_data) 425 | self.utils.mdict(['-x', file_mdd, '-d', dir_data]) 426 | else: 427 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + "同路径下未找到相应的 mdd 文件, 将不会生成 imgs/data 文件夹") 428 | print(Fore.GREEN + "\n已提取原材料至目录: " + Fore.RESET + out_dir) 429 | else: 430 | print(Fore.RED + "ERROR: " + Fore.RESET + "词典并非由 AutoMdxBuilder 制作, 不支持还原") 431 | shutil.rmtree(tmp_restore) 432 | 433 | def pdf_to_amb(self, input_path, pdf_flg=True): 434 | """ 从 PDF文件/pdg文件夹 生成 amb 文件夹 """ 435 | # 0.准备路径相关 436 | dir_bkmk = os.path.join(self.settings.dir_output_tmp, 'bkmk') 437 | if os.path.exists(dir_bkmk): 438 | shutil.rmtree(dir_bkmk) 439 | os.makedirs(dir_bkmk) 440 | # 开始处理 441 | if pdf_flg: 442 | fname = os.path.split(input_path)[1] 443 | out_dir = os.path.join(os.path.split(input_path)[0], fname.split('.')[0]+'_amb') 444 | if not os.path.exists(out_dir): 445 | os.makedirs(out_dir) 446 | # 1.导出书签 447 | cur_path = os.getcwd() 448 | self.utils.eximport_bkmk_fp2p(input_path, os.path.join(cur_path, dir_bkmk)) 449 | try: 450 | with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'r', encoding='utf-16le') as fr: 451 | text = fr.read() 452 | line_num = len(re.findall(r'^', text, flags=re.M)) 453 | if line_num <= 3: 454 | print(Fore.YELLOW + "INFO: " + Fore.RESET + "未识别到目录, 将不会生成 toc.txt") 455 | else: 456 | with open(os.path.join(out_dir, 'toc.txt'), 'w', encoding='utf-8') as fw: 457 | fw.write(text) 458 | if line_num > 500: 459 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + "书签超过 500 行, 请后续确认是否包含索引, 是的话建议改名为 toc_all.txt") 460 | except UnicodeDecodeError: 461 | shutil.copy(os.path.join(dir_bkmk, "FreePic2Pdf_bkmk.txt"), os.path.join(out_dir, "[utf-16]toc.txt")) 462 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + "书签中存在无法识别的字符, 已输出为 utf-16 编码") 463 | with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'r', encoding='utf-16le') as fr: 464 | mt = re.search(r'(?<=BasePage=)(\d+)', fr.read()) 465 | if mt: 466 | body_start = mt.group(0) 467 | else: 468 | body_start = 1 469 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + "未识别到正文起始页码, 已设置默认值 1") 470 | # 2.生成 build.toml 471 | shutil.copy(os.path.join(self.settings.dir_lib, "build.toml"), os.path.join(out_dir, "build.toml")) 472 | with open(os.path.join(out_dir, "build.toml"), 'r+', encoding='utf-8') as fr: 473 | text = fr.read() 474 | text = re.sub(r'^templ_choice = "\w"', 'templ_choice = "A"', text, flags=re.I+re.M) 475 | text = re.sub(r'^name = "[^"]+?"', f'name = "{fname.split(".")[0]}"', text, flags=re.I+re.M) 476 | text = re.sub(r'^name_abbr = "[^"]+?"', 'name_abbr = "XXXXXX"', text, flags=re.I+re.M) 477 | text = re.sub(r'^body_start = \d+', f'body_start = {str(body_start)}', text, flags=re.I+re.M) 478 | fr.seek(0) 479 | fr.truncate() 480 | fr.write(text) 481 | # 3.导出图片 482 | if not os.path.exists(os.path.join(out_dir, 'imgs')): 483 | os.makedirs(os.path.join(out_dir, 'imgs')) 484 | self.utils.pdf_to_imgs(input_path, os.path.join(out_dir, 'imgs')) 485 | else: 486 | out_dir = input_path+'_amb' 487 | if not os.path.exists(out_dir): 488 | os.makedirs(out_dir) 489 | # 1.pdg 转 img 490 | if not os.path.exists(os.path.join(out_dir, 'imgs')): 491 | os.makedirs(os.path.join(out_dir, 'imgs')) 492 | print(os.path.join(out_dir, 'imgs')) 493 | self.utils.convert_pdg_to_img(input_path, os.path.join(out_dir, 'imgs')) 494 | # 2.识别词典信息 495 | bkmk_itf = os.path.join(os.path.join(out_dir, 'imgs'), 'FreePic2Pdf.itf') 496 | if os.path.isfile(bkmk_itf): 497 | with open(bkmk_itf, 'r', encoding='utf-16le') as fr: 498 | text = fr.read() 499 | mt_body_start = re.search(r'(?<=TextPage=)(\d+)', text) 500 | mt_name = re.search(r'(?<=Title=)(.+)', text) 501 | if mt_body_start: 502 | body_start = mt_body_start.group(0) 503 | else: 504 | body_start = 1 505 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + "未识别到正文起始页码, 已设置默认值 1") 506 | if mt_name: 507 | name = mt_name.group(0) 508 | else: 509 | name = os.path.split(input_path)[1] 510 | os.remove(bkmk_itf) 511 | else: 512 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + "未识别到书籍信息") 513 | # 3.生成 build.toml 514 | shutil.copy(os.path.join(self.settings.dir_lib, "build.toml"), os.path.join(out_dir, "build.toml")) 515 | with open(os.path.join(out_dir, "build.toml"), 'r+', encoding='utf-8') as fr: 516 | text = fr.read() 517 | text = re.sub(r'^templ_choice = "\w"', 'templ_choice = "A"', text, flags=re.I+re.M) 518 | text = re.sub(r'^name = "[^"]+?"', f'name = "{name}"', text, flags=re.I+re.M) 519 | text = re.sub(r'^name_abbr = "[^"]+?"', 'name_abbr = "XXXXXX"', text, flags=re.I+re.M) 520 | text = re.sub(r'^body_start = \d+', f'body_start = {str(body_start)}', text, flags=re.I+re.M) 521 | fr.seek(0) 522 | fr.truncate() 523 | fr.write(text) 524 | shutil.rmtree(dir_bkmk) 525 | print(Fore.GREEN + "\n\n预备原材料生成完毕!" + Fore.RESET) 526 | 527 | def amb_to_pdf(self, file_toml, outside_flg): 528 | """ 从 amb 文件夹合成 PDF 文件 """ 529 | # 0.准备路径相关 530 | dir_amb = os.path.split(file_toml)[0] 531 | if outside_flg: 532 | out_file = os.path.join(os.path.split(dir_amb)[0], self.settings.name+'.pdf') 533 | else: 534 | out_file = os.path.join(dir_amb, self.settings.name+'.pdf') 535 | # 准备临时书签文件夹 536 | dir_bkmk_bk = os.path.join(self.settings.dir_lib, 'bkmk') 537 | dir_bkmk = os.path.join(self.settings.dir_output_tmp, 'bkmk') 538 | if os.path.exists(dir_bkmk): 539 | shutil.rmtree(dir_bkmk) 540 | os.makedirs(dir_bkmk) 541 | # 1.生成临时书签 542 | with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'w', encoding='utf-8') as fw: 543 | with open(os.path.join(dir_bkmk_bk, 'FreePic2Pdf.itf'), 'r+', encoding='utf-8') as fr: 544 | text = re.sub(r'(?<=BasePage=|TextPage=)\d+', str(self.settings.body_start[0]), fr.read()) 545 | fw.write(text) 546 | with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'w', encoding='utf-8') as fw: 547 | fw.write('正文\t1\n') 548 | toc_flg = False 549 | for fname in os.listdir(dir_amb): 550 | if fname in ('toc.txt', 'toc_all.txt'): 551 | with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'w', encoding='utf-8') as fw: 552 | with open(os.path.join(dir_amb, fname), 'r', encoding='utf-8') as fr: 553 | text = fr.read() 554 | fw.write(text) 555 | toc_flg = True 556 | break 557 | elif fname == 'index_all.txt': 558 | toc_tmp = os.path.join(self.settings.dir_output_tmp, 'toc_all.txt') 559 | if self.func.index_all_to_toc(os.path.join(dir_amb, fname), toc_tmp): 560 | with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'w', encoding='utf-8') as fw: 561 | with open(toc_tmp, 'r', encoding='utf-8') as fr: 562 | text = fr.read() 563 | fw.write(text) 564 | toc_flg = True 565 | break 566 | else: 567 | pass 568 | if not toc_flg: 569 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + "未找到 toc.txt/toc_all.txt/index_all.txt, 生成的 PDF 将不带书签") 570 | # 2.将图片合成PDF 571 | if os.path.isdir(os.path.join(dir_amb, 'imgs')): 572 | self.utils.combine_img_to_pdf_fp2p(os.path.join(dir_amb, 'imgs'), out_file) 573 | # 3.给PDF挂书签 574 | cur_path = os.getcwd() 575 | self.utils.eximport_bkmk_fp2p(out_file, os.path.join(cur_path, dir_bkmk), False) 576 | shutil.rmtree(dir_bkmk) 577 | print(Fore.GREEN + "\n\nPDF生成完毕!" + Fore.RESET) 578 | else: 579 | print(Fore.RED + "ERROR: " + Fore.RESET + "未找到 imgs 文件夹") 580 | 581 | 582 | def print_menu(): 583 | """ 打印选单 """ 584 | # 功能选单 585 | print("\n(〇) 打包/解包") 586 | print(Fore.CYAN + " 1" + Fore.RESET + ".解包 mdx/mdd 文件") 587 | print(Fore.CYAN + " 2" + Fore.RESET + ".将源 txt 文件打包成 mdx 文件") 588 | print(Fore.CYAN + " 3" + Fore.RESET + ".将资料包文件夹打包成 mdd 文件") 589 | print("\n(一) 准备原材料") 590 | print(Fore.CYAN + " 10" + Fore.RESET + ".从 PDF文件/pdg文件夹 生成预备原材料" + Fore.YELLOW + " (还需手动检查完善)" + Fore.RESET) 591 | print(Fore.CYAN + " 11" + Fore.RESET + ".toc_all 和 index_all 互转") 592 | print(Fore.CYAN + " 12" + Fore.RESET + ".合并 toc 和 index 为 index_all") 593 | print(Fore.CYAN + " 13" + Fore.RESET + ".索引扩充(通过标点符号等分词), 提升查得率") 594 | print(Fore.CYAN + " 14" + Fore.RESET + ".繁体简体 txt 文本文件互转") 595 | print("\n(二) 制作词典") 596 | print(Fore.CYAN + " 20" + Fore.RESET + ".生成词典" + Fore.YELLOW + " (需准备好原材料)" + Fore.RESET) 597 | print("\n(三) 还原词典") 598 | print(Fore.CYAN + " 30" + Fore.RESET + ".从词典还原原材料" + Fore.YELLOW + " (仅支持 AMB 1.4 以上版本)" + Fore.RESET) 599 | print(Fore.CYAN + " 31" + Fore.RESET + ".从原材料还原 PDF") 600 | print("\n(四) 其他工具") 601 | print(Fore.CYAN + " 41" + Fore.RESET + ".从 PDF 提取图片 (PDF补丁丁)") 602 | print(Fore.CYAN + " 42" + Fore.RESET + ".从 PDF 提取图片 (MuPDF)") 603 | print(Fore.CYAN + " 43" + Fore.RESET + ".将 PDF 转换成图片 (MuPDF)") 604 | print(Fore.CYAN + " 44" + Fore.RESET + ".将 图片 合成PDF (MuPDF)") 605 | print(Fore.CYAN + " 45" + Fore.RESET + ".将 图片 合成PDF (FreePic2Pdf)") 606 | print(Fore.CYAN + " 46" + Fore.RESET + ".PDF书签导出/导入 (FreePic2Pdf)") 607 | 608 | 609 | def main(): 610 | # 程序开始 611 | amb = AutoMdxBuilder() 612 | print(Fore.CYAN + f"欢迎使用 AutoMdxBuilder {amb.settings.version}, 下面是功能选单:" + Fore.RESET) 613 | while True: 614 | print_menu() 615 | sel = input('\n请输入数字(回车或“0”退出程序): ') 616 | # 执行选择 617 | if re.match(r'\d+$', sel) and int(sel) in range(1, 50): 618 | print('\n------------------') 619 | amb.auto_processing(int(sel)) 620 | print('\n\n------------------------------------') 621 | # 判断是否继续 622 | ctn = input(Fore.CYAN + "回车退出程序, 或输入 Y/y 继续使用 AMB: " + Fore.RESET) 623 | if ctn not in ['Y', 'y']: 624 | break 625 | else: 626 | break 627 | 628 | 629 | if __name__ == '__main__': 630 | just_fix_windows_console() 631 | # logging.basicConfig(format='%(asctime)s | %(message)s', filename=tmp_set.file_log, filemode='w', level=logging.INFO) 632 | try: 633 | main() 634 | # logging.info('The program worked fine.') 635 | except: 636 | # logging.error(traceback.format_exc()) 637 | print(traceback.format_exc()) 638 | print(Fore.RED + "ERROR: " + Fore.RESET + "由于上述原因, 程序已中止运行") 639 | print('\n\n------------------------------------') 640 | input("回车退出程序:") 641 | -------------------------------------------------------------------------------- /ebook_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Date : 2023-11-15 18:43:07 4 | # @Author : Litles (litlesme@gmail.com) 5 | # @Link : https://github.com/Litles 6 | # @Version : 1.6 7 | 8 | import os 9 | import re 10 | import shutil 11 | import time 12 | from colorama import Fore 13 | # import codecs 14 | from pywinauto.application import Application 15 | from pywinauto.keyboard import send_keys 16 | from pywinauto.timings import Timings 17 | from PIL import Image 18 | import sys 19 | from mdict_utils.__main__ import run as mdict_cmd 20 | # import fitz 21 | # from fitz.__main__ import main as fitz_command 22 | 23 | 24 | class EbookUtils: 25 | """ 电子书(PDF等)实用工具 """ 26 | def __init__(self, amb): 27 | self.settings = amb.settings 28 | 29 | # ========== (〇) mdict-utils ========== 30 | def mdict(self, parms): 31 | """ 执行 mdict-utils 程序 """ 32 | saved_parms = sys.argv[1:] 33 | sys.argv[1:] = parms 34 | mdict_cmd() 35 | sys.argv[1:] = saved_parms 36 | 37 | def export_mdx(self, mfile): 38 | """ 解包 mdx/mdd (取代 MdxExport.exe) """ 39 | done_flg = True 40 | if os.path.isfile(mfile) and mfile.endswith('.mdx'): 41 | out_dir = os.path.splitext(mfile)[0] 42 | self.mdict(['-x', mfile, '-d', out_dir]) 43 | for fname in os.listdir(out_dir): 44 | fp = os.path.join(out_dir, fname) 45 | if os.path.isfile(fp) and ('description' in fname.split('.')): 46 | fp_new = fp.replace('.description', '.info').replace('.mdx', '') 47 | os.rename(fp, fp_new) 48 | elif os.path.isfile(fp): 49 | fp_new = fp.replace('.mdx', '') 50 | os.rename(fp, fp_new) 51 | # 分析 info 信息, 确定是否支持词条顺序的还原 52 | order_flg = False 53 | for f in os.listdir(out_dir): 54 | fp = os.path.join(out_dir, f) 55 | text = '' 56 | if fp.endswith('.info.html'): 57 | with open(fp, 'r', encoding='utf-8') as fr: 58 | if re.search(r'

[^><]*?, (packed|built) with AutoMdxBuilder[^><]*?\.
', fr.read(), flags=re.I): 59 | # 符合条件, 支持词条顺序的还原 60 | order_flg = True 61 | break 62 | if order_flg: 63 | # 按编号精准还原源 txt 64 | xname = os.path.split(mfile)[1] 65 | file_final_txt = os.path.join(out_dir, xname.split('.')[0]+'.txt') 66 | entries = [] 67 | eid = '99999999' 68 | with open(file_final_txt, 'r', encoding='utf-8') as fr: 69 | text = '' 70 | for line in fr: 71 | if re.match(r'', line): 72 | eid = re.match(r'', line).group(1) 73 | elif not re.match(r'\s*$', line): 74 | text += line 75 | else: 76 | text += line 77 | entries.append({"eid": eid, "text": text}) 78 | eid = '99999999' 79 | text = '' 80 | if eid != '': 81 | entries.sort(key=lambda x: x["eid"], reverse=False) 82 | with open(file_final_txt, 'w', encoding='utf-8') as fw: 83 | for entry in entries: 84 | fw.write(entry["text"]) 85 | else: 86 | print(Fore.YELLOW + "INFO: " + Fore.RESET + "检测到词典并非由 AMB 生成, 不保证词条顺序的准确还原") 87 | elif os.path.isfile(mfile) and mfile.endswith('.mdd'): 88 | cur_dir, mname = os.path.split(mfile) 89 | out_dir = os.path.join(os.path.splitext(mfile)[0], 'data') 90 | if os.path.exists(out_dir): 91 | shutil.rmtree(out_dir) 92 | # 检查是否存在 mdd 分包 93 | multi_mdd_flg = False 94 | mdd_names = [mname] 95 | for fname in os.listdir(cur_dir): 96 | if re.search(r'\.\d+\.mdd$', fname.lower()): 97 | multi_mdd_flg = True 98 | mdd_names.append(fname) 99 | # 按检查结果区分处理 100 | if multi_mdd_flg and input('检查到目录下存在 mdd 分包, 是否全部解包 (Y/N): ') in ('Y', 'y'): 101 | mdd_names = list(set(mdd_names)) 102 | mdd_names.sort() 103 | for mdd_name in mdd_names: 104 | print(f"开始解压 '{mdd_name}' :\n") 105 | self.mdict(['-x', os.path.join(cur_dir, mdd_name), '-d', out_dir]) 106 | else: 107 | self.mdict(['-x', mfile, '-d', out_dir]) 108 | else: 109 | print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 110 | done_flg = False 111 | return done_flg 112 | 113 | def pack_to_mdict(self, dir_output, file_final_txt, file_dict_info, dir_data): 114 | """ 打包 mdx/mdd (取代 MdxBuilder.exe) """ 115 | mdx_flg = True 116 | mdd_flg = True 117 | # 打包 mdx 118 | print('正在生成 mdx 文件……\n') 119 | ftitle = os.path.join(dir_output, os.path.splitext(os.path.split(file_final_txt)[1])[0]) 120 | if os.path.exists(file_final_txt) and os.path.exists(file_dict_info): 121 | # 给词条添加编号信息 122 | tmp_final_txt = os.path.join(os.path.join(self.settings.dir_bundle, '_tmp'), 'tmp_final.txt') 123 | with open(file_final_txt, 'r', encoding='utf-8') as fr: 124 | with open(tmp_final_txt, 'w', encoding='utf-8') as fw: 125 | n = 0 126 | link_flg = False 127 | for line in fr: 128 | if re.match(r'@@@LINK=', line, flags=re.I): 129 | link_flg = True 130 | if (not link_flg) and re.match(r'\s*$', line): 131 | n += 1 132 | fw.write(f'\n') 133 | link_flg = False 134 | fw.write(line) 135 | self.mdict(['--description', file_dict_info, '--encoding', 'utf-8', '-a', tmp_final_txt, ftitle+'.mdx']) 136 | else: 137 | print(Fore.RED + "ERROR: " + Fore.RESET + f"文件 {file_final_txt} 或 {file_dict_info} 不存在") 138 | mdx_flg = False 139 | # 打包 mdd 140 | if dir_data is not None: 141 | mdd_flg = self.pack_to_mdd(dir_data, ftitle) 142 | if mdx_flg and mdd_flg: 143 | return True 144 | else: 145 | return False 146 | 147 | def pack_to_mdd(self, dir_data, ftitle): 148 | """ 仅打包 mdd (取代 MdxBuilder.exe) """ 149 | done_flg = True 150 | pack_flg = True 151 | if ftitle is None: 152 | ftitle = dir_data 153 | # 判断是否打包 154 | if os.path.exists(dir_data) and len(os.listdir(dir_data)) > 0: 155 | if os.path.exists(ftitle+'.mdd'): 156 | a = input(f'文件 "{ftitle}.mdd" 已存在, 是否重新打包 mdd (Y/N): ') 157 | if a not in ('Y', 'y'): 158 | pack_flg = False 159 | else: 160 | print(Fore.RED + "ERROR: " + Fore.RESET + f"文件夹 {dir_data} 不存在或为空") 161 | pack_flg = False 162 | done_flg = False 163 | # 开始打包 164 | if pack_flg: 165 | print('正在生成 mdd 文件……\n') 166 | # 检查子文件夹的数量 167 | sub_dirs = [] 168 | for item in os.listdir(dir_data): 169 | if os.path.isdir(os.path.join(dir_data, item)): 170 | sub_dirs.append(os.path.join(dir_data, item)) 171 | # 如果有2个子文件夹以上, 再计算子文件夹大小, 如果大小超过 1.5G, 将分包 172 | split_flg = False 173 | size_sum = 0 174 | if len(sub_dirs) > 1: 175 | # 判断子文件夹大小 176 | for sub_dir in sub_dirs: 177 | for fname in os.listdir(sub_dir): 178 | if os.path.isfile(os.path.join(sub_dir, fname)): 179 | size_sum += os.path.getsize(os.path.join(sub_dir, fname)) 180 | if size_sum > 1536000000: 181 | split_flg = True 182 | break 183 | # 按检查结果开始处理 184 | if split_flg: 185 | size_sum = 0 186 | print(Fore.YELLOW + "INFO: " + Fore.RESET + "资料文件夹超过 1.5G, 将自动分包") 187 | # 创建临时文件夹 188 | tmp_dir = os.path.join(os.path.split(dir_data)[0], '_packing') 189 | if not os.path.exists(tmp_dir): 190 | os.makedirs(tmp_dir) 191 | pack_list = [] 192 | pack = [] 193 | n = 0 194 | # 对每个子文件夹作判断 195 | for i in range(len(sub_dirs)): 196 | for fname in os.listdir(sub_dirs[i]): 197 | if os.path.isfile(os.path.join(sub_dirs[i], fname)): 198 | size_sum += os.path.getsize(os.path.join(sub_dirs[i], fname)) 199 | if size_sum > 1024000000: 200 | size_sum = 0 201 | pack.append(sub_dirs[i]) 202 | pack_list.append(pack) 203 | pack = [] 204 | break 205 | pack.append(sub_dirs[i]) 206 | n = i 207 | # 1.打包子文件夹 208 | mdd_rk = 0 209 | for sds in pack_list: 210 | for sd in sds: 211 | # 移动到临时文件夹中 212 | os.rename(sd, os.path.join(tmp_dir, os.path.split(sd)[1])) 213 | # 移完之后打包 214 | if mdd_rk == 0: 215 | self.mdict(['-a', tmp_dir, ftitle+'.mdd']) 216 | else: 217 | self.mdict(['-a', tmp_dir, f'{ftitle}.{str(mdd_rk)}.mdd']) 218 | # 打包完再移回去 219 | for fname in os.listdir(tmp_dir): 220 | os.rename(os.path.join(tmp_dir, fname), os.path.join(dir_data, fname)) 221 | mdd_rk += 1 222 | # 1.打包剩余部分 223 | # 移动文件夹部分(如果有) 224 | if n == len(sub_dirs) - 1: 225 | for sd in pack: 226 | os.rename(sd, os.path.join(tmp_dir, os.path.split(sd)[1])) 227 | # 移动文件部分(如果有) 228 | for item in os.listdir(dir_data): 229 | if not os.path.isdir(os.path.join(dir_data, item)): 230 | os.rename(os.path.join(dir_data, item), os.path.join(tmp_dir, item)) 231 | # 打包 232 | if len(os.listdir(tmp_dir)) == 0: 233 | pass 234 | else: 235 | self.mdict(['-a', tmp_dir, f'{ftitle}.{str(mdd_rk)}.mdd']) 236 | # 移回去 237 | for fname in os.listdir(tmp_dir): 238 | os.rename(os.path.join(tmp_dir, fname), os.path.join(dir_data, fname)) 239 | # 删除临时文件夹 240 | if os.path.exists(tmp_dir): 241 | os.rmdir(tmp_dir) 242 | else: 243 | self.mdict(['-a', dir_data, ftitle+'.mdd']) 244 | return done_flg 245 | 246 | # ========== (一) From PDF to Images ========== 247 | def pdf_to_imgs(self, file_pdf, dir_out): 248 | """ 自动判断文字版/图片版PDF, 并选择最优方法导出图像 """ 249 | # 准备环境 250 | file_exe = os.path.join(os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'MuPDF'), 'mutool.exe') 251 | dir_tmp = os.path.join(self.settings.dir_bundle, '_tmp') 252 | if not os.path.exists(dir_tmp): 253 | os.makedirs(dir_tmp) 254 | dir_tmp_mp = os.path.join(dir_tmp, 'MuPDF_tmp') 255 | if not os.path.exists(dir_tmp_mp): 256 | os.makedirs(dir_tmp_mp) 257 | tmp_txt = os.path.join(dir_tmp_mp, 'text.txt') 258 | # 判断是文字版还是图片版PDF 259 | img_pdf_flg = True 260 | os.system(f'{file_exe} draw -o {tmp_txt} -F text "{file_pdf}" 2-11') 261 | with open(tmp_txt, 'r', encoding='utf-8') as fr: 262 | word = re.sub(r'[\r\n\s]', '', fr.read()) 263 | if len(word) > 50: 264 | img_pdf_flg = False 265 | # 开始处理 266 | if img_pdf_flg: 267 | self.extract_pdf_to_imgs_pdfpatcher(file_pdf, dir_out) 268 | else: 269 | self.convert_pdf_to_imgs(file_pdf, dir_out) 270 | shutil.rmtree(dir_tmp_mp) 271 | 272 | def convert_pdf_to_imgs(self, file_pdf, dir_out, dpi=300): 273 | """ 使用 mutool.exe 按 DPI 参数转换成图片 (推荐用于文字版PDF) """ 274 | # 准备文件夹 275 | file_exe = os.path.join(os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'MuPDF'), 'mutool.exe') 276 | if not os.path.exists(dir_out): 277 | os.makedirs(dir_out) 278 | file_png = os.path.join(dir_out, '%06d.png') 279 | # 开始转换 280 | os.system(f'{file_exe} draw -o "{file_png}" -F png -r {str(dpi)} "{file_pdf}"') 281 | print('转换完成!') 282 | 283 | # def convert_pdf_to_imgs_fitz(self, file_pdf, dir_out, dpi=300): 284 | # """ 使用 fitz(mupdf), 按 DPI 等参数转换成图片 """ 285 | # # 读取 pdf 286 | # doc = fitz.open(file_pdf) 287 | # mat = fitz.Matrix(1, 1) 288 | # count = 0 289 | # for p in doc: 290 | # count += 1 291 | # # 开始导出 292 | # if not os.path.exists(dir_out): 293 | # os.makedirs(dir_out) 294 | # print('转换中……') 295 | # for i in range(count): 296 | # fname = f"{str(i+1).zfill(8)}.png" 297 | # page = doc.load_page(i) 298 | # pix = page.get_pixmap(matrix=mat, dpi=dpi, colorspace=fitz.csGRAY, alpha=False) 299 | # pix.save(os.path.join(dir_out, fname)) 300 | # doc.close() 301 | # print('转换完成!') 302 | 303 | def extract_pdf_to_imgs(self, file_pdf, dir_out): 304 | """ Extracting images with mutool.exe (Windows only) """ 305 | # 1.extract to tmp folder 306 | file_exe = os.path.join(os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'MuPDF'), 'mutool.exe') 307 | dir_tmp = os.path.join(self.settings.dir_bundle, '_tmp') 308 | if not os.path.exists(dir_tmp): 309 | os.makedirs(dir_tmp) 310 | dir_tmp_me = os.path.join(dir_tmp, 'MuPDF_extract') 311 | if not os.path.exists(dir_tmp_me): 312 | os.makedirs(dir_tmp_me) 313 | os.chdir(dir_tmp_me) 314 | os.system(f'{file_exe} extract "{file_pdf}"') 315 | os.chdir(self.settings.dir_bundle) 316 | # 2.remove to destination 317 | imgs = [] 318 | for fname in os.listdir(dir_tmp_me): 319 | ext = os.path.splitext(fname)[1].lower() 320 | if ext in self.settings.img_exts: 321 | imgs.append({"path": os.path.join(dir_tmp_me, fname), "ext": ext}) 322 | if not os.path.exists(dir_out): 323 | os.makedirs(dir_out) 324 | imgs.sort(key=lambda x: x["path"], reverse=False) 325 | n = 0 326 | for img in imgs: 327 | n += 1 328 | os.rename(img["path"], os.path.join(dir_out, str(n).zfill(6)+img["ext"])) 329 | shutil.rmtree(dir_tmp_me) 330 | print('提取完成!') 331 | 332 | def extract_pdf_to_imgs_pdfpatcher(self, file_pdf, dir_out): 333 | """ Extracting images with PDFPatcher.exe (Windows only) """ 334 | # 0.配置程序选项 335 | dir_program = os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'PDFPatcher') 336 | file_conf_bak = os.path.join(self.settings.dir_lib, 'PDFPatcher_AppConfig.json') 337 | file_conf = os.path.join(dir_program, 'AppConfig.json') 338 | shutil.copy(file_conf_bak, file_conf) 339 | # 1.启动 PDFPatcher 程序, 配置提取选项 340 | Timings.fast() 341 | app = Application(backend='win32').start(os.path.join(dir_program, 'PDFPatcher.exe')) 342 | dlg_main = app.window(title_re='.*PDF.*补丁丁') 343 | dlg_main.wait('ready', timeout=10) 344 | send_keys('%{g}tt') 345 | dlg_extract = dlg_main 346 | dlg_extract.wait('ready', timeout=2).children()[38].set_text(file_pdf) 347 | dlg_extract.wait('ready', timeout=2).children()[33].set_text(dir_out) 348 | # 2.开始提取 349 | dlg_extract.wait('ready', timeout=2).children()[6].click() 350 | time.sleep(0.2) 351 | # print(dlg_extract.children()[52].GetProperties()) 352 | while True: 353 | if '返回' in dlg_extract.children()[52].texts(): 354 | dlg_extract.children()[52].click() 355 | app.kill() 356 | break 357 | else: 358 | time.sleep(0.2) 359 | print('提取完成!') 360 | 361 | # def extract_pdf_to_imgs_fitz(self, file_pdf, dir_out): 362 | # """ 使用 fitz(mupdf), 如果生成了JBIG2加密的 jb2,则还需要使用 jbig2dec 解密成 png """ 363 | # # 准备参数 364 | # cmd = ['extract', str(file_pdf), '-images', '-output', str(dir_out)] 365 | # saved_parms = sys.argv[1:] 366 | # sys.argv[1:] = cmd 367 | # # 开始导出 368 | # if not os.path.exists(dir_out): 369 | # os.makedirs(dir_out) 370 | # print('提取中……') 371 | # fitz_command() 372 | # sys.argv[1:] = saved_parms 373 | # print('提取完成!') 374 | 375 | # ========== (二) From Images to PDF ========== 376 | def combine_img_to_pdf(self, dir_imgs, file_pdf): 377 | """ use mutool.exe to combine images to pdf file (Windows only) """ 378 | # prepare paths 379 | file_exe = os.path.join(os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'MuPDF'), 'mutool.exe') 380 | dir_tmp = os.path.join(self.settings.dir_bundle, '_tmp') 381 | if not os.path.exists(dir_tmp): 382 | os.makedirs(dir_tmp) 383 | dir_pcs = os.path.join(dir_tmp, 'MuPDF_pcs') 384 | dir_pdf_frag = os.path.join(dir_tmp, 'MuPDF_pdf_frag') 385 | dir_pdf_merge = os.path.join(dir_tmp, 'MuPDF_pdf_merge') 386 | if not os.path.exists(dir_pcs): 387 | os.makedirs(dir_pcs) 388 | if not os.path.exists(dir_pdf_frag): 389 | os.makedirs(dir_pdf_frag) 390 | if not os.path.exists(dir_pdf_merge): 391 | os.makedirs(dir_pdf_merge) 392 | file_pcs = os.path.join(self.settings.dir_lib, 'MuPDF_pcs.txt') 393 | # read image files to get sizes 394 | imgs = [] 395 | for fname in os.listdir(dir_imgs): 396 | fp = os.path.join(dir_imgs, fname) 397 | if os.path.splitext(fp)[1].lower() in self.settings.img_exts: 398 | img = { 399 | "fname": fname, 400 | "path": fp, 401 | "size": Image.open(fp).size 402 | } 403 | imgs.append(img) 404 | imgs.sort(key=lambda x: x["fname"], reverse=False) 405 | # generate pcs(Page content streams) txt file 406 | with open(file_pcs, 'r', encoding='utf-8') as fr: 407 | text = fr.read() 408 | page_num = 0 409 | txts = [] 410 | for img in imgs: 411 | page_num += 1 412 | pcs = text.replace('', str(page_num).zfill(6)) 413 | pcs = pcs.replace('', img["path"]) 414 | pcs = pcs.replace('', str(img["size"][0])) 415 | pcs = pcs.replace('', str(img["size"][1])) 416 | txt = os.path.join(dir_pcs, str(page_num).zfill(6)+'.txt') 417 | with open(txt, 'w', encoding='utf-8') as fw: 418 | fw.write(pcs) 419 | txts.append(txt) 420 | # start to create pdf fragments 421 | pdfs = [] 422 | n, k, step = 1, 1, 20 423 | total_step = int(page_num/step + 1) 424 | while k <= total_step: 425 | pcs_str = '' 426 | bound = k*step 427 | while n <= min(bound, page_num): 428 | pcs_str = pcs_str + ' ' + txts[n-1] 429 | n += 1 430 | tmp_pdf = os.path.join(dir_pdf_frag, str(k).zfill(3)+'.pdf') 431 | os.system(f'{file_exe} create -o {tmp_pdf} -O compress-images {pcs_str}') 432 | print(f'[{str(min(n,page_num))}/{str(page_num)}]PDF合成中') 433 | pdfs.append(tmp_pdf) 434 | k += 1 435 | # merge fragments 436 | pdf_str = '' 437 | file_num = len(pdfs) 438 | n, k, step = 1, 1, 10 439 | total_step = int(file_num/step + 1) 440 | while k <= total_step: 441 | merge_str = '' 442 | bound = k*step 443 | while n <= min(bound, file_num): 444 | merge_str = merge_str + ' ' + pdfs[n-1] 445 | n += 1 446 | tmp_pdf = os.path.join(dir_pdf_merge, str(k).zfill(2)+'.pdf') 447 | os.system(f'{file_exe} merge -o {tmp_pdf} {merge_str}') 448 | pdf_str = pdf_str + ' ' + tmp_pdf 449 | k += 1 450 | # output final single file 451 | os.system(f'{file_exe} merge -o "{file_pdf}" {pdf_str}') 452 | shutil.rmtree(dir_pcs) 453 | shutil.rmtree(dir_pdf_frag) 454 | shutil.rmtree(dir_pdf_merge) 455 | print('合成完成!') 456 | 457 | def combine_img_to_pdf_fp2p(self, dir_imgs, file_pdf): 458 | """ 使用 FreePic2Pdf.exe 图像合成 pdf """ 459 | # 0.配置转换选项, 设定图像文件夹 460 | dir_program = os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'FreePic2Pdf') 461 | file_ini_bak = os.path.join(self.settings.dir_lib, 'FreePic2Pdf.ini') 462 | file_ini = os.path.join(dir_program, 'FreePic2Pdf.ini') 463 | with open(file_ini_bak, 'r', encoding='utf-16le') as fr: 464 | para_item = 'PARA_DIR_SRC='+dir_imgs.replace('\\', '\\\\') 465 | text = re.sub(r'^PARA_DIR_SRC=.+$', para_item, fr.read(), flags=re.M) 466 | with open(file_ini, 'w', encoding='utf-16le') as fw: 467 | fw.write(text) 468 | # 1.启动 FreePic2Pdf 程序 469 | Timings.fast() 470 | app = Application(backend='win32').start(os.path.join(dir_program, 'FreePic2Pdf.exe')) 471 | dlg_main = app.FreePic2Pdf 472 | # 2.设定输出 pdf 文件路径 473 | dlg_main.wait('ready', timeout=10).children()[32].set_edit_text(file_pdf) 474 | # 3.开始合成 pdf 475 | dlg_main.children()[20].click() # 点击执行 476 | while True: 477 | if app.window(title='FreePic2Pdf', predicate_func=lambda dlg: len(dlg.children()) == 3).exists(): 478 | app.window( 479 | title='FreePic2Pdf', predicate_func=lambda dlg: len(dlg.children()) == 3 480 | ).wait('ready', timeout=2).children()[0].click() 481 | app.kill() 482 | break 483 | else: 484 | time.sleep(0.2) 485 | print('PDF 生成完毕!') 486 | 487 | # ========== (三) From Other Formats to Images ========== 488 | def convert_pdg_to_img(self, dir_pdg, dir_out): 489 | """ 使用 Pdg2Pic.exe 转换 pdgs 为 imgs """ 490 | # 0.配置转换选项, 设定输出文件夹 491 | dir_program = os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'Pdg2Pic') 492 | file_ini_bak = os.path.join(self.settings.dir_lib, 'Pdg2Pic.ini') 493 | file_ini = os.path.join(dir_program, 'Pdg2Pic.ini') 494 | with open(file_ini_bak, 'r', encoding='utf-16le') as fr: 495 | para_item = 'PARA_DIR_TGT='+dir_out.replace('\\', '\\\\') 496 | text = re.sub(r'^PARA_DIR_TGT=.+$', para_item, fr.read(), flags=re.M) 497 | with open(file_ini, 'w', encoding='utf-16le') as fw: 498 | fw.write(text) 499 | # 1.启动 Pdg2Pic 程序 500 | Timings.fast() 501 | app = Application(backend='win32').start(os.path.join(dir_program, 'Pdg2Pic.exe')) 502 | dlg_main = app.Pdg2Pic 503 | # 2.读取输入的 PDG 文件夹 504 | dlg_main.wait('ready', timeout=10).children()[3].click() # 打开文件夹选择框 505 | dlg_sel = app.window(title=u'选择存放PDG文件的文件夹') 506 | dlg_sel.wait('ready', timeout=5).children()[6].set_text(dir_pdg) 507 | dlg_sel.children()[9].click() 508 | app.window(title=u'格式统计').wait('ready', timeout=3).children()[0].click() 509 | # dlg_sum = app.window(title=u'格式统计').wait('ready', timeout=3) 510 | # while True: 511 | # if 'OK' in dlg_sum.children()[0].texts(): 512 | # dlg_sum.children()[0].click() 513 | # break 514 | # else: 515 | # time.sleep(0.05) 516 | # 3.开始转换 517 | while True: 518 | if not app.window(title=u'格式统计').exists(): 519 | dlg_main.children()[0].click() # 点击执行 520 | break 521 | else: 522 | time.sleep(0.05) 523 | while True: 524 | if app.window(title='Pdg2Pic', predicate_func=lambda dlg: len(dlg.children()) == 3).exists(): 525 | app.window( 526 | title='Pdg2Pic', predicate_func=lambda dlg: len(dlg.children()) == 3 527 | ).wait('ready', timeout=2).children()[0].click() 528 | app.kill() 529 | break 530 | else: 531 | time.sleep(0.2) 532 | print('转换完成!') 533 | 534 | # ========== (四) PDF Bookmark Management ========== 535 | def eximport_bkmk_fp2p(self, file_pdf, dir_bkmk, export_flg=True): 536 | """ 使用 FreePic2Pdf.exe 向/从 pdf 文件中导入/导出书签 """ 537 | dir_program = os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'FreePic2Pdf') 538 | # 1.启动 FreePic2Pdf 程序 539 | Timings.fast() 540 | app = Application(backend='win32').start(os.path.join(dir_program, 'FreePic2Pdf.exe')) 541 | dlg_main = app.FreePic2Pdf 542 | dlg_main.wait('ready', timeout=10).children()[30].click() # 点击进入书签导入/导出窗口 543 | dlg_iebkmk = app.window(title=u'Import/Export PDF Bookmark') 544 | if export_flg: 545 | dlg_iebkmk.wait('ready', timeout=5).children()[26].select(1) # 切换到书签导出栏 546 | # 2.选定 pdf 文件 547 | time.sleep(0.1) 548 | dlg_iebkmk.children()[4].click() # 打开文件选择框 549 | dlg_sel_pdf = app.window(title=u'Select File') 550 | dlg_sel_pdf.wait('ready', timeout=5).children()[12].set_text(file_pdf) 551 | dlg_sel_pdf.children()[16].click() # 选中待处理的 pdf 文件 552 | # 3.选定书签文件夹 553 | if not os.path.exists(dir_bkmk): 554 | os.makedirs(dir_bkmk) 555 | while True: 556 | if not app.window(title=u'Select File').exists(): 557 | break 558 | else: 559 | time.sleep(0.05) 560 | dlg_iebkmk.children()[9].click() # 打开文件夹选择框 561 | dlg_sel_folder = app.window(title=u'Source Folder') 562 | dlg_sel_folder.wait('ready', timeout=5).children()[6].set_edit_text(dir_bkmk) 563 | dlg_sel_folder.children()[9].click() 564 | # 3.开始导入/导出 565 | while True: 566 | if not app.window(title=u'Source Folder').exists(): 567 | dlg_iebkmk.children()[0].click() # 点击执行 568 | break 569 | else: 570 | time.sleep(0.05) 571 | while True: 572 | if app.window(title='FreePic2Pdf', predicate_func=lambda dlg: len(dlg.children()) == 3).exists(): 573 | app.window( 574 | title='FreePic2Pdf', predicate_func=lambda dlg: len(dlg.children()) == 3 575 | ).wait('ready', timeout=2).children()[0].click() 576 | app.kill() 577 | break 578 | else: 579 | time.sleep(0.2) 580 | if export_flg: 581 | # [备用1]utf-16判断有无BOM 582 | # with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'rb') as frb: 583 | # encoded_text = frb.read() 584 | # bom = codecs.BOM_UTF16_LE 585 | # if encoded_text.startswith(bom): 586 | # bkmk_itf = encoded_text[len(bom):].decode('utf-16le') 587 | # else: 588 | # bkmk_itf = encoded_text.decode('utf-16le') 589 | # base_page = re.search(r'(?<=BasePage=)(\d+)', bkmk_itf) 590 | # if base_page: 591 | # bkmk_itf = re.sub(r'^TextPage=$', 'TextPage='+base_page.group(0), bkmk_itf, flags=re.M) 592 | # with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'rb') as frb: 593 | # encoded_text = frb.read() 594 | # bom = codecs.BOM_UTF16_LE 595 | # if encoded_text.startswith(bom): 596 | # bkmk_text = encoded_text[len(bom):].decode('utf-16le') 597 | # else: 598 | # bkmk_text = encoded_text.decode('utf-16le') 599 | # [备用2]考虑是否一律转utf-8 600 | # with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'r', encoding='utf-16') as fr: 601 | # bkmk_itf = fr.read() 602 | # base_page = re.search(r'(?<=BasePage=)(\d+)', bkmk_itf) 603 | # if base_page: 604 | # bkmk_itf = re.sub(r'^TextPage=$', 'TextPage='+base_page.group(0), bkmk_itf, flags=re.M) 605 | # with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'r', encoding='utf-16') as fr: 606 | # bkmk_text = fr.read() 607 | # dir_bkmk_bk = os.path.join(self.settings.dir_lib, 'bkmk') 608 | # shutil.copy(os.path.join(dir_bkmk_bk, "FreePic2Pdf.itf"), os.path.join(dir_bkmk, "FreePic2Pdf.itf")) 609 | # shutil.copy(os.path.join(dir_bkmk_bk, "FreePic2Pdf_bkmk.txt"), os.path.join(dir_bkmk, "FreePic2Pdf_bkmk.txt")) 610 | # with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'w', encoding='utf-8') as fw: 611 | # fw.write(bkmk_itf) 612 | # with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'w', encoding='utf-8') as fw: 613 | # fw.write(bkmk_text) 614 | with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'r+', encoding='utf-16le') as fr: 615 | bkmk_itf = fr.read() 616 | base_page = re.search(r'(?<=BasePage=)(\d+)', bkmk_itf) 617 | if base_page: 618 | bkmk_itf = re.sub(r'^TextPage=$', 'TextPage='+base_page.group(0), bkmk_itf, flags=re.M) 619 | fr.seek(0) 620 | fr.truncate() 621 | fr.write(bkmk_itf) 622 | print('书签导出完成!') 623 | else: 624 | print('书签导入完成!') 625 | -------------------------------------------------------------------------------- /func_lib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Date : 2023-11-16 00:00:53 4 | # @Author : Litles (litlesme@gmail.com) 5 | # @Link : https://github.com/Litles 6 | # @Version : 1.6 7 | 8 | import os 9 | import re 10 | import shutil 11 | from copy import copy 12 | from datetime import datetime 13 | # import chardet 14 | from colorama import Fore 15 | from opencc import OpenCC 16 | 17 | 18 | class FuncLib(): 19 | """ functions for invoking """ 20 | def __init__(self, amb): 21 | self.settings = amb.settings 22 | 23 | def index_all_to_toc(self, file_index_all, file_toc_all, vol_i=0, fill_flg=False): 24 | """ index_all 文件转 toc_all 文件 """ 25 | done_flg = True 26 | if self.text_file_check(file_index_all) == 2: 27 | # 读取 28 | dcts = [] 29 | with open(file_index_all, 'r', encoding='utf-8') as fr: 30 | level = 0 31 | i = 0 32 | for line in fr: 33 | i += 1 34 | # 要先扫描章节再扫描词条 35 | mth_stem = self.settings.pat_stem.match(line) 36 | if mth_stem: 37 | # 无卷标章节 38 | level = int(mth_stem.group(1)) 39 | if mth_stem.group(3) == '': 40 | dcts.append({"level": level, "name": mth_stem.group(2), "page": 0, "vol_n": vol_i+1}) 41 | else: 42 | dcts.append({"level": level, "name": mth_stem.group(2), "page": int(mth_stem.group(3)), "vol_n": vol_i+1}) 43 | elif self.settings.pat_stem_vol.match(line): 44 | # 有卷标章节 45 | mth_vol_stem = self.settings.pat_stem_vol.match(line) 46 | level = int(mth_vol_stem.group(1)) 47 | if mth_vol_stem.group(4) == '': 48 | dcts.append({"level": level, "name": mth_vol_stem.group(2), "page": 0, "vol_n": int(mth_vol_stem.group(3))}) 49 | else: 50 | dcts.append({"level": level, "name": mth_vol_stem.group(2), "page": int(mth_vol_stem.group(4)), "vol_n": int(mth_vol_stem.group(3))}) 51 | elif self.settings.pat_index.match(line): 52 | # 无卷标词条 53 | mth = self.settings.pat_index.match(line) 54 | dcts.append({"level": level+1, "name": mth.group(1), "page": int(mth.group(2)), "vol_n": vol_i+1}) 55 | elif self.settings.pat_index_vol.match(line): 56 | # 有卷标词条 57 | mth_vol = self.settings.pat_index_vol.match(line) 58 | dcts.append({"level": level+1, "name": mth_vol.group(1), "page": int(mth_vol.group(3)), "vol_n": int(mth_vol.group(2))}) 59 | else: 60 | print(Fore.RED + "ERROR: " + Fore.RESET + f"第 {i} 行格式有误, 请检查") 61 | done_flg = False 62 | break 63 | # 输出 64 | if done_flg: 65 | with open(file_toc_all, 'w', encoding='utf-8') as fw: 66 | p_fill = 1 67 | for x in range(len(dcts)): 68 | dct = dcts[x] 69 | # 判断是否要加卷标 70 | if dct["vol_n"] == 1: 71 | s_vol = '' 72 | else: 73 | s_vol = '['+str(dct["vol_n"])+']' 74 | # 开始写入 75 | if dct["page"] != 0: 76 | fw.write('\t'*dct["level"] + f'{dct["name"]}\t{s_vol}{str(dct["page"])}\n') 77 | elif fill_flg: 78 | # 向后检索页码来填充 79 | for d in dcts[x+1:]: 80 | if d["page"] != 0: 81 | p_fill = d["page"] 82 | break 83 | fw.write('\t'*dct["level"] + f'{dct["name"]}\t{s_vol}{str(p_fill)}\n') 84 | # 如果向后仍未检索到页码(待补充) 85 | else: 86 | fw.write('\t'*dct["level"] + f'{dct["name"]}\n') 87 | else: 88 | done_flg = False 89 | return done_flg 90 | 91 | def toc_all_to_index(self, file_toc_all, file_index_all): 92 | """ toc_all 文件转 index_all 文件 """ 93 | if self.text_file_check(file_toc_all) == 2: 94 | # 读取 toc_all.txt 95 | pairs = self.read_toc_file(file_toc_all) 96 | # 识别收集非章节的词条索引 97 | index, entries_tmp = [], [] 98 | child_flg = False 99 | for i in range(1, len(pairs)): 100 | if pairs[i]["level"] == pairs[i-1]["level"]: 101 | if child_flg: 102 | # 满足条件, 继续收集 103 | entries_tmp.append(i) 104 | elif pairs[i]["level"] > pairs[i-1]["level"]: 105 | # 是展开节点, 开启收集 106 | entries_tmp = [] 107 | child_flg = True 108 | entries_tmp.append(i) 109 | else: 110 | # 展开结束, 归档, 清空篮子 111 | index += entries_tmp 112 | entries_tmp = [] 113 | child_flg = False 114 | # 补漏(因为最末一次收集可能未归档) 115 | if len(entries_tmp) > 0: 116 | index += entries_tmp 117 | # 生成 index_all.txt 118 | with open(file_index_all, 'w', encoding='utf-8') as fw: 119 | for i in range(len(pairs)): 120 | vol_n = pairs[i]["vol_n"] # 若 vol_n 大于 1 则标示分卷号 121 | if i in index: 122 | # 检查是否存在索引条无页码 123 | if pairs[i]["page"] == 0: 124 | str_p = '1' 125 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"第 {i+1} 行普通索引条无页码, 已设置为默认值 1") 126 | else: 127 | str_p = str(pairs[i]["page"]) 128 | # 写入索引条 129 | if vol_n > 1: 130 | fw.write(f'{pairs[i]["title"]}\t[{str(vol_n)}]{str_p}\n') 131 | else: 132 | fw.write(f'{pairs[i]["title"]}\t{str_p}\n') 133 | elif pairs[i]["page"] == 0: 134 | fw.write(f'【L{str(pairs[i]["level"])}】{pairs[i]["title"]}\t\n') 135 | else: 136 | if vol_n > 1: 137 | fw.write(f'【L{str(pairs[i]["level"])}】{pairs[i]["title"]}\t[{str(vol_n)}]{str(pairs[i]["page"])}\n') 138 | else: 139 | fw.write(f'【L{str(pairs[i]["level"])}】{pairs[i]["title"]}\t{str(pairs[i]["page"])}\n') 140 | return True 141 | else: 142 | return False 143 | 144 | def read_toc_file(self, file_toc, vol_i=0): 145 | """ 读取 toc/toc_all 文件 """ 146 | pairs = [] 147 | with open(file_toc, 'r', encoding='utf-8') as fr: 148 | i = 1 149 | for line in fr: 150 | mth = self.settings.pat_toc.match(line) 151 | if mth: 152 | pair = { 153 | "level": len(mth.group(1)), 154 | "title": mth.group(2), 155 | "page": int(mth.group(3)), 156 | "vol_n": vol_i+1 157 | } 158 | pairs.append(pair) 159 | elif self.settings.pat_toc_blank.match(line): 160 | mth_blank = self.settings.pat_toc_blank.match(line) 161 | pair = { 162 | "level": len(mth_blank.group(1)), 163 | "title": mth_blank.group(2), 164 | "page": 0, 165 | "vol_n": vol_i+1 166 | } 167 | pairs.append(pair) 168 | elif self.settings.pat_toc_vol.match(line): 169 | mth_vol = self.settings.pat_toc_vol.match(line) 170 | pair = { 171 | "level": len(mth_vol.group(1)), 172 | "title": mth_vol.group(2), 173 | "page": int(mth_vol.group(4)), 174 | "vol_n": int(mth_vol.group(3)) 175 | } 176 | pairs.append(pair) 177 | else: 178 | print(Fore.RED + "ERROR: " + Fore.RESET + f"第 {i} 行未匹配, 已忽略") 179 | pairs = [] 180 | break 181 | i += 1 182 | return pairs 183 | 184 | def read_index_file(self, file_index, vol_i=0): 185 | """ 读取 index 文件 """ 186 | pairs = [] 187 | with open(file_index, 'r', encoding='utf-8') as fr: 188 | i = 1 189 | for line in fr: 190 | mth = self.settings.pat_index.match(line) 191 | if mth: 192 | pair = { 193 | "title": mth.group(1), 194 | "page": int(mth.group(2)), 195 | "vol_n": vol_i+1 196 | } 197 | pairs.append(pair) 198 | elif self.settings.pat_index_vol.match(line): 199 | mth_vol = self.settings.pat_index_vol.match(line) 200 | pair = { 201 | "title": mth_vol.group(1), 202 | "page": int(mth_vol.group(3)), 203 | "vol_n": int(mth_vol.group(2)) 204 | } 205 | pairs.append(pair) 206 | else: 207 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"第 {i} 行未匹配, 已忽略") 208 | i += 1 209 | return pairs 210 | 211 | def merge_to_index_all(self, file_toc, file_index, file_index_all): 212 | """ 将 toc 和 index 文件合并成 index_all 文件 """ 213 | # 思路: 先合并为 toc_all, 再转换成 index_all 214 | if self.text_file_check(file_toc) == 2: 215 | done_flg = True 216 | # 1.读取 toc 217 | toc_pairs = self.read_toc_file(file_toc) 218 | if toc_pairs: 219 | # 判断 toc 是否是有序的(无序则不支持合并), 同时生成新页码(填充无页码章节) 220 | rank_last = -100000 221 | p_fill = 1 222 | for i in range(len(toc_pairs)): 223 | toc_pairs[i]["page_new"] = copy(toc_pairs[i]["page"]) 224 | dct = toc_pairs[i] 225 | if dct["page"] == 0: 226 | # 向后检索页码来填充 227 | for d in toc_pairs[i+1:]: 228 | if d["page"] != 0: 229 | p_fill = d["page"] 230 | break 231 | toc_pairs[i]["page_new"] = copy(p_fill) 232 | elif dct["vol_n"]*100000+dct["page"] < rank_last: 233 | print(Fore.RED + "ERROR: " + Fore.RESET + f"目录文件第 {i} 行页码乱序, 不支持合并") 234 | done_flg = False 235 | break 236 | else: 237 | rank_last = dct["vol_n"]*100000+dct["page"] 238 | else: 239 | done_flg = False 240 | # 2.读取 index 241 | index_pairs = self.read_index_file(file_index) 242 | if index_pairs: 243 | # 排序确保是有序的 244 | index_pairs.sort(key=lambda x: x["vol_n"]*100000+x["page"], reverse=False) 245 | else: 246 | print(Fore.RED + "ERROR: " + Fore.RESET + "读取索引文件失败") 247 | done_flg = False 248 | # 3.排序合并 toc 和 index 249 | if done_flg: 250 | toc_unsure = [] 251 | toc_wrong = [] 252 | file_tmp = os.path.join(self.settings.dir_output_tmp, self.settings.fname_toc_all) 253 | with open(file_tmp, 'w', encoding='utf-8') as fw: 254 | i = 0 255 | j = 0 256 | # toc 除最后一行 257 | for i in range(len(toc_pairs)-1): 258 | level = toc_pairs[i]["level"] 259 | # 先写入目录条 260 | if toc_pairs[i]["vol_n"] > 1: 261 | vol_toc = '['+str(toc_pairs[i]["vol_n"])+']' 262 | else: 263 | vol_toc = '' 264 | if toc_pairs[i]["page"] != 0: 265 | fw.write('\t'*level + f'{toc_pairs[i]["title"]}\t{vol_toc}{str(toc_pairs[i]["page"])}\n') 266 | else: 267 | fw.write('\t'*level + f'{toc_pairs[i]["title"]}\n') 268 | # 2.写入符合的索引行 269 | for x in range(j, len(index_pairs)): 270 | if index_pairs[x]["vol_n"] > 1: 271 | vol_index = '['+str(index_pairs[x]["vol_n"])+']' 272 | else: 273 | vol_index = '' 274 | rk = index_pairs[x]["vol_n"]*100000+index_pairs[x]["page"] 275 | # a.小于当前章节: 写入(排序错误) 276 | if (rk < toc_pairs[i]["vol_n"]*100000+toc_pairs[i]["page_new"]): 277 | fw.write('\t'*(level+1) + f'{index_pairs[x]["title"]}\t{vol_index}{str(index_pairs[x]["page"])}\n') 278 | j = x + 1 279 | if toc_pairs[i] not in toc_wrong: 280 | toc_wrong.append(toc_pairs[i]) 281 | elif (rk == toc_pairs[i]["vol_n"]*100000+toc_pairs[i]["page_new"]) and (rk == toc_pairs[i+1]["vol_n"]*100000+toc_pairs[i+1]["page_new"]): 282 | j = x 283 | break 284 | # b.等于当前章节, 小于后一章节: 写入(词条和章节孰前孰后存疑,故记录) 285 | elif (rk == toc_pairs[i]["vol_n"]*100000+toc_pairs[i]["page_new"]) and (rk < toc_pairs[i+1]["vol_n"]*100000+toc_pairs[i+1]["page_new"]): 286 | fw.write('\t'*(level+1) + f'{index_pairs[x]["title"]}\t{vol_index}{str(index_pairs[x]["page"])}\n') 287 | j = x + 1 288 | if toc_pairs[i] not in toc_unsure: 289 | toc_unsure.append(toc_pairs[i]) 290 | # c.大于当前章节, 小于后一章节: 写入 291 | elif (rk > toc_pairs[i]["vol_n"]*100000+toc_pairs[i]["page_new"]) and (rk < toc_pairs[i+1]["vol_n"]*100000+toc_pairs[i+1]["page_new"]): 292 | fw.write('\t'*(level+1) + f'{index_pairs[x]["title"]}\t{vol_index}{str(index_pairs[x]["page"])}\n') 293 | j = x + 1 294 | # d.剩余情况: 大于当前章节,大于等于后一章节 295 | else: 296 | j = x 297 | break 298 | # 补 toc 的最后一行 299 | level = toc_pairs[-1]["level"] 300 | if toc_pairs[-1]["vol_n"] > 1: 301 | vol_toc = '['+str(toc_pairs[-1]["vol_n"])+']' 302 | else: 303 | vol_toc = '' 304 | if toc_pairs[-1]["page"] != 0: 305 | fw.write('\t'*level + f'{toc_pairs[-1]["title"]}\t{vol_toc}{str(toc_pairs[-1]["page"])}\n') 306 | else: 307 | fw.write('\t'*level + f'{toc_pairs[-1]["title"]}\n') 308 | # 写入剩余的索引行 309 | for x in range(j, len(index_pairs)): 310 | if index_pairs[x]["vol_n"] > 1: 311 | vol_index = '['+str(index_pairs[x]["vol_n"])+']' 312 | else: 313 | vol_index = '' 314 | fw.write('\t'*(level+1) + f'{index_pairs[x]["title"]}\t{vol_index}{str(index_pairs[x]["page"])}\n') 315 | if index_pairs[x]["vol_n"]*100000+index_pairs[x]["page"] < toc_pairs[-1]["vol_n"]*100000+toc_pairs[-1]["page_new"]: 316 | if toc_pairs[-1] not in toc_wrong: 317 | toc_wrong.append(toc_pairs[-1]) 318 | elif index_pairs[x]["vol_n"]*100000+index_pairs[x]["page"] == toc_pairs[-1]["vol_n"]*100000+toc_pairs[-1]["page_new"]: 319 | if toc_pairs[-1] not in toc_unsure: 320 | toc_unsure.append(toc_pairs[-1]) 321 | if self.toc_all_to_index(file_tmp, file_index_all): 322 | print(Fore.GREEN + "\n处理完成, 生成在同 index.txt 目录下" + Fore.RESET) 323 | # 输出错误和存疑的 toc 部分以便检查 324 | if toc_wrong or toc_unsure: 325 | fp = os.path.join(os.path.split(file_index_all)[0], '_need_checking.log') 326 | with open(fp, 'w', encoding='utf-8') as fw: 327 | if toc_wrong: 328 | fw.write('========= 排序错误 ==========\n') 329 | for t in toc_wrong: 330 | if t["vol_n"] > 1: 331 | vol_toc = '['+str(t["vol_n"])+']' 332 | else: 333 | vol_toc = '' 334 | if t["page"] == 0: 335 | fw.write(f'【L{str(t["level"])}】{t["title"]}\t\n') 336 | else: 337 | fw.write(f'【L{str(t["level"])}】{t["title"]}\t{vol_toc}{str(t["page"])}\n') 338 | if toc_unsure: 339 | fw.write('========= 排序存疑 ==========\n') 340 | for t in toc_unsure: 341 | if t["vol_n"] > 1: 342 | vol_toc = '['+str(t["vol_n"])+']' 343 | else: 344 | vol_toc = '' 345 | if t["page"] == 0: 346 | fw.write(f'【L{str(t["level"])}】{t["title"]}\t\n') 347 | else: 348 | fw.write(f'【L{str(t["level"])}】{t["title"]}\t{vol_toc}{str(t["page"])}\n') 349 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + "存在排序存疑的条目, 已记录在日志 _need_checking.log 中,需手动调整完善") 350 | else: 351 | print(Fore.RED + "ERROR: " + Fore.RESET + "读取目录文件失败") 352 | 353 | def read_index_all_file(self, file_index_all, img_dict_flg=True, vol_i=0, navi_flg=False): 354 | done_flg = True 355 | dcts = [] 356 | dct_chaps = [] 357 | tail_ids = [] 358 | # 用于收集末章节的子词条 359 | tail_list = [] 360 | tail = {"id": 0, "children": []} 361 | with open(file_index_all, 'r', encoding='utf-8') as fr: 362 | if img_dict_flg: 363 | pat1 = re.compile(r'【L(\d+)】([^\t]+)\t([\[\d\]]*\-\d+|[\[\d\]]*\d*)[\r\n]*$') # 匹配章节词头(有/无卷标) 364 | pat2 = re.compile(r'([^\t]+)\t([\[\d\]]*\-?\d+)[\r\n]*$') # 匹配词条词头(有/无卷标) 365 | else: 366 | pat1 = self.settings.pat_stem_text # 匹配章节词头 367 | pat2 = self.settings.pat_tab # 匹配词条词头 368 | pat3 = self.settings.pat_index_blank # 匹配仅导航 369 | i = 0 370 | navi_bar = [None for i in range(10)] 371 | navi_bar_tmp = [] 372 | for line in fr: 373 | i += 1 374 | checked_flg = False 375 | vol_n = vol_i+1 376 | # 匹配章节 377 | if pat1.match(line): 378 | mth = pat1.match(line) 379 | # 读取页码/词条内容, 分卷号 380 | if img_dict_flg and mth.group(3) == '': 381 | body = 0 382 | elif img_dict_flg and re.match(r'\-?\d+$', mth.group(3)): 383 | body = int(mth.group(3)) 384 | elif img_dict_flg and re.match(r'\[(\d+)\](\-?\d+)$', mth.group(3)): 385 | mth_mth1 = re.match(r'\[(\d+)\](\-?\d+)$', mth.group(3)) 386 | vol_n, body = int(mth_mth1.group(1)), int(mth_mth1.group(2)) 387 | elif img_dict_flg: 388 | print(Fore.RED + "ERROR: " + Fore.RESET + f"第 {i} 行未匹配, 请检查") 389 | done_flg = False 390 | break 391 | else: 392 | body = mth.group(3) 393 | dct = { 394 | "id": i, 395 | "level": int(mth.group(1)), 396 | "title": mth.group(2), 397 | "body": body, 398 | "vol_n": vol_n 399 | } 400 | # navi_bar 构造 401 | navi_bar[int(mth.group(1))] = mth.group(2) 402 | navi_bar_tmp = navi_bar[:int(mth.group(1))+1] 403 | dct["navi_bar"] = copy(navi_bar_tmp) 404 | dct_chaps.append(dct) 405 | # 子词条清“篮子” 406 | if len(tail["children"]) != 0: 407 | tail_list.append({"id": tail["id"], "children": tail["children"]}) 408 | tail_ids.append(tail["id"]) 409 | checked_flg = True 410 | tail["id"] = i 411 | tail["children"] = [] 412 | # 匹配词条 413 | elif pat2.match(line): 414 | mth = pat2.match(line) 415 | if img_dict_flg and re.match(r'\-?\d+$', mth.group(2)): 416 | body = int(mth.group(2)) 417 | elif img_dict_flg and re.match(r'\[(\d+)\](\-?\d+)$', mth.group(2)): 418 | mth_mth1 = re.match(r'\[(\d+)\](\-?\d+)$', mth.group(2)) 419 | vol_n, body = int(mth_mth1.group(1)), int(mth_mth1.group(2)) 420 | elif img_dict_flg: 421 | print(Fore.RED + "ERROR: " + Fore.RESET + f"第 {i} 行未匹配, 请检查") 422 | done_flg = False 423 | break 424 | else: 425 | body = mth.group(2) 426 | dct = { 427 | "id": i, 428 | "level": -1, 429 | "title": mth.group(1), 430 | "body": body, 431 | "vol_n": vol_n 432 | } 433 | dct["navi_bar"] = navi_bar_tmp + [mth.group(1)] 434 | # 收集子词条 435 | tail["children"].append(mth.group(1)) 436 | # 匹配仅导航 437 | elif navi_flg and pat3.match(line): 438 | mth = pat3.match(line) 439 | dct = { 440 | "id": i, 441 | "level": -1, 442 | "title": mth.group(1), 443 | "body": '', 444 | "vol_n": vol_n 445 | } 446 | dct["navi_bar"] = navi_bar_tmp + [mth.group(1)] 447 | # 收集子词条 448 | tail["children"].append(mth.group(1)) 449 | else: 450 | print(Fore.RED + "ERROR: " + Fore.RESET + f"第 {i} 行未匹配, 请检查") 451 | done_flg = False 452 | break 453 | dcts.append(dct) 454 | # 遍历完成后补漏 455 | if not checked_flg and len(tail["children"]) != 0: 456 | tail_list.append({"id": tail["id"], "children": tail["children"]}) 457 | tail_ids.append(tail["id"]) 458 | # 用于收集大章节的子章节 459 | stem_ids = [] 460 | stem_list = [] 461 | stem = {"id": 0, "children": []} 462 | for i in range(len(dct_chaps)-1): 463 | dct_obj = dct_chaps[i] 464 | stem["id"] = dct_obj["id"] 465 | stem["children"] = [] 466 | checked_flg = False 467 | for dct in dct_chaps[i+1:]: 468 | if dct["level"] == dct_obj["level"]+1: 469 | stem["children"].append(dct["title"]) 470 | elif dct["level"] <= dct_obj["level"]: 471 | # 收集子章节 472 | if len(stem["children"]) != 0: 473 | stem_list.append({"id": stem["id"], "children": stem["children"]}) 474 | stem_ids.append(stem["id"]) 475 | checked_flg = True 476 | break 477 | # 补漏收 478 | if not checked_flg and len(stem["children"]) != 0: 479 | stem_list.append({"id": stem["id"], "children": stem["children"]}) 480 | stem_ids.append(stem["id"]) 481 | # 检查 482 | if len(tail_ids+stem_ids) != len(set(tail_ids+stem_ids)): 483 | done_flg = False 484 | set_tail_ids = set(tail_ids) 485 | set_stem_ids = set(stem_ids) 486 | print("层级矛盾行: ", set_tail_ids.intersection(set_stem_ids)) 487 | print(Fore.RED + "ERROR: " + Fore.RESET + f"文件 {file_index_all} 解析出现矛盾, 请检查索引顺序") 488 | else: 489 | # 整合所有信息 490 | for dct in dcts: 491 | if dct["level"] == -1: 492 | dct["children"] = [] 493 | dct["entry_list"] = False 494 | elif dct["id"] in tail_ids: 495 | for item in tail_list: 496 | if dct["id"] == item["id"]: 497 | dct["children"] = item["children"] 498 | dct["entry_list"] = True 499 | break 500 | elif dct["id"] in stem_ids: 501 | for item in stem_list: 502 | if dct["id"] == item["id"]: 503 | dct["children"] = item["children"] 504 | dct["entry_list"] = False 505 | break 506 | else: 507 | dct["children"] = [] 508 | dct["entry_list"] = False 509 | if done_flg: 510 | return dcts 511 | else: 512 | print(Fore.RED + "全索引文件读取失败: " + Fore.RESET + file_index_all) 513 | return None 514 | 515 | def make_relinks_syn(self, file_syns, file_out): 516 | """ 生成同义词重定向 """ 517 | words = [] 518 | # 1.读取重定向索引 519 | syns = [] 520 | fname = os.path.split(file_syns)[1] 521 | with open(file_syns, 'r', encoding='utf-8') as fr: 522 | i = 1 523 | for line in fr: 524 | mth = self.settings.pat_tab.match(line) 525 | if mth: 526 | syns.append({"syn": mth.group(1), "origin": mth.group(2)}) 527 | else: 528 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"{fname} 第 {i} 行未匹配, 已忽略") 529 | i += 1 530 | # 2.生成重定向 531 | with open(file_out, 'w', encoding='utf-8') as fw: 532 | for syn in syns: 533 | fw.write(f'{syn["syn"]}\n@@@LINK={syn["origin"]}\n\n') 534 | words.append(syn["syn"]) 535 | print("重定向(同义词)词条已生成") 536 | return words 537 | 538 | def make_relinks_st(self, words, file_out): 539 | converter_s2t = OpenCC('s2t.json') 540 | converter_t2s = OpenCC('t2s.json') 541 | to_words = [] 542 | # 生成繁简通搜重定向 543 | with open(file_out, 'w', encoding='utf-8') as fw: 544 | for word in words: 545 | # 简转繁 546 | to_word = converter_s2t.convert(word) 547 | if to_word != word and to_word not in to_words: 548 | fw.write(f'{to_word}\n@@@LINK={word}\n\n') 549 | to_words.append(to_word) 550 | # 繁转简 551 | to_word = converter_t2s.convert(word) 552 | if to_word != word and to_word not in to_words: 553 | fw.write(f'{to_word}\n@@@LINK={word}\n\n') 554 | to_words.append(to_word) 555 | print("重定向(繁简)词条已生成") 556 | 557 | def make_relinks_split(self, file_in, file_out, n_chars=2): 558 | relinks = [] 559 | pat = re.compile(r'[;,。\?\!\,\.\;]+') 560 | with open(file_in, 'r', encoding='utf-8') as fr: 561 | for line in fr: 562 | headword = line.rstrip() 563 | for s in pat.split(headword): 564 | if (s != headword) and (not pat.match(s)) and (len(s) >= n_chars): 565 | relink = s + '\t' + headword + '\n' 566 | if relink not in relinks: 567 | relinks.append(relink) 568 | if relinks: 569 | with open(file_out, 'w', encoding='utf-8') as fw: 570 | for relink in relinks: 571 | fw.write(relink) 572 | return True 573 | else: 574 | print(Fore.RED + "ERROR: " + Fore.RESET + "分词结果为空") 575 | return False 576 | 577 | def simp_trad_trans(self, file_in, file_out, trans_type): 578 | """ 繁简转换 """ 579 | if trans_type == 'T': 580 | converter_s2t = OpenCC('s2t.json') 581 | with open(file_out, 'w', encoding='utf-8') as fw: 582 | with open(file_in, 'r', encoding='utf-8') as fr: 583 | for line in fr: 584 | # 简转繁 585 | fw.write(converter_s2t.convert(line)) 586 | else: 587 | converter_t2s = OpenCC('t2s.json') 588 | with open(file_out, 'w', encoding='utf-8') as fw: 589 | with open(file_in, 'r', encoding='utf-8') as fr: 590 | for line in fr: 591 | # 繁转简 592 | fw.write(converter_t2s.convert(line)) 593 | print(f"\n转换结果已生成: {file_out}") 594 | 595 | def text_file_check(self, text_file): 596 | if not os.path.exists(text_file) or not os.path.isfile(text_file): 597 | print(Fore.YELLOW + "INFO: " + Fore.RESET + f"文件 {text_file} 不存在") 598 | return 0 599 | else: 600 | text = '' 601 | with open(text_file, 'r', encoding='utf-8') as fr: 602 | i = 0 603 | for line in fr: 604 | i += 1 605 | if i < 6: 606 | text += line 607 | else: 608 | break 609 | if re.match(r'\s*$', text): 610 | print(Fore.RED + "ERROR: " + Fore.RESET + f"文件 {text_file} 内容为空") 611 | return 1 612 | else: 613 | return 2 614 | 615 | def merge_and_count(self, file_list, file_final): 616 | # 筛选出有效文件 617 | parts = [] 618 | for f in file_list: 619 | if os.path.exists(f): 620 | parts.append(f) 621 | # 开始计数和合并 622 | entry_total = 0 623 | if len(parts) == 1 and file_final in parts: 624 | # 只有单个文件自身, 则不需要写 625 | with open(file_final, 'r', encoding='utf-8') as fr: 626 | for line in fr: 627 | if line == '\n': 628 | entry_total += 1 629 | else: 630 | # 用临时文件存储, 完了再重命名 631 | file_tmp = os.path.join(self.settings.dir_output_tmp, 'tmp.xxx') 632 | with open(file_tmp, 'a', encoding='utf-8') as fa: 633 | for part in parts: 634 | with open(part, 'r', encoding='utf-8') as fr: 635 | for line in fr: 636 | if line == '\n': 637 | entry_total += 1 638 | fa.write(line) 639 | if os.path.isfile(file_final): 640 | os.remove(file_final) 641 | os.rename(file_tmp, file_final) 642 | return entry_total 643 | 644 | def generate_info_html(self, file_info_raw, file_out, dict_name, templ_choice=None, volume_num=None): 645 | with open(file_out, 'w', encoding='utf-8') as fw: 646 | # 读取 info.html 647 | if file_info_raw and os.path.isfile(file_info_raw): 648 | with open(file_info_raw, 'r', encoding='utf-8') as fr: 649 | fw.write(fr.read().rstrip()) 650 | # 打上 AMB 标志 (有模板则是制作, 没有则认为是打包) 651 | if templ_choice and volume_num: 652 | fw.write(f"\n

{dict_name}, built with AutoMdxBuilder {self.settings.version} on {datetime.now().strftime('%Y/%m/%d')}, based on template {templ_choice.upper()} in {volume_num} volumes.
\n") 653 | elif templ_choice: 654 | fw.write(f"\n

{dict_name}, built with AutoMdxBuilder {self.settings.version} on {datetime.now().strftime('%Y/%m/%d')}, based on template {templ_choice.upper()}.
\n") 655 | else: 656 | fw.write(f"\n

{dict_name}, packed with AutoMdxBuilder {self.settings.version} on {datetime.now().strftime('%Y/%m/%d')}.
\n") 657 | return True 658 | 659 | def get_item_list(self, dct): 660 | html = '' 661 | if dct["level"] == -1: 662 | pass 663 | elif dct["entry_list"]: 664 | html += '

' 665 | i = 0 666 | for item in dct["children"]: 667 | i += 1 668 | if i == 1: 669 | html += f'{item}' 670 | else: 671 | html += f'{item}' 672 | html += '

\n' 673 | elif len(dct["children"]) != 0: 674 | html += '
    ' 675 | for item in dct["children"]: 676 | html += f'
  • {item}
  • ' 677 | html += '
\n' 678 | else: 679 | pass 680 | return html 681 | 682 | # def _detect_code(self, text_file): 683 | # with open(text_file, 'rb') as frb: 684 | # data = frb.read() 685 | # dcts = chardet.detect(data) 686 | # return dcts["encoding"] 687 | 688 | def prepare_imgs(self, dir_imgs_in, dir_imgs_out, volume_num=None): 689 | print('开始处理图像...') 690 | imgs = [] 691 | img_lens = [] 692 | if volume_num: 693 | # 整理图像 694 | lst_dir_imgs = [] 695 | for i in range(volume_num): 696 | dir_tmp = os.path.join(dir_imgs_out, os.path.split(dir_imgs_in["main"][i])[1]) 697 | imgs_tmp = self._proc_img_vol(dir_imgs_in["main"][i], dir_tmp, True, i) 698 | imgs += imgs_tmp 699 | img_lens.append(len(imgs_tmp)) 700 | print(f"第 {i+1} 卷已完成") 701 | lst_dir_imgs.append(dir_tmp) 702 | for fp in dir_imgs_in["others"]: 703 | dir_tmp = os.path.join(dir_imgs_out, os.path.split(fp)[1]) 704 | if os.path.exists(dir_tmp): 705 | size_in = sum(os.path.getsize(os.path.join(fp, f)) for f in os.listdir(fp) if os.path.isfile(os.path.join(fp, f))) 706 | size_out = sum(os.path.getsize(os.path.join(dir_tmp, f)) for f in os.listdir(dir_tmp) if os.path.isfile(os.path.join(dir_tmp, f))) 707 | if size_out == 0 or size_out != size_in: 708 | shutil.rmtree(dir_tmp) 709 | shutil.copytree(fp, dir_tmp) 710 | else: 711 | shutil.copytree(fp, dir_tmp) 712 | lst_dir_imgs.append(dir_tmp) 713 | # 清除 _tmp/imgs 中无关的文件,文件夹 714 | for fname in os.listdir(dir_imgs_out): 715 | fp = os.path.join(dir_imgs_out, fname) 716 | if os.path.isfile(fp): 717 | os.remove(fp) 718 | elif os.path.isdir(fp) and fp not in lst_dir_imgs: 719 | shutil.rmtree(fp) 720 | else: 721 | imgs = self._proc_img_vol(dir_imgs_in, dir_imgs_out) 722 | img_lens.append(len(imgs)) 723 | print('\n图像处理完毕。') 724 | return imgs, img_lens 725 | 726 | def _proc_img_vol(self, dir_imgs_in, dir_imgs_out, multi_vols_flg=False, vol_i=0): 727 | """ 图像预处理(重命名等) """ 728 | # 0.图像拷贝判断 729 | copy_flg = True 730 | if os.path.exists(dir_imgs_out): 731 | size_in = sum(os.path.getsize(os.path.join(dir_imgs_in, f)) for f in os.listdir(dir_imgs_in) if os.path.isfile(os.path.join(dir_imgs_in, f))) 732 | size_out = sum(os.path.getsize(os.path.join(dir_imgs_out, f)) for f in os.listdir(dir_imgs_out) if os.path.isfile(os.path.join(dir_imgs_out, f))) 733 | # 为空或不一样, 则重新处理 734 | if size_out == 0 or size_out != size_in: 735 | shutil.rmtree(dir_imgs_out) 736 | os.makedirs(dir_imgs_out) 737 | else: 738 | copy_flg = False 739 | else: 740 | os.makedirs(dir_imgs_out) 741 | # 1.获取图像文件列表 742 | num_flg = True # 图像文件名是否纯数字 743 | img_files = [] 744 | for fname in os.listdir(dir_imgs_in): 745 | fpath = os.path.join(dir_imgs_in, fname) 746 | if os.path.isfile(fpath) and fpath.endswith(tuple(self.settings.img_exts)): 747 | img_files.append(fpath) 748 | if not re.match(r'\d+', fname.split('.')[0]): 749 | num_flg = False 750 | # 按旧文件名排序 751 | if num_flg: 752 | img_files.sort(key=lambda x: int(os.path.split(x)[1].split('.')[0]), reverse=False) # 按数字排 753 | else: 754 | img_files.sort(reverse=False) # 按字符串排 755 | # 2.重命名 756 | dname = os.path.split(dir_imgs_out)[1].strip('\\/') 757 | imgs = [] 758 | n = 0 759 | len_digit = self.settings.len_digit # 获取序号位数 760 | for img_file in img_files: 761 | n += 1 762 | f_dir, f_name = os.path.split(img_file) 763 | f_ext = os.path.splitext(f_name)[1] 764 | # 区分正文和辅页, 辅页前缀'A', 正文前缀'B' 765 | if multi_vols_flg: 766 | # 分卷 767 | if n < self.settings.body_start[vol_i]: 768 | i_str = str(n).zfill(len_digit) 769 | f_title_new = f'{self.settings.name_abbr}[{str(vol_i+1).zfill(2)}]_A{i_str}' 770 | else: 771 | i_str = str(n-self.settings.body_start[vol_i]+1).zfill(len_digit) 772 | f_title_new = f'{self.settings.name_abbr}[{str(vol_i+1).zfill(2)}]_B{i_str}' 773 | imgs.append({'vol_n': vol_i+1, 'title': f_title_new, 'path': dname+'/'+f_title_new+f_ext, 'i_in_vol': n-1}) 774 | else: 775 | # 非分卷 776 | if n < self.settings.body_start[vol_i]: 777 | i_str = str(n).zfill(len_digit) 778 | f_title_new = f'{self.settings.name_abbr}_A{i_str}' 779 | else: 780 | i_str = str(n-self.settings.body_start[vol_i]+1).zfill(len_digit) 781 | f_title_new = f'{self.settings.name_abbr}_B{i_str}' 782 | imgs.append({'vol_n': vol_i+1, 'title': f_title_new, 'path': f_title_new+f_ext, 'i_in_vol': n-1}) 783 | # 复制新文件到输出文件夹 784 | if copy_flg: 785 | shutil.copy(img_file, os.path.join(dir_imgs_out, f_title_new+f_ext)) 786 | return imgs 787 | -------------------------------------------------------------------------------- /images/amb_folder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/amb_folder.png -------------------------------------------------------------------------------- /images/auto_split.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/auto_split.png -------------------------------------------------------------------------------- /images/img_dict_atmpl.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/img_dict_atmpl.gif -------------------------------------------------------------------------------- /images/img_dict_btmpl.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/img_dict_btmpl.gif -------------------------------------------------------------------------------- /images/imgs_order.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/imgs_order.png -------------------------------------------------------------------------------- /images/index.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/index.png -------------------------------------------------------------------------------- /images/index_all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/index_all.png -------------------------------------------------------------------------------- /images/settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/settings.png -------------------------------------------------------------------------------- /images/syns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/syns.png -------------------------------------------------------------------------------- /images/text_dict_ctmpl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/text_dict_ctmpl.png -------------------------------------------------------------------------------- /images/text_dict_dtmpl.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/text_dict_dtmpl.gif -------------------------------------------------------------------------------- /images/toc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/toc.png -------------------------------------------------------------------------------- /images/work_dir_tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/work_dir_tree.png -------------------------------------------------------------------------------- /lib/FreePic2Pdf.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/lib/FreePic2Pdf.ini -------------------------------------------------------------------------------- /lib/MuPDF_pcs.txt: -------------------------------------------------------------------------------- 1 | %%MediaBox 0 0 2 | %%Rotate 0 3 | %%Image Im 4 | 5 | % Draw an image. 6 | q 7 | 0 0 0 0 cm 8 | /Im Do 9 | Q 10 | -------------------------------------------------------------------------------- /lib/PDFPatcher_AppConfig.json: -------------------------------------------------------------------------------- 1 | {"检查更新时间":"2023-11-18T11:27:03","检查更新间隔":14,"保存程序设置":true,"文档加载模式":"优化处理效率","编码设置":{},"信息文件导出设置":{"导出文档属性":true,"导出文档书签":true,"导出页面链接":true,"导出阅读器设置":true,"导出页面设置":true,"导出编录信息":false,"导出页面内容":false,"导出页面字典":false,"导出图片":false,"导出解码文本":false,"导出命令操作符":false,"导出二进制流":false,"解析命名位置":false,"导出尺寸单位":{"单位":"厘米"},"文本编码":"系统默认"},"信息文件导入设置":{"导入文档属性":true,"导入文档书签":true,"导入页面链接":true,"保留页面链接":false,"导入阅读器设置":true,"导入页面设置":true},"PDF文件处理设置":{"页面布局":{"边框调整值":{"上":0,"右":0,"左":0,"下":0,"比例":false},"指定尺寸":{"名称":"等同原始内容尺寸","高度":0,"宽度":0},"页面筛选":"NotSpecified","旋转角度":0,"拉伸内容":false,"水平对齐":"Center","垂直对齐":"Middle","基准页面":0},"水平DPI":0,"垂直DPI":0,"校正图片旋转角度":false,"优化黑白图片压缩算法":false,"黑白图片自动透明":true,"指定元数据":{"指定文档元数据属性":false,"重写XML元数据属性":false},"阅读器设置":{"删除缩放比例":false,"强制内部链接":false,"书签状态":"保持不变","指定阅读器设置":false,"隐藏菜单":false,"隐藏工具栏":false,"隐藏程序界面":false,"适合窗口":false,"窗口居中":false,"显示文档标题":false},"压缩索引表和书签":true,"统一页面方向":false,"旋转源页面方向":false,"旋转方向":false},"PDF文档设置":{"嵌入字库":false,"删除文本尾随空白":false,"允许替换字库":false,"修复内容流":false,"删除批注":false,"删除导航书签":false,"删除使用限制":false,"删除文档自动动作":false,"删除页面自动动作":false,"删除页面表单":false,"删除链接批注":false,"删除页面元数据":false,"删除页面文本":false,"删除页面缩略图":false,"删除XML元数据":false,"优化黑白图片压缩算法":false,"页面布局":{"边框调整值":{"上":0,"右":0,"左":0,"下":0,"比例":false},"指定尺寸":{"名称":"等同原始内容尺寸","高度":0,"宽度":0},"页面筛选":"NotSpecified","旋转角度":0,"拉伸内容":false,"水平对齐":"Center","垂直对齐":"Middle","基准页面":0},"指定元数据":{"指定文档元数据属性":false,"重写XML元数据属性":false},"阅读器设置":{"删除缩放比例":false,"强制内部链接":false,"书签状态":"保持不变","指定阅读器设置":false,"隐藏菜单":false,"隐藏工具栏":false,"隐藏程序界面":false,"适合窗口":false,"窗口居中":false,"显示文档标题":false},"压缩索引表和书签":true,"统一页面方向":false,"旋转源页面方向":false,"旋转方向":false},"PDF编辑器设置":{"嵌入字库":false,"删除文本尾随空白":false,"允许替换字库":false,"修复内容流":false,"删除批注":false,"删除导航书签":false,"删除使用限制":false,"删除文档自动动作":false,"删除页面自动动作":false,"删除页面表单":false,"删除链接批注":false,"删除页面元数据":false,"删除页面文本":false,"删除页面缩略图":false,"删除XML元数据":false,"优化黑白图片压缩算法":false,"页面布局":{"边框调整值":{"上":0,"右":0,"左":0,"下":0,"比例":false},"指定尺寸":{"名称":"等同原始内容尺寸","高度":0,"宽度":0},"页面筛选":"NotSpecified","旋转角度":0,"拉伸内容":false,"水平对齐":"Center","垂直对齐":"Middle","基准页面":0},"指定元数据":{"指定文档元数据属性":false,"重写XML元数据属性":false},"阅读器设置":{"删除缩放比例":false,"强制内部链接":false,"书签状态":"保持不变","指定阅读器设置":false,"隐藏菜单":false,"隐藏工具栏":false,"隐藏程序界面":false,"适合窗口":false,"窗口居中":false,"显示文档标题":false},"压缩索引表和书签":true,"统一页面方向":false,"旋转源页面方向":false,"旋转方向":false},"自动生成书签设置":{"最小标题尺寸":13,"第一行为标题":false,"忽略单字符标题":false,"忽略数字标题":false,"合并相邻标题":true,"合并不同尺寸标题":false,"合并不同字体标题":true,"忽略重叠文本":false,"自动组织标题层次":true,"列出字体统计信息":true,"列出所有字体":false,"排版":"混合","最大合并行距":1.5,"识别分栏":true,"为首页生成书签":true,"Y轴偏移":1,"定位到页面顶端":0,"导出文本位置信息":false},"导出图像设置":{"自动指定输出位置":true,"避免重复导出图片":false,"合并图片":false,"合并JPG图片为PNG":true,"垂直翻转图片":false,"反转黑白图片颜色":false,"黑白图片导出为PNG":true,"导出批注图片":false,"最小高度":0,"最小宽度":0,"导出路径":"","文件名称掩码":"000000","导出掩模":false,"取反掩模":false},"转为图片设置":{"自动指定输出位置":true,"图片格式":"Png","垂直翻转图片":false,"水平翻转图片":false,"图片颜色":"Rgb","反转图片颜色":false,"JPEG质量":0,"旋转角度":0,"图片宽度":0,"图片比例":1,"分辨率":72,"尺寸模式":false,"文件名称掩码":"0000","适合区域":false,"隐藏批注":false,"减少颜色":false,"伽马校正":1,"染色":16777215},"提取页面设置":{"压缩文档":true,"保留文档属性":true,"保留文档书签":true,"删除无效书签":true,"解除文档限制":true,"添加编号":true,"拆分方式":0,"按页数拆分":1},"文本识别设置":{"识别语言":2052,"旋转校正":false,"拉伸校正":false,"排版":"混合","识别分栏":true,"目录识别模式":false,"压缩空白":false,"删除汉字间空白":false,"识别前保留图像颜色":false,"导出原始识别结果":false,"在屏幕输出识别文本":false},"工具栏设置":{"Buttons":[{"ID":"Editor","按钮名称":"编辑器","显示按钮文字":true,"显示按钮":true},{"ID":"Patcher","按钮名称":"批量修改文档","显示按钮文字":true,"显示按钮":true},{"ID":"Merger","按钮名称":"合并文档","显示按钮文字":true,"显示按钮":true},{"ID":"Ocr","按钮名称":"识别文本","显示按钮文字":false,"显示按钮":true},{"ID":"BookmarkGenerator","按钮名称":"自动书签","显示按钮文字":false,"显示按钮":true},{"ID":"Rename","按钮名称":"批量重命名","显示按钮文字":false,"显示按钮":false},{"ID":"ExtractPages","按钮名称":"提取页面或拆分文档","显示按钮文字":false,"显示按钮":true},{"ID":"ExtractImages","按钮名称":"提取图片","显示按钮文字":true,"显示按钮":true},{"ID":"RenderPages","按钮名称":"转换页面为图片","显示按钮文字":false,"显示按钮":true},{"ID":"Inspector","按钮名称":"结构探查器","显示按钮文字":false,"显示按钮":false},{"ID":"InfoExchanger","按钮名称":"导出导入信息文件","显示按钮文字":false,"显示按钮":true},{"ID":"Options","按钮名称":"程序配置","显示按钮文字":false,"显示按钮":false}]},"窗口设置":{"状态":"Normal","左":58,"上":33,"宽":928,"高":678},"最近使用的文档":{"源文件":["D:\\汉语方言词汇.pdf"],"文件夹":["D:\\汉语方言词汇"]}} -------------------------------------------------------------------------------- /lib/Pdg2Pic.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/lib/Pdg2Pic.ini -------------------------------------------------------------------------------- /lib/atmpl.css: -------------------------------------------------------------------------------- 1 | /*预定义*/ 2 | ul { 3 | margin-right: 0px; 4 | margin-left: 32px; 5 | margin-top: 4px; 6 | margin-bottom: 4px; 7 | padding: 0px; 8 | } 9 | p { 10 | text-indent: 2em; 11 | margin:4px auto; 12 | } 13 | a {text-decoration:none} 14 | 15 | /*----------导航------------*/ 16 | /*导航框*/ 17 | div.top-navi{ 18 | margin: 0px 0px 0px 0px; 19 | padding: 3px 0px; 20 | border-top: 3px solid #3b264d; 21 | background-color: #CC9933; /* CD284C */ 22 | font-weight: bold; 23 | text-align: center; 24 | } 25 | div.bottom-navi{ 26 | margin: 0px 0px 0px 0px; 27 | padding: 3px 0px; 28 | border-bottom: 1px solid #3b264d;/*#ef8b14; #8470FF*/ 29 | background-color: #EAEAEA; 30 | text-align: center; 31 | } 32 | /*导航链接*/ 33 | div.top-navi a:link {color: #ffffff;} 34 | div.top-navi a:hover {background:green;} 35 | div.top-navi a:visited {color: #ffffff} /* FFFF99 */ 36 | div.bottom-navi a:link {color: blue;} 37 | div.bottom-navi a:hover {background:yellow;} 38 | span.navi-item{ 39 | margin: 0px 0px 0px 0px; 40 | } 41 | span.navi-item-left{ 42 | margin: 0px 0px 0px 4px; 43 | float: left; 44 | } 45 | span.navi-item-middle span.navi-item{ 46 | margin: 0px 6px 0px 6px; 47 | } 48 | span.navi-item-right{ 49 | margin: 0px 4px 0px 0px; 50 | float: right; 51 | } 52 | 53 | /*----------词条------------*/ 54 | /*---图片---*/ 55 | div.main-img img{ 56 | text-align: center; 57 | width: 100%; 58 | } 59 | /**/ 60 | 61 | /*---目录标题---*/ 62 | div.toc-title { 63 | text-align: center; 64 | margin-bottom: 8px; 65 | font-weight: bold; 66 | font-size: 120%; 67 | } 68 | /*---目录链接---*/ 69 | div.toc-text a:link {color: blue;} 70 | div.toc-text a:hover {background:yellow;} 71 | -------------------------------------------------------------------------------- /lib/auto_split_2.css: -------------------------------------------------------------------------------- 1 | /*图片自适应单双栏 (如不需要则注释下面两段, 并开启 display:none 那行)*/ 2 | @media screen and (max-width:720px) { 3 | div.main-img {width: 100%;overflow:hidden;} 4 | div.main-img div.left{position:relative;width: 200%;z-index:2;} 5 | div.main-img div.right{position:relative;width: 200%;margin-left:-100%;z-index:1;} 6 | } /*【额外pic设置】div.main-img div.pic{clip-path: polygon(0 0, 100% 0, 100% 94%, 0 95%);margin-top: 0.5em;}*/ /* margin-bottom: -4.5em; */ 7 | @media screen and (min-width:721px) { 8 | div.main-img{width:100%;overflow:hidden;margin-top: 0.5em;} 9 | div.main-img div.left{position:relative;display:hidden;margin:0;padding:0;} 10 | div.main-img div.right{position:relative;width:100%; display: none;} 11 | } /* 【right部分】高度H÷宽度W margin-top: -147.3235%; */ 12 | /*div.main-img div.right {display:none;}*/ 13 | -------------------------------------------------------------------------------- /lib/bkmk/FreePic2Pdf.itf: -------------------------------------------------------------------------------- 1 | [Images] 2 | 3 | [Font] 4 | Language=GBK 5 | FontSize=7 6 | Margin=0.5 7 | 8 | [Bkmk] 9 | File=FreePic2Pdf_bkmk.txt 10 | AddAsText=0 11 | ShowBkmk=1 12 | ShowAll=0 13 | BasePage=63 14 | 15 | [Main] 16 | ContentsPage= 17 | TextPage=63 18 | -------------------------------------------------------------------------------- /lib/bkmk/FreePic2Pdf_bkmk.txt: -------------------------------------------------------------------------------- 1 | 封面 -62 2 | 前言 -59 3 | 凡例 -57 4 | 方言音系简介 -54 5 | 一、北京话声韵调 -54 6 | 二、济南话声韵调 -52 7 | 三、西安话声韵调 -50 8 | 四、太原话声韵调 -48 9 | 五、武汉话声韵调 -46 10 | 六、成都话声韵调 -45 11 | 七、合肥话声韵调 -44 12 | 八、扬州话声韵调 -42 13 | 九、苏州话声韵调 -41 14 | 十、温州话声韵调 -38 15 | 十一、长沙话声韵调 -35 16 | 十二、双峰话声韵调 -34 17 | 十三、南昌话声韵调 -32 18 | 十四、梅县话声韵调 -30 19 | 十五、广州话声韵调 -28 20 | 十六、阳江话声韵调 -26 21 | 十七、厦门话声韵调 -23 22 | 十八、潮州话声韵调 -21 23 | 十九、福州话声韵调 -19 24 | 二十、建瓯话声韵调 -15 25 | 分类词目 -12 26 | 正文 1 27 | 普通话音序索引 617 28 | 封底 629 29 | -------------------------------------------------------------------------------- /lib/bkmk_utf16le/FreePic2Pdf.itf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/lib/bkmk_utf16le/FreePic2Pdf.itf -------------------------------------------------------------------------------- /lib/bkmk_utf16le/FreePic2Pdf_bkmk.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/lib/bkmk_utf16le/FreePic2Pdf_bkmk.txt -------------------------------------------------------------------------------- /lib/btmpl.css: -------------------------------------------------------------------------------- 1 | /*预定义*/ 2 | ul { 3 | margin-right: 0px; 4 | margin-left: 32px; 5 | margin-top: 4px; 6 | margin-bottom: 4px; 7 | padding: 0px; 8 | } 9 | p { 10 | text-indent: 2em; 11 | margin:4px auto; 12 | } 13 | a {text-decoration:none} 14 | 15 | /*----------导航------------*/ 16 | /*原索引 (不展示)*/ 17 | div.index-all {display: none;} 18 | /*导航框*/ 19 | div.top-navi-level{ 20 | margin: 0px 0px 0px 0px; 21 | padding: 3px 0px 3px 4px; 22 | border-top: 3px solid #3b264d; 23 | background-color: #CC9933; /* CD284C */ 24 | font-weight: bold; 25 | text-align: left; 26 | } 27 | span.sep-navi {margin-left: 4px; margin-right: 4px; color: black; font-size: 90%;} 28 | div.top-navi{ 29 | margin: 0px 0px 0px 0px; 30 | padding: 3px 0px; 31 | border-top: 3px solid #3b264d; 32 | background-color: #CC9933; /* CD284C */ 33 | font-weight: bold; 34 | text-align: center; 35 | } 36 | div.bottom-navi{ 37 | margin: 0px 0px 0px 0px; 38 | padding: 3px 0px; 39 | border-bottom: 1px solid #3b264d;/*#ef8b14; #8470FF*/ 40 | background-color: #EAEAEA; 41 | text-align: center; 42 | } 43 | /*导航链接*/ 44 | div.top-navi a:link {color: #ffffff;} 45 | div.top-navi a:hover {background:green;} 46 | div.top-navi a:visited {color: #ffffff} /* FFFF99 */ 47 | div.bottom-navi a:link {color: blue;} 48 | div.bottom-navi a:hover {background:yellow;} 49 | /*导航链接 (多级) */ 50 | div.top-navi-level a:link {color: #ffffff;} 51 | div.top-navi-level a:hover {background:green;} 52 | div.top-navi-level a:visited {color: #ffffff} /* FFFF99 */ 53 | div.top-navi-level span.navi-item-entry a:link {color: #FFFF99;} 54 | div.top-navi-level span.navi-item-entry a:hover {background:green;} 55 | div.top-navi-level span.navi-item-entry a:visited {color: #FFFF99} /* FFFF99 */ 56 | span.navi-item{ 57 | margin: 0px 0px 0px 0px; 58 | } 59 | span.navi-item-entry{ 60 | margin: 0px 0px 0px 0px; 61 | } 62 | span.navi-item-left{ 63 | margin: 0px 0px 0px 4px; 64 | float: left; 65 | } 66 | span.navi-item-middle span.navi-item{ 67 | margin: 0px 6px 0px 6px; 68 | } 69 | span.navi-item-right{ 70 | margin: 0px 4px 0px 0px; 71 | float: right; 72 | } 73 | 74 | /*----------词条------------*/ 75 | /*---图片---*/ 76 | div.main-img img{ 77 | text-align: center; 78 | width: 100%; 79 | } 80 | /**/ 81 | 82 | /*---目录标题---*/ 83 | div.toc-title { 84 | text-align: center; 85 | margin-bottom: 8px; 86 | font-weight: bold; 87 | font-size: 120%; 88 | } 89 | /*---目录链接---*/ 90 | div.toc-text a:link {color: blue;} 91 | div.toc-text a:hover {background:yellow;} 92 | span.sep-list {color: grey; font-size: 90%;} 93 | -------------------------------------------------------------------------------- /lib/build.toml: -------------------------------------------------------------------------------- 1 | # 词典制作的配置文件(用于 AutoMdxBuilder 1.4 版本及以上) 2 | 3 | [global] 4 | templ_choice = "B" # 【重要】选择要应用的模板, 同时需完成下方对应模板的具体配置(如果有的话) 5 | name = "文史工具书词典" # 书名 6 | name_abbr = "WSGJSCD" # 书名首字母缩写 7 | simp_trad_flg = false # 是否需要繁简通搜 8 | add_extra_navis = false # 是否需要添加额外的导航栏(index_all_navi_\d+.txt) 9 | multi_volume = false # 是否是多卷的 10 | # 多卷模式下可以标示每个分卷名 (作用于 toc, index_all/toc_all) 11 | #vol_names = [ 12 | # "政治斗争卷", 13 | # "政治人物卷", 14 | # "军事卷" 15 | #] 16 | 17 | 18 | [template] 19 | [template.a] 20 | # 图像词典 (模板A) 21 | # 必需材料: imgs(文件夹), index/toc 22 | # 可选材料: syns, info 23 | body_start = 1 # 正文起始页为第几张图(>=1) 24 | auto_split_columns = 1 # (可选)自适应分栏数 (默认1表示不分栏) 25 | body_end_page = 99999 # (可选)最大正文页码 (用于自适应分栏范围的判断, 默认到最后一页) 26 | 27 | # (可选)导航栏链接, 有目录 (toc) 就可以设置 28 | #navi_items = [ 29 | # {a = "凡例",ref = "凡例"}, 30 | # {a = "北京",ref = "一、北京话声韵调"}, 31 | # {a = "苏州",ref = "九、苏州话声韵调"}, 32 | # {a = "武汉",ref = "五、武汉话声韵调"}, 33 | # {a = "成都",ref = "六、成都话声韵调"} 34 | #] 35 | 36 | 37 | [template.b] 38 | # 图像词典 (模板B) 39 | # 必需材料: imgs(文件夹), index_all/toc_all 40 | # 可选材料: syns, info 41 | body_start = 1 # 正文起始页为第几张图(>=1) 42 | auto_split_columns = 1 # (可选)自适应分栏数 (默认1表示不分栏) 43 | body_end_page = 99999 # (可选)最大正文词条页码 (用于自适应分栏范围的判断, 默认到最后一页) 44 | add_extra_index = false # 添加额外的 index.txt 文件 45 | 46 | 47 | [template.c] 48 | # 文本词典 (模板C) 49 | # 必需材料: index 50 | # 可选材料: data(文件夹), syns, info 51 | add_headwords = true 52 | 53 | [template.d] 54 | # 文本词典 (模板D) 55 | # 必需材料: index_all 56 | # 可选材料: data(文件夹), syns, info 57 | add_headwords = true 58 | -------------------------------------------------------------------------------- /lib/ctmpl.css: -------------------------------------------------------------------------------- 1 | /*预定义*/ 2 | h1 {text-align:center; font-size: 150%;} 3 | ul { 4 | margin-right: 0px; 5 | margin-left: 32px; 6 | margin-top: 4px; 7 | margin-bottom: 4px; 8 | padding: 0px; 9 | } 10 | p { 11 | text-indent: 2em; 12 | margin:4px auto; 13 | } 14 | a {text-decoration:none} 15 | 16 | /*----------词条------------*/ 17 | div.readertdetaillink {display: none;} 18 | /*词目*/ 19 | div.entry-headword { 20 | text-align: left; 21 | font-weight: bold; 22 | font-size: 110%; 23 | color: #2F49A5; 24 | } 25 | /*目录标题*/ 26 | div.toc-title { 27 | text-align: center; 28 | margin-bottom: 8px; 29 | font-weight: bold; 30 | font-size: 120%; 31 | } 32 | /*正文*/ 33 | div.entry-body {} 34 | /*数据来源*/ 35 | p.source {text-align: right; color: grey; font-size: 90%;} 36 | 37 | /*----------三方补充------------*/ 38 | div.tocTitle {text-align: center; margin-bottom: 8px; font-weight: bold; font-size: 120%;} /*目录标题*/ 39 | div.tocText {} /*词条内容*/ 40 | div.entryTitle {color: #00578E; font-weight: bold; font-size: 110%;} /*词目*/ 41 | div.entryText {} /*词条内容*/ 42 | div.title {} 43 | span#titleText {color: #00578E; font-weight: bold; font-size: 110%;} /*词目*/ 44 | span#titlePY {margin-left: 6px; color: yellow; font-weight: normal; font-size: 110%;} /*拼音*/ 45 | span#titleTextEn {margin-left: 6px; color: #134f28; font-weight: normal; font-size: 95%;} /*英文*/ 46 | span#titleOtherText {margin-left: 4px; color: grey; font-weight: normal; font-size: 95%;} /*其他*/ 47 | div.contentText {} 48 | -------------------------------------------------------------------------------- /lib/dtmpl.css: -------------------------------------------------------------------------------- 1 | /*预定义*/ 2 | h1 {text-align:center; font-size: 150%;} 3 | ul { 4 | margin-right: 0px; 5 | margin-left: 32px; 6 | margin-top: 4px; 7 | margin-bottom: 4px; 8 | padding: 0px; 9 | } 10 | p { 11 | text-indent: 2em; 12 | margin:4px auto; 13 | } 14 | a {text-decoration:none} 15 | 16 | /*----------导航------------*/ 17 | /*原索引 (不展示)*/ 18 | div.index-all {display: none;} 19 | div.readertdetaillink {display: none;} 20 | /*导航框*/ 21 | div.top-navi-level { 22 | margin: 12px 0px 6px 0px; 23 | padding: 3px 0px; 24 | border-top: 2px solid #ef8b14; 25 | border-bottom: 1px solid #ef8b14;/*#ef8b14; #8470FF*/ 26 | } 27 | span.sep-navi {margin-left: 4px; margin-right: 4px; color: black; font-size: 90%;} 28 | div.bottom-navi { 29 | margin: 12px 0px 6px 0px; 30 | padding: 3px 0px; 31 | border-top: 1px dotted #3c4457; 32 | border-bottom: 1px dotted #3c4457;/*#ef8b14; #8470FF #ef8b14*/ 33 | text-align:center; 34 | color:#3c4457; 35 | } 36 | /*导航链接*/ 37 | div.bottom-navi a:link {color: grey;} 38 | div.bottom-navi a:hover {background:yellow;} 39 | div.bottom-navi a:visited {color: grey} 40 | /*导航链接 (多级) */ 41 | div.top-navi-level a:link {color: #7c0000;} 42 | div.top-navi-level a:hover {background: yellow;} 43 | div.top-navi-level a:visited {color: #7c0000} 44 | div.top-navi-level span.navi-item-entry a:link {color: #C30A50;} 45 | div.top-navi-level span.navi-item-entry a:hover {background: yellow;} 46 | div.top-navi-level span.navi-item-entry a:visited {color: #C30A50} 47 | span.navi-item{ 48 | margin: 0px 0px 0px 0px; 49 | } 50 | span.navi-item-entry{ 51 | margin: 0px 0px 0px 0px; 52 | } 53 | span.navi-item-left{ 54 | margin: 0px 0px 0px 4px; 55 | float: left; 56 | } 57 | span.navi-item-middle span.navi-item{ 58 | margin: 0px 6px 0px 6px; 59 | } 60 | span.navi-item-right{ 61 | margin: 0px 4px 0px 0px; 62 | float: right; 63 | } 64 | 65 | /*----------词条------------*/ 66 | /*目录标题*/ 67 | div.toc-title { 68 | text-align: center; 69 | margin-bottom: 8px; 70 | font-weight: bold; 71 | font-size: 120%; 72 | } 73 | /*目录链接*/ 74 | div.toc-text a:link {color: blue;} 75 | div.toc-text a:hover {background:yellow;} 76 | span.sep-list {color: grey; font-size: 90%;} 77 | /*词目*/ 78 | div.entry-headword { 79 | text-align: left; 80 | font-weight: bold; 81 | font-size: 110%; 82 | color: #a50000; 83 | } 84 | /*正文*/ 85 | div.entry-body {} 86 | /*数据来源*/ 87 | p.source {text-align: right; color: grey; font-size: 90%;} 88 | 89 | /*----------三方补充------------*/ 90 | div.tocTitle {text-align: center; margin-bottom: 8px; font-weight: bold; font-size: 120%;} /*目录标题*/ 91 | div.tocText {} /*词条内容*/ 92 | div.entryTitle {color: #00578E; font-weight: bold; font-size: 110%;} /*词目*/ 93 | div.entryText {} /*词条内容*/ 94 | div.title {} 95 | span#titleText {color: #00578E; font-weight: bold; font-size: 110%;} /*词目*/ 96 | span#titlePY {margin-left: 6px; color: yellow; font-weight: normal; font-size: 110%;} /*拼音*/ 97 | span#titleTextEn {margin-left: 6px; color: #134f28; font-weight: normal; font-size: 95%;} /*英文*/ 98 | span#titleOtherText {margin-left: 4px; color: grey; font-weight: normal; font-size: 95%;} /*其他*/ 99 | div.contentText {} 100 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | colorama==0.4.6 2 | tomlkit==0.12.2 3 | tomli==2.0.1 4 | mdict-utils==1.3.12 5 | OpenCC==1.1.1 6 | Pillow==10.1.0 7 | PyMuPDF==1.23.6 8 | pywinauto==0.6.8 9 | pywin32==306 10 | pywin32-ctypes==0.2.2 11 | -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Date : 2023-11-16 00:00:58 4 | # @Author : Litles (litlesme@gmail.com) 5 | # @Link : https://github.com/Litles 6 | # @Version : 1.6 7 | 8 | import os 9 | import sys 10 | import re 11 | from tomli import load 12 | from tomlkit import loads 13 | from colorama import Fore 14 | 15 | 16 | class Settings: 17 | """ 词典设置 """ 18 | # 【提示】 AMB 1.4 及以后的版本已不在此处配置词典, 请移步 build.toml 文件 19 | def __init__(self): 20 | # 程序版本 21 | self.version = '1.6' 22 | 23 | # 输入文件 24 | self.dname_imgs = 'imgs' 25 | self.img_exts = ['.jpg', 'jpeg', '.jp2', '.png', '.gif', '.bmp', '.tif', '.tiff'] 26 | self.len_digit = 6 27 | self.dname_data = 'data' 28 | self.fname_index = 'index.txt' 29 | self.fname_index_all = 'index_all.txt' 30 | self.fname_toc_all = 'toc_all.txt' 31 | self.fname_toc = 'toc.txt' 32 | self.fname_syns = 'syns.txt' 33 | self.fname_dict_info = 'info.html' 34 | 35 | # 输出文件 36 | if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'): 37 | self.dir_bundle = sys._MEIPASS 38 | else: 39 | self.dir_bundle = os.getcwd() 40 | self.dir_output_tmp = os.path.join(self.dir_bundle, '_tmp') 41 | if not os.path.exists(self.dir_output_tmp): 42 | os.makedirs(self.dir_output_tmp) 43 | self.dir_index = os.path.join(self.dir_output_tmp, 'index') 44 | self.dir_toc = os.path.join(self.dir_output_tmp, 'toc') 45 | self.dir_index_all = os.path.join(self.dir_output_tmp, 'index_all') 46 | self.fname_entries_text = 'entries_text.txt' 47 | self.fname_entries_img = 'entries_img.txt' 48 | self.fname_entries_toc = 'entries_toc.txt' 49 | self.fname_entries_with_navi = 'entries_with_navi.txt' 50 | self.fname_entries_with_navi_text = 'entries_with_navi_text.txt' 51 | self.fname_relinks_syn = 'relinks_syn.txt' 52 | self.fname_relinks_st = 'relinks_st.txt' 53 | self.fname_relinks_index = 'relinks_index.txt' # template B 54 | self.fname_relinks_headword = 'relinks_headword.txt' 55 | self.file_log = os.path.join(self.dir_bundle, '_log.log') 56 | 57 | # 文本格式 58 | # index/index_all 59 | self.pat_stem = re.compile(r'【L(\d+)】([^\t]+)\t(\-\d+|\d*)[\r\n]*$') # 匹配图像词典全索引的主干条目 (有页码/无页码) 60 | self.pat_stem_vol = re.compile(r'【L(\d+)】([^\t]+)\t\[(\d+)\](\-\d+|\d*)[\r\n]*$') # [有卷标]匹配图像词典全索引的主干条目 (有页码/无页码) 61 | self.pat_stem_text = re.compile(r'【L(\d+)】([^\t]+)\t([^\t\r\n]*)[\r\n]*$') # 匹配文本词典全索引的主干条目 (有内容/无内容) 62 | self.pat_index = re.compile(r'([^\t]+)\t(\-?\d+)[\r\n]*$') # 匹配图像词典索引 (有页码) 63 | self.pat_index_vol = re.compile(r'([^\t]+)\t\[(\d+)\](\-?\d+)[\r\n]*$') # [有卷标]匹配图像词典索引 (有页码) 64 | self.pat_index_blank = re.compile(r'([^\t\r\n]+)[\t\r\n]*$') # 匹配导航 index_all, 无内容 65 | # toc 66 | self.pat_toc = re.compile(r'(\t*)([^\t]+)\t(\-?\d+)[\r\n]*$') # 匹配图像词典目录 (有页码) 67 | self.pat_toc_vol = re.compile(r'(\t*)([^\t]+)\t\[(\d+)\](\-?\d+)[\r\n]*$') # [有卷标]匹配图像词典目录 (有页码) 68 | self.pat_toc_blank = re.compile(r'(\t*)([^\t\r\n]+)[\t\r\n]*$') # 匹配图像词典目录 (无页码) 69 | # TAB分隔(通用) 70 | self.pat_tab = re.compile(r'([^\t]+)\t([^\t\r\n]+)[\r\n]*$') 71 | # 提取 72 | self.pat_relink = re.compile(r'^([^\r\n]+)[\r\n]+@@@LINK=([^\r\n]+)[\r\n]+[\r\n]*$', flags=re.M) 73 | 74 | # 预设样式/模板 75 | self.dir_lib = os.path.join(self.dir_bundle, 'lib') 76 | self.build_tmpl = 'build.toml' 77 | self.css_atmpl = 'atmpl.css' 78 | self.css_btmpl = 'btmpl.css' 79 | self.css_ctmpl = 'ctmpl.css' 80 | self.css_dtmpl = 'dtmpl.css' 81 | self.css_split_2 = 'auto_split_2.css' 82 | 83 | # 预设值 84 | self.body_start = [1] 85 | self.split_columns = 1 86 | self.body_end_page = [99999] 87 | self.add_headwords = True 88 | self.multi_volume = False 89 | self.volume_num = 1 90 | self.vol_names = [None] 91 | self.add_extra_index = False # template B 92 | 93 | def load_build_toml(self, file_toml, pdf_flg=False, outside_flg=True): 94 | build_flg = True 95 | # 输入文件夹 96 | self.dir_input = os.path.split(file_toml)[0] 97 | self.dir_input_tmp = os.path.join(self.dir_input, '_tmp') 98 | with open(file_toml, 'rb') as fr: 99 | try: 100 | build = load(fr) 101 | # --- 通用设置 --- 102 | self.name = build["global"]["name"] # 书名 103 | self.name_abbr = build["global"]["name_abbr"].upper() # 书名首字母缩写 104 | self.simp_trad_flg = build["global"].get("simp_trad_flg", False) # 是否要繁简通搜 105 | self.add_extra_navis = build["global"].get("add_extra_navis", False) # 是否要额外导航栏 106 | # --- 区别设置 --- 107 | self.templ_choice = build["global"]["templ_choice"].upper() # 模板选择 108 | self.multi_volume = build["global"].get("multi_volume", False) 109 | # 模板 A, B 110 | if self.templ_choice in ('A', 'B'): 111 | # --- 1.独有部分 ---- 112 | if self.templ_choice == 'A': 113 | label = 'a' 114 | self.navi_items = build["template"][label].get("navi_items", []) 115 | for item in self.navi_items: 116 | if item["ref"] == "": 117 | item["ref"] = item["a"] 118 | else: 119 | label = 'b' 120 | self.add_extra_index = build["template"][label].get("add_extra_index", False) 121 | # --- 2.共有部分 ---- 122 | # body_start 123 | self.body_start = build["template"][label]["body_start"] # 正文起始页为第几张图(>=1) 124 | if isinstance(self.body_start, int): 125 | self.body_start = [self.body_start] 126 | # 卷数, 卷名(默认全 None) 127 | self.volume_num = len(self.body_start) 128 | if self.multi_volume: 129 | get_vol_names = build["global"].get("vol_names", self.vol_names[0]) 130 | if not get_vol_names: 131 | self.vol_names = [None for i in range(self.volume_num)] 132 | elif isinstance(get_vol_names, list) and len(get_vol_names) == self.volume_num: 133 | self.vol_names = get_vol_names 134 | elif isinstance(get_vol_names, list) and len(get_vol_names) != self.volume_num: 135 | print(Fore.RED + "ERROR: " + Fore.RESET + "build.toml 中 body_start 和 vol_names 数目不匹配") 136 | build_flg = False 137 | else: 138 | print(Fore.RED + "ERROR: " + Fore.RESET + "build.toml 中 vol_names 设置有误") 139 | build_flg = False 140 | # 分栏 (可选) 141 | self.split_columns = build["template"][label].get("auto_split_columns", 1) 142 | get_body_end_page = build["template"][label].get("body_end_page", self.body_end_page[0]) 143 | if self.multi_volume: 144 | self.body_end_page = [self.body_end_page[0] for i in range(self.volume_num)] 145 | if isinstance(get_body_end_page, int): 146 | self.body_end_page = [get_body_end_page for i in range(self.volume_num)] 147 | elif isinstance(get_body_end_page, list): 148 | if len(get_body_end_page) > self.volume_num: 149 | build_flg = False 150 | print(Fore.RED + "ERROR: " + Fore.RESET + "build.toml 中 body_end_page 数目超过了分卷数") 151 | else: 152 | for i in range(len(get_body_end_page)): 153 | self.body_end_page[i] = get_body_end_page[i] 154 | else: 155 | build_flg = False 156 | print(Fore.RED + "ERROR: " + Fore.RESET + "build.toml 中 body_end_page 格式有误") 157 | else: 158 | if isinstance(get_body_end_page, int): 159 | self.body_end_page[0] = get_body_end_page 160 | elif isinstance(get_body_end_page, list): 161 | self.body_end_page[0] = get_body_end_page[0] 162 | else: 163 | build_flg = False 164 | print(Fore.RED + "ERROR: " + Fore.RESET + "build.toml 中 body_end_page 格式有误") 165 | # 模板 C 166 | elif self.templ_choice == 'C': 167 | self.add_headwords = build["template"]["c"].get("add_headwords", True) 168 | # 模板 D 169 | elif self.templ_choice == 'D': 170 | self.add_headwords = build["template"]["d"].get("add_headwords", True) 171 | self.vol_names = build["global"].get("vol_names", self.vol_names) 172 | if not isinstance(self.vol_names, list): 173 | build_flg = False 174 | print(Fore.RED + "ERROR: " + Fore.RESET + "build.toml 中 vol_names 格式有误") 175 | # 设定其他变量 176 | self.fname_final_txt = f"{self.name}.txt" 177 | self.fname_css = f"{self.name_abbr.lower()}.css" 178 | # 确定输出文件夹 179 | if pdf_flg: 180 | pass 181 | elif outside_flg: 182 | self.dir_output = os.path.join(os.path.split(self.dir_input)[0], self.name) + '_mdict' 183 | else: 184 | self.dir_output = os.path.join(self.dir_input, self.name) + '_mdict' 185 | except: 186 | build_flg = False 187 | print(Fore.RED + "ERROR: " + Fore.RESET + "读取 build.toml 文件失败, 请检查格式是否规范、选项是否遗漏") 188 | # 生成 TOML 对象 189 | if build_flg: 190 | with open(file_toml, 'r', encoding='utf-8') as fr: 191 | self.build = loads(fr.read()) 192 | return build_flg 193 | -------------------------------------------------------------------------------- /templates/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/templates/__init__.py -------------------------------------------------------------------------------- /templates/text_dict_ctmpl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Date : 2023-11-16 00:00:41 4 | # @Author : Litles (litlesme@gmail.com) 5 | # @Link : https://github.com/Litles 6 | # @Version : 1.6 7 | 8 | import os 9 | import re 10 | from tomlkit import dumps 11 | from colorama import Fore 12 | 13 | 14 | class TextDictCtmpl: 15 | """ 文本词典(模板C) """ 16 | def __init__(self, amb): 17 | self.settings = amb.settings 18 | self.func = amb.func 19 | 20 | def make_source_file(self): 21 | """ 制作预备 txt 源文本 """ 22 | # 清空临时目录下所有文件 23 | for fname in os.listdir(self.settings.dir_output_tmp): 24 | fpath = os.path.join(self.settings.dir_output_tmp, fname) 25 | if os.path.isfile(fpath): 26 | os.remove(fpath) 27 | # 初始化, 检查原材料: index, syns, info, data 28 | check_result = self._check_raw_files() 29 | # 开始制作 30 | if check_result: 31 | print('\n材料检查通过, 开始制作词典……\n') 32 | # 预定义输出文件名 33 | file_final_txt = os.path.join(self.settings.dir_output_tmp, self.settings.fname_final_txt) 34 | file_dict_info = os.path.join(self.settings.dir_output_tmp, self.settings.fname_dict_info) 35 | # 1.分步生成各部分源文本 36 | file_1 = os.path.join(self.settings.dir_output_tmp, self.settings.fname_entries_text) # 文本词条 37 | file_2 = os.path.join(self.settings.dir_output_tmp, self.settings.fname_relinks_syn) # 同义词重定向 38 | file_3 = os.path.join(self.settings.dir_output_tmp, self.settings.fname_relinks_st) # 繁简重定向 39 | # 判断是否要生成额外导航栏 40 | # if check_result[4]: 41 | # file_0, navi_bars = self._gen_extra_navi_bars(check_result[4]) 42 | # (1) 生成文本(主)词条 43 | headwords = self._make_entries_text(check_result[0], file_1) 44 | # (2) 生成同义词重定向 45 | if check_result[1]: 46 | headwords += self.func.make_relinks_syn(check_result[1], file_2) 47 | # (3) 生成繁简通搜重定向 48 | if self.settings.simp_trad_flg: 49 | self.func.make_relinks_st(headwords, file_3) 50 | # 2.合并成最终 txt 源文本 51 | entry_total = self.func.merge_and_count([file_1, file_2, file_3], file_final_txt) 52 | print(f'\n源文本 "{self.settings.fname_final_txt}"(共 {entry_total} 词条)生成完毕!') 53 | # 3.生成 info.html 54 | self.func.generate_info_html(check_result[2], file_dict_info, self.settings.name, 'C') 55 | # 返回制作结果 56 | return [file_final_txt, check_result[3], file_dict_info] 57 | else: 58 | print(Fore.RED + "\n材料检查不通过, 请确保材料准备无误再执行程序" + Fore.RESET) 59 | return None 60 | 61 | def _gen_extra_navi_bars(self, lst_file_index_all): 62 | """ 生成额外导航栏 """ 63 | # for file_index_all in lst_file_index_all: 64 | # dcts = self.func.read_index_all_file(file_index_all) 65 | # self.gen_top_navi_bar(dcts) 66 | 67 | def extract_final_txt(self, file_final_txt, out_dir, dict_name): 68 | """ 从模板C词典的源 txt 文本中提取 index, syns 信息 """ 69 | dcts = [] 70 | syns = [] 71 | # (一) 分析提取源 txt 文本 72 | with open(file_final_txt, 'r', encoding='utf-8') as fr: 73 | text = fr.read() 74 | # 1.提取 index_all 75 | pat_index = re.compile(r'^.+?
(.+?)
$', flags=re.M+re.S) 76 | for t in pat_index.findall(text): 77 | dct = { 78 | "id": t[0], 79 | "name": t[1], 80 | "body": t[2] 81 | } 82 | dcts.append(dct) 83 | # 2.识别 name_abbr 84 | mth = re.search(r'^$', text, flags=re.M) 85 | if mth: 86 | name_abbr = mth.group(1).upper() 87 | else: 88 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + "未识别到词典缩略字母, 已设置默认值") 89 | name_abbr = 'XXXXCD' 90 | # 3.提取 syns 91 | for t in self.settings.pat_relink.findall(text): 92 | syns.append((t[0], t[1])) 93 | # (二) 整理输出提取结果 94 | # 1.index.txt 95 | dcts.sort(key=lambda dct: dct["id"], reverse=False) 96 | with open(os.path.join(out_dir, 'index.txt'), 'w', encoding='utf-8') as fw: 97 | for dct in dcts: 98 | fw.write(f'{dct["name"]}\t{dct["body"]}\n') 99 | # 2.syns.txt 100 | if syns: 101 | with open(os.path.join(out_dir, 'syns.txt'), 'w', encoding='utf-8') as fw: 102 | for s in syns: 103 | fw.write(f'{s[0]}\t{s[1]}\n') 104 | # 3.build.toml 文件 105 | self.settings.load_build_toml(os.path.join(self.settings.dir_lib, self.settings.build_tmpl), False) 106 | self.settings.build["global"]["templ_choice"] = "C" 107 | self.settings.build["global"]["name"] = dict_name 108 | self.settings.build["global"]["name_abbr"] = name_abbr 109 | # 判断 add_headwords 110 | if not re.search(r'^
[^<]+
$', text, flags=re.M): 111 | self.settings.build["template"]["c"]["add_headwords"] = False 112 | with open(os.path.join(out_dir, 'build.toml'), 'w', encoding='utf-8') as fw: 113 | fw.write(dumps(self.settings.build)) 114 | 115 | def _make_entries_text(self, file_index, file_out): 116 | headwords = [] 117 | """ (一) 生成文本(主)词条 """ 118 | with open(file_out, 'a', encoding='utf-8') as fa: 119 | with open(file_index, 'r', encoding='utf-8') as fr: 120 | i = 0 121 | for line in fr: 122 | i += 1 123 | if self.settings.pat_tab.match(line): 124 | mth = self.settings.pat_tab.match(line) 125 | part_title = f'{mth.group(1)}\n' 126 | part_css = f'\n' 127 | part_index = f'\n' 128 | if not self.settings.add_headwords: 129 | part_headword = '' 130 | else: 131 | part_headword = f'
{mth.group(1)}
\n' 132 | if re.match(r'<(p|div|html|body|title|head)', mth.group(2), flags=re.I): 133 | part_body = f'
{mth.group(2)}
\n' 134 | else: 135 | part_body = f'

{mth.group(2)}

\n' 136 | # 将完整词条写入文件 137 | fa.write(part_title+part_css+part_index+part_headword+part_body+'\n') 138 | headwords.append(mth.group(1)) 139 | else: 140 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"第 {i} 行未匹配, 已忽略") 141 | print("文本词条已生成") 142 | return headwords 143 | 144 | def _check_raw_files(self): 145 | """ 检查原材料 146 | * 必要文本存在(文本编码均要是 utf-8 无 bom) 147 | * 检查 info.html 的编码 148 | """ 149 | check_result = [] 150 | # 预定义输入文件路径 151 | file_index = os.path.join(self.settings.dir_input, self.settings.fname_index) 152 | file_syns = os.path.join(self.settings.dir_input, self.settings.fname_syns) 153 | file_dict_info = os.path.join(self.settings.dir_input, self.settings.fname_dict_info) 154 | dir_data = os.path.join(self.settings.dir_input, self.settings.dname_data) 155 | # 1.扫描识别 index 文件 156 | pass_flg = True 157 | index_check_num = self.func.text_file_check(file_index) 158 | if index_check_num == 2: 159 | with open(file_index, 'r', encoding='utf-8') as fr: 160 | i = 0 161 | for line in fr: 162 | i += 1 163 | if not self.settings.pat_tab.match(line): 164 | print(Fore.RED + "ERROR: " + Fore.RESET + f"index.txt 第 {i} 行未匹配, 请检查") 165 | pass_flg = False 166 | break 167 | elif index_check_num == 1: 168 | pass_flg = False 169 | else: 170 | pass_flg = False 171 | print(Fore.RED + "ERROR: " + Fore.RESET + "未读取到 index 文件") 172 | if pass_flg: 173 | check_result.append(file_index) 174 | # 2.检查同义词文件: 若存在就要合格 175 | syns_check_num = self.func.text_file_check(file_syns) 176 | if syns_check_num == 0: 177 | check_result.append(None) 178 | elif syns_check_num == 2: 179 | check_result.append(file_syns) 180 | # 3.检查 info.html: 若存在就要合格 181 | info_check_num = self.func.text_file_check(file_dict_info) 182 | if info_check_num == 0: 183 | check_result.append(None) 184 | elif info_check_num == 2: 185 | check_result.append(file_dict_info) 186 | # 4.检查 data 文件夹 187 | if os.path.isdir(dir_data) and len(os.listdir(dir_data)) != 0: 188 | check_result.append(dir_data) 189 | elif os.path.isdir(dir_data): 190 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + "data 文件夹为空, 已忽略将不打包") 191 | check_result.append(None) 192 | else: 193 | check_result.append(None) 194 | # 返回最终检查结果 195 | if len(check_result) == 4: 196 | return check_result 197 | else: 198 | return None 199 | -------------------------------------------------------------------------------- /templates/text_dict_dtmpl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Date : 2023-11-16 00:00:48 4 | # @Author : Litles (litlesme@gmail.com) 5 | # @Link : https://github.com/Litles 6 | # @Version : 1.6 7 | 8 | import os 9 | import re 10 | import shutil 11 | from tomlkit import dumps 12 | from colorama import Fore 13 | 14 | 15 | class TextDictDtmpl: 16 | """ 文本词典(模板D) """ 17 | def __init__(self, amb): 18 | self.settings = amb.settings 19 | self.func = amb.func 20 | 21 | def make_source_file(self): 22 | """ 制作预备 txt 源文本 """ 23 | # 清空临时目录下所有文件 24 | for fname in os.listdir(self.settings.dir_output_tmp): 25 | fpath = os.path.join(self.settings.dir_output_tmp, fname) 26 | if os.path.isfile(fpath): 27 | os.remove(fpath) 28 | # 初始化, 检查原材料: index_all, syns, info, data 29 | check_result = self._check_raw_files() 30 | # 开始制作 31 | if check_result: 32 | print('\n材料检查通过, 开始制作词典……\n') 33 | # 预定义输出文件名 34 | file_final_txt = os.path.join(self.settings.dir_output_tmp, self.settings.fname_final_txt) 35 | file_dict_info = os.path.join(self.settings.dir_output_tmp, self.settings.fname_dict_info) 36 | # 1.分步生成各部分源文本 37 | file_1 = os.path.join(self.settings.dir_output_tmp, self.settings.fname_entries_with_navi_text) # 文本(有导航栏)词条 38 | file_2 = os.path.join(self.settings.dir_output_tmp, self.settings.fname_relinks_syn) # 同义词重定向 39 | file_3 = os.path.join(self.settings.dir_output_tmp, self.settings.fname_relinks_st) # 繁简重定向 40 | # (1) 生成文本(主)词条, 带层级导航 41 | headwords = self._make_entries_with_navi(check_result[0], file_1) 42 | if headwords: 43 | # (2) 生成近义词重定向 44 | if check_result[1]: 45 | headwords += self.func.make_relinks_syn(check_result[1], file_2) 46 | # (3) 生成繁简通搜重定向 47 | if self.settings.simp_trad_flg: 48 | self.func.make_relinks_st(headwords, file_3) 49 | # 2.合并成最终 txt 源文本 50 | entry_total = self.func.merge_and_count([file_1, file_2, file_3], file_final_txt) 51 | print(f'\n源文本 "{self.settings.fname_final_txt}"(共 {entry_total} 词条)生成完毕!') 52 | # 3.生成 info.html 53 | if self.settings.multi_volume: 54 | self.func.generate_info_html(check_result[2], file_dict_info, self.settings.name, 'D', self.settings.volume_num) 55 | else: 56 | self.func.generate_info_html(check_result[2], file_dict_info, self.settings.name, 'D') 57 | # 返回制作结果 58 | return [file_final_txt, check_result[3], file_dict_info] 59 | else: 60 | return None 61 | else: 62 | print(Fore.RED + "\n材料检查不通过, 请确保材料准备无误再执行程序" + Fore.RESET) 63 | return None 64 | 65 | def extract_final_txt(self, file_final_txt, out_dir, dict_name, multi_vols_flg=False, volume_num=1): 66 | """ 从模板D词典的源 txt 文本中提取 index, syns 信息 """ 67 | dcts = [] 68 | syns = [] 69 | # (一) 分析提取源 txt 文本 70 | with open(file_final_txt, 'r', encoding='utf-8') as fr: 71 | text = fr.read() 72 | # 1.提取 index_all 73 | pat_index = re.compile(r'^.+?(
[^\r\n]+
)$', flags=re.M+re.S) 74 | for t in pat_index.findall(text): 75 | if t[2].startswith('
'): 76 | body = re.search(r'
(.+?)
$', t[2], flags=re.M).group(1) 77 | else: 78 | body = '' 79 | dct = { 80 | "id": t[0], 81 | "name": t[1], 82 | "body": body 83 | } 84 | dcts.append(dct) 85 | # 2.识别 name_abbr 86 | mth = re.search(r'^$', text, flags=re.M) 87 | if mth: 88 | name_abbr = mth.group(1).upper() 89 | else: 90 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + "未识别到词典缩略字母, 已设置默认值") 91 | name_abbr = 'XXXXCD' 92 | # 3.提取 syns 93 | for t in self.settings.pat_relink.findall(text): 94 | if not t[1].startswith(name_abbr): 95 | syns.append((t[0], t[1])) 96 | # (二) 整理输出提取结果 97 | # 1.index_all.txt 98 | dcts.sort(key=lambda dct: dct["id"], reverse=False) 99 | with open(os.path.join(out_dir, 'index_all.txt'), 'w', encoding='utf-8') as fw: 100 | for dct in dcts: 101 | if dct["body"] == '': 102 | fw.write(f'{dct["name"]}\t\n') 103 | else: 104 | fw.write(f'{dct["name"]}\t{dct["body"]}\n') 105 | # 2.syns.txt 106 | if syns: 107 | with open(os.path.join(out_dir, 'syns.txt'), 'w', encoding='utf-8') as fw: 108 | for s in syns: 109 | fw.write(f'{s[0]}\t{s[1]}\n') 110 | # 3. build.toml 文件 111 | self.settings.load_build_toml(os.path.join(self.settings.dir_lib, self.settings.build_tmpl), False) 112 | self.settings.build["global"]["templ_choice"] = "D" 113 | self.settings.build["global"]["name"] = dict_name 114 | self.settings.build["global"]["name_abbr"] = name_abbr 115 | # 判断 add_headwords 116 | if not re.search(r'^
[^<]+
$', text, flags=re.M): 117 | self.settings.build["template"]["d"]["add_headwords"] = False 118 | with open(os.path.join(out_dir, 'build.toml'), 'w', encoding='utf-8') as fw: 119 | fw.write(dumps(self.settings.build)) 120 | 121 | def _make_entries_with_navi(self, file_index_all, file_out): 122 | headwords = [] 123 | """ (一) 生成文本(主)词条, 带层级导航 """ 124 | # 1.读取全索引文件 125 | dcts = self.func.read_index_all_file(file_index_all, False) 126 | # 2.生成主体词条 127 | if dcts: 128 | with open(file_out, 'w', encoding='utf-8') as fw: 129 | tops = [] 130 | headwords_stem = [] 131 | i = 0 132 | len_dcts = len(dcts) 133 | for dct in dcts: 134 | part_css = f'\n' 135 | # 词头, 索引备份 136 | if dct["level"] == -1: 137 | part_title = f'{dct["title"]}\n' 138 | part_index = f'\n' 139 | else: 140 | part_title = f'{self.settings.name_abbr}_{dct["title"]}\n' 141 | part_index = f'\n' 142 | # top-navi-level 部分 143 | part_top = '
' 144 | part_top += f'🕮' 145 | for x in range(len(dct["navi_bar"])): 146 | cname = 'navi-item' 147 | link_name = f'{self.settings.name_abbr}_{dct["navi_bar"][x]}' 148 | if x == len(dct["navi_bar"])-1 and dct["level"] == -1: 149 | cname = 'navi-item-entry' 150 | link_name = dct["navi_bar"][x] 151 | aname = dct["navi_bar"][x] 152 | part_top += f'»{aname}' 153 | part_top += '
\n' 154 | # item-list 部分 155 | part_list = self.func.get_item_list(dct) 156 | # 词条部分 157 | if dct["level"] != -1 and dct["body"] == '': 158 | part_headword = '' 159 | part_body = '' 160 | elif dct["level"] != -1 and dct["body"] != '': 161 | part_headword = '' 162 | part_body = f'
{dct["body"]}
\n' 163 | elif not self.settings.add_headwords: 164 | part_headword = '' 165 | part_body = f'
{dct["body"]}
\n' 166 | elif re.match(r'<(p|div|html|body|title|head)', dct["body"], flags=re.I): 167 | part_headword = f'
{dct["title"]}
\n' 168 | part_body = f'
{dct["body"]}
\n' 169 | else: 170 | part_headword = f'
{dct["title"]}
\n' 171 | part_body = f'

{dct["body"]}

\n' 172 | # bottom-navi 部分 173 | part_left = '' 174 | part_right = '' 175 | if i == 0: 176 | # 只有右 177 | if dcts[i+1]["level"] != -1: 178 | part_right = f'{dcts[i+1]["title"]} ☛' 179 | else: 180 | part_right = f'{dcts[i+1]["title"]} ☛' 181 | elif i == len_dcts-1: 182 | # 只有左 183 | if dcts[i-1]["level"] != -1: 184 | part_left = f'☚ {dcts[i-1]["title"]}' 185 | else: 186 | part_left = f'☚ {dcts[i-1]["title"]}' 187 | else: 188 | if dcts[i-1]["level"] != -1: 189 | part_left = f'☚ {dcts[i-1]["title"]}' 190 | else: 191 | part_left = f'☚ {dcts[i-1]["title"]}' 192 | if dcts[i+1]["level"] != -1: 193 | part_right = f'{dcts[i+1]["title"]} ☛' 194 | else: 195 | part_right = f'{dcts[i+1]["title"]} ☛' 196 | part_bottom = '
' + part_left + '   ' + part_right + '
\n' 197 | # 合并写入 198 | fw.write(part_title+part_css+part_index+part_top+part_list+part_headword+part_body+part_bottom+'\n') 199 | headwords.append(dct["title"]) 200 | # 收集顶级章节 201 | if dct["level"] != -1: 202 | if dct["level"] == 0: 203 | tops.append(dct["title"]) 204 | elif dct["level"] == 1 and self.settings.multi_volume: 205 | pass 206 | else: 207 | headwords_stem.append(dct["title"]) 208 | i += 1 209 | # 3.写入总目词条 210 | toc_entry = f'TOC_{self.settings.name_abbr}\n' 211 | toc_entry += f'\n' 212 | toc_entry += f'\n' 213 | toc_entry += '
    ' 214 | for top in tops: 215 | toc_entry += f'
  • {top}
  • ' 216 | toc_entry += '
' + '   ' + '
\n' 217 | toc_entry += '
\n\n' 218 | fw.write(toc_entry) 219 | # 4.章节重定向 220 | for word in headwords_stem: 221 | fw.write(f'{word}\n@@@LINK={self.settings.name_abbr}_{word}\n\n') 222 | print("文本词条(有导航栏)已生成") 223 | else: 224 | pass 225 | return headwords 226 | 227 | def _check_index_alls(self, dir_input, dir_out): 228 | """ 检查 index_all 文本 """ 229 | pass_flg = True 230 | file_index_all = os.path.join(dir_input, self.settings.fname_index_all) 231 | # 1.扫描识别总 index_all 文件 232 | final_index_all = os.path.join(dir_out, self.settings.fname_index_all) 233 | index_check_num = self.func.text_file_check(file_index_all) 234 | if index_check_num == 2: 235 | shutil.copy(file_index_all, final_index_all) 236 | # 读取检查总 index_all 文件 237 | with open(final_index_all, 'r', encoding='utf-8') as fr: 238 | i = 0 239 | for line in fr: 240 | i += 1 241 | mth_stem = self.settings.pat_stem_text.match(line) 242 | if mth_stem: 243 | # 章节 244 | pass 245 | elif self.settings.pat_tab.match(line): 246 | # 词条 247 | pass 248 | else: 249 | print(Fore.RED + "ERROR: " + Fore.RESET + f"index_all.txt 第 {i} 行未匹配, 请检查") 250 | pass_flg = False 251 | break 252 | elif index_check_num == 1: 253 | pass_flg = False 254 | elif self.settings.multi_volume: 255 | # 2.扫描识别分 index_all 256 | lst_file_index_all = [] 257 | pat1 = re.compile(r'index_all_(\d+)', flags=re.I) 258 | lst_n = [] 259 | for fname in os.listdir(dir_input): 260 | if fname.endswith('.txt') and pat1.match(fname): 261 | vol_n = int(pat1.match(fname).group(1)) 262 | fp = os.path.join(dir_input, fname) 263 | if vol_n not in lst_n: 264 | index_check_num = self.func.text_file_check(fp) 265 | if index_check_num == 1: 266 | pass_flg = False 267 | break 268 | elif index_check_num == 2: 269 | lst_file_index_all.append({"vol_n": vol_n, "path": fp}) 270 | lst_n.append(vol_n) 271 | if pass_flg and not lst_file_index_all: 272 | print(Fore.RED + "ERROR: " + Fore.RESET + "未读取到 index_all 文件") 273 | pass_flg = False 274 | elif pass_flg: 275 | self.settings.volume_num = len(lst_file_index_all) 276 | # 3.合并各 index_all 文本, 顺便检查格式 277 | lst_file_index_all.sort(key=lambda dct: dct["vol_n"], reverse=False) 278 | pat_vname = re.compile(r'index_all_\d+_(.+?)\.txt', flags=re.I) 279 | with open(final_index_all, 'w', encoding='utf-8') as fw: 280 | break_flg = False 281 | for x in range(len(lst_file_index_all)): 282 | fname = os.path.split(lst_file_index_all[x]["path"])[1] 283 | with open(lst_file_index_all[x]["path"], 'r', encoding='utf-8') as fr: 284 | # 获取卷名, 写入卷标 285 | try: 286 | vname = self.settings.vol_names[x] 287 | except IndexError: 288 | vname = None 289 | if not vname: 290 | if pat_vname.match(fname): 291 | vname = pat_vname.match(fname).group(1) 292 | else: 293 | vname = '第'+str(lst_file_index_all[x]["vol_n"]).zfill(2)+'卷' 294 | fw.write('【L0】'+vname+'\t\n') 295 | # 整合开始 296 | i = 0 297 | for line in fr: 298 | i += 1 299 | mth_stem = self.settings.pat_stem_text.match(line) 300 | if mth_stem: 301 | # 章节 302 | if mth_stem.group(3) == '': 303 | fw.write(f'【L{str(int(mth_stem.group(1))+1)}】{mth_stem.group(2)}\t\n') 304 | else: 305 | fw.write(f'【L{str(int(mth_stem.group(1))+1)}】{mth_stem.group(2)}\t{mth_stem.group(3)}\n') 306 | elif self.settings.pat_tab.match(line): 307 | # 词条 308 | mth = self.settings.pat_tab.match(line) 309 | fw.write(f'{mth.group(1)}\t{mth.group(2)}\n') 310 | else: 311 | print(Fore.RED + "ERROR: " + Fore.RESET + f"{fname} 第 {i} 行未匹配, 请检查") 312 | pass_flg = False 313 | break_flg = True 314 | break 315 | if break_flg: 316 | break 317 | if pass_flg: 318 | return final_index_all 319 | else: 320 | return None 321 | 322 | def _check_raw_files(self): 323 | """ 检查原材料 324 | * 必要文本存在(文本编码均要是 utf-8 无 bom) 325 | * 检查 info.html 的编码 326 | """ 327 | check_result = [] 328 | # 预定义输入文件路径 329 | dir_input = self.settings.dir_input 330 | file_index_all = os.path.join(dir_input, self.settings.fname_index_all) 331 | file_syns = os.path.join(dir_input, self.settings.fname_syns) 332 | file_dict_info = os.path.join(dir_input, self.settings.fname_dict_info) 333 | dir_data = os.path.join(dir_input, self.settings.dname_data) 334 | # 准备临时文件夹 335 | dir_index_all = self.settings.dir_index_all 336 | if os.path.exists(dir_index_all): 337 | shutil.rmtree(dir_index_all) 338 | os.makedirs(dir_index_all) 339 | else: 340 | os.makedirs(dir_index_all) 341 | file_index_all = self._check_index_alls(dir_input, dir_index_all) 342 | # 1.检查索引文件: 必须存在且合格 343 | if file_index_all: 344 | check_result.append(file_index_all) 345 | # 2.检查同义词文件: 若存在就要合格 346 | syns_check_num = self.func.text_file_check(file_syns) 347 | if syns_check_num == 0: 348 | check_result.append(None) 349 | elif syns_check_num == 2: 350 | check_result.append(file_syns) 351 | # 3.检查 info.html: 若存在就要合格 352 | info_check_num = self.func.text_file_check(file_dict_info) 353 | if info_check_num == 0: 354 | check_result.append(None) 355 | elif info_check_num == 2: 356 | check_result.append(file_dict_info) 357 | # 4.检查 data 文件夹 358 | if os.path.isdir(dir_data) and len(os.listdir(dir_data)) != 0: 359 | check_result.append(dir_data) 360 | elif os.path.isdir(dir_data): 361 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + "data 文件夹为空, 已忽略将不打包") 362 | check_result.append(None) 363 | else: 364 | check_result.append(None) 365 | # 返回最终检查结果 366 | if len(check_result) == 4: 367 | return check_result 368 | else: 369 | return None 370 | --------------------------------------------------------------------------------