├── .cross_platform
    ├── auto_mdx_builder.py
    └── ebook_utils.py
├── LICENSE
├── README.md
├── auto_mdx_builder.py
├── ebook_utils.py
├── func_lib.py
├── images
    ├── amb_folder.png
    ├── auto_split.png
    ├── img_dict_atmpl.gif
    ├── img_dict_btmpl.gif
    ├── imgs_order.png
    ├── index.png
    ├── index_all.png
    ├── settings.png
    ├── syns.png
    ├── text_dict_ctmpl.png
    ├── text_dict_dtmpl.gif
    ├── toc.png
    └── work_dir_tree.png
├── lib
    ├── FreePic2Pdf.ini
    ├── MuPDF_pcs.txt
    ├── PDFPatcher_AppConfig.json
    ├── Pdg2Pic.ini
    ├── atmpl.css
    ├── auto_split_2.css
    ├── bkmk
    │   ├── FreePic2Pdf.itf
    │   └── FreePic2Pdf_bkmk.txt
    ├── bkmk_utf16le
    │   ├── FreePic2Pdf.itf
    │   └── FreePic2Pdf_bkmk.txt
    ├── btmpl.css
    ├── build.toml
    ├── ctmpl.css
    └── dtmpl.css
├── requirements.txt
├── settings.py
└── templates
    ├── __init__.py
    ├── img_dict_atmpl.py
    ├── img_dict_btmpl.py
    ├── text_dict_ctmpl.py
    └── text_dict_dtmpl.py


/.cross_platform/auto_mdx_builder.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Date    : 2023-11-16 00:00:17
  4 | # @Author  : Litles (litlesme@gmail.com)
  5 | # @Link    : https://github.com/Litles
  6 | # @Version : 1.5
  7 | 
  8 | import logging
  9 | import traceback
 10 | import os
 11 | import re
 12 | import shutil
 13 | from colorama import Fore, just_fix_windows_console
 14 | from settings import Settings
 15 | from func_lib import FuncLib
 16 | from img_dict_atmpl import ImgDictAtmpl
 17 | from img_dict_btmpl import ImgDictBtmpl
 18 | from text_dict_ctmpl import TextDictCtmpl
 19 | from text_dict_dtmpl import TextDictDtmpl
 20 | from ebook_utils import EbookUtils
 21 | 
 22 | 
 23 | class AutoMdxBuilder:
 24 |     """图像词典制作程序"""
 25 |     def __init__(self):
 26 |         self.settings = Settings()
 27 |         self.func = FuncLib(self)
 28 |         self.utils = EbookUtils(self)
 29 | 
 30 |     def auto_processing(self, sel):
 31 |         """ 根据选择自动处理 """
 32 |         if sel == 1:
 33 |             # --- 解包 mdx/mdd 文件 ---
 34 |             mfile = input("请输入要解包的 mdx/mdd 文件路径: ").strip('"')
 35 |             if self.utils.export_mdx(mfile):
 36 |                 print(Fore.GREEN + "\n已输出在同目录下: " + Fore.RESET + os.path.splitext(mfile)[0])
 37 |         elif sel == 2:
 38 |             # --- 将源 txt 文件打包成 mdx 文件 ---
 39 |             file_final_txt = input("请输入要打包的 txt 文件路径: ").strip('"')
 40 |             if self.func.text_file_check(file_final_txt) == 2:
 41 |                 # 检查数据文件夹
 42 |                 dir_curr, fname_txt = os.path.split(file_final_txt)
 43 |                 dir_data = os.path.join(dir_curr, 'data')
 44 |                 if not os.path.exists(dir_data):
 45 |                     print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"文件夹 {dir_data} 不存在, 已默认不打包 mdd")
 46 |                     dir_data = None
 47 |                 elif os.path.exists(dir_data) and len(os.listdir(dir_data)) == 0:
 48 |                     print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"文件夹 {dir_data} 为空, 已默认不打包 mdd")
 49 |                     dir_data = None
 50 |                 # 生成 info.html
 51 |                 file_info_raw = None
 52 |                 for fname in os.listdir(dir_curr):
 53 |                     if fname == 'info.html':
 54 |                         file_info_raw = os.path.join(dir_curr, fname)
 55 |                     elif fname.endswith('.html') and fname.startswith(os.path.splitext(fname_txt)[0]):
 56 |                         file_info_raw = os.path.join(dir_curr, fname)
 57 |                         break
 58 |                 file_dict_info = self.func.generate_info_html(os.path.splitext(fname_txt)[0], file_info_raw, None)
 59 |                 # 打包
 60 |                 print('\n------------------\n开始打包……\n')
 61 |                 done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_data, dir_curr)
 62 |                 if done_flg:
 63 |                     print(Fore.GREEN + "\n打包完毕。" + Fore.RESET)
 64 |             else:
 65 |                 print(Fore.RED + "\n材料检查不通过, 请确保材料准备无误再执行程序" + Fore.RESET)
 66 |         elif sel == 3:
 67 |             # --- 将资料包文件夹打包成 mdd 文件 ---
 68 |             dir_data = input("请输入要打包的资料文件夹路径: ").strip('"\\').rstrip('/')
 69 |             dir_data = dir_data.rstrip('\\')
 70 |             dir_data = dir_data.rstrip('/')
 71 |             print('\n------------------\n开始打包……\n')
 72 |             done_flg = self.utils.pack_to_mdd(dir_data, None)
 73 |             if done_flg:
 74 |                 print(Fore.GREEN + "\n打包完毕。" + Fore.RESET)
 75 |         # elif sel == 10:
 76 |         #     # --- 从 PDF文件/pdg文件夹 生成预备原材料 ---
 77 |         #     p = input("请输入 pdf文件/pdg文件夹 路径: ").strip('"\\').rstrip('/')
 78 |         #     if os.path.isfile(p) and os.path.splitext(p)[1] == '.pdf':
 79 |         #         self.pdf_to_amb(p)
 80 |         #     elif os.path.isdir(p):
 81 |         #         self.pdf_to_amb(p, False)
 82 |         #     else:
 83 |         #         print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误")
 84 |         elif sel == 11:
 85 |             # --- 从 toc_all.txt 生成 index_all.txt ---
 86 |             file_toc_all = input("请输入 toc_all.txt 的文件路径: ").strip('"')
 87 |             file_index_all = os.path.join(os.path.split(file_toc_all)[0], 'index_all.txt')
 88 |             if self.func.toc_all_to_index(file_toc_all, file_index_all):
 89 |                 print(Fore.GREEN + "\n处理完成, 生成在同目录下" + Fore.RESET)
 90 |             else:
 91 |                 print(Fore.RED + "\n文件检查不通过, 请确保文件准备无误再执行程序" + Fore.RESET)
 92 |         elif sel == 12:
 93 |             # --- 合并 toc.txt 和 index.txt 为 index_all.txt ---
 94 |             file_toc = input("(1) 请输入 toc.txt 的文件路径: ").strip('"')
 95 |             file_index = input("(2) 请输入 index.txt 的文件路径: ").strip('"')
 96 |             file_index_all = os.path.join(os.path.split(file_index)[0], 'index_all.txt')
 97 |             self.func.merge_to_index_all(file_toc, file_index, file_index_all)
 98 |         elif sel == 20:
 99 |             # --- 生成词典 ---
100 |             p = input("请输入原材料文件夹路径或 build.toml 文件路径: ").strip('"\\').rstrip('/')
101 |             if os.path.split(p)[1] == 'build.toml':
102 |                 if self.settings.load_build_toml(p, False, False):
103 |                     self._build_mdict()
104 |             elif os.path.isdir(p):
105 |                 file_toml = os.path.join(p, 'build.toml')
106 |                 if os.path.isfile(file_toml):
107 |                     if self.settings.load_build_toml(file_toml, False, True):
108 |                         self._build_mdict()
109 |                 else:
110 |                     print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 build.toml 文件")
111 |             else:
112 |                 print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误")
113 |         elif sel == 30:
114 |             # --- 从词典还原原材料 ---
115 |             p = input("请输入词典的文件夹或 mdx/mdd 文件路径: ").strip('"\\').rstrip('/')
116 |             if os.path.isfile(p) and os.path.splitext(p)[1] == '.mdx':
117 |                 self._restore_raw(p, False)
118 |             elif os.path.isfile(p) and os.path.splitext(p)[1] == '.mdd':
119 |                 if os.path.isfile(p[:-1]+'x'):
120 |                     self._restore_raw(p[:-1]+'x', False)
121 |             elif os.path.isdir(p):
122 |                 for m in os.listdir(p):
123 |                     if m.endswith('.mdx'):
124 |                         self._restore_raw(os.path.join(p, m), True)
125 |                         break
126 |                 else:
127 |                     print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 mdx 文件")
128 |             else:
129 |                 print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误")
130 |         # elif sel == 31:
131 |         #     # --- 从原材料还原 PDF ---
132 |         #     p = input("请输入原材料文件夹路径或 build.toml 文件路径: ").strip('"\\').rstrip('/')
133 |         #     if os.path.split(p)[1] == 'build.toml':
134 |         #         if self.settings.load_build_toml(p, True):
135 |         #             self.amb_to_pdf(file_toml, False)
136 |         #     elif os.path.isdir(p):
137 |         #         file_toml = os.path.join(p, 'build.toml')
138 |         #         if os.path.isfile(file_toml):
139 |         #             if self.settings.load_build_toml(file_toml, True):
140 |         #                 self.amb_to_pdf(file_toml, True)
141 |         #         else:
142 |         #             print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 build.toml 文件")
143 |         #     else:
144 |         #         print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误")
145 |         elif sel == 32:
146 |             # --- 从 index_all.txt 还原 toc_all.txt ---
147 |             file_index_all = input("请输入 index_all.txt 的文件路径: ").strip('"')
148 |             file_toc_all = os.path.join(os.path.split(file_index_all)[0], 'toc_all.txt')
149 |             if self.func.index_to_toc(file_index_all, file_toc_all):
150 |                 print(Fore.GREEN + "\n处理完成, 生成在同目录下" + Fore.RESET)
151 |             else:
152 |                 print(Fore.RED + "\n文件检查不通过, 请确保所有词目都有对应页码" + Fore.RESET)
153 |         elif sel == 41:
154 |             # --- 从 PDF 提取图片 (MuPDF) ---
155 |             p = input("请输入 PDF 文件路径: ").strip('"\\').rstrip('/')
156 |             if os.path.isfile(p) and p.lower().endswith('.pdf'):
157 |                 fname = os.path.split(p)[1]
158 |                 out_dir = os.path.join(os.path.split(p)[0], fname.split('.')[0])
159 |                 self.utils.extract_pdf_to_imgs_fitz(p, out_dir)
160 |             else:
161 |                 print(Fore.RED + "\n输入的路径有误" + Fore.RESET)
162 |         elif sel == 42:
163 |             # --- 将 PDF 转换成图片 (MuPDF) ---
164 |             p = input("请输入 PDF 文件路径: ").strip('"\\').rstrip('/')
165 |             if os.path.isfile(p) and p.lower().endswith('.pdf'):
166 |                 fname = os.path.split(p)[1]
167 |                 out_dir = os.path.join(os.path.split(p)[0], fname.split('.')[0])
168 |                 dpi = input("请输入要生成图片的 DPI（回车则默认300）: ")
169 |                 if re.match(r'^\d+$', dpi):
170 |                     self.utils.convert_pdf_to_imgs_fitz(p, out_dir, int(dpi))
171 |                 else:
172 |                     self.utils.convert_pdf_to_imgs_fitz(p, out_dir)
173 |             else:
174 |                 print(Fore.RED + "\n输入的路径有误" + Fore.RESET)
175 |         # elif sel == 43:
176 |         #     # --- 将 图片 合成 PDF (MuPDF) ---
177 |         #     p = input("请输入图片所在文件夹路径: ").strip('"\\').rstrip('/')
178 |         #     if os.path.isdir(p):
179 |         #         out_file = p+'.pdf'
180 |         #         self.utils.combine_img_to_pdf(p, out_file)
181 |         #     else:
182 |         #         print(Fore.RED + "\n输入的路径有误" + Fore.RESET)
183 |         # elif sel == 44:
184 |         #     # --- PDF 书签导出/导入（FreePic2Pdf） ---
185 |         #     file_pdf = input("请输入 PDF 文件路径: ").strip('"\\').rstrip('/')
186 |         #     dir_bkmk = input("请输入书签文件夹路径（导出则直接回车）: ").strip('"\\').rstrip('/')
187 |         #     if os.path.isdir(dir_bkmk):
188 |         #         self.utils.eximport_bkmk_fp2p(file_pdf, dir_bkmk, False)
189 |         #     elif dir_bkmk is None or len(dir_bkmk) == 0:
190 |         #         fname = os.path.split(file_pdf)[1]
191 |         #         dir_bkmk = os.path.join(os.path.split(file_pdf)[0], fname.split('.')[0]+'_bkmk')
192 |         #         self.utils.eximport_bkmk_fp2p(file_pdf, dir_bkmk)
193 |         #     else:
194 |         #         print(Fore.RED + "\n输入的路径有误" + Fore.RESET)
195 |         else:
196 |             pass
197 | 
198 |     def _build_mdict(self):
199 |         done_flg = False
200 |         if self.settings.templ_choice in ('a', 'A'):
201 |             """ 制作图像词典 (模板A) """
202 |             # 生成 txt 源文本
203 |             proc_flg, file_final_txt, dir_imgs_out, file_dict_info = ImgDictAtmpl(self).make_source_file()
204 |             if proc_flg:
205 |                 # 创建输出文件夹
206 |                 if not os.path.exists(self.settings.dir_output):
207 |                     os.makedirs(self.settings.dir_output)
208 |                 # 拷贝模板 css 文件
209 |                 file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_atmpl)
210 |                 file_css = os.path.join(self.settings.dir_output, self.settings.fname_css)
211 |                 shutil.copy(file_css_tmpl, file_css)
212 |                 # 开始打包
213 |                 print('\n------------------\n开始打包……\n')
214 |                 done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_imgs_out, self.settings.dir_output)
215 |         elif self.settings.templ_choice in ('b', 'B'):
216 |             """ 制作图像词典 (模板B) """
217 |             # 生成 txt 源文本
218 |             proc_flg, file_final_txt, dir_imgs_out, file_dict_info = ImgDictBtmpl(self).make_source_file()
219 |             if proc_flg:
220 |                 # 创建输出文件夹
221 |                 if not os.path.exists(self.settings.dir_output):
222 |                     os.makedirs(self.settings.dir_output)
223 |                 # 拷贝模板 css 文件
224 |                 file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_btmpl)
225 |                 file_css = os.path.join(self.settings.dir_output, self.settings.fname_css)
226 |                 shutil.copy(file_css_tmpl, file_css)
227 |                 # 开始打包
228 |                 print('\n------------------\n开始打包……\n')
229 |                 done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_imgs_out, self.settings.dir_output)
230 |         elif self.settings.templ_choice in ('c', 'C'):
231 |             """ 制作文本词典 (模板C) """
232 |             # 生成 txt 源文本
233 |             proc_flg, file_final_txt, file_dict_info = TextDictCtmpl(self).make_source_file()
234 |             if proc_flg:
235 |                 # 创建输出文件夹
236 |                 if not os.path.exists(self.settings.dir_output):
237 |                     os.makedirs(self.settings.dir_output)
238 |                 # 拷贝模板 css 文件
239 |                 file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_ctmpl)
240 |                 file_css = os.path.join(self.settings.dir_output, self.settings.fname_css)
241 |                 shutil.copy(file_css_tmpl, file_css)
242 |                 # 开始打包
243 |                 print('\n------------------\n开始打包……\n')
244 |                 dir_data = os.path.join(self.settings.dir_input, self.settings.dname_data)
245 |                 if not os.path.exists(dir_data) or len(os.listdir(dir_data)) == 0:
246 |                     dir_data = None
247 |                 done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_data, self.settings.dir_output)
248 |         elif self.settings.templ_choice in ('d', 'D'):
249 |             """ 制作文本词典 (模板D) """
250 |             # 生成 txt 源文本
251 |             proc_flg, file_final_txt, file_dict_info = TextDictDtmpl(self).make_source_file()
252 |             if proc_flg:
253 |                 # 创建输出文件夹
254 |                 if not os.path.exists(self.settings.dir_output):
255 |                     os.makedirs(self.settings.dir_output)
256 |                 # 拷贝模板 css 文件
257 |                 file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_dtmpl)
258 |                 file_css = os.path.join(self.settings.dir_output, self.settings.fname_css)
259 |                 shutil.copy(file_css_tmpl, file_css)
260 |                 # 开始打包
261 |                 print('\n------------------\n开始打包……\n')
262 |                 dir_data = os.path.join(self.settings.dir_input, self.settings.dname_data)
263 |                 if not os.path.exists(dir_data) or len(os.listdir(dir_data)) == 0:
264 |                     dir_data = None
265 |                 done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_data, self.settings.dir_output)
266 |         else:
267 |             pass
268 |         if done_flg:
269 |             print("\n打包完毕。" + Fore.GREEN + "\n\n恭喜, 词典已生成！" + Fore.RESET)
270 | 
271 |     def _restore_raw(self, xfile, outside_flg):
272 |         """ 将词典还原为原材料 """
273 |         # 1.准备参数
274 |         extract_flg = False
275 |         dict_name = None
276 |         templ_choice = None
277 |         dir_input, fname = os.path.split(xfile)
278 |         # 2.分析 mdx 文件
279 |         tmp_restore = os.path.join(self.settings.dir_output_tmp, 'restore')
280 |         if not os.path.exists(tmp_restore):
281 |             os.makedirs(tmp_restore)
282 |         tmp_xfile = os.path.join(tmp_restore, fname)
283 |         tmp_xdir = os.path.splitext(tmp_xfile)[0]
284 |         if os.path.exists(tmp_xdir):
285 |             shutil.rmtree(tmp_xdir)
286 |         shutil.copy(xfile, tmp_xfile)
287 |         if self.utils.export_mdx(tmp_xfile):
288 |             tmp_final_txt = os.path.join(tmp_xdir, fname.split('.')[0]+'.txt')
289 |         # 分析 info 信息, 确定是否支持还原
290 |         for f in os.listdir(tmp_xdir):
291 |             fp = os.path.join(tmp_xdir, f)
292 |             text = ''
293 |             if fp.endswith('.info.html'):
294 |                 with open(fp, 'r', encoding='utf-8') as fr:
295 |                     pat = re.compile(r'<div><br/>([^><]*?), built with AutoMdxBuilder[^><]*?based on template ([A-D])\.<br/></div>', flags=re.I)
296 |                     text = fr.read()
297 |                     if pat.search(text):
298 |                         # 符合条件, 支持还原
299 |                         dict_name = pat.search(text).group(1)
300 |                         templ_choice = pat.search(text).group(2)
301 |                         text = pat.sub('', text)
302 |                         extract_flg = True
303 |                         break
304 |         # 3.开始提取
305 |         if extract_flg:
306 |             # 创建目标文件夹
307 |             if outside_flg:
308 |                 out_dir = os.path.join(os.path.split(dir_input)[0], fname.split('.')[0]) + '_amb'
309 |             else:
310 |                 out_dir = os.path.splitext(xfile)[0] + '_amb'
311 |             if not os.path.exists(out_dir):
312 |                 os.makedirs(out_dir)
313 |             # 提取 info.html
314 |             if not re.match(r'^\s*$', text):
315 |                 with open(os.path.join(out_dir, 'info.html'), 'w', encoding='utf-8') as fw:
316 |                     fw.write(text)
317 |             # 提取 index, index_all, syns 等信息
318 |             if tmp_final_txt:
319 |                 # 选择函数进行处理
320 |                 if templ_choice == 'A':
321 |                     ImgDictAtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name)
322 |                 elif templ_choice == 'B':
323 |                     ImgDictBtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name)
324 |                 elif templ_choice == 'C':
325 |                     TextDictCtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name)
326 |                 elif templ_choice == 'D':
327 |                     TextDictDtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name)
328 |             # 处理 mdd
329 |             file_mdd = os.path.splitext(xfile)[0] + '.mdd'
330 |             if os.path.isfile(file_mdd) and templ_choice in ('A', 'B'):
331 |                 dir_data = os.path.join(out_dir, "imgs")
332 |                 if os.path.exists(dir_data):
333 |                     shutil.rmtree(dir_data)
334 |                 self.utils.mdict(['-x', file_mdd, '-d', dir_data])
335 |             elif os.path.isfile(file_mdd) and templ_choice in ('C', 'D'):
336 |                 dir_data = os.path.join(out_dir, "data")
337 |                 if os.path.exists(dir_data):
338 |                     shutil.rmtree(dir_data)
339 |                 self.utils.mdict(['-x', file_mdd, '-d', dir_data])
340 |             else:
341 |                 print(Fore.YELLOW + "WARN: " + Fore.RESET + "同路径下未找到相应的 mdd 文件, 将不会生成 imgs/data 文件夹")
342 |             print(Fore.GREEN + "\n已提取原材料至目录: " + Fore.RESET + out_dir)
343 |         else:
344 |             print(Fore.RED + "ERROR: " + Fore.RESET + "词典并非由 AutoMdxBuilder 制作, 不支持还原")
345 |         shutil.rmtree(tmp_restore)
346 | 
347 |     # def pdf_to_amb(self, input_path, pdf_flg=True):
348 |     #     """ 从 PDF文件/pdg文件夹 生成 amb 文件夹 """
349 |     #     # 0.准备路径相关
350 |     #     dir_bkmk = os.path.join(self.settings.dir_output_tmp, 'bkmk')
351 |     #     if not os.path.exists(dir_bkmk):
352 |     #         os.makedirs(dir_bkmk)
353 |     #     # 开始处理
354 |     #     if pdf_flg:
355 |     #         fname = os.path.split(input_path)[1]
356 |     #         out_dir = os.path.join(os.path.split(input_path)[0], fname.split('.')[0]+'_amb')
357 |     #         if not os.path.exists(out_dir):
358 |     #             os.makedirs(out_dir)
359 |     #         # 1.导出书签
360 |     #         cur_path = os.getcwd()
361 |     #         self.utils.eximport_bkmk_fp2p(input_path, os.path.join(cur_path, dir_bkmk))
362 |     #         try:
363 |     #             with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'r', encoding='utf-16le') as fr:
364 |     #                 text = fr.read()
365 |     #                 line_num = len(re.findall(r'^', text, flags=re.M))
366 |     #                 if line_num <= 3:
367 |     #                     print(Fore.YELLOW + "INFO: " + Fore.RESET + "未识别到目录, 将不会生成 toc.txt")
368 |     #                 else:
369 |     #                     with open(os.path.join(out_dir, 'toc.txt'), 'w', encoding='utf-8') as fw:
370 |     #                         fw.write(text)
371 |     #                     if line_num > 500:
372 |     #                         print(Fore.YELLOW + "INFO: " + Fore.RESET + "书签超过 500 行, 请后续确认是否包含索引, 是的话建议改名为 toc_all.txt")
373 |     #         except UnicodeDecodeError:
374 |     #             shutil.copy(os.path.join(dir_bkmk, "FreePic2Pdf_bkmk.txt"), os.path.join(out_dir, "[utf-16]toc.txt"))
375 |     #             print(Fore.YELLOW + "WARN: " + Fore.RESET + "书签中存在无法识别的字符, 已输出为 utf-16 编码")
376 |     #         with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'r', encoding='utf-16le') as fr:
377 |     #             mt = re.search(r'(?<=BasePage=)(\d+)', fr.read())
378 |     #             if mt:
379 |     #                 body_start = mt.group(0)
380 |     #             else:
381 |     #                 body_start = 1
382 |     #                 print(Fore.YELLOW + "INFO: " + Fore.RESET + "未识别到正文起始页码, 已设置默认值 1")
383 |     #         # 2.生成 build.toml
384 |     #         shutil.copy(os.path.join(self.settings.dir_lib, "build.toml"), os.path.join(out_dir, "build.toml"))
385 |     #         with open(os.path.join(out_dir, "build.toml"), 'r+', encoding='utf-8') as fr:
386 |     #             text = fr.read()
387 |     #             text = re.sub(r'^templ_choice = "\w"', 'templ_choice = "A"', text, flags=re.I+re.M)
388 |     #             text = re.sub(r'^name = "[^"]+?"', f'name = "{fname.split(".")[0]}"', text, flags=re.I+re.M)
389 |     #             text = re.sub(r'^name_abbr = "[^"]+?"', 'name_abbr = "XXXXXX"', text, flags=re.I+re.M)
390 |     #             text = re.sub(r'^body_start = \d+', f'body_start = {str(body_start)}', text, flags=re.I+re.M)
391 |     #             fr.seek(0)
392 |     #             fr.truncate()
393 |     #             fr.write(text)
394 |     #         # 3.导出图片
395 |     #         if not os.path.exists(os.path.join(out_dir, 'imgs')):
396 |     #             os.makedirs(os.path.join(out_dir, 'imgs'))
397 |     #         self.utils.pdf_to_imgs(input_path, os.path.join(out_dir, 'imgs'))
398 |     #     else:
399 |     #         out_dir = input_path+'_amb'
400 |     #         if not os.path.exists(out_dir):
401 |     #             os.makedirs(out_dir)
402 |     #         # 1.pdg 转 img
403 |     #         if not os.path.exists(os.path.join(out_dir, 'imgs')):
404 |     #             os.makedirs(os.path.join(out_dir, 'imgs'))
405 |     #         print(os.path.join(out_dir, 'imgs'))
406 |     #         self.utils.convert_pdg_to_img(input_path, os.path.join(out_dir, 'imgs'))
407 |     #         # 2.识别词典信息
408 |     #         bkmk_itf = os.path.join(os.path.join(out_dir, 'imgs'), 'FreePic2Pdf.itf')
409 |     #         if os.path.isfile(bkmk_itf):
410 |     #             with open(bkmk_itf, 'r', encoding='utf-16le') as fr:
411 |     #                 text = fr.read()
412 |     #                 mt_body_start = re.search(r'(?<=TextPage=)(\d+)', text)
413 |     #                 mt_name = re.search(r'(?<=Title=)(.+)', text)
414 |     #                 if mt_body_start:
415 |     #                     body_start = mt_body_start.group(0)
416 |     #                 else:
417 |     #                     body_start = 1
418 |     #                     print(Fore.YELLOW + "INFO: " + Fore.RESET + "未识别到正文起始页码, 已设置默认值 1")
419 |     #                 if mt_name:
420 |     #                     name = mt_name.group(0)
421 |     #                 else:
422 |     #                     name = os.path.split(input_path)[1]
423 |     #             os.remove(bkmk_itf)
424 |     #         else:
425 |     #             print(Fore.YELLOW + "INFO: " + Fore.RESET + "未识别到书籍信息")
426 |     #         # 3.生成 build.toml
427 |     #         shutil.copy(os.path.join(self.settings.dir_lib, "build.toml"), os.path.join(out_dir, "build.toml"))
428 |     #         with open(os.path.join(out_dir, "build.toml"), 'r+', encoding='utf-8') as fr:
429 |     #             text = fr.read()
430 |     #             text = re.sub(r'^templ_choice = "\w"', 'templ_choice = "A"', text, flags=re.I+re.M)
431 |     #             text = re.sub(r'^name = "[^"]+?"', f'name = "{name}"', text, flags=re.I+re.M)
432 |     #             text = re.sub(r'^name_abbr = "[^"]+?"', 'name_abbr = "XXXXXX"', text, flags=re.I+re.M)
433 |     #             text = re.sub(r'^body_start = \d+', f'body_start = {str(body_start)}', text, flags=re.I+re.M)
434 |     #             fr.seek(0)
435 |     #             fr.truncate()
436 |     #             fr.write(text)
437 |     #     shutil.rmtree(dir_bkmk)
438 |     #     print(Fore.GREEN + "\n\n预备原材料生成完毕！" + Fore.RESET)
439 | 
440 |     # def amb_to_pdf(self, file_toml, outside_flg):
441 |     #     """ 从 amb 文件夹合成 PDF 文件 """
442 |     #     # 0.准备路径相关
443 |     #     dir_amb = os.path.split(file_toml)[0]
444 |     #     if outside_flg:
445 |     #         out_file = os.path.join(os.path.split(dir_amb)[0], self.settings.name+'.pdf')
446 |     #     else:
447 |     #         out_file = os.path.join(dir_amb, self.settings.name+'.pdf')
448 |     #     dir_bkmk_bk = os.path.join(self.settings.dir_lib, 'bkmk')
449 |     #     dir_bkmk = os.path.join(self.settings.dir_output_tmp, 'bkmk')
450 |     #     if not os.path.exists(dir_bkmk):
451 |     #         os.makedirs(dir_bkmk)
452 |     #         shutil.copy(os.path.join(dir_bkmk_bk, "FreePic2Pdf.itf"), os.path.join(dir_bkmk, "FreePic2Pdf.itf"))
453 |     #         shutil.copy(os.path.join(dir_bkmk_bk, "FreePic2Pdf_bkmk.txt"), os.path.join(dir_bkmk, "FreePic2Pdf_bkmk.txt"))
454 |     #     # 1.生成临时书签
455 |     #     with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'r+', encoding='utf-8') as fr:
456 |     #         text = re.sub(r'(?<=BasePage=|TextPage=)\d+', str(self.settings.body_start), fr.read())
457 |     #         fr.seek(0)
458 |     #         fr.truncate()
459 |     #         fr.write(text)
460 |     #     toc_flg = False
461 |     #     for fname in os.listdir(dir_amb):
462 |     #         if fname == 'toc.txt':
463 |     #             with open(os.path.join(dir_amb, fname), 'r', encoding='utf-8') as fr:
464 |     #                 text = fr.read()
465 |     #             with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'r+', encoding='utf-8') as fr:
466 |     #                 fr.seek(0)
467 |     #                 fr.truncate()
468 |     #                 fr.write(text)
469 |     #             toc_flg = True
470 |     #             break
471 |     #         elif fname == 'index_all.txt':
472 |     #             toc_tmp = os.path.join(self.settings.dir_output_tmp, 'toc_all.txt')
473 |     #             if self.func.index_to_toc(os.path.join(dir_amb, fname), toc_tmp):
474 |     #                 with open(toc_tmp, 'r', encoding='utf-8') as fr:
475 |     #                     text = fr.read()
476 |     #                 with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'r+', encoding='utf-8') as fr:
477 |     #                     fr.seek(0)
478 |     #                     fr.truncate()
479 |     #                     fr.write(text)
480 |     #             toc_flg = True
481 |     #             break
482 |     #         else:
483 |     #             pass
484 |     #     if not toc_flg:
485 |     #         print(Fore.YELLOW + "WARN: " + Fore.RESET + "未找到 toc.txt/index_all.txt, 生成的 PDF 将不带书签")
486 |     #     # 2.将图片合成PDF
487 |     #     if os.path.isdir(os.path.join(dir_amb, 'imgs')):
488 |     #         self.utils.combine_img_to_pdf_fp2p(os.path.join(dir_amb, 'imgs'), out_file)
489 |     #         # 3.给PDF挂书签
490 |     #         cur_path = os.getcwd()
491 |     #         self.utils.eximport_bkmk_fp2p(out_file, os.path.join(cur_path, dir_bkmk), False)
492 |     #         shutil.rmtree(dir_bkmk)
493 |     #         print(Fore.GREEN + "\n\nPDF生成完毕！" + Fore.RESET)
494 |     #     else:
495 |     #         print(Fore.RED + "ERROR: " + Fore.RESET + "未找到 imgs 文件夹")
496 | 
497 | 
498 | def print_menu():
499 |     """ 打印选单 """
500 |     # 功能选单
501 |     print("\n(〇) 打包/解包")
502 |     print(Fore.CYAN + "  1" + Fore.RESET + ".解包 mdx/mdd 文件")
503 |     print(Fore.CYAN + "  2" + Fore.RESET + ".将源 txt 文件打包成 mdx 文件")
504 |     print(Fore.CYAN + "  3" + Fore.RESET + ".将资料包文件夹打包成 mdd 文件")
505 |     print("\n(一) 准备原材料")
506 |     # print(Fore.CYAN + "  10" + Fore.RESET + ".从 PDF文件/pdg文件夹 生成预备原材料" + Fore.YELLOW + " (还需手动检查完善)" + Fore.RESET)
507 |     print(Fore.CYAN + "  11" + Fore.RESET + ".从 toc_all.txt 生成 index_all.txt")
508 |     print(Fore.CYAN + "  12" + Fore.RESET + ".合并 toc.txt 和 index.txt 为 index_all.txt")
509 |     print("\n(二) 制作词典")
510 |     print(Fore.CYAN + "  20" + Fore.RESET + ".生成词典" + Fore.YELLOW + " (需准备好原材料)" + Fore.RESET)
511 |     print("\n(三) 还原词典")
512 |     print(Fore.CYAN + "  30" + Fore.RESET + ".从词典还原原材料" + Fore.YELLOW + " (仅支持 AMB 1.4 以上版本)" + Fore.RESET)
513 |     # print(Fore.CYAN + "  31" + Fore.RESET + ".从原材料还原 PDF")
514 |     print(Fore.CYAN + "  32" + Fore.RESET + ".从 index_all.txt 还原 toc_all.txt")
515 |     print("\n(四) 其他工具")
516 |     print(Fore.CYAN + "  41" + Fore.RESET + ".从 PDF 提取图片 (MuPDF)")
517 |     print(Fore.CYAN + "  42" + Fore.RESET + ".将 PDF 转换成图片 (MuPDF)")
518 |     # print(Fore.CYAN + "  43" + Fore.RESET + ".将 图片 合成 PDF (MuPDF)")
519 |     # print(Fore.CYAN + "  44" + Fore.RESET + ".PDF书签导出/导入 (FreePic2Pdf)")
520 | 
521 | 
522 | def main():
523 |     just_fix_windows_console()
524 |     # 程序开始
525 |     print(Fore.CYAN + "欢迎使用 AutoMdxBuilder 1.5, 下面是功能选单:" + Fore.RESET)
526 |     while True:
527 |         print_menu()
528 |         sel = input('\n请输入数字（回车或“0”退出程序）: ')
529 |         # 执行选择
530 |         if re.match(r'^\d+$', sel) and int(sel) in range(1, 50):
531 |             print('\n------------------')
532 |             amb = AutoMdxBuilder()
533 |             amb.auto_processing(int(sel))
534 |             print('\n\n------------------------------------')
535 |             # 判断是否继续
536 |             ctn = input(Fore.CYAN + "回车退出程序, 或输入 Y/y 继续使用 AMB: " + Fore.RESET)
537 |             if ctn not in ['Y', 'y']:
538 |                 break
539 |         else:
540 |             break
541 | 
542 | 
543 | if __name__ == '__main__':
544 |     logging.basicConfig(format='%(asctime)s | %(message)s', filename=Settings().file_log, filemode='w', level=logging.INFO)
545 |     try:
546 |         main()
547 |         logging.info('The program worked fine.')
548 |     except:
549 |         logging.error(traceback.format_exc())
550 | 


--------------------------------------------------------------------------------
/.cross_platform/ebook_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Date    : 2023-11-15 18:43:07
  4 | # @Author  : Litles (litlesme@gmail.com)
  5 | # @Link    : https://github.com/Litles
  6 | # @Version : 1.5
  7 | 
  8 | import os
  9 | import re
 10 | import shutil
 11 | from colorama import Fore
 12 | # from PIL import Image
 13 | import sys
 14 | from mdict_utils.__main__ import run as mdict_cmd
 15 | import fitz
 16 | from fitz.__main__ import main as fitz_command
 17 | 
 18 | 
 19 | class EbookUtils:
 20 |     """ 电子书(PDF等)实用工具 """
 21 |     def __init__(self, amb):
 22 |         self.settings = amb.settings
 23 | 
 24 |     # ========== (〇) mdict-utils ==========
 25 |     def mdict(self, parms):
 26 |         """ 执行 mdict-utils 程序 """
 27 |         saved_parms = sys.argv[1:]
 28 |         sys.argv[1:] = parms
 29 |         mdict_cmd()
 30 |         sys.argv[1:] = saved_parms
 31 | 
 32 |     def export_mdx(self, mfile):
 33 |         """ 解包 mdx/mdd (取代 MdxExport.exe) """
 34 |         done_flg = True
 35 |         if os.path.isfile(mfile) and mfile.endswith('.mdx'):
 36 |             out_dir = os.path.splitext(mfile)[0]
 37 |             self.mdict(['-x', mfile, '-d', out_dir])
 38 |             for fname in os.listdir(out_dir):
 39 |                 fp = os.path.join(out_dir, fname)
 40 |                 if os.path.isfile(fp) and ('description' in fname.split('.')):
 41 |                     fp_new = fp.replace('.description', '.info').replace('.mdx', '')
 42 |                     os.rename(fp, fp_new)
 43 |                 elif os.path.isfile(fp):
 44 |                     fp_new = fp.replace('.mdx', '')
 45 |                     os.rename(fp, fp_new)
 46 |             # 分析 info 信息, 确定是否支持词条顺序的还原
 47 |             order_flg = False
 48 |             for f in os.listdir(out_dir):
 49 |                 fp = os.path.join(out_dir, f)
 50 |                 text = ''
 51 |                 if fp.endswith('.info.html'):
 52 |                     with open(fp, 'r', encoding='utf-8') as fr:
 53 |                         if re.search(r'<div><br/>[^><]*?, (packed|built) with AutoMdxBuilder[^><]*?\.<br/></div>', fr.read(), flags=re.I):
 54 |                             # 符合条件, 支持词条顺序的还原
 55 |                             order_flg = True
 56 |                             break
 57 |             if order_flg:
 58 |                 # 按编号精准还原源 txt
 59 |                 xname = os.path.split(mfile)[1]
 60 |                 file_final_txt = os.path.join(out_dir, xname.split('.')[0]+'.txt')
 61 |                 entries = []
 62 |                 eid = '99999999'
 63 |                 with open(file_final_txt, 'r', encoding='utf-8') as fr:
 64 |                     text = ''
 65 |                     for line in fr:
 66 |                         if re.match(r'^<div class="entry-id" style="display:none;">(\d+)</div>', line):
 67 |                             eid = re.match(r'^<div class="entry-id" style="display:none;">(\d+)</div>', line).group(1)
 68 |                         elif not re.match(r'^</>\s*$', line):
 69 |                             text += line
 70 |                         else:
 71 |                             text += line
 72 |                             entries.append({"eid": eid, "text": text})
 73 |                             eid = '99999999'
 74 |                             text = ''
 75 |                 if eid != '':
 76 |                     entries.sort(key=lambda x: x["eid"], reverse=False)
 77 |                     with open(file_final_txt, 'w', encoding='utf-8') as fw:
 78 |                         for entry in entries:
 79 |                             fw.write(entry["text"])
 80 |             else:
 81 |                 print(Fore.YELLOW + "WARN: " + Fore.RESET + "检测到词典并非由 AMB 生成, 不保证词条顺序的准确还原")
 82 |         elif os.path.isfile(mfile) and mfile.endswith('.mdd'):
 83 |             cur_dir, mname = os.path.split(mfile)
 84 |             out_dir = os.path.join(os.path.splitext(mfile)[0], 'data')
 85 |             if os.path.exists(out_dir):
 86 |                 shutil.rmtree(out_dir)
 87 |             # 检查是否存在 mdd 分包
 88 |             multi_mdd_flg = False
 89 |             mdd_names = [mname]
 90 |             for fname in os.listdir(cur_dir):
 91 |                 if re.search(r'\.\d+\.mdd$', fname.lower()):
 92 |                     multi_mdd_flg = True
 93 |                     mdd_names.append(fname)
 94 |             # 按检查结果区分处理
 95 |             if multi_mdd_flg and input('检查到目录下存在 mdd 分包, 是否全部解包 (Y/N): ') in ('Y', 'y'):
 96 |                 mdd_names = list(set(mdd_names))
 97 |                 mdd_names.sort()
 98 |                 for mdd_name in mdd_names:
 99 |                     print(f"开始解压 '{mdd_name}' :\n")
100 |                     self.mdict(['-x', os.path.join(cur_dir, mdd_name), '-d', out_dir])
101 |             else:
102 |                 self.mdict(['-x', mfile, '-d', out_dir])
103 |         else:
104 |             print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误")
105 |             done_flg = False
106 |         return done_flg
107 | 
108 |     def pack_to_mdict(self, file_final_txt, file_dict_info, dir_data, dir_output):
109 |         """ 打包 mdx/mdd (取代 MdxBuilder.exe) """
110 |         mdx_flg = True
111 |         mdd_flg = True
112 |         # 打包 mdx
113 |         print('正在生成 mdx 文件……\n')
114 |         ftitle = os.path.join(dir_output, os.path.splitext(os.path.split(file_final_txt)[1])[0])
115 |         if os.path.exists(file_final_txt) and os.path.exists(file_dict_info):
116 |             # 给词条添加编号信息
117 |             tmp_final_txt = os.path.join(os.path.join(self.settings.dir_bundle, '_tmp'), 'tmp_final.txt')
118 |             with open(file_final_txt, 'r', encoding='utf-8') as fr:
119 |                 with open(tmp_final_txt, 'w', encoding='utf-8') as fw:
120 |                     n = 0
121 |                     link_flg = False
122 |                     for line in fr:
123 |                         if re.match(r'^@@@LINK=', line, flags=re.I):
124 |                             link_flg = True
125 |                         if (not link_flg) and re.match(r'^</>\s*$', line):
126 |                             n += 1
127 |                             fw.write(f'<div class="entry-id" style="display:none;">{str(n).zfill(8)}</div>\n')
128 |                             link_flg = False
129 |                         fw.write(line)
130 |             self.mdict(['--description', file_dict_info, '--encoding', 'utf-8', '-a', tmp_final_txt, ftitle+'.mdx'])
131 |         else:
132 |             print(Fore.RED + "ERROR: " + Fore.RESET + f"文件 {file_final_txt} 或 {file_dict_info} 不存在")
133 |             mdx_flg = False
134 |         # 打包 mdd
135 |         if dir_data is not None:
136 |             mdd_flg = self.pack_to_mdd(dir_data, ftitle)
137 |         if mdx_flg and mdd_flg:
138 |             return True
139 |         else:
140 |             return False
141 | 
142 |     def pack_to_mdd(self, dir_data, ftitle):
143 |         """ 仅打包 mdd (取代 MdxBuilder.exe) """
144 |         done_flg = True
145 |         pack_flg = True
146 |         if ftitle is None:
147 |             ftitle = dir_data
148 |         # 判断是否打包
149 |         if os.path.exists(dir_data) and len(os.listdir(dir_data)) > 0:
150 |             if os.path.exists(ftitle+'.mdd'):
151 |                 a = input(f'文件 "{ftitle}.mdd" 已存在, 是否重新打包 mdd (Y/N): ')
152 |                 if a not in ('Y', 'y'):
153 |                     pack_flg = False
154 |         else:
155 |             print(Fore.RED + "ERROR: " + Fore.RESET + f"文件夹 {dir_data} 不存在或为空")
156 |             pack_flg = False
157 |             done_flg = False
158 |         # 开始打包
159 |         if pack_flg:
160 |             print('正在生成 mdd 文件……\n')
161 |             # 检查子文件夹的数量
162 |             sub_dirs = []
163 |             for item in os.listdir(dir_data):
164 |                 if os.path.isdir(os.path.join(dir_data, item)):
165 |                     sub_dirs.append(os.path.join(dir_data, item))
166 |             # 如果有2个子文件夹以上, 再计算子文件夹大小, 如果大小超过 1.5G, 将分包
167 |             split_flg = False
168 |             size_sum = 0
169 |             if len(sub_dirs) > 1:
170 |                 # 判断子文件夹大小
171 |                 for sub_dir in sub_dirs:
172 |                     for fname in os.listdir(sub_dir):
173 |                         if os.path.isfile(os.path.join(sub_dir, fname)):
174 |                             size_sum += os.path.getsize(os.path.join(sub_dir, fname))
175 |                         if size_sum > 1536000000:
176 |                             split_flg = True
177 |                             break
178 |             # 按检查结果开始处理
179 |             if split_flg:
180 |                 size_sum = 0
181 |                 print(Fore.YELLOW + "INFO: " + Fore.RESET + "资料文件夹超过 1.5G, 将自动分包")
182 |                 # 创建临时文件夹
183 |                 tmp_dir = os.path.join(os.path.split(dir_data)[0], '_packing')
184 |                 if not os.path.exists(tmp_dir):
185 |                     os.makedirs(tmp_dir)
186 |                 pack_list = []
187 |                 pack = []
188 |                 n = 0
189 |                 # 对每个子文件夹作判断
190 |                 for i in range(len(sub_dirs)):
191 |                     for fname in os.listdir(sub_dirs[i]):
192 |                         if os.path.isfile(os.path.join(sub_dirs[i], fname)):
193 |                             size_sum += os.path.getsize(os.path.join(sub_dirs[i], fname))
194 |                         if size_sum > 1024000000:
195 |                             size_sum = 0
196 |                             pack.append(sub_dirs[i])
197 |                             pack_list.append(pack)
198 |                             pack = []
199 |                             break
200 |                     pack.append(sub_dirs[i])
201 |                     n = i
202 |                 # 1.打包子文件夹
203 |                 mdd_rk = 0
204 |                 for sds in pack_list:
205 |                     for sd in sds:
206 |                         # 移动到临时文件夹中
207 |                         os.rename(sd, os.path.join(tmp_dir, os.path.split(sd)[1]))
208 |                     # 移完之后打包
209 |                     if mdd_rk == 0:
210 |                         self.mdict(['-a', tmp_dir, ftitle+'.mdd'])
211 |                     else:
212 |                         self.mdict(['-a', tmp_dir, f'{ftitle}.{str(mdd_rk)}.mdd'])
213 |                     # 打包完再移回去
214 |                     for fname in os.listdir(tmp_dir):
215 |                         os.rename(os.path.join(tmp_dir, fname), os.path.join(dir_data, fname))
216 |                     mdd_rk += 1
217 |                 # 1.打包剩余部分
218 |                 # 移动文件夹部分(如果有)
219 |                 if n == len(sub_dirs) - 1:
220 |                     for sd in pack:
221 |                         os.rename(sd, os.path.join(tmp_dir, os.path.split(sd)[1]))
222 |                 # 移动文件部分(如果有)
223 |                 for item in os.listdir(dir_data):
224 |                     if not os.path.isdir(os.path.join(dir_data, item)):
225 |                         os.rename(os.path.join(dir_data, item), os.path.join(tmp_dir, item))
226 |                 # 打包
227 |                 if len(os.listdir(tmp_dir)) == 0:
228 |                     pass
229 |                 else:
230 |                     self.mdict(['-a', tmp_dir, f'{ftitle}.{str(mdd_rk)}.mdd'])
231 |                     # 移回去
232 |                     for fname in os.listdir(tmp_dir):
233 |                         os.rename(os.path.join(tmp_dir, fname), os.path.join(dir_data, fname))
234 |                 # 删除临时文件夹
235 |                 if os.path.exists(tmp_dir):
236 |                     os.rmdir(tmp_dir)
237 |             else:
238 |                 self.mdict(['-a', dir_data, ftitle+'.mdd'])
239 |         return done_flg
240 | 
241 |     # ========== (一) From PDF to Images ==========
242 |     # def pdf_to_imgs(self, file_pdf, dir_out):
243 |     #     """ 自动判断文字版/图片版PDF, 并选择最优方法导出图像 """
244 |     #     # 准备环境
245 |     #     file_exe = os.path.join(os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'MuPDF'), 'mutool.exe')
246 |     #     dir_tmp = os.path.join(self.settings.dir_bundle, '_tmp')
247 |     #     if not os.path.exists(dir_tmp):
248 |     #         os.makedirs(dir_tmp)
249 |     #     dir_tmp_mp = os.path.join(dir_tmp, 'MuPDF_tmp')
250 |     #     if not os.path.exists(dir_tmp_mp):
251 |     #         os.makedirs(dir_tmp_mp)
252 |     #     tmp_txt = os.path.join(dir_tmp_mp, 'text.txt')
253 |     #     # 判断是文字版还是图片版PDF
254 |     #     img_pdf_flg = True
255 |     #     os.system(f'{file_exe} draw -o {tmp_txt} -F text {file_pdf} 2-11')
256 |     #     with open(tmp_txt, 'r', encoding='utf-8') as fr:
257 |     #         word = re.sub(r'[\r\n\s]', '', fr.read())
258 |     #         if len(word) > 50:
259 |     #             img_pdf_flg = False
260 |     #     # 开始处理
261 |     #     if img_pdf_flg:
262 |     #         self.extract_pdf_to_imgs_pdfpatcher(file_pdf, dir_out)
263 |     #     else:
264 |     #         self.convert_pdf_to_imgs(file_pdf, dir_out)
265 |     #     shutil.rmtree(dir_tmp_mp)
266 | 
267 |     def convert_pdf_to_imgs_fitz(self, file_pdf, dir_out, dpi=300):
268 |         """ 使用 fitz(mupdf), 按 DPI 等参数转换成图片 """
269 |         # 读取 pdf
270 |         doc = fitz.open(file_pdf)
271 |         mat = fitz.Matrix(1, 1)
272 |         count = 0
273 |         for p in doc:
274 |             count += 1
275 |         # 开始导出
276 |         if not os.path.exists(dir_out):
277 |             os.makedirs(dir_out)
278 |         print('转换中……')
279 |         for i in range(count):
280 |             fname = f"{str(i+1).zfill(8)}.png"
281 |             page = doc.load_page(i)
282 |             pix = page.get_pixmap(matrix=mat, dpi=dpi, colorspace=fitz.csGRAY, alpha=False)
283 |             pix.save(os.path.join(dir_out, fname))
284 |         doc.close()
285 |         print('转换完成！')
286 | 
287 |     def extract_pdf_to_imgs_fitz(self, file_pdf, dir_out):
288 |         """ 使用 fitz(mupdf), 如果生成了JBIG2加密的 jb2，则还需要使用 jbig2dec 解密成 png """
289 |         # 准备参数
290 |         cmd = ['extract', str(file_pdf), '-images', '-output', str(dir_out)]
291 |         saved_parms = sys.argv[1:]
292 |         sys.argv[1:] = cmd
293 |         # 开始导出
294 |         if not os.path.exists(dir_out):
295 |             os.makedirs(dir_out)
296 |         print('提取中……')
297 |         fitz_command()
298 |         sys.argv[1:] = saved_parms
299 |         print('提取完成！')
300 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Litles
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## AutoMdxBuilder 简介
  2 | **自动化制作 mdx 词典工具，人人都可以制作电子词典**（支持 Windows/macOS/Linux）
  3 | 
  4 | AutoMdxBuilder 是 [[Mdict]](https://www.mdict.cn/wp/?lang=en) 词典制作相关的工具，旨在自动化词典制作过程，同时降低制作门槛，该工具目前具备以下功能：
  5 | 
  6 | **(一) 打包/解包**
  7 | 
  8 | * 解包 mdx/mdd 文件。功能同 `MdxExport.exe`，支持自动解 mdd 分包，支持保留原始词条顺序。
  9 | * 打包成 mdx/mdd 文件。功能同 `MdxBuilder.exe`，支持 mdd 自动分包，支持保留原始词条顺序。
 10 | 
 11 | **(二) 制作词典**
 12 | 
 13 | * 自动化制作词典 (目前有A-D四个可选模板, 均支持多卷/集合类型）
 14 | * 一键从 PDF/pdg 等原料制作词典
 15 | 
 16 | **(三) 还原词典**
 17 | 
 18 | * 将 Mdict 词典逆向还原成原材料，方便词典的二次编辑
 19 | * 将 Mdict 词典逆向还原成 PDF
 20 | 
 21 | **(四) 其他实用工具**
 22 | 
 23 | * PDF 与图片互转
 24 | * PDF 书签管理
 25 | 
 26 | ## 一、词典制作
 27 | 
 28 | ### (〇) 成品预览
 29 | #### 图像词典 (模板A，朴素版)
 30 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/img_dict_atmpl.gif)
 31 | 
 32 | #### 图像词典 (模板B，导航版)
 33 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/img_dict_btmpl.gif)
 34 | 
 35 | #### 文本词典 (模板C，朴素版)
 36 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/text_dict_ctmpl.png)
 37 | 
 38 | #### 文本词典 (模板D，导航版)
 39 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/text_dict_dtmpl.gif)
 40 | 
 41 | ### 词典制作概述
 42 | 
 43 | 使用词典制作功能时，需要准备好原材料，将所需要的材料单独用一个文件夹收纳（不妨称它为 amb 文件夹）。词典制作的配置信息写在 build.toml 文件中，同样也放置在该文件夹中。下面是一个示例的 amb 文件夹结构：
 44 | 
 45 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/amb_folder.png)
 46 | 
 47 | ### (一) 原材料准备说明
 48 | 
 49 | 制作不同模板的词典，所需的原材料也不尽相同，下面分模板列举：
 50 | 
 51 | #### 1.图像词典 (模板A)
 52 | 
 53 | * (必须) `imgs` 文件夹: 存放图像文件，不限定图片格式，png、jpg 等均可，也无特定的名称要求（顺序是对的就行）；
 54 | * (可选) `index.txt`: 索引文件
 55 | * (可选) `toc.txt`: 目录文件
 56 | 
 57 | > index 和 toc 二者中必须至少有一个, 如果你的 toc 目录文件比较全, 建议改名 toc_all 然后使用模板 B
 58 | 
 59 | #### 2.图像词典 (模板B)
 60 | 
 61 | * （必须）`imgs` 文件夹：存放图像文件，同模板A
 62 | * （可选）`index_all.txt`: 全索引文件
 63 | * （可选）`toc_all.txt`: 全目录文件
 64 | * （可选）`index.txt`: 附加索引文件
 65 | 
 66 | > index_all 与 toc_all 是等价的, 按偏好使用其中一种即可
 67 | > 如果在 index_all 之外还有独立的词条, 可以设置 add_extra_index = true, 并将那些词条以 index.txt 文件的形式作为补充
 68 | 
 69 | #### 3.文本词典 (模板C)
 70 | 
 71 | * （必须）`index.txt`: 索引文件
 72 | 
 73 | #### 4.文本词典 (模板D)
 74 | 
 75 | * （必须）`index_all.txt`: 全索引文件
 76 | 
 77 | **【通用可选】** 除上述各模板的材料准备之外，下面两个是通用材料，制作词典可按需添加：
 78 | 
 79 | * （可选）`syns.txt` 文件：同义词文件；
 80 | * （可选）`info.html` 文件：词典介绍等描述。
 81 | 
 82 | **【注意事项】**
 83 | 
 84 | * 凡涉及的文本文件（如`.txt`、`.html`），一律要求 **UTF-8 无 BOM** 的编码格式；
 85 | * 原材料文件夹中只放置需要用到的文件/文件夹，**为避免误读取，不用到的不要出现在原材料文件夹内**；
 86 | * 文件夹和文件的名称就按本说明所提的，不建议自定义名称。
 87 | 
 88 | ### (二) 配置文件 `build.toml` 参数说明
 89 | 
 90 | 可参见 lib/build.toml 中的初始配置，已有详细注释，制作词典时可直接拷贝修改, 也可以参考 demo 词典的配置情况。下面选取其中部分作为补充说明：
 91 | 
 92 | * `simp_trad_flg`: 是否需要繁简通搜, 开启后将会把所有词头都添加繁体/简体跳转, 以确保 mdx 使用时能繁简通搜。 默认 false 不开启。
 93 | * `multi_volume`: 是否是多卷的, true 则开启多卷模式（需要按多卷模式来准备原材料）。默认是 false 即单卷模式。
 94 | * `body_start`: 正文起始图片序号, 比如正文第一页是 imgs 文件夹中的第 23 张图, 那么就设置为 `body_start = 23`。（多卷模式下该值是列表，比如 `body_start = [23, 19, 1, 1]`）
 95 | * `auto_split_columns`: 是否开启自动分栏, 设置值 2 则自动分割成两栏，该功能是为方便手机等小屏移动设备的使用而设置。默认值 1 表示不开启自动分栏。
 96 | * `body_end_page`: 当自动分栏开启时，该值确定了分栏的应用范围，分栏从正文第一页开启, 默认到辞书的最后一页。（多卷模式下该值是列表，比如 `body_end_page = [463, 501, 9999, 9999]`）
 97 | 
 98 | 对于模板 A 的 `navi_items`，其中 `a` 的值是显示文字，`ref`的值是与 `toc.txt` 中词目对应的：
 99 | 
100 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/settings.png)
101 | 
102 | 对于文本词典模板 C,D 中的 `add_headwords` 选项, 词条内容如果已经带有标题，可以将该项设置为 false。
103 | 
104 | 
105 | ## 二、相关文件格式
106 | 
107 | ### 索引文件 `index.txt`
108 | 
109 | 格式`词目<TAB>页码`（页码数是相对正文起始页的，而不是图片序号）：
110 | 
111 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/index.png)
112 | 
113 | > 如果是多卷模式, 则页码需要带分卷号前缀 `[n]` 以标识分卷（第一卷 `[1]` 可以省略不写），比如词条『刘备』是在第4卷第3页, 那么索引应写作 `刘备<TAB>[4]3`；
114 | 
115 | 如果是制作文本词典 (模板C)，用到的文件也叫 `index.txt`，只不过其中的 **页码** 换成了 **词条正文**，格式为 `词目<TAB>词条正文` 。
116 | 
117 | ### 目录文件 `toc.txt`
118 | 
119 | 格式`[<TAB>*]词目<TAB>页码`，格式大概像这样（行首 TAB 缩进表层级）：
120 | 
121 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/toc.png)
122 | 
123 | 格式同程序 `FreePic2Pdf.exe` 的书签文件`FreePic2Pdf_bkmk.txt`，因此也可以直接用 `FreePic2Pdf.exe` 程序从 pdf 文件中导出。
124 | 
125 | > 与索引文件一样，多卷模式下, 页码需要带分卷号前缀 `[n]` 以标识分卷（第一卷 `[1]` 可以省略不写）
126 | 
127 | ### 全索引文件 `index_all.txt`
128 | 
129 | 是 `index.txt` 的拓展，格式同样是 `词目<TAB>页码` ，只不过 `index_all.txt` 是把 `toc.txt` 也并入进来，并且是严格有序的。
130 | 
131 | 其中目录（章节）的词目要加 `【L<层级>】` 前缀标识，比如顶级章节“正文”前缀就是 `【L0】正文` ，“正文”的下一级“史前篇”的前缀就是 `【L1】史前篇` 。
132 | 
133 | > 章节词目可以没有对应页码，但要保留 `<TAB>` 
134 | 
135 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/index_all.png)
136 | 
137 | > 与索引文件一样，多卷模式下, 页码需要带分卷号前缀 `[n]` 以标识分卷（第一卷 `[1]` 可以省略不写）
138 | 
139 | 如果是制作文本词典 (模板D)，用到的文件也叫 `index_all.txt`，只不过其中的 **页码** 换成了 **词条正文**，格式为 `词目<TAB>词条正文` 。
140 | 
141 | ### 同义词文件 `syns.txt`
142 | 
143 | 或说重定向文件，格式`同义词<TAB>词目`：
144 | 
145 | ![img](https://github.com/Litles/AutoMdxBuilder/blob/main/images/syns.png)
146 | 
147 | ## 三、多卷模式补充说明
148 | 
149 | 当在 `build.toml` 中设置 `multi_volume = true` 时，将会按照多卷模式制作词典，这时原材料的命名相比一般模式会有些许不同，下面按模板列举：
150 | 
151 | 图像词典模板 A,B 在多卷模式下, 首先图像文件夹结构将是 imgs/vol_01, imgs/vol_02, imgs/vol_03... 即分卷子文件夹名称需加 vol_00 前缀
152 | 
153 | * 模板 A: 除可以使用全局索引/目录文件 index.txt, toc.txt 外，也可以使用分卷文件 index_01.txt, index_02.txt ... 和 toc_01.txt, toc_02.txt ... （分卷文件中的页码无需加`[n]`前缀）
154 | * 模板 B: 除可以使用全局全索引/全目录文件 index_all.txt/toc_all.txt 外，也可以使用分卷文件 index_all_01.txt, index_all_02.txt ... 或 toc_all_01.txt, toc_all_02.txt ... （分卷文件中的页码无需加`[n]`前缀）
155 | * 模板 D: 同模板 B, 不过因为没有页码, 所以分卷文件和全局文件无区别
156 | 
157 | > 还可以在目录文件、全索引或全目录文件名上标识分卷名称（这样就不用在 `build.toml` 中设置 vol_names 项）, 比如 toc_01_军事卷、 toc_all_01_军事卷.txt 或 index_all_01_军事卷.txt, 这样, 程序将会从文件名中读取卷名
158 | 
159 | ## 四、其他功能简介
160 | 
161 | ## 参考
162 | 
163 | + https://github.com/liuyug/mdict-utils
164 | + https://github.com/VimWei/MdxSourceBuilder
165 | 


--------------------------------------------------------------------------------
/auto_mdx_builder.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Date    : 2023-11-16 00:00:17
  4 | # @Author  : Litles (litlesme@gmail.com)
  5 | # @Link    : https://github.com/Litles
  6 | # @Version : 1.6
  7 | 
  8 | # import logging
  9 | import traceback
 10 | import os
 11 | import re
 12 | import shutil
 13 | from colorama import Fore, just_fix_windows_console
 14 | from settings import Settings
 15 | from func_lib import FuncLib
 16 | from templates.img_dict_atmpl import ImgDictAtmpl
 17 | from templates.img_dict_btmpl import ImgDictBtmpl
 18 | from templates.text_dict_ctmpl import TextDictCtmpl
 19 | from templates.text_dict_dtmpl import TextDictDtmpl
 20 | from ebook_utils import EbookUtils
 21 | 
 22 | 
 23 | class AutoMdxBuilder:
 24 |     """图像词典制作程序"""
 25 |     def __init__(self):
 26 |         self.settings = Settings()
 27 |         self.func = FuncLib(self)
 28 |         self.utils = EbookUtils(self)
 29 | 
 30 |     def auto_processing(self, sel):
 31 |         """ 根据选择自动处理 """
 32 |         if sel == 1:
 33 |             # --- 解包 mdx/mdd 文件 ---
 34 |             mfile = input("请输入要解包的 mdx/mdd 文件路径: ").strip('"')
 35 |             if self.utils.export_mdx(mfile):
 36 |                 print(Fore.GREEN + "\n已输出在同目录下: " + Fore.RESET + os.path.splitext(mfile)[0])
 37 |         elif sel == 2:
 38 |             # --- 将源 txt 文件打包成 mdx 文件 ---
 39 |             file_final_txt = input("请输入要打包的 txt 文件路径: ").strip('"')
 40 |             if self.func.text_file_check(file_final_txt) == 2:
 41 |                 # 检查数据文件夹
 42 |                 dir_curr, fname_txt = os.path.split(file_final_txt)
 43 |                 dir_data = os.path.join(dir_curr, 'data')
 44 |                 if not os.path.exists(dir_data):
 45 |                     print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"文件夹 {dir_data} 不存在, 已默认不打包 mdd")
 46 |                     dir_data = None
 47 |                 elif os.path.exists(dir_data) and len(os.listdir(dir_data)) == 0:
 48 |                     print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"文件夹 {dir_data} 为空, 已默认不打包 mdd")
 49 |                     dir_data = None
 50 |                 # 生成 info.html
 51 |                 file_info_raw = None
 52 |                 for fname in os.listdir(dir_curr):
 53 |                     if fname == 'info.html':
 54 |                         file_info_raw = os.path.join(dir_curr, fname)
 55 |                     elif fname.endswith('.html') and fname.startswith(os.path.splitext(fname_txt)[0]):
 56 |                         file_info_raw = os.path.join(dir_curr, fname)
 57 |                         break
 58 |                 file_dict_info = os.path.join(self.settings.dir_output_tmp, self.settings.fname_dict_info)
 59 |                 self.func.generate_info_html(file_info_raw, file_dict_info, os.path.splitext(fname_txt)[0], None)
 60 |                 # 打包
 61 |                 print('\n------------------\n开始打包……\n')
 62 |                 done_flg = self.utils.pack_to_mdict(dir_curr, file_final_txt, file_dict_info, dir_data)
 63 |                 if done_flg:
 64 |                     print(Fore.GREEN + "\n打包完毕。" + Fore.RESET)
 65 |             else:
 66 |                 print(Fore.RED + "\n材料检查不通过, 请确保材料准备无误再执行程序" + Fore.RESET)
 67 |         elif sel == 3:
 68 |             # --- 将资料包文件夹打包成 mdd 文件 ---
 69 |             dir_data = input("请输入要打包的资料文件夹路径: ").strip('"').rstrip('\\/')
 70 |             print('\n------------------\n开始打包……\n')
 71 |             done_flg = self.utils.pack_to_mdd(dir_data, None)
 72 |             if done_flg:
 73 |                 print(Fore.GREEN + "\n打包完毕。" + Fore.RESET)
 74 |         elif sel == 10:
 75 |             # --- 从 PDF文件/pdg文件夹 生成预备原材料 ---
 76 |             p = input("请输入 pdf文件/pdg文件夹 路径: ").strip('"').rstrip('\\/')
 77 |             if os.path.isfile(p) and os.path.splitext(p)[1] == '.pdf':
 78 |                 self.pdf_to_amb(p)
 79 |             elif os.path.isdir(p):
 80 |                 self.pdf_to_amb(p, False)
 81 |             else:
 82 |                 print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误")
 83 |         elif sel == 11:
 84 |             # --- index_all/toc_all 互转 ---
 85 |             file_input = input("请输入 toc_all/index_all 的文件路径: ").strip('"')
 86 |             dir_input, fname = os.path.split(file_input)
 87 |             done_flg = True
 88 |             if 'index_all' in fname:
 89 |                 file_toc_all = os.path.join(dir_input, fname.replace('index_all', 'toc_all'))
 90 |                 done_flg = self.func.index_all_to_toc(file_input, file_toc_all)
 91 |             elif 'toc_all' in fname:
 92 |                 file_index_all = os.path.join(dir_input, fname.replace('toc_all', 'index_all'))
 93 |                 done_flg = self.func.toc_all_to_index(file_input, file_index_all)
 94 |             else:
 95 |                 it = input("该文本文本是 index_all 吗（Y/N）: ")
 96 |                 if it in ('Y', 'y'):
 97 |                     file_toc_all = os.path.join(dir_input, 'toc_all.txt')
 98 |                     done_flg = self.func.index_all_to_toc(file_input, file_toc_all)
 99 |                 elif it in ('N', 'n'):
100 |                     file_index_all = os.path.join(dir_input, 'index_all.txt')
101 |                     done_flg = self.func.toc_all_to_index(file_input, file_index_all)
102 |                 else:
103 |                     done_flg = False
104 |             if done_flg:
105 |                 print(Fore.GREEN + "\n转换完成, 生成在同目录下" + Fore.RESET)
106 |             else:
107 |                 print(Fore.RED + "\n未完成转换" + Fore.RESET)
108 |         elif sel == 12:
109 |             # --- 合并 toc 和 index 为 index_all ---
110 |             file_toc = input("(1) 请输入 toc 文件的路径: ").strip('"')
111 |             file_index = input("(2) 请输入 index 文件的路径: ").strip('"')
112 |             file_index_all = os.path.join(os.path.split(file_index)[0], 'index_all.txt')
113 |             self.func.merge_to_index_all(file_toc, file_index, file_index_all)
114 |         elif sel == 13:
115 |             # --- 索引扩充(通过标点符号等分词), 提升查得率 ---
116 |             p = input("请输入词头文件路径: ").strip('"')
117 |             if os.path.isfile(p) and self.func.text_file_check(p) == 2:
118 |                 file_result = os.path.splitext(p)[0]+'_split'+os.path.splitext(p)[1]
119 |                 inp = input("输入分词最少字符数(大于0, 回车默认长度为2): ")
120 |                 n_chars = 2
121 |                 if re.match(r'\d+$', inp) and int(inp) > 0:
122 |                     n_chars = int(inp)
123 |                 else:
124 |                     print(Fore.YELLOW + "INFO: " + Fore.RESET + "输入未识别, 已使用默认长度2")
125 |                 if self.func.make_relinks_split(p, file_result, n_chars):
126 |                     print(Fore.GREEN + "\n转换完成, 生成在同目录下" + Fore.RESET)
127 |             else:
128 |                 print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误")
129 |         elif sel == 14:
130 |             # --- 繁体简体 txt 文本文件互转 ---
131 |             p = input("请输入要转换的文本文件路径: ").strip('"')
132 |             if os.path.isfile(p) and self.func.text_file_check(p) == 2:
133 |                 ts = input("将该文本转成繁体(T/t)还是简体(S/s):")
134 |                 if ts in ('T', 't'):
135 |                     file_result = os.path.splitext(p)[0]+'_trad'+os.path.splitext(p)[1]
136 |                     self.func.simp_trad_trans(p, file_result, 'T')
137 |                 elif ts in ('S', 's'):
138 |                     file_result = os.path.splitext(p)[0]+'_simp'+os.path.splitext(p)[1]
139 |                     self.func.simp_trad_trans(p, file_result, 'S')
140 |                 else:
141 |                     print(Fore.RED + "ERROR: " + Fore.RESET + "输入有误")
142 |             else:
143 |                 print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误")
144 |         elif sel == 20:
145 |             # --- 生成词典 ---
146 |             p = input("请输入原材料文件夹路径或 build.toml 文件路径: ").strip('"').rstrip('\\/')
147 |             if os.path.split(p)[1] == 'build.toml':
148 |                 if self.settings.load_build_toml(p, False, False):
149 |                     self._build_mdict()
150 |             elif os.path.isdir(p):
151 |                 file_toml = os.path.join(p, 'build.toml')
152 |                 if os.path.isfile(file_toml):
153 |                     if self.settings.load_build_toml(file_toml, False, True):
154 |                         self._build_mdict()
155 |                 else:
156 |                     print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 build.toml 文件")
157 |             else:
158 |                 print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误")
159 |         elif sel == 30:
160 |             # --- 从词典还原原材料 ---
161 |             p = input("请输入词典的文件夹或 mdx/mdd 文件路径: ").strip('"').rstrip('\\/')
162 |             if os.path.isfile(p) and os.path.splitext(p)[1] == '.mdx':
163 |                 self._restore_raw(p, False)
164 |             elif os.path.isfile(p) and os.path.splitext(p)[1] == '.mdd':
165 |                 if os.path.isfile(p[:-1]+'x'):
166 |                     self._restore_raw(p[:-1]+'x', False)
167 |             elif os.path.isdir(p):
168 |                 for m in os.listdir(p):
169 |                     if m.endswith('.mdx'):
170 |                         self._restore_raw(os.path.join(p, m), True)
171 |                         break
172 |                 else:
173 |                     print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 mdx 文件")
174 |             else:
175 |                 print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误")
176 |         elif sel == 31:
177 |             # --- 从原材料还原 PDF ---
178 |             p = input("请输入原材料文件夹路径或 build.toml 文件路径: ").strip('"').rstrip('\\/')
179 |             if os.path.split(p)[1] == 'build.toml':
180 |                 if self.settings.load_build_toml(p, True):
181 |                     self.amb_to_pdf(file_toml, False)
182 |             elif os.path.isdir(p):
183 |                 file_toml = os.path.join(p, 'build.toml')
184 |                 if os.path.isfile(file_toml):
185 |                     if self.settings.load_build_toml(file_toml, True):
186 |                         self.amb_to_pdf(file_toml, True)
187 |                 else:
188 |                     print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 build.toml 文件")
189 |             else:
190 |                 print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误")
191 |         elif sel == 41:
192 |             # --- 从 PDF 提取图片 (PDF补丁丁) ---
193 |             p = input("请输入 PDF 文件路径: ").strip('"').rstrip('\\/')
194 |             if os.path.isfile(p) and p.lower().endswith('.pdf'):
195 |                 fname = os.path.split(p)[1]
196 |                 out_dir = os.path.join(os.path.split(p)[0], fname.split('.')[0])
197 |                 self.utils.extract_pdf_to_imgs_pdfpatcher(p, out_dir)
198 |             else:
199 |                 print(Fore.RED + "\n输入的路径有误" + Fore.RESET)
200 |         elif sel == 42:
201 |             # --- 从 PDF 提取图片 (MuPDF) ---
202 |             p = input("请输入 PDF 文件路径: ").strip('"').rstrip('\\/')
203 |             if os.path.isfile(p) and p.lower().endswith('.pdf'):
204 |                 fname = os.path.split(p)[1]
205 |                 out_dir = os.path.join(os.path.split(p)[0], fname.split('.')[0])
206 |                 self.utils.extract_pdf_to_imgs(p, out_dir)
207 |             else:
208 |                 print(Fore.RED + "\n输入的路径有误" + Fore.RESET)
209 |         elif sel == 43:
210 |             # --- 将 PDF 转换成图片 (MuPDF) ---
211 |             p = input("请输入 PDF 文件路径: ").strip('"').rstrip('\\/')
212 |             if os.path.isfile(p) and p.lower().endswith('.pdf'):
213 |                 fname = os.path.split(p)[1]
214 |                 out_dir = os.path.join(os.path.split(p)[0], fname.split('.')[0])
215 |                 dpi = input("请输入要生成图片的 DPI（回车则默认300）: ")
216 |                 if re.match(r'\d+$', dpi):
217 |                     self.utils.convert_pdf_to_imgs(p, out_dir, int(dpi))
218 |                 else:
219 |                     self.utils.convert_pdf_to_imgs(p, out_dir)
220 |             else:
221 |                 print(Fore.RED + "\n输入的路径有误" + Fore.RESET)
222 |         elif sel == 44:
223 |             # --- 将 图片 合成 PDF (MuPDF) ---
224 |             p = input("请输入图片所在文件夹路径(不能包含空格): ").strip('"').rstrip('\\/')
225 |             if os.path.isdir(p):
226 |                 out_file = p+'.pdf'
227 |                 self.utils.combine_img_to_pdf(p, out_file)
228 |             else:
229 |                 print(Fore.RED + "\n输入的路径有误" + Fore.RESET)
230 |         elif sel == 45:
231 |             # --- 将 图片 合成 PDF (FreePic2Pdf) ---
232 |             p = input("请输入图片所在文件夹路径(不能包含空格): ").strip('"').rstrip('\\/')
233 |             if os.path.isdir(p):
234 |                 out_file = p+'.pdf'
235 |                 self.utils.combine_img_to_pdf_fp2p(p, out_file)
236 |             else:
237 |                 print(Fore.RED + "\n输入的路径有误" + Fore.RESET)
238 |         elif sel == 46:
239 |             # --- PDF 书签导出/导入 (FreePic2Pdf) ---
240 |             file_pdf = input("请输入 PDF 文件路径: ").strip('"').rstrip('\\/')
241 |             dir_bkmk = input("请输入书签文件夹路径（导出则直接回车）: ").strip('"').rstrip('\\/')
242 |             if os.path.isdir(dir_bkmk):
243 |                 self.utils.eximport_bkmk_fp2p(file_pdf, dir_bkmk, False)
244 |             elif dir_bkmk is None or len(dir_bkmk) == 0:
245 |                 fname = os.path.split(file_pdf)[1]
246 |                 dir_bkmk = os.path.join(os.path.split(file_pdf)[0], fname.split('.')[0]+'_bkmk')
247 |                 self.utils.eximport_bkmk_fp2p(file_pdf, dir_bkmk)
248 |             else:
249 |                 print(Fore.RED + "\n输入的路径有误" + Fore.RESET)
250 |         else:
251 |             pass
252 | 
253 |     def _build_mdict(self):
254 |         done_flg = False
255 |         if self.settings.templ_choice in ('a', 'A'):
256 |             """ 制作图像词典 (模板A) """
257 |             # 生成 txt 源文本
258 |             make_result = ImgDictAtmpl(self).make_source_file()
259 |             if make_result:
260 |                 file_final_txt, dir_imgs, file_dict_info = make_result
261 |                 # 创建输出文件夹
262 |                 if not os.path.exists(self.settings.dir_output):
263 |                     os.makedirs(self.settings.dir_output)
264 |                 # 生成 css 文件
265 |                 file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_atmpl)
266 |                 file_css = os.path.join(self.settings.dir_output, self.settings.fname_css)
267 |                 if self.settings.split_columns == 2:
268 |                     with open(os.path.join(self.settings.dir_lib, self.settings.css_split_2), 'r', encoding='utf-8') as fr:
269 |                         s = fr.read()
270 |                     with open(file_css, 'w', encoding='utf-8') as fw:
271 |                         with open(file_css_tmpl, 'r', encoding='utf-8') as fr:
272 |                             fw.write(fr.read().replace('/*<insert_css: auto_split>*/', s))
273 |                 else:
274 |                     shutil.copy(file_css_tmpl, file_css)
275 |                 # 开始打包
276 |                 print('\n------------------\n开始打包……\n')
277 |                 done_flg = self.utils.pack_to_mdict(self.settings.dir_output, file_final_txt, file_dict_info, dir_imgs)
278 |         elif self.settings.templ_choice in ('b', 'B'):
279 |             """ 制作图像词典 (模板B) """
280 |             # 生成 txt 源文本
281 |             make_result = ImgDictBtmpl(self).make_source_file()
282 |             if make_result:
283 |                 file_final_txt, dir_imgs, file_dict_info = make_result
284 |                 # 创建输出文件夹
285 |                 if not os.path.exists(self.settings.dir_output):
286 |                     os.makedirs(self.settings.dir_output)
287 |                 # 生成 css 文件
288 |                 file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_btmpl)
289 |                 file_css = os.path.join(self.settings.dir_output, self.settings.fname_css)
290 |                 if self.settings.split_columns == 2:
291 |                     with open(os.path.join(self.settings.dir_lib, self.settings.css_split_2), 'r', encoding='utf-8') as fr:
292 |                         s = fr.read()
293 |                     with open(file_css, 'w', encoding='utf-8') as fw:
294 |                         with open(file_css_tmpl, 'r', encoding='utf-8') as fr:
295 |                             fw.write(fr.read().replace('/*<insert_css: auto_split>*/', s))
296 |                 else:
297 |                     shutil.copy(file_css_tmpl, file_css)
298 |                 # 开始打包
299 |                 print('\n------------------\n开始打包……\n')
300 |                 done_flg = self.utils.pack_to_mdict(self.settings.dir_output, file_final_txt, file_dict_info, dir_imgs)
301 |         elif self.settings.templ_choice in ('c', 'C'):
302 |             """ 制作文本词典 (模板C) """
303 |             # 生成 txt 源文本
304 |             make_result = TextDictCtmpl(self).make_source_file()
305 |             if make_result:
306 |                 file_final_txt, dir_data, file_dict_info = make_result
307 |                 # 创建输出文件夹
308 |                 if not os.path.exists(self.settings.dir_output):
309 |                     os.makedirs(self.settings.dir_output)
310 |                 # 生成 css 文件
311 |                 file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_ctmpl)
312 |                 file_css = os.path.join(self.settings.dir_output, self.settings.fname_css)
313 |                 shutil.copy(file_css_tmpl, file_css)
314 |                 # 开始打包
315 |                 print('\n------------------\n开始打包……\n')
316 |                 done_flg = self.utils.pack_to_mdict(self.settings.dir_output, file_final_txt, file_dict_info, dir_data)
317 |         elif self.settings.templ_choice in ('d', 'D'):
318 |             """ 制作文本词典 (模板D) """
319 |             # 生成 txt 源文本
320 |             make_result = TextDictDtmpl(self).make_source_file()
321 |             if make_result:
322 |                 file_final_txt, dir_data, file_dict_info = make_result
323 |                 # 创建输出文件夹
324 |                 if not os.path.exists(self.settings.dir_output):
325 |                     os.makedirs(self.settings.dir_output)
326 |                 # 生成 css 文件
327 |                 file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_dtmpl)
328 |                 file_css = os.path.join(self.settings.dir_output, self.settings.fname_css)
329 |                 shutil.copy(file_css_tmpl, file_css)
330 |                 # 开始打包
331 |                 print('\n------------------\n开始打包……\n')
332 |                 done_flg = self.utils.pack_to_mdict(self.settings.dir_output, file_final_txt, file_dict_info, dir_data)
333 |         if done_flg:
334 |             print("\n打包完毕。" + Fore.GREEN + "\n\n恭喜, 词典已生成！" + Fore.RESET)
335 | 
336 |     def _restore_raw(self, xfile, outside_flg):
337 |         """ 将词典还原为原材料 """
338 |         # 1.准备参数
339 |         extract_flg = False
340 |         dict_name = None
341 |         templ_choice = None
342 |         dir_input, fname = os.path.split(xfile)
343 |         # 2.分析 mdx 文件
344 |         tmp_restore = os.path.join(self.settings.dir_output_tmp, 'restore')
345 |         if not os.path.exists(tmp_restore):
346 |             os.makedirs(tmp_restore)
347 |         tmp_xfile = os.path.join(tmp_restore, fname)
348 |         tmp_xdir = os.path.splitext(tmp_xfile)[0]
349 |         if os.path.exists(tmp_xdir):
350 |             shutil.rmtree(tmp_xdir)
351 |         shutil.copy(xfile, tmp_xfile)
352 |         if self.utils.export_mdx(tmp_xfile):
353 |             tmp_final_txt = os.path.join(tmp_xdir, fname.split('.')[0]+'.txt')
354 |         else:
355 |             tmp_final_txt = None
356 |         # 分析 info 信息, 确定是否支持还原
357 |         for f in os.listdir(tmp_xdir):
358 |             fp = os.path.join(tmp_xdir, f)
359 |             text = ''
360 |             if fp.endswith('.info.html'):
361 |                 with open(fp, 'r', encoding='utf-8') as fr:
362 |                     pat = re.compile(r'<div><br/>([^><]*?), built with AutoMdxBuilder[^><]*?based on template ([A-D])\.<br/></div>', flags=re.I)
363 |                     pat_multi = re.compile(r'<div><br/>([^><]*?), built with AutoMdxBuilder[^><]*?based on template ([ABD]) in (\d+) volumes\.<br/></div>', flags=re.I)
364 |                     text = fr.read()
365 |                     if pat.search(text):
366 |                         # 符合条件, 支持还原
367 |                         dict_name = pat.search(text).group(1)
368 |                         templ_choice = pat.search(text).group(2)
369 |                         multi_vols_flg = False
370 |                         volume_num = 1
371 |                         text = pat.sub('', text)
372 |                         extract_flg = True
373 |                         break
374 |                     elif pat_multi.search(text):
375 |                         # (多卷)符合条件, 支持还原
376 |                         dict_name = pat_multi.search(text).group(1)
377 |                         templ_choice = pat_multi.search(text).group(2)
378 |                         multi_vols_flg = True
379 |                         volume_num = int(pat_multi.search(text).group(3))
380 |                         text = pat_multi.sub('', text)
381 |                         extract_flg = True
382 |                         break
383 |         # 3.开始提取
384 |         if extract_flg:
385 |             # 创建目标文件夹
386 |             if outside_flg:
387 |                 out_dir = os.path.join(os.path.split(dir_input)[0], fname.split('.')[0]) + '_amb'
388 |             else:
389 |                 out_dir = os.path.splitext(xfile)[0] + '_amb'
390 |             if not os.path.exists(out_dir):
391 |                 os.makedirs(out_dir)
392 |             # 提取 info.html
393 |             if not re.match(r'\s*$', text):
394 |                 with open(os.path.join(out_dir, 'info.html'), 'w', encoding='utf-8') as fw:
395 |                     fw.write(text)
396 |             # 提取 index, index_all, syns 等信息
397 |             file_css = None
398 |             for f in os.listdir(dir_input):
399 |                 if os.path.splitext(f)[1].lower() == '.css':
400 |                     file_css = os.path.join(dir_input, f)
401 |             if tmp_final_txt:
402 |                 # 选择函数进行处理
403 |                 if templ_choice == 'A':
404 |                     # 模板A无备份索引, 故不保证索引顺序的精准还原
405 |                     ImgDictAtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name, file_css, multi_vols_flg, volume_num)
406 |                 elif templ_choice == 'B':
407 |                     ImgDictBtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name, file_css, multi_vols_flg, volume_num)
408 |                 elif templ_choice == 'C':
409 |                     TextDictCtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name)
410 |                 elif templ_choice == 'D':
411 |                     TextDictDtmpl(self).extract_final_txt(tmp_final_txt, out_dir, dict_name, multi_vols_flg, volume_num)
412 |             else:
413 |                 print(Fore.RED + "ERROR: " + Fore.RESET + "还原失败")
414 |             # 处理 mdd
415 |             file_mdd = os.path.splitext(xfile)[0] + '.mdd'
416 |             if os.path.isfile(file_mdd) and templ_choice in ('A', 'B'):
417 |                 dir_data = os.path.join(out_dir, "imgs")
418 |                 if os.path.exists(dir_data):
419 |                     shutil.rmtree(dir_data)
420 |                 self.utils.mdict(['-x', file_mdd, '-d', dir_data])
421 |             elif os.path.isfile(file_mdd) and templ_choice in ('C', 'D'):
422 |                 dir_data = os.path.join(out_dir, "data")
423 |                 if os.path.exists(dir_data):
424 |                     shutil.rmtree(dir_data)
425 |                 self.utils.mdict(['-x', file_mdd, '-d', dir_data])
426 |             else:
427 |                 print(Fore.MAGENTA + "WARN: " + Fore.RESET + "同路径下未找到相应的 mdd 文件, 将不会生成 imgs/data 文件夹")
428 |             print(Fore.GREEN + "\n已提取原材料至目录: " + Fore.RESET + out_dir)
429 |         else:
430 |             print(Fore.RED + "ERROR: " + Fore.RESET + "词典并非由 AutoMdxBuilder 制作, 不支持还原")
431 |         shutil.rmtree(tmp_restore)
432 | 
433 |     def pdf_to_amb(self, input_path, pdf_flg=True):
434 |         """ 从 PDF文件/pdg文件夹 生成 amb 文件夹 """
435 |         # 0.准备路径相关
436 |         dir_bkmk = os.path.join(self.settings.dir_output_tmp, 'bkmk')
437 |         if os.path.exists(dir_bkmk):
438 |             shutil.rmtree(dir_bkmk)
439 |         os.makedirs(dir_bkmk)
440 |         # 开始处理
441 |         if pdf_flg:
442 |             fname = os.path.split(input_path)[1]
443 |             out_dir = os.path.join(os.path.split(input_path)[0], fname.split('.')[0]+'_amb')
444 |             if not os.path.exists(out_dir):
445 |                 os.makedirs(out_dir)
446 |             # 1.导出书签
447 |             cur_path = os.getcwd()
448 |             self.utils.eximport_bkmk_fp2p(input_path, os.path.join(cur_path, dir_bkmk))
449 |             try:
450 |                 with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'r', encoding='utf-16le') as fr:
451 |                     text = fr.read()
452 |                     line_num = len(re.findall(r'^', text, flags=re.M))
453 |                     if line_num <= 3:
454 |                         print(Fore.YELLOW + "INFO: " + Fore.RESET + "未识别到目录, 将不会生成 toc.txt")
455 |                     else:
456 |                         with open(os.path.join(out_dir, 'toc.txt'), 'w', encoding='utf-8') as fw:
457 |                             fw.write(text)
458 |                         if line_num > 500:
459 |                             print(Fore.MAGENTA + "WARN: " + Fore.RESET + "书签超过 500 行, 请后续确认是否包含索引, 是的话建议改名为 toc_all.txt")
460 |             except UnicodeDecodeError:
461 |                 shutil.copy(os.path.join(dir_bkmk, "FreePic2Pdf_bkmk.txt"), os.path.join(out_dir, "[utf-16]toc.txt"))
462 |                 print(Fore.MAGENTA + "WARN: " + Fore.RESET + "书签中存在无法识别的字符, 已输出为 utf-16 编码")
463 |             with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'r', encoding='utf-16le') as fr:
464 |                 mt = re.search(r'(?<=BasePage=)(\d+)', fr.read())
465 |                 if mt:
466 |                     body_start = mt.group(0)
467 |                 else:
468 |                     body_start = 1
469 |                     print(Fore.MAGENTA + "WARN: " + Fore.RESET + "未识别到正文起始页码, 已设置默认值 1")
470 |             # 2.生成 build.toml
471 |             shutil.copy(os.path.join(self.settings.dir_lib, "build.toml"), os.path.join(out_dir, "build.toml"))
472 |             with open(os.path.join(out_dir, "build.toml"), 'r+', encoding='utf-8') as fr:
473 |                 text = fr.read()
474 |                 text = re.sub(r'^templ_choice = "\w"', 'templ_choice = "A"', text, flags=re.I+re.M)
475 |                 text = re.sub(r'^name = "[^"]+?"', f'name = "{fname.split(".")[0]}"', text, flags=re.I+re.M)
476 |                 text = re.sub(r'^name_abbr = "[^"]+?"', 'name_abbr = "XXXXXX"', text, flags=re.I+re.M)
477 |                 text = re.sub(r'^body_start = \d+', f'body_start = {str(body_start)}', text, flags=re.I+re.M)
478 |                 fr.seek(0)
479 |                 fr.truncate()
480 |                 fr.write(text)
481 |             # 3.导出图片
482 |             if not os.path.exists(os.path.join(out_dir, 'imgs')):
483 |                 os.makedirs(os.path.join(out_dir, 'imgs'))
484 |             self.utils.pdf_to_imgs(input_path, os.path.join(out_dir, 'imgs'))
485 |         else:
486 |             out_dir = input_path+'_amb'
487 |             if not os.path.exists(out_dir):
488 |                 os.makedirs(out_dir)
489 |             # 1.pdg 转 img
490 |             if not os.path.exists(os.path.join(out_dir, 'imgs')):
491 |                 os.makedirs(os.path.join(out_dir, 'imgs'))
492 |             print(os.path.join(out_dir, 'imgs'))
493 |             self.utils.convert_pdg_to_img(input_path, os.path.join(out_dir, 'imgs'))
494 |             # 2.识别词典信息
495 |             bkmk_itf = os.path.join(os.path.join(out_dir, 'imgs'), 'FreePic2Pdf.itf')
496 |             if os.path.isfile(bkmk_itf):
497 |                 with open(bkmk_itf, 'r', encoding='utf-16le') as fr:
498 |                     text = fr.read()
499 |                     mt_body_start = re.search(r'(?<=TextPage=)(\d+)', text)
500 |                     mt_name = re.search(r'(?<=Title=)(.+)', text)
501 |                     if mt_body_start:
502 |                         body_start = mt_body_start.group(0)
503 |                     else:
504 |                         body_start = 1
505 |                         print(Fore.MAGENTA + "WARN: " + Fore.RESET + "未识别到正文起始页码, 已设置默认值 1")
506 |                     if mt_name:
507 |                         name = mt_name.group(0)
508 |                     else:
509 |                         name = os.path.split(input_path)[1]
510 |                 os.remove(bkmk_itf)
511 |             else:
512 |                 print(Fore.MAGENTA + "WARN: " + Fore.RESET + "未识别到书籍信息")
513 |             # 3.生成 build.toml
514 |             shutil.copy(os.path.join(self.settings.dir_lib, "build.toml"), os.path.join(out_dir, "build.toml"))
515 |             with open(os.path.join(out_dir, "build.toml"), 'r+', encoding='utf-8') as fr:
516 |                 text = fr.read()
517 |                 text = re.sub(r'^templ_choice = "\w"', 'templ_choice = "A"', text, flags=re.I+re.M)
518 |                 text = re.sub(r'^name = "[^"]+?"', f'name = "{name}"', text, flags=re.I+re.M)
519 |                 text = re.sub(r'^name_abbr = "[^"]+?"', 'name_abbr = "XXXXXX"', text, flags=re.I+re.M)
520 |                 text = re.sub(r'^body_start = \d+', f'body_start = {str(body_start)}', text, flags=re.I+re.M)
521 |                 fr.seek(0)
522 |                 fr.truncate()
523 |                 fr.write(text)
524 |         shutil.rmtree(dir_bkmk)
525 |         print(Fore.GREEN + "\n\n预备原材料生成完毕！" + Fore.RESET)
526 | 
527 |     def amb_to_pdf(self, file_toml, outside_flg):
528 |         """ 从 amb 文件夹合成 PDF 文件 """
529 |         # 0.准备路径相关
530 |         dir_amb = os.path.split(file_toml)[0]
531 |         if outside_flg:
532 |             out_file = os.path.join(os.path.split(dir_amb)[0], self.settings.name+'.pdf')
533 |         else:
534 |             out_file = os.path.join(dir_amb, self.settings.name+'.pdf')
535 |         # 准备临时书签文件夹
536 |         dir_bkmk_bk = os.path.join(self.settings.dir_lib, 'bkmk')
537 |         dir_bkmk = os.path.join(self.settings.dir_output_tmp, 'bkmk')
538 |         if os.path.exists(dir_bkmk):
539 |             shutil.rmtree(dir_bkmk)
540 |         os.makedirs(dir_bkmk)
541 |         # 1.生成临时书签
542 |         with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'w', encoding='utf-8') as fw:
543 |             with open(os.path.join(dir_bkmk_bk, 'FreePic2Pdf.itf'), 'r+', encoding='utf-8') as fr:
544 |                 text = re.sub(r'(?<=BasePage=|TextPage=)\d+', str(self.settings.body_start[0]), fr.read())
545 |             fw.write(text)
546 |         with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'w', encoding='utf-8') as fw:
547 |             fw.write('正文\t1\n')
548 |         toc_flg = False
549 |         for fname in os.listdir(dir_amb):
550 |             if fname in ('toc.txt', 'toc_all.txt'):
551 |                 with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'w', encoding='utf-8') as fw:
552 |                     with open(os.path.join(dir_amb, fname), 'r', encoding='utf-8') as fr:
553 |                         text = fr.read()
554 |                     fw.write(text)
555 |                 toc_flg = True
556 |                 break
557 |             elif fname == 'index_all.txt':
558 |                 toc_tmp = os.path.join(self.settings.dir_output_tmp, 'toc_all.txt')
559 |                 if self.func.index_all_to_toc(os.path.join(dir_amb, fname), toc_tmp):
560 |                     with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'w', encoding='utf-8') as fw:
561 |                         with open(toc_tmp, 'r', encoding='utf-8') as fr:
562 |                             text = fr.read()
563 |                         fw.write(text)
564 |                 toc_flg = True
565 |                 break
566 |             else:
567 |                 pass
568 |         if not toc_flg:
569 |             print(Fore.MAGENTA + "WARN: " + Fore.RESET + "未找到 toc.txt/toc_all.txt/index_all.txt, 生成的 PDF 将不带书签")
570 |         # 2.将图片合成PDF
571 |         if os.path.isdir(os.path.join(dir_amb, 'imgs')):
572 |             self.utils.combine_img_to_pdf_fp2p(os.path.join(dir_amb, 'imgs'), out_file)
573 |             # 3.给PDF挂书签
574 |             cur_path = os.getcwd()
575 |             self.utils.eximport_bkmk_fp2p(out_file, os.path.join(cur_path, dir_bkmk), False)
576 |             shutil.rmtree(dir_bkmk)
577 |             print(Fore.GREEN + "\n\nPDF生成完毕！" + Fore.RESET)
578 |         else:
579 |             print(Fore.RED + "ERROR: " + Fore.RESET + "未找到 imgs 文件夹")
580 | 
581 | 
582 | def print_menu():
583 |     """ 打印选单 """
584 |     # 功能选单
585 |     print("\n(〇) 打包/解包")
586 |     print(Fore.CYAN + "  1" + Fore.RESET + ".解包 mdx/mdd 文件")
587 |     print(Fore.CYAN + "  2" + Fore.RESET + ".将源 txt 文件打包成 mdx 文件")
588 |     print(Fore.CYAN + "  3" + Fore.RESET + ".将资料包文件夹打包成 mdd 文件")
589 |     print("\n(一) 准备原材料")
590 |     print(Fore.CYAN + "  10" + Fore.RESET + ".从 PDF文件/pdg文件夹 生成预备原材料" + Fore.YELLOW + " (还需手动检查完善)" + Fore.RESET)
591 |     print(Fore.CYAN + "  11" + Fore.RESET + ".toc_all 和 index_all 互转")
592 |     print(Fore.CYAN + "  12" + Fore.RESET + ".合并 toc 和 index 为 index_all")
593 |     print(Fore.CYAN + "  13" + Fore.RESET + ".索引扩充(通过标点符号等分词), 提升查得率")
594 |     print(Fore.CYAN + "  14" + Fore.RESET + ".繁体简体 txt 文本文件互转")
595 |     print("\n(二) 制作词典")
596 |     print(Fore.CYAN + "  20" + Fore.RESET + ".生成词典" + Fore.YELLOW + " (需准备好原材料)" + Fore.RESET)
597 |     print("\n(三) 还原词典")
598 |     print(Fore.CYAN + "  30" + Fore.RESET + ".从词典还原原材料" + Fore.YELLOW + " (仅支持 AMB 1.4 以上版本)" + Fore.RESET)
599 |     print(Fore.CYAN + "  31" + Fore.RESET + ".从原材料还原 PDF")
600 |     print("\n(四) 其他工具")
601 |     print(Fore.CYAN + "  41" + Fore.RESET + ".从 PDF 提取图片 (PDF补丁丁)")
602 |     print(Fore.CYAN + "  42" + Fore.RESET + ".从 PDF 提取图片 (MuPDF)")
603 |     print(Fore.CYAN + "  43" + Fore.RESET + ".将 PDF 转换成图片 (MuPDF)")
604 |     print(Fore.CYAN + "  44" + Fore.RESET + ".将 图片 合成PDF (MuPDF)")
605 |     print(Fore.CYAN + "  45" + Fore.RESET + ".将 图片 合成PDF (FreePic2Pdf)")
606 |     print(Fore.CYAN + "  46" + Fore.RESET + ".PDF书签导出/导入 (FreePic2Pdf)")
607 | 
608 | 
609 | def main():
610 |     # 程序开始
611 |     amb = AutoMdxBuilder()
612 |     print(Fore.CYAN + f"欢迎使用 AutoMdxBuilder {amb.settings.version}, 下面是功能选单:" + Fore.RESET)
613 |     while True:
614 |         print_menu()
615 |         sel = input('\n请输入数字（回车或“0”退出程序）: ')
616 |         # 执行选择
617 |         if re.match(r'\d+$', sel) and int(sel) in range(1, 50):
618 |             print('\n------------------')
619 |             amb.auto_processing(int(sel))
620 |             print('\n\n------------------------------------')
621 |             # 判断是否继续
622 |             ctn = input(Fore.CYAN + "回车退出程序, 或输入 Y/y 继续使用 AMB: " + Fore.RESET)
623 |             if ctn not in ['Y', 'y']:
624 |                 break
625 |         else:
626 |             break
627 | 
628 | 
629 | if __name__ == '__main__':
630 |     just_fix_windows_console()
631 |     # logging.basicConfig(format='%(asctime)s | %(message)s', filename=tmp_set.file_log, filemode='w', level=logging.INFO)
632 |     try:
633 |         main()
634 |         # logging.info('The program worked fine.')
635 |     except:
636 |         # logging.error(traceback.format_exc())
637 |         print(traceback.format_exc())
638 |         print(Fore.RED + "ERROR: " + Fore.RESET + "由于上述原因, 程序已中止运行")
639 |         print('\n\n------------------------------------')
640 |         input("回车退出程序:")
641 | 


--------------------------------------------------------------------------------
/ebook_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Date    : 2023-11-15 18:43:07
  4 | # @Author  : Litles (litlesme@gmail.com)
  5 | # @Link    : https://github.com/Litles
  6 | # @Version : 1.6
  7 | 
  8 | import os
  9 | import re
 10 | import shutil
 11 | import time
 12 | from colorama import Fore
 13 | # import codecs
 14 | from pywinauto.application import Application
 15 | from pywinauto.keyboard import send_keys
 16 | from pywinauto.timings import Timings
 17 | from PIL import Image
 18 | import sys
 19 | from mdict_utils.__main__ import run as mdict_cmd
 20 | # import fitz
 21 | # from fitz.__main__ import main as fitz_command
 22 | 
 23 | 
 24 | class EbookUtils:
 25 |     """ 电子书(PDF等)实用工具 """
 26 |     def __init__(self, amb):
 27 |         self.settings = amb.settings
 28 | 
 29 |     # ========== (〇) mdict-utils ==========
 30 |     def mdict(self, parms):
 31 |         """ 执行 mdict-utils 程序 """
 32 |         saved_parms = sys.argv[1:]
 33 |         sys.argv[1:] = parms
 34 |         mdict_cmd()
 35 |         sys.argv[1:] = saved_parms
 36 | 
 37 |     def export_mdx(self, mfile):
 38 |         """ 解包 mdx/mdd (取代 MdxExport.exe) """
 39 |         done_flg = True
 40 |         if os.path.isfile(mfile) and mfile.endswith('.mdx'):
 41 |             out_dir = os.path.splitext(mfile)[0]
 42 |             self.mdict(['-x', mfile, '-d', out_dir])
 43 |             for fname in os.listdir(out_dir):
 44 |                 fp = os.path.join(out_dir, fname)
 45 |                 if os.path.isfile(fp) and ('description' in fname.split('.')):
 46 |                     fp_new = fp.replace('.description', '.info').replace('.mdx', '')
 47 |                     os.rename(fp, fp_new)
 48 |                 elif os.path.isfile(fp):
 49 |                     fp_new = fp.replace('.mdx', '')
 50 |                     os.rename(fp, fp_new)
 51 |             # 分析 info 信息, 确定是否支持词条顺序的还原
 52 |             order_flg = False
 53 |             for f in os.listdir(out_dir):
 54 |                 fp = os.path.join(out_dir, f)
 55 |                 text = ''
 56 |                 if fp.endswith('.info.html'):
 57 |                     with open(fp, 'r', encoding='utf-8') as fr:
 58 |                         if re.search(r'<div><br/>[^><]*?, (packed|built) with AutoMdxBuilder[^><]*?\.<br/></div>', fr.read(), flags=re.I):
 59 |                             # 符合条件, 支持词条顺序的还原
 60 |                             order_flg = True
 61 |                             break
 62 |             if order_flg:
 63 |                 # 按编号精准还原源 txt
 64 |                 xname = os.path.split(mfile)[1]
 65 |                 file_final_txt = os.path.join(out_dir, xname.split('.')[0]+'.txt')
 66 |                 entries = []
 67 |                 eid = '99999999'
 68 |                 with open(file_final_txt, 'r', encoding='utf-8') as fr:
 69 |                     text = ''
 70 |                     for line in fr:
 71 |                         if re.match(r'<div class="entry-id" style="display:none;">(\d+)</div>', line):
 72 |                             eid = re.match(r'<div class="entry-id" style="display:none;">(\d+)</div>', line).group(1)
 73 |                         elif not re.match(r'</>\s*$', line):
 74 |                             text += line
 75 |                         else:
 76 |                             text += line
 77 |                             entries.append({"eid": eid, "text": text})
 78 |                             eid = '99999999'
 79 |                             text = ''
 80 |                 if eid != '':
 81 |                     entries.sort(key=lambda x: x["eid"], reverse=False)
 82 |                     with open(file_final_txt, 'w', encoding='utf-8') as fw:
 83 |                         for entry in entries:
 84 |                             fw.write(entry["text"])
 85 |             else:
 86 |                 print(Fore.YELLOW + "INFO: " + Fore.RESET + "检测到词典并非由 AMB 生成, 不保证词条顺序的准确还原")
 87 |         elif os.path.isfile(mfile) and mfile.endswith('.mdd'):
 88 |             cur_dir, mname = os.path.split(mfile)
 89 |             out_dir = os.path.join(os.path.splitext(mfile)[0], 'data')
 90 |             if os.path.exists(out_dir):
 91 |                 shutil.rmtree(out_dir)
 92 |             # 检查是否存在 mdd 分包
 93 |             multi_mdd_flg = False
 94 |             mdd_names = [mname]
 95 |             for fname in os.listdir(cur_dir):
 96 |                 if re.search(r'\.\d+\.mdd$', fname.lower()):
 97 |                     multi_mdd_flg = True
 98 |                     mdd_names.append(fname)
 99 |             # 按检查结果区分处理
100 |             if multi_mdd_flg and input('检查到目录下存在 mdd 分包, 是否全部解包 (Y/N): ') in ('Y', 'y'):
101 |                 mdd_names = list(set(mdd_names))
102 |                 mdd_names.sort()
103 |                 for mdd_name in mdd_names:
104 |                     print(f"开始解压 '{mdd_name}' :\n")
105 |                     self.mdict(['-x', os.path.join(cur_dir, mdd_name), '-d', out_dir])
106 |             else:
107 |                 self.mdict(['-x', mfile, '-d', out_dir])
108 |         else:
109 |             print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误")
110 |             done_flg = False
111 |         return done_flg
112 | 
113 |     def pack_to_mdict(self, dir_output, file_final_txt, file_dict_info, dir_data):
114 |         """ 打包 mdx/mdd (取代 MdxBuilder.exe) """
115 |         mdx_flg = True
116 |         mdd_flg = True
117 |         # 打包 mdx
118 |         print('正在生成 mdx 文件……\n')
119 |         ftitle = os.path.join(dir_output, os.path.splitext(os.path.split(file_final_txt)[1])[0])
120 |         if os.path.exists(file_final_txt) and os.path.exists(file_dict_info):
121 |             # 给词条添加编号信息
122 |             tmp_final_txt = os.path.join(os.path.join(self.settings.dir_bundle, '_tmp'), 'tmp_final.txt')
123 |             with open(file_final_txt, 'r', encoding='utf-8') as fr:
124 |                 with open(tmp_final_txt, 'w', encoding='utf-8') as fw:
125 |                     n = 0
126 |                     link_flg = False
127 |                     for line in fr:
128 |                         if re.match(r'@@@LINK=', line, flags=re.I):
129 |                             link_flg = True
130 |                         if (not link_flg) and re.match(r'</>\s*$', line):
131 |                             n += 1
132 |                             fw.write(f'<div class="entry-id" style="display:none;">{str(n).zfill(8)}</div>\n')
133 |                             link_flg = False
134 |                         fw.write(line)
135 |             self.mdict(['--description', file_dict_info, '--encoding', 'utf-8', '-a', tmp_final_txt, ftitle+'.mdx'])
136 |         else:
137 |             print(Fore.RED + "ERROR: " + Fore.RESET + f"文件 {file_final_txt} 或 {file_dict_info} 不存在")
138 |             mdx_flg = False
139 |         # 打包 mdd
140 |         if dir_data is not None:
141 |             mdd_flg = self.pack_to_mdd(dir_data, ftitle)
142 |         if mdx_flg and mdd_flg:
143 |             return True
144 |         else:
145 |             return False
146 | 
147 |     def pack_to_mdd(self, dir_data, ftitle):
148 |         """ 仅打包 mdd (取代 MdxBuilder.exe) """
149 |         done_flg = True
150 |         pack_flg = True
151 |         if ftitle is None:
152 |             ftitle = dir_data
153 |         # 判断是否打包
154 |         if os.path.exists(dir_data) and len(os.listdir(dir_data)) > 0:
155 |             if os.path.exists(ftitle+'.mdd'):
156 |                 a = input(f'文件 "{ftitle}.mdd" 已存在, 是否重新打包 mdd (Y/N): ')
157 |                 if a not in ('Y', 'y'):
158 |                     pack_flg = False
159 |         else:
160 |             print(Fore.RED + "ERROR: " + Fore.RESET + f"文件夹 {dir_data} 不存在或为空")
161 |             pack_flg = False
162 |             done_flg = False
163 |         # 开始打包
164 |         if pack_flg:
165 |             print('正在生成 mdd 文件……\n')
166 |             # 检查子文件夹的数量
167 |             sub_dirs = []
168 |             for item in os.listdir(dir_data):
169 |                 if os.path.isdir(os.path.join(dir_data, item)):
170 |                     sub_dirs.append(os.path.join(dir_data, item))
171 |             # 如果有2个子文件夹以上, 再计算子文件夹大小, 如果大小超过 1.5G, 将分包
172 |             split_flg = False
173 |             size_sum = 0
174 |             if len(sub_dirs) > 1:
175 |                 # 判断子文件夹大小
176 |                 for sub_dir in sub_dirs:
177 |                     for fname in os.listdir(sub_dir):
178 |                         if os.path.isfile(os.path.join(sub_dir, fname)):
179 |                             size_sum += os.path.getsize(os.path.join(sub_dir, fname))
180 |                         if size_sum > 1536000000:
181 |                             split_flg = True
182 |                             break
183 |             # 按检查结果开始处理
184 |             if split_flg:
185 |                 size_sum = 0
186 |                 print(Fore.YELLOW + "INFO: " + Fore.RESET + "资料文件夹超过 1.5G, 将自动分包")
187 |                 # 创建临时文件夹
188 |                 tmp_dir = os.path.join(os.path.split(dir_data)[0], '_packing')
189 |                 if not os.path.exists(tmp_dir):
190 |                     os.makedirs(tmp_dir)
191 |                 pack_list = []
192 |                 pack = []
193 |                 n = 0
194 |                 # 对每个子文件夹作判断
195 |                 for i in range(len(sub_dirs)):
196 |                     for fname in os.listdir(sub_dirs[i]):
197 |                         if os.path.isfile(os.path.join(sub_dirs[i], fname)):
198 |                             size_sum += os.path.getsize(os.path.join(sub_dirs[i], fname))
199 |                         if size_sum > 1024000000:
200 |                             size_sum = 0
201 |                             pack.append(sub_dirs[i])
202 |                             pack_list.append(pack)
203 |                             pack = []
204 |                             break
205 |                     pack.append(sub_dirs[i])
206 |                     n = i
207 |                 # 1.打包子文件夹
208 |                 mdd_rk = 0
209 |                 for sds in pack_list:
210 |                     for sd in sds:
211 |                         # 移动到临时文件夹中
212 |                         os.rename(sd, os.path.join(tmp_dir, os.path.split(sd)[1]))
213 |                     # 移完之后打包
214 |                     if mdd_rk == 0:
215 |                         self.mdict(['-a', tmp_dir, ftitle+'.mdd'])
216 |                     else:
217 |                         self.mdict(['-a', tmp_dir, f'{ftitle}.{str(mdd_rk)}.mdd'])
218 |                     # 打包完再移回去
219 |                     for fname in os.listdir(tmp_dir):
220 |                         os.rename(os.path.join(tmp_dir, fname), os.path.join(dir_data, fname))
221 |                     mdd_rk += 1
222 |                 # 1.打包剩余部分
223 |                 # 移动文件夹部分(如果有)
224 |                 if n == len(sub_dirs) - 1:
225 |                     for sd in pack:
226 |                         os.rename(sd, os.path.join(tmp_dir, os.path.split(sd)[1]))
227 |                 # 移动文件部分(如果有)
228 |                 for item in os.listdir(dir_data):
229 |                     if not os.path.isdir(os.path.join(dir_data, item)):
230 |                         os.rename(os.path.join(dir_data, item), os.path.join(tmp_dir, item))
231 |                 # 打包
232 |                 if len(os.listdir(tmp_dir)) == 0:
233 |                     pass
234 |                 else:
235 |                     self.mdict(['-a', tmp_dir, f'{ftitle}.{str(mdd_rk)}.mdd'])
236 |                     # 移回去
237 |                     for fname in os.listdir(tmp_dir):
238 |                         os.rename(os.path.join(tmp_dir, fname), os.path.join(dir_data, fname))
239 |                 # 删除临时文件夹
240 |                 if os.path.exists(tmp_dir):
241 |                     os.rmdir(tmp_dir)
242 |             else:
243 |                 self.mdict(['-a', dir_data, ftitle+'.mdd'])
244 |         return done_flg
245 | 
246 |     # ========== (一) From PDF to Images ==========
247 |     def pdf_to_imgs(self, file_pdf, dir_out):
248 |         """ 自动判断文字版/图片版PDF, 并选择最优方法导出图像 """
249 |         # 准备环境
250 |         file_exe = os.path.join(os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'MuPDF'), 'mutool.exe')
251 |         dir_tmp = os.path.join(self.settings.dir_bundle, '_tmp')
252 |         if not os.path.exists(dir_tmp):
253 |             os.makedirs(dir_tmp)
254 |         dir_tmp_mp = os.path.join(dir_tmp, 'MuPDF_tmp')
255 |         if not os.path.exists(dir_tmp_mp):
256 |             os.makedirs(dir_tmp_mp)
257 |         tmp_txt = os.path.join(dir_tmp_mp, 'text.txt')
258 |         # 判断是文字版还是图片版PDF
259 |         img_pdf_flg = True
260 |         os.system(f'{file_exe} draw -o {tmp_txt} -F text "{file_pdf}" 2-11')
261 |         with open(tmp_txt, 'r', encoding='utf-8') as fr:
262 |             word = re.sub(r'[\r\n\s]', '', fr.read())
263 |             if len(word) > 50:
264 |                 img_pdf_flg = False
265 |         # 开始处理
266 |         if img_pdf_flg:
267 |             self.extract_pdf_to_imgs_pdfpatcher(file_pdf, dir_out)
268 |         else:
269 |             self.convert_pdf_to_imgs(file_pdf, dir_out)
270 |         shutil.rmtree(dir_tmp_mp)
271 | 
272 |     def convert_pdf_to_imgs(self, file_pdf, dir_out, dpi=300):
273 |         """ 使用 mutool.exe 按 DPI 参数转换成图片 (推荐用于文字版PDF) """
274 |         # 准备文件夹
275 |         file_exe = os.path.join(os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'MuPDF'), 'mutool.exe')
276 |         if not os.path.exists(dir_out):
277 |             os.makedirs(dir_out)
278 |         file_png = os.path.join(dir_out, '%06d.png')
279 |         # 开始转换
280 |         os.system(f'{file_exe} draw -o "{file_png}" -F png -r {str(dpi)} "{file_pdf}"')
281 |         print('转换完成！')
282 | 
283 |     # def convert_pdf_to_imgs_fitz(self, file_pdf, dir_out, dpi=300):
284 |     #     """ 使用 fitz(mupdf), 按 DPI 等参数转换成图片 """
285 |     #     # 读取 pdf
286 |     #     doc = fitz.open(file_pdf)
287 |     #     mat = fitz.Matrix(1, 1)
288 |     #     count = 0
289 |     #     for p in doc:
290 |     #         count += 1
291 |     #     # 开始导出
292 |     #     if not os.path.exists(dir_out):
293 |     #         os.makedirs(dir_out)
294 |     #     print('转换中……')
295 |     #     for i in range(count):
296 |     #         fname = f"{str(i+1).zfill(8)}.png"
297 |     #         page = doc.load_page(i)
298 |     #         pix = page.get_pixmap(matrix=mat, dpi=dpi, colorspace=fitz.csGRAY, alpha=False)
299 |     #         pix.save(os.path.join(dir_out, fname))
300 |     #     doc.close()
301 |     #     print('转换完成！')
302 | 
303 |     def extract_pdf_to_imgs(self, file_pdf, dir_out):
304 |         """ Extracting images with mutool.exe (Windows only) """
305 |         # 1.extract to tmp folder
306 |         file_exe = os.path.join(os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'MuPDF'), 'mutool.exe')
307 |         dir_tmp = os.path.join(self.settings.dir_bundle, '_tmp')
308 |         if not os.path.exists(dir_tmp):
309 |             os.makedirs(dir_tmp)
310 |         dir_tmp_me = os.path.join(dir_tmp, 'MuPDF_extract')
311 |         if not os.path.exists(dir_tmp_me):
312 |             os.makedirs(dir_tmp_me)
313 |         os.chdir(dir_tmp_me)
314 |         os.system(f'{file_exe} extract "{file_pdf}"')
315 |         os.chdir(self.settings.dir_bundle)
316 |         # 2.remove to destination
317 |         imgs = []
318 |         for fname in os.listdir(dir_tmp_me):
319 |             ext = os.path.splitext(fname)[1].lower()
320 |             if ext in self.settings.img_exts:
321 |                 imgs.append({"path": os.path.join(dir_tmp_me, fname), "ext": ext})
322 |         if not os.path.exists(dir_out):
323 |             os.makedirs(dir_out)
324 |         imgs.sort(key=lambda x: x["path"], reverse=False)
325 |         n = 0
326 |         for img in imgs:
327 |             n += 1
328 |             os.rename(img["path"], os.path.join(dir_out, str(n).zfill(6)+img["ext"]))
329 |         shutil.rmtree(dir_tmp_me)
330 |         print('提取完成！')
331 | 
332 |     def extract_pdf_to_imgs_pdfpatcher(self, file_pdf, dir_out):
333 |         """ Extracting images with PDFPatcher.exe (Windows only) """
334 |         # 0.配置程序选项
335 |         dir_program = os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'PDFPatcher')
336 |         file_conf_bak = os.path.join(self.settings.dir_lib, 'PDFPatcher_AppConfig.json')
337 |         file_conf = os.path.join(dir_program, 'AppConfig.json')
338 |         shutil.copy(file_conf_bak, file_conf)
339 |         # 1.启动 PDFPatcher 程序, 配置提取选项
340 |         Timings.fast()
341 |         app = Application(backend='win32').start(os.path.join(dir_program, 'PDFPatcher.exe'))
342 |         dlg_main = app.window(title_re='.*PDF.*补丁丁')
343 |         dlg_main.wait('ready', timeout=10)
344 |         send_keys('%{g}tt')
345 |         dlg_extract = dlg_main
346 |         dlg_extract.wait('ready', timeout=2).children()[38].set_text(file_pdf)
347 |         dlg_extract.wait('ready', timeout=2).children()[33].set_text(dir_out)
348 |         # 2.开始提取
349 |         dlg_extract.wait('ready', timeout=2).children()[6].click()
350 |         time.sleep(0.2)
351 |         # print(dlg_extract.children()[52].GetProperties())
352 |         while True:
353 |             if '返回' in dlg_extract.children()[52].texts():
354 |                 dlg_extract.children()[52].click()
355 |                 app.kill()
356 |                 break
357 |             else:
358 |                 time.sleep(0.2)
359 |         print('提取完成！')
360 | 
361 |     # def extract_pdf_to_imgs_fitz(self, file_pdf, dir_out):
362 |     #     """ 使用 fitz(mupdf), 如果生成了JBIG2加密的 jb2，则还需要使用 jbig2dec 解密成 png """
363 |     #     # 准备参数
364 |     #     cmd = ['extract', str(file_pdf), '-images', '-output', str(dir_out)]
365 |     #     saved_parms = sys.argv[1:]
366 |     #     sys.argv[1:] = cmd
367 |     #     # 开始导出
368 |     #     if not os.path.exists(dir_out):
369 |     #         os.makedirs(dir_out)
370 |     #     print('提取中……')
371 |     #     fitz_command()
372 |     #     sys.argv[1:] = saved_parms
373 |     #     print('提取完成！')
374 | 
375 |     # ========== (二) From Images to PDF ==========
376 |     def combine_img_to_pdf(self, dir_imgs, file_pdf):
377 |         """ use mutool.exe to combine images to pdf file (Windows only) """
378 |         # prepare paths
379 |         file_exe = os.path.join(os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'MuPDF'), 'mutool.exe')
380 |         dir_tmp = os.path.join(self.settings.dir_bundle, '_tmp')
381 |         if not os.path.exists(dir_tmp):
382 |             os.makedirs(dir_tmp)
383 |         dir_pcs = os.path.join(dir_tmp, 'MuPDF_pcs')
384 |         dir_pdf_frag = os.path.join(dir_tmp, 'MuPDF_pdf_frag')
385 |         dir_pdf_merge = os.path.join(dir_tmp, 'MuPDF_pdf_merge')
386 |         if not os.path.exists(dir_pcs):
387 |             os.makedirs(dir_pcs)
388 |         if not os.path.exists(dir_pdf_frag):
389 |             os.makedirs(dir_pdf_frag)
390 |         if not os.path.exists(dir_pdf_merge):
391 |             os.makedirs(dir_pdf_merge)
392 |         file_pcs = os.path.join(self.settings.dir_lib, 'MuPDF_pcs.txt')
393 |         # read image files to get sizes
394 |         imgs = []
395 |         for fname in os.listdir(dir_imgs):
396 |             fp = os.path.join(dir_imgs, fname)
397 |             if os.path.splitext(fp)[1].lower() in self.settings.img_exts:
398 |                 img = {
399 |                     "fname": fname,
400 |                     "path": fp,
401 |                     "size": Image.open(fp).size
402 |                 }
403 |                 imgs.append(img)
404 |         imgs.sort(key=lambda x: x["fname"], reverse=False)
405 |         # generate pcs(Page content streams) txt file
406 |         with open(file_pcs, 'r', encoding='utf-8') as fr:
407 |             text = fr.read()
408 |         page_num = 0
409 |         txts = []
410 |         for img in imgs:
411 |             page_num += 1
412 |             pcs = text.replace('<num>', str(page_num).zfill(6))
413 |             pcs = pcs.replace('<path>', img["path"])
414 |             pcs = pcs.replace('<width>', str(img["size"][0]))
415 |             pcs = pcs.replace('<height>', str(img["size"][1]))
416 |             txt = os.path.join(dir_pcs, str(page_num).zfill(6)+'.txt')
417 |             with open(txt, 'w', encoding='utf-8') as fw:
418 |                 fw.write(pcs)
419 |             txts.append(txt)
420 |         # start to create pdf fragments
421 |         pdfs = []
422 |         n, k, step = 1, 1, 20
423 |         total_step = int(page_num/step + 1)
424 |         while k <= total_step:
425 |             pcs_str = ''
426 |             bound = k*step
427 |             while n <= min(bound, page_num):
428 |                 pcs_str = pcs_str + ' ' + txts[n-1]
429 |                 n += 1
430 |             tmp_pdf = os.path.join(dir_pdf_frag, str(k).zfill(3)+'.pdf')
431 |             os.system(f'{file_exe} create -o {tmp_pdf} -O compress-images {pcs_str}')
432 |             print(f'[{str(min(n,page_num))}/{str(page_num)}]PDF合成中')
433 |             pdfs.append(tmp_pdf)
434 |             k += 1
435 |         # merge fragments
436 |         pdf_str = ''
437 |         file_num = len(pdfs)
438 |         n, k, step = 1, 1, 10
439 |         total_step = int(file_num/step + 1)
440 |         while k <= total_step:
441 |             merge_str = ''
442 |             bound = k*step
443 |             while n <= min(bound, file_num):
444 |                 merge_str = merge_str + ' ' + pdfs[n-1]
445 |                 n += 1
446 |             tmp_pdf = os.path.join(dir_pdf_merge, str(k).zfill(2)+'.pdf')
447 |             os.system(f'{file_exe} merge -o {tmp_pdf} {merge_str}')
448 |             pdf_str = pdf_str + ' ' + tmp_pdf
449 |             k += 1
450 |         # output final single file
451 |         os.system(f'{file_exe} merge -o "{file_pdf}" {pdf_str}')
452 |         shutil.rmtree(dir_pcs)
453 |         shutil.rmtree(dir_pdf_frag)
454 |         shutil.rmtree(dir_pdf_merge)
455 |         print('合成完成！')
456 | 
457 |     def combine_img_to_pdf_fp2p(self, dir_imgs, file_pdf):
458 |         """ 使用 FreePic2Pdf.exe 图像合成 pdf """
459 |         # 0.配置转换选项, 设定图像文件夹
460 |         dir_program = os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'FreePic2Pdf')
461 |         file_ini_bak = os.path.join(self.settings.dir_lib, 'FreePic2Pdf.ini')
462 |         file_ini = os.path.join(dir_program, 'FreePic2Pdf.ini')
463 |         with open(file_ini_bak, 'r', encoding='utf-16le') as fr:
464 |             para_item = 'PARA_DIR_SRC='+dir_imgs.replace('\\', '\\\\')
465 |             text = re.sub(r'^PARA_DIR_SRC=.+$', para_item, fr.read(), flags=re.M)
466 |             with open(file_ini, 'w', encoding='utf-16le') as fw:
467 |                 fw.write(text)
468 |         # 1.启动 FreePic2Pdf 程序
469 |         Timings.fast()
470 |         app = Application(backend='win32').start(os.path.join(dir_program, 'FreePic2Pdf.exe'))
471 |         dlg_main = app.FreePic2Pdf
472 |         # 2.设定输出 pdf 文件路径
473 |         dlg_main.wait('ready', timeout=10).children()[32].set_edit_text(file_pdf)
474 |         # 3.开始合成 pdf
475 |         dlg_main.children()[20].click()  # 点击执行
476 |         while True:
477 |             if app.window(title='FreePic2Pdf', predicate_func=lambda dlg: len(dlg.children()) == 3).exists():
478 |                 app.window(
479 |                     title='FreePic2Pdf', predicate_func=lambda dlg: len(dlg.children()) == 3
480 |                 ).wait('ready', timeout=2).children()[0].click()
481 |                 app.kill()
482 |                 break
483 |             else:
484 |                 time.sleep(0.2)
485 |         print('PDF 生成完毕！')
486 | 
487 |     # ========== (三) From Other Formats to Images ==========
488 |     def convert_pdg_to_img(self, dir_pdg, dir_out):
489 |         """ 使用 Pdg2Pic.exe 转换 pdgs 为 imgs """
490 |         # 0.配置转换选项, 设定输出文件夹
491 |         dir_program = os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'Pdg2Pic')
492 |         file_ini_bak = os.path.join(self.settings.dir_lib, 'Pdg2Pic.ini')
493 |         file_ini = os.path.join(dir_program, 'Pdg2Pic.ini')
494 |         with open(file_ini_bak, 'r', encoding='utf-16le') as fr:
495 |             para_item = 'PARA_DIR_TGT='+dir_out.replace('\\', '\\\\')
496 |             text = re.sub(r'^PARA_DIR_TGT=.+$', para_item, fr.read(), flags=re.M)
497 |             with open(file_ini, 'w', encoding='utf-16le') as fw:
498 |                 fw.write(text)
499 |         # 1.启动 Pdg2Pic 程序
500 |         Timings.fast()
501 |         app = Application(backend='win32').start(os.path.join(dir_program, 'Pdg2Pic.exe'))
502 |         dlg_main = app.Pdg2Pic
503 |         # 2.读取输入的 PDG 文件夹
504 |         dlg_main.wait('ready', timeout=10).children()[3].click()  # 打开文件夹选择框
505 |         dlg_sel = app.window(title=u'选择存放PDG文件的文件夹')
506 |         dlg_sel.wait('ready', timeout=5).children()[6].set_text(dir_pdg)
507 |         dlg_sel.children()[9].click()
508 |         app.window(title=u'格式统计').wait('ready', timeout=3).children()[0].click()
509 |         # dlg_sum = app.window(title=u'格式统计').wait('ready', timeout=3)
510 |         # while True:
511 |         #     if 'OK' in dlg_sum.children()[0].texts():
512 |         #         dlg_sum.children()[0].click()
513 |         #         break
514 |         #     else:
515 |         #         time.sleep(0.05)
516 |         # 3.开始转换
517 |         while True:
518 |             if not app.window(title=u'格式统计').exists():
519 |                 dlg_main.children()[0].click()  # 点击执行
520 |                 break
521 |             else:
522 |                 time.sleep(0.05)
523 |         while True:
524 |             if app.window(title='Pdg2Pic', predicate_func=lambda dlg: len(dlg.children()) == 3).exists():
525 |                 app.window(
526 |                     title='Pdg2Pic', predicate_func=lambda dlg: len(dlg.children()) == 3
527 |                 ).wait('ready', timeout=2).children()[0].click()
528 |                 app.kill()
529 |                 break
530 |             else:
531 |                 time.sleep(0.2)
532 |         print('转换完成！')
533 | 
534 |     # ========== (四) PDF Bookmark Management ==========
535 |     def eximport_bkmk_fp2p(self, file_pdf, dir_bkmk, export_flg=True):
536 |         """ 使用 FreePic2Pdf.exe 向/从 pdf 文件中导入/导出书签 """
537 |         dir_program = os.path.join(os.path.join(self.settings.dir_bundle, 'tools'), 'FreePic2Pdf')
538 |         # 1.启动 FreePic2Pdf 程序
539 |         Timings.fast()
540 |         app = Application(backend='win32').start(os.path.join(dir_program, 'FreePic2Pdf.exe'))
541 |         dlg_main = app.FreePic2Pdf
542 |         dlg_main.wait('ready', timeout=10).children()[30].click()  # 点击进入书签导入/导出窗口
543 |         dlg_iebkmk = app.window(title=u'Import/Export PDF Bookmark')
544 |         if export_flg:
545 |             dlg_iebkmk.wait('ready', timeout=5).children()[26].select(1)  # 切换到书签导出栏
546 |         # 2.选定 pdf 文件
547 |         time.sleep(0.1)
548 |         dlg_iebkmk.children()[4].click()  # 打开文件选择框
549 |         dlg_sel_pdf = app.window(title=u'Select File')
550 |         dlg_sel_pdf.wait('ready', timeout=5).children()[12].set_text(file_pdf)
551 |         dlg_sel_pdf.children()[16].click()  # 选中待处理的 pdf 文件
552 |         # 3.选定书签文件夹
553 |         if not os.path.exists(dir_bkmk):
554 |             os.makedirs(dir_bkmk)
555 |         while True:
556 |             if not app.window(title=u'Select File').exists():
557 |                 break
558 |             else:
559 |                 time.sleep(0.05)
560 |         dlg_iebkmk.children()[9].click()  # 打开文件夹选择框
561 |         dlg_sel_folder = app.window(title=u'Source Folder')
562 |         dlg_sel_folder.wait('ready', timeout=5).children()[6].set_edit_text(dir_bkmk)
563 |         dlg_sel_folder.children()[9].click()
564 |         # 3.开始导入/导出
565 |         while True:
566 |             if not app.window(title=u'Source Folder').exists():
567 |                 dlg_iebkmk.children()[0].click()  # 点击执行
568 |                 break
569 |             else:
570 |                 time.sleep(0.05)
571 |         while True:
572 |             if app.window(title='FreePic2Pdf', predicate_func=lambda dlg: len(dlg.children()) == 3).exists():
573 |                 app.window(
574 |                     title='FreePic2Pdf', predicate_func=lambda dlg: len(dlg.children()) == 3
575 |                 ).wait('ready', timeout=2).children()[0].click()
576 |                 app.kill()
577 |                 break
578 |             else:
579 |                 time.sleep(0.2)
580 |         if export_flg:
581 |             # [备用1]utf-16判断有无BOM
582 |             # with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'rb') as frb:
583 |             #     encoded_text = frb.read()
584 |             #     bom = codecs.BOM_UTF16_LE
585 |             #     if encoded_text.startswith(bom):
586 |             #         bkmk_itf = encoded_text[len(bom):].decode('utf-16le')
587 |             #     else:
588 |             #         bkmk_itf = encoded_text.decode('utf-16le')
589 |             #     base_page = re.search(r'(?<=BasePage=)(\d+)', bkmk_itf)
590 |             #     if base_page:
591 |             #         bkmk_itf = re.sub(r'^TextPage=$', 'TextPage='+base_page.group(0), bkmk_itf, flags=re.M)
592 |             # with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'rb') as frb:
593 |             #     encoded_text = frb.read()
594 |             #     bom = codecs.BOM_UTF16_LE
595 |             #     if encoded_text.startswith(bom):
596 |             #         bkmk_text = encoded_text[len(bom):].decode('utf-16le')
597 |             #     else:
598 |             #         bkmk_text = encoded_text.decode('utf-16le')
599 |             # [备用2]考虑是否一律转utf-8
600 |             # with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'r', encoding='utf-16') as fr:
601 |             #     bkmk_itf = fr.read()
602 |             #     base_page = re.search(r'(?<=BasePage=)(\d+)', bkmk_itf)
603 |             #     if base_page:
604 |             #         bkmk_itf = re.sub(r'^TextPage=$', 'TextPage='+base_page.group(0), bkmk_itf, flags=re.M)
605 |             # with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'r', encoding='utf-16') as fr:
606 |             #     bkmk_text = fr.read()
607 |             # dir_bkmk_bk = os.path.join(self.settings.dir_lib, 'bkmk')
608 |             # shutil.copy(os.path.join(dir_bkmk_bk, "FreePic2Pdf.itf"), os.path.join(dir_bkmk, "FreePic2Pdf.itf"))
609 |             # shutil.copy(os.path.join(dir_bkmk_bk, "FreePic2Pdf_bkmk.txt"), os.path.join(dir_bkmk, "FreePic2Pdf_bkmk.txt"))
610 |             # with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'w', encoding='utf-8') as fw:
611 |             #     fw.write(bkmk_itf)
612 |             # with open(os.path.join(dir_bkmk, 'FreePic2Pdf_bkmk.txt'), 'w', encoding='utf-8') as fw:
613 |             #     fw.write(bkmk_text)
614 |             with open(os.path.join(dir_bkmk, 'FreePic2Pdf.itf'), 'r+', encoding='utf-16le') as fr:
615 |                 bkmk_itf = fr.read()
616 |                 base_page = re.search(r'(?<=BasePage=)(\d+)', bkmk_itf)
617 |                 if base_page:
618 |                     bkmk_itf = re.sub(r'^TextPage=$', 'TextPage='+base_page.group(0), bkmk_itf, flags=re.M)
619 |                 fr.seek(0)
620 |                 fr.truncate()
621 |                 fr.write(bkmk_itf)
622 |             print('书签导出完成！')
623 |         else:
624 |             print('书签导入完成！')
625 | 


--------------------------------------------------------------------------------
/func_lib.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Date    : 2023-11-16 00:00:53
  4 | # @Author  : Litles (litlesme@gmail.com)
  5 | # @Link    : https://github.com/Litles
  6 | # @Version : 1.6
  7 | 
  8 | import os
  9 | import re
 10 | import shutil
 11 | from copy import copy
 12 | from datetime import datetime
 13 | # import chardet
 14 | from colorama import Fore
 15 | from opencc import OpenCC
 16 | 
 17 | 
 18 | class FuncLib():
 19 |     """ functions for invoking """
 20 |     def __init__(self, amb):
 21 |         self.settings = amb.settings
 22 | 
 23 |     def index_all_to_toc(self, file_index_all, file_toc_all, vol_i=0, fill_flg=False):
 24 |         """ index_all 文件转 toc_all 文件 """
 25 |         done_flg = True
 26 |         if self.text_file_check(file_index_all) == 2:
 27 |             # 读取
 28 |             dcts = []
 29 |             with open(file_index_all, 'r', encoding='utf-8') as fr:
 30 |                 level = 0
 31 |                 i = 0
 32 |                 for line in fr:
 33 |                     i += 1
 34 |                     # 要先扫描章节再扫描词条
 35 |                     mth_stem = self.settings.pat_stem.match(line)
 36 |                     if mth_stem:
 37 |                         # 无卷标章节
 38 |                         level = int(mth_stem.group(1))
 39 |                         if mth_stem.group(3) == '':
 40 |                             dcts.append({"level": level, "name": mth_stem.group(2), "page": 0, "vol_n": vol_i+1})
 41 |                         else:
 42 |                             dcts.append({"level": level, "name": mth_stem.group(2), "page": int(mth_stem.group(3)), "vol_n": vol_i+1})
 43 |                     elif self.settings.pat_stem_vol.match(line):
 44 |                         # 有卷标章节
 45 |                         mth_vol_stem = self.settings.pat_stem_vol.match(line)
 46 |                         level = int(mth_vol_stem.group(1))
 47 |                         if mth_vol_stem.group(4) == '':
 48 |                             dcts.append({"level": level, "name": mth_vol_stem.group(2), "page": 0, "vol_n": int(mth_vol_stem.group(3))})
 49 |                         else:
 50 |                             dcts.append({"level": level, "name": mth_vol_stem.group(2), "page": int(mth_vol_stem.group(4)), "vol_n": int(mth_vol_stem.group(3))})
 51 |                     elif self.settings.pat_index.match(line):
 52 |                         # 无卷标词条
 53 |                         mth = self.settings.pat_index.match(line)
 54 |                         dcts.append({"level": level+1, "name": mth.group(1), "page": int(mth.group(2)), "vol_n": vol_i+1})
 55 |                     elif self.settings.pat_index_vol.match(line):
 56 |                         # 有卷标词条
 57 |                         mth_vol = self.settings.pat_index_vol.match(line)
 58 |                         dcts.append({"level": level+1, "name": mth_vol.group(1), "page": int(mth_vol.group(3)), "vol_n": int(mth_vol.group(2))})
 59 |                     else:
 60 |                         print(Fore.RED + "ERROR: " + Fore.RESET + f"第 {i} 行格式有误, 请检查")
 61 |                         done_flg = False
 62 |                         break
 63 |             # 输出
 64 |             if done_flg:
 65 |                 with open(file_toc_all, 'w', encoding='utf-8') as fw:
 66 |                     p_fill = 1
 67 |                     for x in range(len(dcts)):
 68 |                         dct = dcts[x]
 69 |                         # 判断是否要加卷标
 70 |                         if dct["vol_n"] == 1:
 71 |                             s_vol = ''
 72 |                         else:
 73 |                             s_vol = '['+str(dct["vol_n"])+']'
 74 |                         # 开始写入
 75 |                         if dct["page"] != 0:
 76 |                             fw.write('\t'*dct["level"] + f'{dct["name"]}\t{s_vol}{str(dct["page"])}\n')
 77 |                         elif fill_flg:
 78 |                             # 向后检索页码来填充
 79 |                             for d in dcts[x+1:]:
 80 |                                 if d["page"] != 0:
 81 |                                     p_fill = d["page"]
 82 |                                     break
 83 |                             fw.write('\t'*dct["level"] + f'{dct["name"]}\t{s_vol}{str(p_fill)}\n')
 84 |                             # 如果向后仍未检索到页码(待补充)
 85 |                         else:
 86 |                             fw.write('\t'*dct["level"] + f'{dct["name"]}\n')
 87 |         else:
 88 |             done_flg = False
 89 |         return done_flg
 90 | 
 91 |     def toc_all_to_index(self, file_toc_all, file_index_all):
 92 |         """ toc_all 文件转 index_all 文件 """
 93 |         if self.text_file_check(file_toc_all) == 2:
 94 |             # 读取 toc_all.txt
 95 |             pairs = self.read_toc_file(file_toc_all)
 96 |             # 识别收集非章节的词条索引
 97 |             index, entries_tmp = [], []
 98 |             child_flg = False
 99 |             for i in range(1, len(pairs)):
100 |                 if pairs[i]["level"] == pairs[i-1]["level"]:
101 |                     if child_flg:
102 |                         # 满足条件, 继续收集
103 |                         entries_tmp.append(i)
104 |                 elif pairs[i]["level"] > pairs[i-1]["level"]:
105 |                     # 是展开节点, 开启收集
106 |                     entries_tmp = []
107 |                     child_flg = True
108 |                     entries_tmp.append(i)
109 |                 else:
110 |                     # 展开结束, 归档, 清空篮子
111 |                     index += entries_tmp
112 |                     entries_tmp = []
113 |                     child_flg = False
114 |             # 补漏(因为最末一次收集可能未归档)
115 |             if len(entries_tmp) > 0:
116 |                 index += entries_tmp
117 |             # 生成 index_all.txt
118 |             with open(file_index_all, 'w', encoding='utf-8') as fw:
119 |                 for i in range(len(pairs)):
120 |                     vol_n = pairs[i]["vol_n"]  # 若 vol_n 大于 1 则标示分卷号
121 |                     if i in index:
122 |                         # 检查是否存在索引条无页码
123 |                         if pairs[i]["page"] == 0:
124 |                             str_p = '1'
125 |                             print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"第 {i+1} 行普通索引条无页码, 已设置为默认值 1")
126 |                         else:
127 |                             str_p = str(pairs[i]["page"])
128 |                         # 写入索引条
129 |                         if vol_n > 1:
130 |                             fw.write(f'{pairs[i]["title"]}\t[{str(vol_n)}]{str_p}\n')
131 |                         else:
132 |                             fw.write(f'{pairs[i]["title"]}\t{str_p}\n')
133 |                     elif pairs[i]["page"] == 0:
134 |                         fw.write(f'【L{str(pairs[i]["level"])}】{pairs[i]["title"]}\t\n')
135 |                     else:
136 |                         if vol_n > 1:
137 |                             fw.write(f'【L{str(pairs[i]["level"])}】{pairs[i]["title"]}\t[{str(vol_n)}]{str(pairs[i]["page"])}\n')
138 |                         else:
139 |                             fw.write(f'【L{str(pairs[i]["level"])}】{pairs[i]["title"]}\t{str(pairs[i]["page"])}\n')
140 |             return True
141 |         else:
142 |             return False
143 | 
144 |     def read_toc_file(self, file_toc, vol_i=0):
145 |         """ 读取 toc/toc_all 文件 """
146 |         pairs = []
147 |         with open(file_toc, 'r', encoding='utf-8') as fr:
148 |             i = 1
149 |             for line in fr:
150 |                 mth = self.settings.pat_toc.match(line)
151 |                 if mth:
152 |                     pair = {
153 |                         "level": len(mth.group(1)),
154 |                         "title": mth.group(2),
155 |                         "page": int(mth.group(3)),
156 |                         "vol_n": vol_i+1
157 |                     }
158 |                     pairs.append(pair)
159 |                 elif self.settings.pat_toc_blank.match(line):
160 |                     mth_blank = self.settings.pat_toc_blank.match(line)
161 |                     pair = {
162 |                         "level": len(mth_blank.group(1)),
163 |                         "title": mth_blank.group(2),
164 |                         "page": 0,
165 |                         "vol_n": vol_i+1
166 |                     }
167 |                     pairs.append(pair)
168 |                 elif self.settings.pat_toc_vol.match(line):
169 |                     mth_vol = self.settings.pat_toc_vol.match(line)
170 |                     pair = {
171 |                         "level": len(mth_vol.group(1)),
172 |                         "title": mth_vol.group(2),
173 |                         "page": int(mth_vol.group(4)),
174 |                         "vol_n": int(mth_vol.group(3))
175 |                     }
176 |                     pairs.append(pair)
177 |                 else:
178 |                     print(Fore.RED + "ERROR: " + Fore.RESET + f"第 {i} 行未匹配, 已忽略")
179 |                     pairs = []
180 |                     break
181 |                 i += 1
182 |         return pairs
183 | 
184 |     def read_index_file(self, file_index, vol_i=0):
185 |         """ 读取 index 文件 """
186 |         pairs = []
187 |         with open(file_index, 'r', encoding='utf-8') as fr:
188 |             i = 1
189 |             for line in fr:
190 |                 mth = self.settings.pat_index.match(line)
191 |                 if mth:
192 |                     pair = {
193 |                         "title": mth.group(1),
194 |                         "page": int(mth.group(2)),
195 |                         "vol_n": vol_i+1
196 |                     }
197 |                     pairs.append(pair)
198 |                 elif self.settings.pat_index_vol.match(line):
199 |                     mth_vol = self.settings.pat_index_vol.match(line)
200 |                     pair = {
201 |                         "title": mth_vol.group(1),
202 |                         "page": int(mth_vol.group(3)),
203 |                         "vol_n": int(mth_vol.group(2))
204 |                     }
205 |                     pairs.append(pair)
206 |                 else:
207 |                     print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"第 {i} 行未匹配, 已忽略")
208 |                 i += 1
209 |         return pairs
210 | 
211 |     def merge_to_index_all(self, file_toc, file_index, file_index_all):
212 |         """ 将 toc 和 index 文件合并成 index_all 文件 """
213 |         # 思路: 先合并为 toc_all, 再转换成 index_all
214 |         if self.text_file_check(file_toc) == 2:
215 |             done_flg = True
216 |             # 1.读取 toc
217 |             toc_pairs = self.read_toc_file(file_toc)
218 |             if toc_pairs:
219 |                 # 判断 toc 是否是有序的(无序则不支持合并), 同时生成新页码(填充无页码章节)
220 |                 rank_last = -100000
221 |                 p_fill = 1
222 |                 for i in range(len(toc_pairs)):
223 |                     toc_pairs[i]["page_new"] = copy(toc_pairs[i]["page"])
224 |                     dct = toc_pairs[i]
225 |                     if dct["page"] == 0:
226 |                         # 向后检索页码来填充
227 |                         for d in toc_pairs[i+1:]:
228 |                             if d["page"] != 0:
229 |                                 p_fill = d["page"]
230 |                                 break
231 |                         toc_pairs[i]["page_new"] = copy(p_fill)
232 |                     elif dct["vol_n"]*100000+dct["page"] < rank_last:
233 |                         print(Fore.RED + "ERROR: " + Fore.RESET + f"目录文件第 {i} 行页码乱序, 不支持合并")
234 |                         done_flg = False
235 |                         break
236 |                     else:
237 |                         rank_last = dct["vol_n"]*100000+dct["page"]
238 |             else:
239 |                 done_flg = False
240 |             # 2.读取 index
241 |             index_pairs = self.read_index_file(file_index)
242 |             if index_pairs:
243 |                 # 排序确保是有序的
244 |                 index_pairs.sort(key=lambda x: x["vol_n"]*100000+x["page"], reverse=False)
245 |             else:
246 |                 print(Fore.RED + "ERROR: " + Fore.RESET + "读取索引文件失败")
247 |                 done_flg = False
248 |             # 3.排序合并 toc 和 index
249 |             if done_flg:
250 |                 toc_unsure = []
251 |                 toc_wrong = []
252 |                 file_tmp = os.path.join(self.settings.dir_output_tmp, self.settings.fname_toc_all)
253 |                 with open(file_tmp, 'w', encoding='utf-8') as fw:
254 |                     i = 0
255 |                     j = 0
256 |                     # toc 除最后一行
257 |                     for i in range(len(toc_pairs)-1):
258 |                         level = toc_pairs[i]["level"]
259 |                         # 先写入目录条
260 |                         if toc_pairs[i]["vol_n"] > 1:
261 |                             vol_toc = '['+str(toc_pairs[i]["vol_n"])+']'
262 |                         else:
263 |                             vol_toc = ''
264 |                         if toc_pairs[i]["page"] != 0:
265 |                             fw.write('\t'*level + f'{toc_pairs[i]["title"]}\t{vol_toc}{str(toc_pairs[i]["page"])}\n')
266 |                         else:
267 |                             fw.write('\t'*level + f'{toc_pairs[i]["title"]}\n')
268 |                         # 2.写入符合的索引行
269 |                         for x in range(j, len(index_pairs)):
270 |                             if index_pairs[x]["vol_n"] > 1:
271 |                                 vol_index = '['+str(index_pairs[x]["vol_n"])+']'
272 |                             else:
273 |                                 vol_index = ''
274 |                             rk = index_pairs[x]["vol_n"]*100000+index_pairs[x]["page"]
275 |                             # a.小于当前章节: 写入(排序错误)
276 |                             if (rk < toc_pairs[i]["vol_n"]*100000+toc_pairs[i]["page_new"]):
277 |                                 fw.write('\t'*(level+1) + f'{index_pairs[x]["title"]}\t{vol_index}{str(index_pairs[x]["page"])}\n')
278 |                                 j = x + 1
279 |                                 if toc_pairs[i] not in toc_wrong:
280 |                                     toc_wrong.append(toc_pairs[i])
281 |                             elif (rk == toc_pairs[i]["vol_n"]*100000+toc_pairs[i]["page_new"]) and (rk == toc_pairs[i+1]["vol_n"]*100000+toc_pairs[i+1]["page_new"]):
282 |                                 j = x
283 |                                 break
284 |                             # b.等于当前章节, 小于后一章节: 写入(词条和章节孰前孰后存疑,故记录)
285 |                             elif (rk == toc_pairs[i]["vol_n"]*100000+toc_pairs[i]["page_new"]) and (rk < toc_pairs[i+1]["vol_n"]*100000+toc_pairs[i+1]["page_new"]):
286 |                                 fw.write('\t'*(level+1) + f'{index_pairs[x]["title"]}\t{vol_index}{str(index_pairs[x]["page"])}\n')
287 |                                 j = x + 1
288 |                                 if toc_pairs[i] not in toc_unsure:
289 |                                     toc_unsure.append(toc_pairs[i])
290 |                             # c.大于当前章节, 小于后一章节: 写入
291 |                             elif (rk > toc_pairs[i]["vol_n"]*100000+toc_pairs[i]["page_new"]) and (rk < toc_pairs[i+1]["vol_n"]*100000+toc_pairs[i+1]["page_new"]):
292 |                                 fw.write('\t'*(level+1) + f'{index_pairs[x]["title"]}\t{vol_index}{str(index_pairs[x]["page"])}\n')
293 |                                 j = x + 1
294 |                             # d.剩余情况: 大于当前章节,大于等于后一章节
295 |                             else:
296 |                                 j = x
297 |                                 break
298 |                     # 补 toc 的最后一行
299 |                     level = toc_pairs[-1]["level"]
300 |                     if toc_pairs[-1]["vol_n"] > 1:
301 |                         vol_toc = '['+str(toc_pairs[-1]["vol_n"])+']'
302 |                     else:
303 |                         vol_toc = ''
304 |                     if toc_pairs[-1]["page"] != 0:
305 |                         fw.write('\t'*level + f'{toc_pairs[-1]["title"]}\t{vol_toc}{str(toc_pairs[-1]["page"])}\n')
306 |                     else:
307 |                         fw.write('\t'*level + f'{toc_pairs[-1]["title"]}\n')
308 |                     # 写入剩余的索引行
309 |                     for x in range(j, len(index_pairs)):
310 |                         if index_pairs[x]["vol_n"] > 1:
311 |                             vol_index = '['+str(index_pairs[x]["vol_n"])+']'
312 |                         else:
313 |                             vol_index = ''
314 |                         fw.write('\t'*(level+1) + f'{index_pairs[x]["title"]}\t{vol_index}{str(index_pairs[x]["page"])}\n')
315 |                         if index_pairs[x]["vol_n"]*100000+index_pairs[x]["page"] < toc_pairs[-1]["vol_n"]*100000+toc_pairs[-1]["page_new"]:
316 |                             if toc_pairs[-1] not in toc_wrong:
317 |                                 toc_wrong.append(toc_pairs[-1])
318 |                         elif index_pairs[x]["vol_n"]*100000+index_pairs[x]["page"] == toc_pairs[-1]["vol_n"]*100000+toc_pairs[-1]["page_new"]:
319 |                             if toc_pairs[-1] not in toc_unsure:
320 |                                 toc_unsure.append(toc_pairs[-1])
321 |                 if self.toc_all_to_index(file_tmp, file_index_all):
322 |                     print(Fore.GREEN + "\n处理完成, 生成在同 index.txt 目录下" + Fore.RESET)
323 |                     # 输出错误和存疑的 toc 部分以便检查
324 |                     if toc_wrong or toc_unsure:
325 |                         fp = os.path.join(os.path.split(file_index_all)[0], '_need_checking.log')
326 |                         with open(fp, 'w', encoding='utf-8') as fw:
327 |                             if toc_wrong:
328 |                                 fw.write('========= 排序错误 ==========\n')
329 |                                 for t in toc_wrong:
330 |                                     if t["vol_n"] > 1:
331 |                                         vol_toc = '['+str(t["vol_n"])+']'
332 |                                     else:
333 |                                         vol_toc = ''
334 |                                     if t["page"] == 0:
335 |                                         fw.write(f'【L{str(t["level"])}】{t["title"]}\t\n')
336 |                                     else:
337 |                                         fw.write(f'【L{str(t["level"])}】{t["title"]}\t{vol_toc}{str(t["page"])}\n')
338 |                             if toc_unsure:
339 |                                 fw.write('========= 排序存疑 ==========\n')
340 |                                 for t in toc_unsure:
341 |                                     if t["vol_n"] > 1:
342 |                                         vol_toc = '['+str(t["vol_n"])+']'
343 |                                     else:
344 |                                         vol_toc = ''
345 |                                     if t["page"] == 0:
346 |                                         fw.write(f'【L{str(t["level"])}】{t["title"]}\t\n')
347 |                                     else:
348 |                                         fw.write(f'【L{str(t["level"])}】{t["title"]}\t{vol_toc}{str(t["page"])}\n')
349 |                         print(Fore.MAGENTA + "WARN: " + Fore.RESET + "存在排序存疑的条目, 已记录在日志 _need_checking.log 中，需手动调整完善")
350 |         else:
351 |             print(Fore.RED + "ERROR: " + Fore.RESET + "读取目录文件失败")
352 | 
353 |     def read_index_all_file(self, file_index_all, img_dict_flg=True, vol_i=0, navi_flg=False):
354 |         done_flg = True
355 |         dcts = []
356 |         dct_chaps = []
357 |         tail_ids = []
358 |         # 用于收集末章节的子词条
359 |         tail_list = []
360 |         tail = {"id": 0, "children": []}
361 |         with open(file_index_all, 'r', encoding='utf-8') as fr:
362 |             if img_dict_flg:
363 |                 pat1 = re.compile(r'【L(\d+)】([^\t]+)\t([\[\d\]]*\-\d+|[\[\d\]]*\d*)[\r\n]*$')  # 匹配章节词头(有/无卷标)
364 |                 pat2 = re.compile(r'([^\t]+)\t([\[\d\]]*\-?\d+)[\r\n]*$')  # 匹配词条词头(有/无卷标)
365 |             else:
366 |                 pat1 = self.settings.pat_stem_text  # 匹配章节词头
367 |                 pat2 = self.settings.pat_tab  # 匹配词条词头
368 |                 pat3 = self.settings.pat_index_blank  # 匹配仅导航
369 |             i = 0
370 |             navi_bar = [None for i in range(10)]
371 |             navi_bar_tmp = []
372 |             for line in fr:
373 |                 i += 1
374 |                 checked_flg = False
375 |                 vol_n = vol_i+1
376 |                 # 匹配章节
377 |                 if pat1.match(line):
378 |                     mth = pat1.match(line)
379 |                     # 读取页码/词条内容, 分卷号
380 |                     if img_dict_flg and mth.group(3) == '':
381 |                         body = 0
382 |                     elif img_dict_flg and re.match(r'\-?\d+$', mth.group(3)):
383 |                         body = int(mth.group(3))
384 |                     elif img_dict_flg and re.match(r'\[(\d+)\](\-?\d+)$', mth.group(3)):
385 |                         mth_mth1 = re.match(r'\[(\d+)\](\-?\d+)$', mth.group(3))
386 |                         vol_n, body = int(mth_mth1.group(1)), int(mth_mth1.group(2))
387 |                     elif img_dict_flg:
388 |                         print(Fore.RED + "ERROR: " + Fore.RESET + f"第 {i} 行未匹配, 请检查")
389 |                         done_flg = False
390 |                         break
391 |                     else:
392 |                         body = mth.group(3)
393 |                     dct = {
394 |                         "id": i,
395 |                         "level": int(mth.group(1)),
396 |                         "title": mth.group(2),
397 |                         "body": body,
398 |                         "vol_n": vol_n
399 |                     }
400 |                     # navi_bar 构造
401 |                     navi_bar[int(mth.group(1))] = mth.group(2)
402 |                     navi_bar_tmp = navi_bar[:int(mth.group(1))+1]
403 |                     dct["navi_bar"] = copy(navi_bar_tmp)
404 |                     dct_chaps.append(dct)
405 |                     # 子词条清“篮子”
406 |                     if len(tail["children"]) != 0:
407 |                         tail_list.append({"id": tail["id"], "children": tail["children"]})
408 |                         tail_ids.append(tail["id"])
409 |                     checked_flg = True
410 |                     tail["id"] = i
411 |                     tail["children"] = []
412 |                 # 匹配词条
413 |                 elif pat2.match(line):
414 |                     mth = pat2.match(line)
415 |                     if img_dict_flg and re.match(r'\-?\d+$', mth.group(2)):
416 |                         body = int(mth.group(2))
417 |                     elif img_dict_flg and re.match(r'\[(\d+)\](\-?\d+)$', mth.group(2)):
418 |                         mth_mth1 = re.match(r'\[(\d+)\](\-?\d+)$', mth.group(2))
419 |                         vol_n, body = int(mth_mth1.group(1)), int(mth_mth1.group(2))
420 |                     elif img_dict_flg:
421 |                         print(Fore.RED + "ERROR: " + Fore.RESET + f"第 {i} 行未匹配, 请检查")
422 |                         done_flg = False
423 |                         break
424 |                     else:
425 |                         body = mth.group(2)
426 |                     dct = {
427 |                         "id": i,
428 |                         "level": -1,
429 |                         "title": mth.group(1),
430 |                         "body": body,
431 |                         "vol_n": vol_n
432 |                     }
433 |                     dct["navi_bar"] = navi_bar_tmp + [mth.group(1)]
434 |                     # 收集子词条
435 |                     tail["children"].append(mth.group(1))
436 |                 # 匹配仅导航
437 |                 elif navi_flg and pat3.match(line):
438 |                     mth = pat3.match(line)
439 |                     dct = {
440 |                         "id": i,
441 |                         "level": -1,
442 |                         "title": mth.group(1),
443 |                         "body": '',
444 |                         "vol_n": vol_n
445 |                     }
446 |                     dct["navi_bar"] = navi_bar_tmp + [mth.group(1)]
447 |                     # 收集子词条
448 |                     tail["children"].append(mth.group(1))
449 |                 else:
450 |                     print(Fore.RED + "ERROR: " + Fore.RESET + f"第 {i} 行未匹配, 请检查")
451 |                     done_flg = False
452 |                     break
453 |                 dcts.append(dct)
454 |             # 遍历完成后补漏
455 |             if not checked_flg and len(tail["children"]) != 0:
456 |                 tail_list.append({"id": tail["id"], "children": tail["children"]})
457 |                 tail_ids.append(tail["id"])
458 |         # 用于收集大章节的子章节
459 |         stem_ids = []
460 |         stem_list = []
461 |         stem = {"id": 0, "children": []}
462 |         for i in range(len(dct_chaps)-1):
463 |             dct_obj = dct_chaps[i]
464 |             stem["id"] = dct_obj["id"]
465 |             stem["children"] = []
466 |             checked_flg = False
467 |             for dct in dct_chaps[i+1:]:
468 |                 if dct["level"] == dct_obj["level"]+1:
469 |                     stem["children"].append(dct["title"])
470 |                 elif dct["level"] <= dct_obj["level"]:
471 |                     # 收集子章节
472 |                     if len(stem["children"]) != 0:
473 |                         stem_list.append({"id": stem["id"], "children": stem["children"]})
474 |                         stem_ids.append(stem["id"])
475 |                     checked_flg = True
476 |                     break
477 |             # 补漏收
478 |             if not checked_flg and len(stem["children"]) != 0:
479 |                 stem_list.append({"id": stem["id"], "children": stem["children"]})
480 |                 stem_ids.append(stem["id"])
481 |         # 检查
482 |         if len(tail_ids+stem_ids) != len(set(tail_ids+stem_ids)):
483 |             done_flg = False
484 |             set_tail_ids = set(tail_ids)
485 |             set_stem_ids = set(stem_ids)
486 |             print("层级矛盾行: ", set_tail_ids.intersection(set_stem_ids))
487 |             print(Fore.RED + "ERROR: " + Fore.RESET + f"文件 {file_index_all} 解析出现矛盾, 请检查索引顺序")
488 |         else:
489 |             # 整合所有信息
490 |             for dct in dcts:
491 |                 if dct["level"] == -1:
492 |                     dct["children"] = []
493 |                     dct["entry_list"] = False
494 |                 elif dct["id"] in tail_ids:
495 |                     for item in tail_list:
496 |                         if dct["id"] == item["id"]:
497 |                             dct["children"] = item["children"]
498 |                             dct["entry_list"] = True
499 |                             break
500 |                 elif dct["id"] in stem_ids:
501 |                     for item in stem_list:
502 |                         if dct["id"] == item["id"]:
503 |                             dct["children"] = item["children"]
504 |                             dct["entry_list"] = False
505 |                             break
506 |                 else:
507 |                     dct["children"] = []
508 |                     dct["entry_list"] = False
509 |         if done_flg:
510 |             return dcts
511 |         else:
512 |             print(Fore.RED + "全索引文件读取失败: " + Fore.RESET + file_index_all)
513 |             return None
514 | 
515 |     def make_relinks_syn(self, file_syns, file_out):
516 |         """ 生成同义词重定向 """
517 |         words = []
518 |         # 1.读取重定向索引
519 |         syns = []
520 |         fname = os.path.split(file_syns)[1]
521 |         with open(file_syns, 'r', encoding='utf-8') as fr:
522 |             i = 1
523 |             for line in fr:
524 |                 mth = self.settings.pat_tab.match(line)
525 |                 if mth:
526 |                     syns.append({"syn": mth.group(1), "origin": mth.group(2)})
527 |                 else:
528 |                     print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"{fname} 第 {i} 行未匹配, 已忽略")
529 |                 i += 1
530 |         # 2.生成重定向
531 |         with open(file_out, 'w', encoding='utf-8') as fw:
532 |             for syn in syns:
533 |                 fw.write(f'{syn["syn"]}\n@@@LINK={syn["origin"]}\n</>\n')
534 |                 words.append(syn["syn"])
535 |         print("重定向(同义词)词条已生成")
536 |         return words
537 | 
538 |     def make_relinks_st(self, words, file_out):
539 |         converter_s2t = OpenCC('s2t.json')
540 |         converter_t2s = OpenCC('t2s.json')
541 |         to_words = []
542 |         # 生成繁简通搜重定向
543 |         with open(file_out, 'w', encoding='utf-8') as fw:
544 |             for word in words:
545 |                 # 简转繁
546 |                 to_word = converter_s2t.convert(word)
547 |                 if to_word != word and to_word not in to_words:
548 |                     fw.write(f'{to_word}\n@@@LINK={word}\n</>\n')
549 |                     to_words.append(to_word)
550 |                 # 繁转简
551 |                 to_word = converter_t2s.convert(word)
552 |                 if to_word != word and to_word not in to_words:
553 |                     fw.write(f'{to_word}\n@@@LINK={word}\n</>\n')
554 |                     to_words.append(to_word)
555 |         print("重定向(繁简)词条已生成")
556 | 
557 |     def make_relinks_split(self, file_in, file_out, n_chars=2):
558 |         relinks = []
559 |         pat = re.compile(r'[；，。\?\!\,\.\;]+')
560 |         with open(file_in, 'r', encoding='utf-8') as fr:
561 |             for line in fr:
562 |                 headword = line.rstrip()
563 |                 for s in pat.split(headword):
564 |                     if (s != headword) and (not pat.match(s)) and (len(s) >= n_chars):
565 |                         relink = s + '\t' + headword + '\n'
566 |                         if relink not in relinks:
567 |                             relinks.append(relink)
568 |         if relinks:
569 |             with open(file_out, 'w', encoding='utf-8') as fw:
570 |                 for relink in relinks:
571 |                     fw.write(relink)
572 |             return True
573 |         else:
574 |             print(Fore.RED + "ERROR: " + Fore.RESET + "分词结果为空")
575 |             return False
576 | 
577 |     def simp_trad_trans(self, file_in, file_out, trans_type):
578 |         """ 繁简转换 """
579 |         if trans_type == 'T':
580 |             converter_s2t = OpenCC('s2t.json')
581 |             with open(file_out, 'w', encoding='utf-8') as fw:
582 |                 with open(file_in, 'r', encoding='utf-8') as fr:
583 |                     for line in fr:
584 |                         # 简转繁
585 |                         fw.write(converter_s2t.convert(line))
586 |         else:
587 |             converter_t2s = OpenCC('t2s.json')
588 |             with open(file_out, 'w', encoding='utf-8') as fw:
589 |                 with open(file_in, 'r', encoding='utf-8') as fr:
590 |                     for line in fr:
591 |                         # 繁转简
592 |                         fw.write(converter_t2s.convert(line))
593 |         print(f"\n转换结果已生成: {file_out}")
594 | 
595 |     def text_file_check(self, text_file):
596 |         if not os.path.exists(text_file) or not os.path.isfile(text_file):
597 |             print(Fore.YELLOW + "INFO: " + Fore.RESET + f"文件 {text_file} 不存在")
598 |             return 0
599 |         else:
600 |             text = ''
601 |             with open(text_file, 'r', encoding='utf-8') as fr:
602 |                 i = 0
603 |                 for line in fr:
604 |                     i += 1
605 |                     if i < 6:
606 |                         text += line
607 |                     else:
608 |                         break
609 |             if re.match(r'\s*$', text):
610 |                 print(Fore.RED + "ERROR: " + Fore.RESET + f"文件 {text_file} 内容为空")
611 |                 return 1
612 |             else:
613 |                 return 2
614 | 
615 |     def merge_and_count(self, file_list, file_final):
616 |         # 筛选出有效文件
617 |         parts = []
618 |         for f in file_list:
619 |             if os.path.exists(f):
620 |                 parts.append(f)
621 |         # 开始计数和合并
622 |         entry_total = 0
623 |         if len(parts) == 1 and file_final in parts:
624 |             # 只有单个文件自身, 则不需要写
625 |             with open(file_final, 'r', encoding='utf-8') as fr:
626 |                 for line in fr:
627 |                     if line == '</>\n':
628 |                         entry_total += 1
629 |         else:
630 |             # 用临时文件存储, 完了再重命名
631 |             file_tmp = os.path.join(self.settings.dir_output_tmp, 'tmp.xxx')
632 |             with open(file_tmp, 'a', encoding='utf-8') as fa:
633 |                 for part in parts:
634 |                     with open(part, 'r', encoding='utf-8') as fr:
635 |                         for line in fr:
636 |                             if line == '</>\n':
637 |                                 entry_total += 1
638 |                             fa.write(line)
639 |             if os.path.isfile(file_final):
640 |                 os.remove(file_final)
641 |             os.rename(file_tmp, file_final)
642 |         return entry_total
643 | 
644 |     def generate_info_html(self, file_info_raw, file_out, dict_name, templ_choice=None, volume_num=None):
645 |         with open(file_out, 'w', encoding='utf-8') as fw:
646 |             # 读取 info.html
647 |             if file_info_raw and os.path.isfile(file_info_raw):
648 |                 with open(file_info_raw, 'r', encoding='utf-8') as fr:
649 |                     fw.write(fr.read().rstrip())
650 |             # 打上 AMB 标志 (有模板则是制作, 没有则认为是打包)
651 |             if templ_choice and volume_num:
652 |                 fw.write(f"\n<div><br/>{dict_name}, built with AutoMdxBuilder {self.settings.version} on {datetime.now().strftime('%Y/%m/%d')}, based on template {templ_choice.upper()} in {volume_num} volumes.<br/></div>\n")
653 |             elif templ_choice:
654 |                 fw.write(f"\n<div><br/>{dict_name}, built with AutoMdxBuilder {self.settings.version} on {datetime.now().strftime('%Y/%m/%d')}, based on template {templ_choice.upper()}.<br/></div>\n")
655 |             else:
656 |                 fw.write(f"\n<div><br/>{dict_name}, packed with AutoMdxBuilder {self.settings.version} on {datetime.now().strftime('%Y/%m/%d')}.<br/></div>\n")
657 |         return True
658 | 
659 |     def get_item_list(self, dct):
660 |         html = ''
661 |         if dct["level"] == -1:
662 |             pass
663 |         elif dct["entry_list"]:
664 |             html += '<div class="toc-list"><p>'
665 |             i = 0
666 |             for item in dct["children"]:
667 |                 i += 1
668 |                 if i == 1:
669 |                     html += f'<a href="entry://{item}">{item}</a>'
670 |                 else:
671 |                     html += f'<span class="sep-list">／</span><a href="entry://{item}">{item}</a>'
672 |             html += '</p></div>\n'
673 |         elif len(dct["children"]) != 0:
674 |             html += '<div class="toc-list"><ul>'
675 |             for item in dct["children"]:
676 |                 html += f'<li><a href="entry://{self.settings.name_abbr}_{item}">{item}</a></li>'
677 |             html += '</ul></div>\n'
678 |         else:
679 |             pass
680 |         return html
681 | 
682 |     # def _detect_code(self, text_file):
683 |     #     with open(text_file, 'rb') as frb:
684 |     #         data = frb.read()
685 |     #         dcts = chardet.detect(data)
686 |     #     return dcts["encoding"]
687 | 
688 |     def prepare_imgs(self, dir_imgs_in, dir_imgs_out, volume_num=None):
689 |         print('开始处理图像...')
690 |         imgs = []
691 |         img_lens = []
692 |         if volume_num:
693 |             # 整理图像
694 |             lst_dir_imgs = []
695 |             for i in range(volume_num):
696 |                 dir_tmp = os.path.join(dir_imgs_out, os.path.split(dir_imgs_in["main"][i])[1])
697 |                 imgs_tmp = self._proc_img_vol(dir_imgs_in["main"][i], dir_tmp, True, i)
698 |                 imgs += imgs_tmp
699 |                 img_lens.append(len(imgs_tmp))
700 |                 print(f"第 {i+1} 卷已完成")
701 |                 lst_dir_imgs.append(dir_tmp)
702 |             for fp in dir_imgs_in["others"]:
703 |                 dir_tmp = os.path.join(dir_imgs_out, os.path.split(fp)[1])
704 |                 if os.path.exists(dir_tmp):
705 |                     size_in = sum(os.path.getsize(os.path.join(fp, f)) for f in os.listdir(fp) if os.path.isfile(os.path.join(fp, f)))
706 |                     size_out = sum(os.path.getsize(os.path.join(dir_tmp, f)) for f in os.listdir(dir_tmp) if os.path.isfile(os.path.join(dir_tmp, f)))
707 |                     if size_out == 0 or size_out != size_in:
708 |                         shutil.rmtree(dir_tmp)
709 |                         shutil.copytree(fp, dir_tmp)
710 |                 else:
711 |                     shutil.copytree(fp, dir_tmp)
712 |                 lst_dir_imgs.append(dir_tmp)
713 |             # 清除 _tmp/imgs 中无关的文件,文件夹
714 |             for fname in os.listdir(dir_imgs_out):
715 |                 fp = os.path.join(dir_imgs_out, fname)
716 |                 if os.path.isfile(fp):
717 |                     os.remove(fp)
718 |                 elif os.path.isdir(fp) and fp not in lst_dir_imgs:
719 |                     shutil.rmtree(fp)
720 |         else:
721 |             imgs = self._proc_img_vol(dir_imgs_in, dir_imgs_out)
722 |             img_lens.append(len(imgs))
723 |         print('\n图像处理完毕。')
724 |         return imgs, img_lens
725 | 
726 |     def _proc_img_vol(self, dir_imgs_in, dir_imgs_out, multi_vols_flg=False, vol_i=0):
727 |         """ 图像预处理(重命名等) """
728 |         # 0.图像拷贝判断
729 |         copy_flg = True
730 |         if os.path.exists(dir_imgs_out):
731 |             size_in = sum(os.path.getsize(os.path.join(dir_imgs_in, f)) for f in os.listdir(dir_imgs_in) if os.path.isfile(os.path.join(dir_imgs_in, f)))
732 |             size_out = sum(os.path.getsize(os.path.join(dir_imgs_out, f)) for f in os.listdir(dir_imgs_out) if os.path.isfile(os.path.join(dir_imgs_out, f)))
733 |             # 为空或不一样, 则重新处理
734 |             if size_out == 0 or size_out != size_in:
735 |                 shutil.rmtree(dir_imgs_out)
736 |                 os.makedirs(dir_imgs_out)
737 |             else:
738 |                 copy_flg = False
739 |         else:
740 |             os.makedirs(dir_imgs_out)
741 |         # 1.获取图像文件列表
742 |         num_flg = True  # 图像文件名是否纯数字
743 |         img_files = []
744 |         for fname in os.listdir(dir_imgs_in):
745 |             fpath = os.path.join(dir_imgs_in, fname)
746 |             if os.path.isfile(fpath) and fpath.endswith(tuple(self.settings.img_exts)):
747 |                 img_files.append(fpath)
748 |             if not re.match(r'\d+', fname.split('.')[0]):
749 |                 num_flg = False
750 |         # 按旧文件名排序
751 |         if num_flg:
752 |             img_files.sort(key=lambda x: int(os.path.split(x)[1].split('.')[0]), reverse=False)  # 按数字排
753 |         else:
754 |             img_files.sort(reverse=False)  # 按字符串排
755 |         # 2.重命名
756 |         dname = os.path.split(dir_imgs_out)[1].strip('\\/')
757 |         imgs = []
758 |         n = 0
759 |         len_digit = self.settings.len_digit  # 获取序号位数
760 |         for img_file in img_files:
761 |             n += 1
762 |             f_dir, f_name = os.path.split(img_file)
763 |             f_ext = os.path.splitext(f_name)[1]
764 |             # 区分正文和辅页, 辅页前缀'A', 正文前缀'B'
765 |             if multi_vols_flg:
766 |                 # 分卷
767 |                 if n < self.settings.body_start[vol_i]:
768 |                     i_str = str(n).zfill(len_digit)
769 |                     f_title_new = f'{self.settings.name_abbr}[{str(vol_i+1).zfill(2)}]_A{i_str}'
770 |                 else:
771 |                     i_str = str(n-self.settings.body_start[vol_i]+1).zfill(len_digit)
772 |                     f_title_new = f'{self.settings.name_abbr}[{str(vol_i+1).zfill(2)}]_B{i_str}'
773 |                 imgs.append({'vol_n': vol_i+1, 'title': f_title_new, 'path': dname+'/'+f_title_new+f_ext, 'i_in_vol': n-1})
774 |             else:
775 |                 # 非分卷
776 |                 if n < self.settings.body_start[vol_i]:
777 |                     i_str = str(n).zfill(len_digit)
778 |                     f_title_new = f'{self.settings.name_abbr}_A{i_str}'
779 |                 else:
780 |                     i_str = str(n-self.settings.body_start[vol_i]+1).zfill(len_digit)
781 |                     f_title_new = f'{self.settings.name_abbr}_B{i_str}'
782 |                 imgs.append({'vol_n': vol_i+1, 'title': f_title_new, 'path': f_title_new+f_ext, 'i_in_vol': n-1})
783 |             # 复制新文件到输出文件夹
784 |             if copy_flg:
785 |                 shutil.copy(img_file, os.path.join(dir_imgs_out, f_title_new+f_ext))
786 |         return imgs
787 | 


--------------------------------------------------------------------------------
/images/amb_folder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/amb_folder.png


--------------------------------------------------------------------------------
/images/auto_split.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/auto_split.png


--------------------------------------------------------------------------------
/images/img_dict_atmpl.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/img_dict_atmpl.gif


--------------------------------------------------------------------------------
/images/img_dict_btmpl.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/img_dict_btmpl.gif


--------------------------------------------------------------------------------
/images/imgs_order.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/imgs_order.png


--------------------------------------------------------------------------------
/images/index.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/index.png


--------------------------------------------------------------------------------
/images/index_all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/index_all.png


--------------------------------------------------------------------------------
/images/settings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/settings.png


--------------------------------------------------------------------------------
/images/syns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/syns.png


--------------------------------------------------------------------------------
/images/text_dict_ctmpl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/text_dict_ctmpl.png


--------------------------------------------------------------------------------
/images/text_dict_dtmpl.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/text_dict_dtmpl.gif


--------------------------------------------------------------------------------
/images/toc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/toc.png


--------------------------------------------------------------------------------
/images/work_dir_tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/images/work_dir_tree.png


--------------------------------------------------------------------------------
/lib/FreePic2Pdf.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/lib/FreePic2Pdf.ini


--------------------------------------------------------------------------------
/lib/MuPDF_pcs.txt:
--------------------------------------------------------------------------------
 1 | %%MediaBox 0 0 <width> <height>
 2 | %%Rotate 0
 3 | %%Image Im<num> <path>
 4 | 
 5 | % Draw an image.
 6 | q
 7 | <width> 0 0 <height> 0 0 cm
 8 | /Im<num> Do
 9 | Q
10 | 


--------------------------------------------------------------------------------
/lib/PDFPatcher_AppConfig.json:
--------------------------------------------------------------------------------
1 | ﻿{"检查更新时间":"2023-11-18T11:27:03","检查更新间隔":14,"保存程序设置":true,"文档加载模式":"优化处理效率","编码设置":{},"信息文件导出设置":{"导出文档属性":true,"导出文档书签":true,"导出页面链接":true,"导出阅读器设置":true,"导出页面设置":true,"导出编录信息":false,"导出页面内容":false,"导出页面字典":false,"导出图片":false,"导出解码文本":false,"导出命令操作符":false,"导出二进制流":false,"解析命名位置":false,"导出尺寸单位":{"单位":"厘米"},"文本编码":"系统默认"},"信息文件导入设置":{"导入文档属性":true,"导入文档书签":true,"导入页面链接":true,"保留页面链接":false,"导入阅读器设置":true,"导入页面设置":true},"PDF文件处理设置":{"页面布局":{"边框调整值":{"上":0,"右":0,"左":0,"下":0,"比例":false},"指定尺寸":{"名称":"等同原始内容尺寸","高度":0,"宽度":0},"页面筛选":"NotSpecified","旋转角度":0,"拉伸内容":false,"水平对齐":"Center","垂直对齐":"Middle","基准页面":0},"水平DPI":0,"垂直DPI":0,"校正图片旋转角度":false,"优化黑白图片压缩算法":false,"黑白图片自动透明":true,"指定元数据":{"指定文档元数据属性":false,"重写XML元数据属性":false},"阅读器设置":{"删除缩放比例":false,"强制内部链接":false,"书签状态":"保持不变","指定阅读器设置":false,"隐藏菜单":false,"隐藏工具栏":false,"隐藏程序界面":false,"适合窗口":false,"窗口居中":false,"显示文档标题":false},"压缩索引表和书签":true,"统一页面方向":false,"旋转源页面方向":false,"旋转方向":false},"PDF文档设置":{"嵌入字库":false,"删除文本尾随空白":false,"允许替换字库":false,"修复内容流":false,"删除批注":false,"删除导航书签":false,"删除使用限制":false,"删除文档自动动作":false,"删除页面自动动作":false,"删除页面表单":false,"删除链接批注":false,"删除页面元数据":false,"删除页面文本":false,"删除页面缩略图":false,"删除XML元数据":false,"优化黑白图片压缩算法":false,"页面布局":{"边框调整值":{"上":0,"右":0,"左":0,"下":0,"比例":false},"指定尺寸":{"名称":"等同原始内容尺寸","高度":0,"宽度":0},"页面筛选":"NotSpecified","旋转角度":0,"拉伸内容":false,"水平对齐":"Center","垂直对齐":"Middle","基准页面":0},"指定元数据":{"指定文档元数据属性":false,"重写XML元数据属性":false},"阅读器设置":{"删除缩放比例":false,"强制内部链接":false,"书签状态":"保持不变","指定阅读器设置":false,"隐藏菜单":false,"隐藏工具栏":false,"隐藏程序界面":false,"适合窗口":false,"窗口居中":false,"显示文档标题":false},"压缩索引表和书签":true,"统一页面方向":false,"旋转源页面方向":false,"旋转方向":false},"PDF编辑器设置":{"嵌入字库":false,"删除文本尾随空白":false,"允许替换字库":false,"修复内容流":false,"删除批注":false,"删除导航书签":false,"删除使用限制":false,"删除文档自动动作":false,"删除页面自动动作":false,"删除页面表单":false,"删除链接批注":false,"删除页面元数据":false,"删除页面文本":false,"删除页面缩略图":false,"删除XML元数据":false,"优化黑白图片压缩算法":false,"页面布局":{"边框调整值":{"上":0,"右":0,"左":0,"下":0,"比例":false},"指定尺寸":{"名称":"等同原始内容尺寸","高度":0,"宽度":0},"页面筛选":"NotSpecified","旋转角度":0,"拉伸内容":false,"水平对齐":"Center","垂直对齐":"Middle","基准页面":0},"指定元数据":{"指定文档元数据属性":false,"重写XML元数据属性":false},"阅读器设置":{"删除缩放比例":false,"强制内部链接":false,"书签状态":"保持不变","指定阅读器设置":false,"隐藏菜单":false,"隐藏工具栏":false,"隐藏程序界面":false,"适合窗口":false,"窗口居中":false,"显示文档标题":false},"压缩索引表和书签":true,"统一页面方向":false,"旋转源页面方向":false,"旋转方向":false},"自动生成书签设置":{"最小标题尺寸":13,"第一行为标题":false,"忽略单字符标题":false,"忽略数字标题":false,"合并相邻标题":true,"合并不同尺寸标题":false,"合并不同字体标题":true,"忽略重叠文本":false,"自动组织标题层次":true,"列出字体统计信息":true,"列出所有字体":false,"排版":"混合","最大合并行距":1.5,"识别分栏":true,"为首页生成书签":true,"Y轴偏移":1,"定位到页面顶端":0,"导出文本位置信息":false},"导出图像设置":{"自动指定输出位置":true,"避免重复导出图片":false,"合并图片":false,"合并JPG图片为PNG":true,"垂直翻转图片":false,"反转黑白图片颜色":false,"黑白图片导出为PNG":true,"导出批注图片":false,"最小高度":0,"最小宽度":0,"导出路径":"","文件名称掩码":"000000","导出掩模":false,"取反掩模":false},"转为图片设置":{"自动指定输出位置":true,"图片格式":"Png","垂直翻转图片":false,"水平翻转图片":false,"图片颜色":"Rgb","反转图片颜色":false,"JPEG质量":0,"旋转角度":0,"图片宽度":0,"图片比例":1,"分辨率":72,"尺寸模式":false,"文件名称掩码":"0000","适合区域":false,"隐藏批注":false,"减少颜色":false,"伽马校正":1,"染色":16777215},"提取页面设置":{"压缩文档":true,"保留文档属性":true,"保留文档书签":true,"删除无效书签":true,"解除文档限制":true,"添加编号":true,"拆分方式":0,"按页数拆分":1},"文本识别设置":{"识别语言":2052,"旋转校正":false,"拉伸校正":false,"排版":"混合","识别分栏":true,"目录识别模式":false,"压缩空白":false,"删除汉字间空白":false,"识别前保留图像颜色":false,"导出原始识别结果":false,"在屏幕输出识别文本":false},"工具栏设置":{"Buttons":[{"ID":"Editor","按钮名称":"编辑器","显示按钮文字":true,"显示按钮":true},{"ID":"Patcher","按钮名称":"批量修改文档","显示按钮文字":true,"显示按钮":true},{"ID":"Merger","按钮名称":"合并文档","显示按钮文字":true,"显示按钮":true},{"ID":"Ocr","按钮名称":"识别文本","显示按钮文字":false,"显示按钮":true},{"ID":"BookmarkGenerator","按钮名称":"自动书签","显示按钮文字":false,"显示按钮":true},{"ID":"Rename","按钮名称":"批量重命名","显示按钮文字":false,"显示按钮":false},{"ID":"ExtractPages","按钮名称":"提取页面或拆分文档","显示按钮文字":false,"显示按钮":true},{"ID":"ExtractImages","按钮名称":"提取图片","显示按钮文字":true,"显示按钮":true},{"ID":"RenderPages","按钮名称":"转换页面为图片","显示按钮文字":false,"显示按钮":true},{"ID":"Inspector","按钮名称":"结构探查器","显示按钮文字":false,"显示按钮":false},{"ID":"InfoExchanger","按钮名称":"导出导入信息文件","显示按钮文字":false,"显示按钮":true},{"ID":"Options","按钮名称":"程序配置","显示按钮文字":false,"显示按钮":false}]},"窗口设置":{"状态":"Normal","左":58,"上":33,"宽":928,"高":678},"最近使用的文档":{"源文件":["D:\\汉语方言词汇.pdf"],"文件夹":["D:\\汉语方言词汇"]}}


--------------------------------------------------------------------------------
/lib/Pdg2Pic.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/lib/Pdg2Pic.ini


--------------------------------------------------------------------------------
/lib/atmpl.css:
--------------------------------------------------------------------------------
 1 | /*预定义*/
 2 | ul {
 3 |     margin-right: 0px;
 4 |     margin-left: 32px;
 5 |     margin-top: 4px;
 6 |     margin-bottom: 4px;
 7 |     padding: 0px;
 8 | }
 9 | p {
10 |     text-indent: 2em;
11 |     margin:4px auto;
12 | }
13 | a {text-decoration:none}
14 | 
15 | /*----------导航------------*/
16 | /*导航框*/
17 | div.top-navi{
18 |     margin: 0px 0px 0px 0px;
19 |     padding: 3px 0px;
20 |     border-top: 3px solid #3b264d;
21 |     background-color: #CC9933; /* CD284C */
22 |     font-weight: bold;
23 |     text-align: center;
24 | }
25 | div.bottom-navi{
26 |     margin: 0px 0px 0px 0px;
27 |     padding: 3px 0px;
28 |     border-bottom: 1px solid #3b264d;/*#ef8b14; #8470FF*/
29 |     background-color: #EAEAEA;
30 |     text-align: center;
31 | }
32 | /*导航链接*/
33 | div.top-navi a:link {color: #ffffff;}
34 | div.top-navi a:hover {background:green;}
35 | div.top-navi a:visited {color: #ffffff} /* FFFF99 */
36 | div.bottom-navi a:link {color: blue;}
37 | div.bottom-navi a:hover {background:yellow;}
38 | span.navi-item{
39 |     margin: 0px 0px 0px 0px;
40 | }
41 | span.navi-item-left{
42 |     margin: 0px 0px 0px 4px;
43 |     float: left;
44 | }
45 | span.navi-item-middle span.navi-item{
46 |     margin: 0px 6px 0px 6px;
47 | }
48 | span.navi-item-right{
49 |     margin: 0px 4px 0px 0px;
50 |     float: right;
51 | }
52 | 
53 | /*----------词条------------*/
54 | /*---图片---*/
55 | div.main-img img{
56 |     text-align: center;
57 |     width: 100%;
58 | }
59 | /*<insert_css: auto_split>*/
60 | 
61 | /*---目录标题---*/
62 | div.toc-title {
63 |     text-align: center;
64 |     margin-bottom: 8px;
65 |     font-weight: bold;
66 |     font-size: 120%;
67 | }
68 | /*---目录链接---*/
69 | div.toc-text a:link {color: blue;}
70 | div.toc-text a:hover {background:yellow;}
71 | 


--------------------------------------------------------------------------------
/lib/auto_split_2.css:
--------------------------------------------------------------------------------
 1 | /*图片自适应单双栏 (如不需要则注释下面两段, 并开启 display:none 那行)*/
 2 | @media screen and (max-width:720px) {
 3 | div.main-img {width: 100%;overflow:hidden;}
 4 | div.main-img div.left{position:relative;width: 200%;z-index:2;}
 5 | div.main-img div.right{position:relative;width: 200%;margin-left:-100%;z-index:1;}
 6 | }   /*【额外pic设置】div.main-img div.pic{clip-path: polygon(0 0, 100% 0, 100% 94%, 0 95%);margin-top: 0.5em;}*/  /* margin-bottom: -4.5em; */
 7 | @media screen and (min-width:721px) {
 8 | div.main-img{width:100%;overflow:hidden;margin-top: 0.5em;}
 9 | div.main-img div.left{position:relative;display:hidden;margin:0;padding:0;}
10 | div.main-img div.right{position:relative;width:100%; display: none;} 
11 | }   /* 【right部分】高度H÷宽度W   margin-top: -147.3235%; */
12 | /*div.main-img div.right {display:none;}*/
13 | 


--------------------------------------------------------------------------------
/lib/bkmk/FreePic2Pdf.itf:
--------------------------------------------------------------------------------
 1 | [Images]
 2 | 
 3 | [Font]
 4 | Language=GBK
 5 | FontSize=7
 6 | Margin=0.5
 7 | 
 8 | [Bkmk]
 9 | File=FreePic2Pdf_bkmk.txt
10 | AddAsText=0
11 | ShowBkmk=1
12 | ShowAll=0
13 | BasePage=63
14 | 
15 | [Main]
16 | ContentsPage=
17 | TextPage=63
18 | 


--------------------------------------------------------------------------------
/lib/bkmk/FreePic2Pdf_bkmk.txt:
--------------------------------------------------------------------------------
 1 | 封面	-62
 2 | 前言	-59
 3 | 凡例	-57
 4 | 方言音系简介	-54
 5 | 	一、北京话声韵调	-54
 6 | 	二、济南话声韵调	-52
 7 | 	三、西安话声韵调	-50
 8 | 	四、太原话声韵调	-48
 9 | 	五、武汉话声韵调	-46
10 | 	六、成都话声韵调	-45
11 | 	七、合肥话声韵调	-44
12 | 	八、扬州话声韵调	-42
13 | 	九、苏州话声韵调	-41
14 | 	十、温州话声韵调	-38
15 | 	十一、长沙话声韵调	-35
16 | 	十二、双峰话声韵调	-34
17 | 	十三、南昌话声韵调	-32
18 | 	十四、梅县话声韵调	-30
19 | 	十五、广州话声韵调	-28
20 | 	十六、阳江话声韵调	-26
21 | 	十七、厦门话声韵调	-23
22 | 	十八、潮州话声韵调	-21
23 | 	十九、福州话声韵调	-19
24 | 	二十、建瓯话声韵调	-15
25 | 分类词目	-12
26 | 正文	1
27 | 普通话音序索引	617
28 | 封底	629
29 | 


--------------------------------------------------------------------------------
/lib/bkmk_utf16le/FreePic2Pdf.itf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/lib/bkmk_utf16le/FreePic2Pdf.itf


--------------------------------------------------------------------------------
/lib/bkmk_utf16le/FreePic2Pdf_bkmk.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/lib/bkmk_utf16le/FreePic2Pdf_bkmk.txt


--------------------------------------------------------------------------------
/lib/btmpl.css:
--------------------------------------------------------------------------------
 1 | /*预定义*/
 2 | ul {
 3 |     margin-right: 0px;
 4 |     margin-left: 32px;
 5 |     margin-top: 4px;
 6 |     margin-bottom: 4px;
 7 |     padding: 0px;
 8 | }
 9 | p {
10 |     text-indent: 2em;
11 |     margin:4px auto;
12 | }
13 | a {text-decoration:none}
14 | 
15 | /*----------导航------------*/
16 | /*原索引 (不展示)*/
17 | div.index-all {display: none;}
18 | /*导航框*/
19 | div.top-navi-level{
20 |     margin: 0px 0px 0px 0px;
21 |     padding: 3px 0px 3px 4px;
22 |     border-top: 3px solid #3b264d;
23 |     background-color: #CC9933; /* CD284C */
24 |     font-weight: bold;
25 |     text-align: left;
26 | }
27 | span.sep-navi {margin-left: 4px; margin-right: 4px; color: black; font-size: 90%;}
28 | div.top-navi{
29 |     margin: 0px 0px 0px 0px;
30 |     padding: 3px 0px;
31 |     border-top: 3px solid #3b264d;
32 |     background-color: #CC9933; /* CD284C */
33 |     font-weight: bold;
34 |     text-align: center;
35 | }
36 | div.bottom-navi{
37 |     margin: 0px 0px 0px 0px;
38 |     padding: 3px 0px;
39 |     border-bottom: 1px solid #3b264d;/*#ef8b14; #8470FF*/
40 |     background-color: #EAEAEA;
41 |     text-align: center;
42 | }
43 | /*导航链接*/
44 | div.top-navi a:link {color: #ffffff;}
45 | div.top-navi a:hover {background:green;}
46 | div.top-navi a:visited {color: #ffffff} /* FFFF99 */
47 | div.bottom-navi a:link {color: blue;}
48 | div.bottom-navi a:hover {background:yellow;}
49 | /*导航链接 (多级) */
50 | div.top-navi-level a:link {color: #ffffff;}
51 | div.top-navi-level a:hover {background:green;}
52 | div.top-navi-level a:visited {color: #ffffff} /* FFFF99 */
53 | div.top-navi-level span.navi-item-entry a:link {color: #FFFF99;}
54 | div.top-navi-level span.navi-item-entry a:hover {background:green;}
55 | div.top-navi-level span.navi-item-entry a:visited {color: #FFFF99} /* FFFF99 */
56 | span.navi-item{
57 |     margin: 0px 0px 0px 0px;
58 | }
59 | span.navi-item-entry{
60 |     margin: 0px 0px 0px 0px;
61 | }
62 | span.navi-item-left{
63 |     margin: 0px 0px 0px 4px;
64 |     float: left;
65 | }
66 | span.navi-item-middle span.navi-item{
67 |     margin: 0px 6px 0px 6px;
68 | }
69 | span.navi-item-right{
70 |     margin: 0px 4px 0px 0px;
71 |     float: right;
72 | }
73 | 
74 | /*----------词条------------*/
75 | /*---图片---*/
76 | div.main-img img{
77 |     text-align: center;
78 |     width: 100%;
79 | }
80 | /*<insert_css: auto_split>*/
81 | 
82 | /*---目录标题---*/
83 | div.toc-title {
84 |     text-align: center;
85 |     margin-bottom: 8px;
86 |     font-weight: bold;
87 |     font-size: 120%;
88 | }
89 | /*---目录链接---*/
90 | div.toc-text a:link {color: blue;}
91 | div.toc-text a:hover {background:yellow;}
92 | span.sep-list {color: grey; font-size: 90%;}
93 | 


--------------------------------------------------------------------------------
/lib/build.toml:
--------------------------------------------------------------------------------
 1 | # 词典制作的配置文件（用于 AutoMdxBuilder 1.4 版本及以上）
 2 | 
 3 | [global]
 4 | templ_choice = "B"  # 【重要】选择要应用的模板, 同时需完成下方对应模板的具体配置（如果有的话）
 5 | name = "文史工具书词典"  # 书名
 6 | name_abbr = "WSGJSCD"   # 书名首字母缩写
 7 | simp_trad_flg = false  # 是否需要繁简通搜
 8 | add_extra_navis = false  # 是否需要添加额外的导航栏(index_all_navi_\d+.txt)
 9 | multi_volume = false  # 是否是多卷的
10 | # 多卷模式下可以标示每个分卷名 (作用于 toc, index_all/toc_all)
11 | #vol_names = [
12 | #    "政治斗争卷",
13 | #    "政治人物卷",
14 | #    "军事卷"
15 | #]
16 | 
17 | 
18 | [template]
19 | [template.a]
20 | # 图像词典 (模板A)
21 | #   必需材料: imgs(文件夹),  index/toc
22 | #   可选材料: syns,  info
23 | body_start = 1  # 正文起始页为第几张图(>=1)
24 | auto_split_columns = 1  # （可选）自适应分栏数 (默认1表示不分栏)
25 | body_end_page = 99999  # （可选）最大正文页码 (用于自适应分栏范围的判断, 默认到最后一页)
26 | 
27 | # （可选）导航栏链接, 有目录 (toc) 就可以设置
28 | #navi_items = [
29 | #    {a = "凡例",ref = "凡例"},
30 | #    {a = "北京",ref = "一、北京话声韵调"},
31 | #    {a = "苏州",ref = "九、苏州话声韵调"},
32 | #    {a = "武汉",ref = "五、武汉话声韵调"},
33 | #    {a = "成都",ref = "六、成都话声韵调"}
34 | #]
35 | 
36 | 
37 | [template.b]
38 | # 图像词典 (模板B)
39 | #   必需材料: imgs(文件夹),  index_all/toc_all
40 | #   可选材料: syns,  info
41 | body_start = 1  # 正文起始页为第几张图(>=1)
42 | auto_split_columns = 1  # （可选）自适应分栏数 (默认1表示不分栏)
43 | body_end_page = 99999  # （可选）最大正文词条页码 (用于自适应分栏范围的判断, 默认到最后一页)
44 | add_extra_index = false  # 添加额外的 index.txt 文件
45 | 
46 | 
47 | [template.c]
48 | # 文本词典 (模板C)
49 | #   必需材料: index
50 | #   可选材料: data(文件夹), syns,  info
51 | add_headwords = true
52 | 
53 | [template.d]
54 | # 文本词典 (模板D)
55 | #   必需材料: index_all
56 | #   可选材料: data(文件夹), syns,  info
57 | add_headwords = true
58 | 


--------------------------------------------------------------------------------
/lib/ctmpl.css:
--------------------------------------------------------------------------------
 1 | /*预定义*/
 2 | h1 {text-align:center; font-size: 150%;}
 3 | ul {
 4 |     margin-right: 0px;
 5 |     margin-left: 32px;
 6 |     margin-top: 4px;
 7 |     margin-bottom: 4px;
 8 |     padding: 0px;
 9 | }
10 | p {
11 |     text-indent: 2em;
12 |     margin:4px auto;
13 | }
14 | a {text-decoration:none}
15 | 
16 | /*----------词条------------*/
17 | div.readertdetaillink {display: none;}
18 | /*词目*/
19 | div.entry-headword {
20 |     text-align: left;
21 |     font-weight: bold;
22 |     font-size: 110%;
23 |     color: #2F49A5;
24 | }
25 | /*目录标题*/
26 | div.toc-title {
27 |     text-align: center;
28 |     margin-bottom: 8px;
29 |     font-weight: bold;
30 |     font-size: 120%;
31 | }
32 | /*正文*/
33 | div.entry-body {}
34 | /*数据来源*/
35 | p.source {text-align: right; color: grey; font-size: 90%;}
36 | 
37 | /*----------三方补充------------*/
38 | div.tocTitle {text-align: center; margin-bottom: 8px; font-weight: bold; font-size: 120%;}  /*目录标题*/
39 | div.tocText {} /*词条内容*/
40 | div.entryTitle {color: #00578E; font-weight: bold; font-size: 110%;}  /*词目*/
41 | div.entryText {} /*词条内容*/
42 | div.title {}
43 |     span#titleText {color: #00578E; font-weight: bold; font-size: 110%;}  /*词目*/
44 |     span#titlePY {margin-left: 6px; color: yellow; font-weight: normal; font-size: 110%;}  /*拼音*/
45 |     span#titleTextEn {margin-left: 6px; color: #134f28; font-weight: normal; font-size: 95%;}  /*英文*/
46 |     span#titleOtherText {margin-left: 4px; color: grey; font-weight: normal; font-size: 95%;}  /*其他*/
47 | div.contentText {}
48 | 


--------------------------------------------------------------------------------
/lib/dtmpl.css:
--------------------------------------------------------------------------------
  1 | /*预定义*/
  2 | h1 {text-align:center; font-size: 150%;}
  3 | ul {
  4 |     margin-right: 0px;
  5 |     margin-left: 32px;
  6 |     margin-top: 4px;
  7 |     margin-bottom: 4px;
  8 |     padding: 0px;
  9 | }
 10 | p {
 11 |     text-indent: 2em;
 12 |     margin:4px auto;
 13 | }
 14 | a {text-decoration:none}
 15 | 
 16 | /*----------导航------------*/
 17 | /*原索引 (不展示)*/
 18 | div.index-all {display: none;}
 19 | div.readertdetaillink {display: none;}
 20 | /*导航框*/
 21 | div.top-navi-level {
 22 | 	margin: 12px 0px 6px 0px;
 23 | 	padding: 3px 0px;
 24 | 	border-top: 2px solid #ef8b14;
 25 | 	border-bottom: 1px solid #ef8b14;/*#ef8b14; #8470FF*/
 26 | }
 27 | span.sep-navi {margin-left: 4px; margin-right: 4px; color: black; font-size: 90%;}
 28 | div.bottom-navi {
 29 | 	margin: 12px 0px 6px 0px;
 30 | 	padding: 3px 0px;
 31 | 	border-top: 1px dotted #3c4457;
 32 | 	border-bottom: 1px dotted #3c4457;/*#ef8b14; #8470FF #ef8b14*/
 33 | 	text-align:center;
 34 | 	color:#3c4457;
 35 | }
 36 | /*导航链接*/
 37 | div.bottom-navi a:link {color: grey;}
 38 | div.bottom-navi a:hover {background:yellow;}
 39 | div.bottom-navi a:visited {color: grey}
 40 | /*导航链接 (多级) */
 41 | div.top-navi-level a:link {color: #7c0000;}
 42 | div.top-navi-level a:hover {background: yellow;}
 43 | div.top-navi-level a:visited {color: #7c0000}
 44 | div.top-navi-level span.navi-item-entry a:link {color: #C30A50;}
 45 | div.top-navi-level span.navi-item-entry a:hover {background: yellow;}
 46 | div.top-navi-level span.navi-item-entry a:visited {color: #C30A50}
 47 | span.navi-item{
 48 |     margin: 0px 0px 0px 0px;
 49 | }
 50 | span.navi-item-entry{
 51 |     margin: 0px 0px 0px 0px;
 52 | }
 53 | span.navi-item-left{
 54 |     margin: 0px 0px 0px 4px;
 55 |     float: left;
 56 | }
 57 | span.navi-item-middle span.navi-item{
 58 |     margin: 0px 6px 0px 6px;
 59 | }
 60 | span.navi-item-right{
 61 |     margin: 0px 4px 0px 0px;
 62 |     float: right;
 63 | }
 64 | 
 65 | /*----------词条------------*/
 66 | /*目录标题*/
 67 | div.toc-title {
 68 |     text-align: center;
 69 |     margin-bottom: 8px;
 70 |     font-weight: bold;
 71 |     font-size: 120%;
 72 | }
 73 | /*目录链接*/
 74 | div.toc-text a:link {color: blue;}
 75 | div.toc-text a:hover {background:yellow;}
 76 | span.sep-list {color: grey; font-size: 90%;}
 77 | /*词目*/
 78 | div.entry-headword {
 79 |     text-align: left;
 80 |     font-weight: bold;
 81 |     font-size: 110%;
 82 |     color: #a50000;
 83 | }
 84 | /*正文*/
 85 | div.entry-body {}
 86 | /*数据来源*/
 87 | p.source {text-align: right; color: grey; font-size: 90%;}
 88 | 
 89 | /*----------三方补充------------*/
 90 | div.tocTitle {text-align: center; margin-bottom: 8px; font-weight: bold; font-size: 120%;}  /*目录标题*/
 91 | div.tocText {} /*词条内容*/
 92 | div.entryTitle {color: #00578E; font-weight: bold; font-size: 110%;}  /*词目*/
 93 | div.entryText {} /*词条内容*/
 94 | div.title {}
 95 |     span#titleText {color: #00578E; font-weight: bold; font-size: 110%;}  /*词目*/
 96 |     span#titlePY {margin-left: 6px; color: yellow; font-weight: normal; font-size: 110%;}  /*拼音*/
 97 |     span#titleTextEn {margin-left: 6px; color: #134f28; font-weight: normal; font-size: 95%;}  /*英文*/
 98 |     span#titleOtherText {margin-left: 4px; color: grey; font-weight: normal; font-size: 95%;}  /*其他*/
 99 | div.contentText {}
100 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | colorama==0.4.6
 2 | tomlkit==0.12.2
 3 | tomli==2.0.1
 4 | mdict-utils==1.3.12
 5 | OpenCC==1.1.1
 6 | Pillow==10.1.0
 7 | PyMuPDF==1.23.6
 8 | pywinauto==0.6.8
 9 | pywin32==306
10 | pywin32-ctypes==0.2.2
11 | 


--------------------------------------------------------------------------------
/settings.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Date    : 2023-11-16 00:00:58
  4 | # @Author  : Litles (litlesme@gmail.com)
  5 | # @Link    : https://github.com/Litles
  6 | # @Version : 1.6
  7 | 
  8 | import os
  9 | import sys
 10 | import re
 11 | from tomli import load
 12 | from tomlkit import loads
 13 | from colorama import Fore
 14 | 
 15 | 
 16 | class Settings:
 17 |     """ 词典设置 """
 18 |     # 【提示】 AMB 1.4 及以后的版本已不在此处配置词典, 请移步 build.toml 文件
 19 |     def __init__(self):
 20 |         # 程序版本
 21 |         self.version = '1.6'
 22 | 
 23 |         # 输入文件
 24 |         self.dname_imgs = 'imgs'
 25 |         self.img_exts = ['.jpg', 'jpeg', '.jp2', '.png', '.gif', '.bmp', '.tif', '.tiff']
 26 |         self.len_digit = 6
 27 |         self.dname_data = 'data'
 28 |         self.fname_index = 'index.txt'
 29 |         self.fname_index_all = 'index_all.txt'
 30 |         self.fname_toc_all = 'toc_all.txt'
 31 |         self.fname_toc = 'toc.txt'
 32 |         self.fname_syns = 'syns.txt'
 33 |         self.fname_dict_info = 'info.html'
 34 | 
 35 |         # 输出文件
 36 |         if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
 37 |             self.dir_bundle = sys._MEIPASS
 38 |         else:
 39 |             self.dir_bundle = os.getcwd()
 40 |         self.dir_output_tmp = os.path.join(self.dir_bundle, '_tmp')
 41 |         if not os.path.exists(self.dir_output_tmp):
 42 |             os.makedirs(self.dir_output_tmp)
 43 |         self.dir_index = os.path.join(self.dir_output_tmp, 'index')
 44 |         self.dir_toc = os.path.join(self.dir_output_tmp, 'toc')
 45 |         self.dir_index_all = os.path.join(self.dir_output_tmp, 'index_all')
 46 |         self.fname_entries_text = 'entries_text.txt'
 47 |         self.fname_entries_img = 'entries_img.txt'
 48 |         self.fname_entries_toc = 'entries_toc.txt'
 49 |         self.fname_entries_with_navi = 'entries_with_navi.txt'
 50 |         self.fname_entries_with_navi_text = 'entries_with_navi_text.txt'
 51 |         self.fname_relinks_syn = 'relinks_syn.txt'
 52 |         self.fname_relinks_st = 'relinks_st.txt'
 53 |         self.fname_relinks_index = 'relinks_index.txt'  # template B
 54 |         self.fname_relinks_headword = 'relinks_headword.txt'
 55 |         self.file_log = os.path.join(self.dir_bundle, '_log.log')
 56 | 
 57 |         # 文本格式
 58 |         # index/index_all
 59 |         self.pat_stem = re.compile(r'【L(\d+)】([^\t]+)\t(\-\d+|\d*)[\r\n]*$')               # 匹配图像词典全索引的主干条目 (有页码/无页码)
 60 |         self.pat_stem_vol = re.compile(r'【L(\d+)】([^\t]+)\t\[(\d+)\](\-\d+|\d*)[\r\n]*$')  # [有卷标]匹配图像词典全索引的主干条目 (有页码/无页码)
 61 |         self.pat_stem_text = re.compile(r'【L(\d+)】([^\t]+)\t([^\t\r\n]*)[\r\n]*$')         # 匹配文本词典全索引的主干条目 (有内容/无内容)
 62 |         self.pat_index = re.compile(r'([^\t]+)\t(\-?\d+)[\r\n]*$')                         # 匹配图像词典索引 (有页码)
 63 |         self.pat_index_vol = re.compile(r'([^\t]+)\t\[(\d+)\](\-?\d+)[\r\n]*$')            # [有卷标]匹配图像词典索引 (有页码)
 64 |         self.pat_index_blank = re.compile(r'([^\t\r\n]+)[\t\r\n]*$')                       # 匹配导航 index_all, 无内容
 65 |         # toc
 66 |         self.pat_toc = re.compile(r'(\t*)([^\t]+)\t(\-?\d+)[\r\n]*$')               # 匹配图像词典目录 (有页码)
 67 |         self.pat_toc_vol = re.compile(r'(\t*)([^\t]+)\t\[(\d+)\](\-?\d+)[\r\n]*$')  # [有卷标]匹配图像词典目录 (有页码)
 68 |         self.pat_toc_blank = re.compile(r'(\t*)([^\t\r\n]+)[\t\r\n]*$')             # 匹配图像词典目录 (无页码)
 69 |         # TAB分隔(通用)
 70 |         self.pat_tab = re.compile(r'([^\t]+)\t([^\t\r\n]+)[\r\n]*$')
 71 |         # 提取
 72 |         self.pat_relink = re.compile(r'^([^\r\n]+)[\r\n]+@@@LINK=([^\r\n]+)[\r\n]+</>[\r\n]*$', flags=re.M)
 73 | 
 74 |         # 预设样式/模板
 75 |         self.dir_lib = os.path.join(self.dir_bundle, 'lib')
 76 |         self.build_tmpl = 'build.toml'
 77 |         self.css_atmpl = 'atmpl.css'
 78 |         self.css_btmpl = 'btmpl.css'
 79 |         self.css_ctmpl = 'ctmpl.css'
 80 |         self.css_dtmpl = 'dtmpl.css'
 81 |         self.css_split_2 = 'auto_split_2.css'
 82 | 
 83 |         # 预设值
 84 |         self.body_start = [1]
 85 |         self.split_columns = 1
 86 |         self.body_end_page = [99999]
 87 |         self.add_headwords = True
 88 |         self.multi_volume = False
 89 |         self.volume_num = 1
 90 |         self.vol_names = [None]
 91 |         self.add_extra_index = False  # template B
 92 | 
 93 |     def load_build_toml(self, file_toml, pdf_flg=False, outside_flg=True):
 94 |         build_flg = True
 95 |         # 输入文件夹
 96 |         self.dir_input = os.path.split(file_toml)[0]
 97 |         self.dir_input_tmp = os.path.join(self.dir_input, '_tmp')
 98 |         with open(file_toml, 'rb') as fr:
 99 |             try:
100 |                 build = load(fr)
101 |                 # --- 通用设置 ---
102 |                 self.name = build["global"]["name"]  # 书名
103 |                 self.name_abbr = build["global"]["name_abbr"].upper()  # 书名首字母缩写
104 |                 self.simp_trad_flg = build["global"].get("simp_trad_flg", False)  # 是否要繁简通搜
105 |                 self.add_extra_navis = build["global"].get("add_extra_navis", False)  # 是否要额外导航栏
106 |                 # --- 区别设置 ---
107 |                 self.templ_choice = build["global"]["templ_choice"].upper()  # 模板选择
108 |                 self.multi_volume = build["global"].get("multi_volume", False)
109 |                 # 模板 A, B
110 |                 if self.templ_choice in ('A', 'B'):
111 |                     # --- 1.独有部分 ----
112 |                     if self.templ_choice == 'A':
113 |                         label = 'a'
114 |                         self.navi_items = build["template"][label].get("navi_items", [])
115 |                         for item in self.navi_items:
116 |                             if item["ref"] == "":
117 |                                 item["ref"] = item["a"]
118 |                     else:
119 |                         label = 'b'
120 |                         self.add_extra_index = build["template"][label].get("add_extra_index", False)
121 |                     # --- 2.共有部分 ----
122 |                     # body_start
123 |                     self.body_start = build["template"][label]["body_start"]  # 正文起始页为第几张图(>=1)
124 |                     if isinstance(self.body_start, int):
125 |                         self.body_start = [self.body_start]
126 |                     # 卷数, 卷名(默认全 None)
127 |                     self.volume_num = len(self.body_start)
128 |                     if self.multi_volume:
129 |                         get_vol_names = build["global"].get("vol_names", self.vol_names[0])
130 |                         if not get_vol_names:
131 |                             self.vol_names = [None for i in range(self.volume_num)]
132 |                         elif isinstance(get_vol_names, list) and len(get_vol_names) == self.volume_num:
133 |                             self.vol_names = get_vol_names
134 |                         elif isinstance(get_vol_names, list) and len(get_vol_names) != self.volume_num:
135 |                             print(Fore.RED + "ERROR: " + Fore.RESET + "build.toml 中 body_start 和 vol_names 数目不匹配")
136 |                             build_flg = False
137 |                         else:
138 |                             print(Fore.RED + "ERROR: " + Fore.RESET + "build.toml 中 vol_names 设置有误")
139 |                             build_flg = False
140 |                     # 分栏 (可选)
141 |                     self.split_columns = build["template"][label].get("auto_split_columns", 1)
142 |                     get_body_end_page = build["template"][label].get("body_end_page", self.body_end_page[0])
143 |                     if self.multi_volume:
144 |                         self.body_end_page = [self.body_end_page[0] for i in range(self.volume_num)]
145 |                         if isinstance(get_body_end_page, int):
146 |                             self.body_end_page = [get_body_end_page for i in range(self.volume_num)]
147 |                         elif isinstance(get_body_end_page, list):
148 |                             if len(get_body_end_page) > self.volume_num:
149 |                                 build_flg = False
150 |                                 print(Fore.RED + "ERROR: " + Fore.RESET + "build.toml 中 body_end_page 数目超过了分卷数")
151 |                             else:
152 |                                 for i in range(len(get_body_end_page)):
153 |                                     self.body_end_page[i] = get_body_end_page[i]
154 |                         else:
155 |                             build_flg = False
156 |                             print(Fore.RED + "ERROR: " + Fore.RESET + "build.toml 中 body_end_page 格式有误")
157 |                     else:
158 |                         if isinstance(get_body_end_page, int):
159 |                             self.body_end_page[0] = get_body_end_page
160 |                         elif isinstance(get_body_end_page, list):
161 |                             self.body_end_page[0] = get_body_end_page[0]
162 |                         else:
163 |                             build_flg = False
164 |                             print(Fore.RED + "ERROR: " + Fore.RESET + "build.toml 中 body_end_page 格式有误")
165 |                 # 模板 C
166 |                 elif self.templ_choice == 'C':
167 |                     self.add_headwords = build["template"]["c"].get("add_headwords", True)
168 |                 # 模板 D
169 |                 elif self.templ_choice == 'D':
170 |                     self.add_headwords = build["template"]["d"].get("add_headwords", True)
171 |                     self.vol_names = build["global"].get("vol_names", self.vol_names)
172 |                     if not isinstance(self.vol_names, list):
173 |                         build_flg = False
174 |                         print(Fore.RED + "ERROR: " + Fore.RESET + "build.toml 中 vol_names 格式有误")
175 |                 # 设定其他变量
176 |                 self.fname_final_txt = f"{self.name}.txt"
177 |                 self.fname_css = f"{self.name_abbr.lower()}.css"
178 |                 # 确定输出文件夹
179 |                 if pdf_flg:
180 |                     pass
181 |                 elif outside_flg:
182 |                     self.dir_output = os.path.join(os.path.split(self.dir_input)[0], self.name) + '_mdict'
183 |                 else:
184 |                     self.dir_output = os.path.join(self.dir_input, self.name) + '_mdict'
185 |             except:
186 |                 build_flg = False
187 |                 print(Fore.RED + "ERROR: " + Fore.RESET + "读取 build.toml 文件失败, 请检查格式是否规范、选项是否遗漏")
188 |         # 生成 TOML 对象
189 |         if build_flg:
190 |             with open(file_toml, 'r', encoding='utf-8') as fr:
191 |                 self.build = loads(fr.read())
192 |         return build_flg
193 | 


--------------------------------------------------------------------------------
/templates/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Litles/AutoMdxBuilder/dc1d61a1fc2214d03368218a44aa636a68bd6596/templates/__init__.py


--------------------------------------------------------------------------------
/templates/text_dict_ctmpl.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Date    : 2023-11-16 00:00:41
  4 | # @Author  : Litles (litlesme@gmail.com)
  5 | # @Link    : https://github.com/Litles
  6 | # @Version : 1.6
  7 | 
  8 | import os
  9 | import re
 10 | from tomlkit import dumps
 11 | from colorama import Fore
 12 | 
 13 | 
 14 | class TextDictCtmpl:
 15 |     """ 文本词典（模板C） """
 16 |     def __init__(self, amb):
 17 |         self.settings = amb.settings
 18 |         self.func = amb.func
 19 | 
 20 |     def make_source_file(self):
 21 |         """ 制作预备 txt 源文本 """
 22 |         # 清空临时目录下所有文件
 23 |         for fname in os.listdir(self.settings.dir_output_tmp):
 24 |             fpath = os.path.join(self.settings.dir_output_tmp, fname)
 25 |             if os.path.isfile(fpath):
 26 |                 os.remove(fpath)
 27 |         # 初始化, 检查原材料: index, syns, info, data
 28 |         check_result = self._check_raw_files()
 29 |         # 开始制作
 30 |         if check_result:
 31 |             print('\n材料检查通过, 开始制作词典……\n')
 32 |             # 预定义输出文件名
 33 |             file_final_txt = os.path.join(self.settings.dir_output_tmp, self.settings.fname_final_txt)
 34 |             file_dict_info = os.path.join(self.settings.dir_output_tmp, self.settings.fname_dict_info)
 35 |             # 1.分步生成各部分源文本
 36 |             file_1 = os.path.join(self.settings.dir_output_tmp, self.settings.fname_entries_text)  # 文本词条
 37 |             file_2 = os.path.join(self.settings.dir_output_tmp, self.settings.fname_relinks_syn)  # 同义词重定向
 38 |             file_3 = os.path.join(self.settings.dir_output_tmp, self.settings.fname_relinks_st)  # 繁简重定向
 39 |             # 判断是否要生成额外导航栏
 40 |             # if check_result[4]:
 41 |             #     file_0, navi_bars = self._gen_extra_navi_bars(check_result[4])
 42 |             # (1) 生成文本(主)词条
 43 |             headwords = self._make_entries_text(check_result[0], file_1)
 44 |             # (2) 生成同义词重定向
 45 |             if check_result[1]:
 46 |                 headwords += self.func.make_relinks_syn(check_result[1], file_2)
 47 |             # (3) 生成繁简通搜重定向
 48 |             if self.settings.simp_trad_flg:
 49 |                 self.func.make_relinks_st(headwords, file_3)
 50 |             # 2.合并成最终 txt 源文本
 51 |             entry_total = self.func.merge_and_count([file_1, file_2, file_3], file_final_txt)
 52 |             print(f'\n源文本 "{self.settings.fname_final_txt}"（共 {entry_total} 词条）生成完毕！')
 53 |             # 3.生成 info.html
 54 |             self.func.generate_info_html(check_result[2], file_dict_info, self.settings.name, 'C')
 55 |             # 返回制作结果
 56 |             return [file_final_txt, check_result[3], file_dict_info]
 57 |         else:
 58 |             print(Fore.RED + "\n材料检查不通过, 请确保材料准备无误再执行程序" + Fore.RESET)
 59 |             return None
 60 | 
 61 |     def _gen_extra_navi_bars(self, lst_file_index_all):
 62 |         """ 生成额外导航栏 """
 63 |         # for file_index_all in lst_file_index_all:
 64 |         #     dcts = self.func.read_index_all_file(file_index_all)
 65 |         #     self.gen_top_navi_bar(dcts)
 66 | 
 67 |     def extract_final_txt(self, file_final_txt, out_dir, dict_name):
 68 |         """ 从模板C词典的源 txt 文本中提取 index, syns 信息 """
 69 |         dcts = []
 70 |         syns = []
 71 |         # (一) 分析提取源 txt 文本
 72 |         with open(file_final_txt, 'r', encoding='utf-8') as fr:
 73 |             text = fr.read()
 74 |             # 1.提取 index_all
 75 |             pat_index = re.compile(r'^<div class="index" style="display:none;">(\d+)\|(.+?)</div>.+?<div class="entry-body">(.+?)</div>$', flags=re.M+re.S)
 76 |             for t in pat_index.findall(text):
 77 |                 dct = {
 78 |                     "id": t[0],
 79 |                     "name": t[1],
 80 |                     "body": t[2]
 81 |                 }
 82 |                 dcts.append(dct)
 83 |             # 2.识别 name_abbr
 84 |             mth = re.search(r'^<link rel="stylesheet" type="text/css" href="([^>/\"\.]+?)\.css"/>$', text, flags=re.M)
 85 |             if mth:
 86 |                 name_abbr = mth.group(1).upper()
 87 |             else:
 88 |                 print(Fore.MAGENTA + "WARN: " + Fore.RESET + "未识别到词典缩略字母, 已设置默认值")
 89 |                 name_abbr = 'XXXXCD'
 90 |             # 3.提取 syns
 91 |             for t in self.settings.pat_relink.findall(text):
 92 |                 syns.append((t[0], t[1]))
 93 |         # (二) 整理输出提取结果
 94 |         # 1.index.txt
 95 |         dcts.sort(key=lambda dct: dct["id"], reverse=False)
 96 |         with open(os.path.join(out_dir, 'index.txt'), 'w', encoding='utf-8') as fw:
 97 |             for dct in dcts:
 98 |                 fw.write(f'{dct["name"]}\t{dct["body"]}\n')
 99 |         # 2.syns.txt
100 |         if syns:
101 |             with open(os.path.join(out_dir, 'syns.txt'), 'w', encoding='utf-8') as fw:
102 |                 for s in syns:
103 |                     fw.write(f'{s[0]}\t{s[1]}\n')
104 |         # 3.build.toml 文件
105 |         self.settings.load_build_toml(os.path.join(self.settings.dir_lib, self.settings.build_tmpl), False)
106 |         self.settings.build["global"]["templ_choice"] = "C"
107 |         self.settings.build["global"]["name"] = dict_name
108 |         self.settings.build["global"]["name_abbr"] = name_abbr
109 |         # 判断 add_headwords
110 |         if not re.search(r'^<div class="entry-headword">[^<]+</div>$', text, flags=re.M):
111 |             self.settings.build["template"]["c"]["add_headwords"] = False
112 |         with open(os.path.join(out_dir, 'build.toml'), 'w', encoding='utf-8') as fw:
113 |             fw.write(dumps(self.settings.build))
114 | 
115 |     def _make_entries_text(self, file_index, file_out):
116 |         headwords = []
117 |         """ (一) 生成文本(主)词条 """
118 |         with open(file_out, 'a', encoding='utf-8') as fa:
119 |             with open(file_index, 'r', encoding='utf-8') as fr:
120 |                 i = 0
121 |                 for line in fr:
122 |                     i += 1
123 |                     if self.settings.pat_tab.match(line):
124 |                         mth = self.settings.pat_tab.match(line)
125 |                         part_title = f'{mth.group(1)}\n'
126 |                         part_css = f'<link rel="stylesheet" type="text/css" href="{self.settings.name_abbr.lower()}.css"/>\n'
127 |                         part_index = f'<div class="index" style="display:none;">{str(i).zfill(10)}|{mth.group(1)}</div>\n'
128 |                         if not self.settings.add_headwords:
129 |                             part_headword = ''
130 |                         else:
131 |                             part_headword = f'<div class="entry-headword">{mth.group(1)}</div>\n'
132 |                         if re.match(r'<(p|div|html|body|title|head)', mth.group(2), flags=re.I):
133 |                             part_body = f'<div class="entry-body">{mth.group(2)}</div>\n'
134 |                         else:
135 |                             part_body = f'<div class="entry-body"><p>{mth.group(2)}</p></div>\n'
136 |                         # 将完整词条写入文件
137 |                         fa.write(part_title+part_css+part_index+part_headword+part_body+'</>\n')
138 |                         headwords.append(mth.group(1))
139 |                     else:
140 |                         print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"第 {i} 行未匹配, 已忽略")
141 |         print("文本词条已生成")
142 |         return headwords
143 | 
144 |     def _check_raw_files(self):
145 |         """ 检查原材料
146 |         * 必要文本存在(文本编码均要是 utf-8 无 bom)
147 |         * 检查 info.html 的编码
148 |         """
149 |         check_result = []
150 |         # 预定义输入文件路径
151 |         file_index = os.path.join(self.settings.dir_input, self.settings.fname_index)
152 |         file_syns = os.path.join(self.settings.dir_input, self.settings.fname_syns)
153 |         file_dict_info = os.path.join(self.settings.dir_input, self.settings.fname_dict_info)
154 |         dir_data = os.path.join(self.settings.dir_input, self.settings.dname_data)
155 |         # 1.扫描识别 index 文件
156 |         pass_flg = True
157 |         index_check_num = self.func.text_file_check(file_index)
158 |         if index_check_num == 2:
159 |             with open(file_index, 'r', encoding='utf-8') as fr:
160 |                 i = 0
161 |                 for line in fr:
162 |                     i += 1
163 |                     if not self.settings.pat_tab.match(line):
164 |                         print(Fore.RED + "ERROR: " + Fore.RESET + f"index.txt 第 {i} 行未匹配, 请检查")
165 |                         pass_flg = False
166 |                         break
167 |         elif index_check_num == 1:
168 |             pass_flg = False
169 |         else:
170 |             pass_flg = False
171 |             print(Fore.RED + "ERROR: " + Fore.RESET + "未读取到 index 文件")
172 |         if pass_flg:
173 |             check_result.append(file_index)
174 |             # 2.检查同义词文件: 若存在就要合格
175 |             syns_check_num = self.func.text_file_check(file_syns)
176 |             if syns_check_num == 0:
177 |                 check_result.append(None)
178 |             elif syns_check_num == 2:
179 |                 check_result.append(file_syns)
180 |             # 3.检查 info.html: 若存在就要合格
181 |             info_check_num = self.func.text_file_check(file_dict_info)
182 |             if info_check_num == 0:
183 |                 check_result.append(None)
184 |             elif info_check_num == 2:
185 |                 check_result.append(file_dict_info)
186 |             # 4.检查 data 文件夹
187 |             if os.path.isdir(dir_data) and len(os.listdir(dir_data)) != 0:
188 |                 check_result.append(dir_data)
189 |             elif os.path.isdir(dir_data):
190 |                 print(Fore.MAGENTA + "WARN: " + Fore.RESET + "data 文件夹为空, 已忽略将不打包")
191 |                 check_result.append(None)
192 |             else:
193 |                 check_result.append(None)
194 |         # 返回最终检查结果
195 |         if len(check_result) == 4:
196 |             return check_result
197 |         else:
198 |             return None
199 | 


--------------------------------------------------------------------------------
/templates/text_dict_dtmpl.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Date    : 2023-11-16 00:00:48
  4 | # @Author  : Litles (litlesme@gmail.com)
  5 | # @Link    : https://github.com/Litles
  6 | # @Version : 1.6
  7 | 
  8 | import os
  9 | import re
 10 | import shutil
 11 | from tomlkit import dumps
 12 | from colorama import Fore
 13 | 
 14 | 
 15 | class TextDictDtmpl:
 16 |     """ 文本词典（模板D） """
 17 |     def __init__(self, amb):
 18 |         self.settings = amb.settings
 19 |         self.func = amb.func
 20 | 
 21 |     def make_source_file(self):
 22 |         """ 制作预备 txt 源文本 """
 23 |         # 清空临时目录下所有文件
 24 |         for fname in os.listdir(self.settings.dir_output_tmp):
 25 |             fpath = os.path.join(self.settings.dir_output_tmp, fname)
 26 |             if os.path.isfile(fpath):
 27 |                 os.remove(fpath)
 28 |         # 初始化, 检查原材料: index_all, syns, info, data
 29 |         check_result = self._check_raw_files()
 30 |         # 开始制作
 31 |         if check_result:
 32 |             print('\n材料检查通过, 开始制作词典……\n')
 33 |             # 预定义输出文件名
 34 |             file_final_txt = os.path.join(self.settings.dir_output_tmp, self.settings.fname_final_txt)
 35 |             file_dict_info = os.path.join(self.settings.dir_output_tmp, self.settings.fname_dict_info)
 36 |             # 1.分步生成各部分源文本
 37 |             file_1 = os.path.join(self.settings.dir_output_tmp, self.settings.fname_entries_with_navi_text)  # 文本(有导航栏)词条
 38 |             file_2 = os.path.join(self.settings.dir_output_tmp, self.settings.fname_relinks_syn)  # 同义词重定向
 39 |             file_3 = os.path.join(self.settings.dir_output_tmp, self.settings.fname_relinks_st)  # 繁简重定向
 40 |             # (1) 生成文本(主)词条, 带层级导航
 41 |             headwords = self._make_entries_with_navi(check_result[0], file_1)
 42 |             if headwords:
 43 |                 # (2) 生成近义词重定向
 44 |                 if check_result[1]:
 45 |                     headwords += self.func.make_relinks_syn(check_result[1], file_2)
 46 |                 # (3) 生成繁简通搜重定向
 47 |                 if self.settings.simp_trad_flg:
 48 |                     self.func.make_relinks_st(headwords, file_3)
 49 |                 # 2.合并成最终 txt 源文本
 50 |                 entry_total = self.func.merge_and_count([file_1, file_2, file_3], file_final_txt)
 51 |                 print(f'\n源文本 "{self.settings.fname_final_txt}"（共 {entry_total} 词条）生成完毕！')
 52 |                 # 3.生成 info.html
 53 |                 if self.settings.multi_volume:
 54 |                     self.func.generate_info_html(check_result[2], file_dict_info, self.settings.name, 'D', self.settings.volume_num)
 55 |                 else:
 56 |                     self.func.generate_info_html(check_result[2], file_dict_info, self.settings.name, 'D')
 57 |                 # 返回制作结果
 58 |                 return [file_final_txt, check_result[3], file_dict_info]
 59 |             else:
 60 |                 return None
 61 |         else:
 62 |             print(Fore.RED + "\n材料检查不通过, 请确保材料准备无误再执行程序" + Fore.RESET)
 63 |             return None
 64 | 
 65 |     def extract_final_txt(self, file_final_txt, out_dir, dict_name, multi_vols_flg=False, volume_num=1):
 66 |         """ 从模板D词典的源 txt 文本中提取 index, syns 信息 """
 67 |         dcts = []
 68 |         syns = []
 69 |         # (一) 分析提取源 txt 文本
 70 |         with open(file_final_txt, 'r', encoding='utf-8') as fr:
 71 |             text = fr.read()
 72 |             # 1.提取 index_all
 73 |             pat_index = re.compile(r'^<div class="index-all" style="display:none;">(\d+)\|(.+?)\|\d+</div>.+?(<div class="(entry-body|toc-list)">[^\r\n]+</div>)$', flags=re.M+re.S)
 74 |             for t in pat_index.findall(text):
 75 |                 if t[2].startswith('<div class="entry-body">'):
 76 |                     body = re.search(r'<div class="entry-body">(.+?)</div>$', t[2], flags=re.M).group(1)
 77 |                 else:
 78 |                     body = ''
 79 |                 dct = {
 80 |                     "id": t[0],
 81 |                     "name": t[1],
 82 |                     "body": body
 83 |                 }
 84 |                 dcts.append(dct)
 85 |             # 2.识别 name_abbr
 86 |             mth = re.search(r'^<link rel="stylesheet" type="text/css" href="([^>/\"\.]+?)\.css"/>$', text, flags=re.M)
 87 |             if mth:
 88 |                 name_abbr = mth.group(1).upper()
 89 |             else:
 90 |                 print(Fore.MAGENTA + "WARN: " + Fore.RESET + "未识别到词典缩略字母, 已设置默认值")
 91 |                 name_abbr = 'XXXXCD'
 92 |             # 3.提取 syns
 93 |             for t in self.settings.pat_relink.findall(text):
 94 |                 if not t[1].startswith(name_abbr):
 95 |                     syns.append((t[0], t[1]))
 96 |         # (二) 整理输出提取结果
 97 |         # 1.index_all.txt
 98 |         dcts.sort(key=lambda dct: dct["id"], reverse=False)
 99 |         with open(os.path.join(out_dir, 'index_all.txt'), 'w', encoding='utf-8') as fw:
100 |             for dct in dcts:
101 |                 if dct["body"] == '':
102 |                     fw.write(f'{dct["name"]}\t\n')
103 |                 else:
104 |                     fw.write(f'{dct["name"]}\t{dct["body"]}\n')
105 |         # 2.syns.txt
106 |         if syns:
107 |             with open(os.path.join(out_dir, 'syns.txt'), 'w', encoding='utf-8') as fw:
108 |                 for s in syns:
109 |                     fw.write(f'{s[0]}\t{s[1]}\n')
110 |         # 3. build.toml 文件
111 |         self.settings.load_build_toml(os.path.join(self.settings.dir_lib, self.settings.build_tmpl), False)
112 |         self.settings.build["global"]["templ_choice"] = "D"
113 |         self.settings.build["global"]["name"] = dict_name
114 |         self.settings.build["global"]["name_abbr"] = name_abbr
115 |         # 判断 add_headwords
116 |         if not re.search(r'^<div class="entry-headword">[^<]+</div>$', text, flags=re.M):
117 |             self.settings.build["template"]["d"]["add_headwords"] = False
118 |         with open(os.path.join(out_dir, 'build.toml'), 'w', encoding='utf-8') as fw:
119 |             fw.write(dumps(self.settings.build))
120 | 
121 |     def _make_entries_with_navi(self, file_index_all, file_out):
122 |         headwords = []
123 |         """ (一) 生成文本(主)词条, 带层级导航 """
124 |         # 1.读取全索引文件
125 |         dcts = self.func.read_index_all_file(file_index_all, False)
126 |         # 2.生成主体词条
127 |         if dcts:
128 |             with open(file_out, 'w', encoding='utf-8') as fw:
129 |                 tops = []
130 |                 headwords_stem = []
131 |                 i = 0
132 |                 len_dcts = len(dcts)
133 |                 for dct in dcts:
134 |                     part_css = f'<link rel="stylesheet" type="text/css" href="{self.settings.name_abbr.lower()}.css"/>\n'
135 |                     # 词头, 索引备份
136 |                     if dct["level"] == -1:
137 |                         part_title = f'{dct["title"]}\n'
138 |                         part_index = f'<div class="index-all" style="display:none;">{str(dct["id"]).zfill(10)}|{dct["title"]}|{str(dct["vol_n"])}</div>\n'
139 |                     else:
140 |                         part_title = f'{self.settings.name_abbr}_{dct["title"]}\n'
141 |                         part_index = f'<div class="index-all" style="display:none;">{str(dct["id"]).zfill(10)}|【L{str(dct["level"])}】{dct["title"]}|{str(dct["vol_n"])}</div>\n'
142 |                     # top-navi-level 部分
143 |                     part_top = '<div class="top-navi-level">'
144 |                     part_top += f'<span class="navi-item"><a href="entry://TOC_{self.settings.name_abbr}">🕮</a></span>'
145 |                     for x in range(len(dct["navi_bar"])):
146 |                         cname = 'navi-item'
147 |                         link_name = f'{self.settings.name_abbr}_{dct["navi_bar"][x]}'
148 |                         if x == len(dct["navi_bar"])-1 and dct["level"] == -1:
149 |                             cname = 'navi-item-entry'
150 |                             link_name = dct["navi_bar"][x]
151 |                         aname = dct["navi_bar"][x]
152 |                         part_top += f'<span class="sep-navi">»</span><span class="{cname}"><a href="entry://{link_name}">{aname}</a></span>'
153 |                     part_top += '</div>\n'
154 |                     # item-list 部分
155 |                     part_list = self.func.get_item_list(dct)
156 |                     # 词条部分
157 |                     if dct["level"] != -1 and dct["body"] == '':
158 |                         part_headword = ''
159 |                         part_body = ''
160 |                     elif dct["level"] != -1 and dct["body"] != '':
161 |                         part_headword = ''
162 |                         part_body = f'<div class="entry-body">{dct["body"]}</div>\n'
163 |                     elif not self.settings.add_headwords:
164 |                         part_headword = ''
165 |                         part_body = f'<div class="entry-body">{dct["body"]}</div>\n'
166 |                     elif re.match(r'<(p|div|html|body|title|head)', dct["body"], flags=re.I):
167 |                         part_headword = f'<div class="entry-headword">{dct["title"]}</div>\n'
168 |                         part_body = f'<div class="entry-body">{dct["body"]}</div>\n'
169 |                     else:
170 |                         part_headword = f'<div class="entry-headword">{dct["title"]}</div>\n'
171 |                         part_body = f'<div class="entry-body"><p>{dct["body"]}</p></div>\n'
172 |                     # bottom-navi 部分
173 |                     part_left = ''
174 |                     part_right = ''
175 |                     if i == 0:
176 |                         # 只有右
177 |                         if dcts[i+1]["level"] != -1:
178 |                             part_right = f'<span class="navi-item-right"><a href="entry://{self.settings.name_abbr}_{dcts[i+1]["title"]}">{dcts[i+1]["title"]}</a>&#8197;☛</span>'
179 |                         else:
180 |                             part_right = f'<span class="navi-item-right"><a href="entry://{dcts[i+1]["title"]}">{dcts[i+1]["title"]}</a>&#8197;☛</span>'
181 |                     elif i == len_dcts-1:
182 |                         # 只有左
183 |                         if dcts[i-1]["level"] != -1:
184 |                             part_left = f'<span class="navi-item-left">☚&#8197;<a href="entry://{self.settings.name_abbr}_{dcts[i-1]["title"]}">{dcts[i-1]["title"]}</a></span>'
185 |                         else:
186 |                             part_left = f'<span class="navi-item-left">☚&#8197;<a href="entry://{dcts[i-1]["title"]}">{dcts[i-1]["title"]}</a></span>'
187 |                     else:
188 |                         if dcts[i-1]["level"] != -1:
189 |                             part_left = f'<span class="navi-item-left">☚&#8197;<a href="entry://{self.settings.name_abbr}_{dcts[i-1]["title"]}">{dcts[i-1]["title"]}</a></span>'
190 |                         else:
191 |                             part_left = f'<span class="navi-item-left">☚&#8197;<a href="entry://{dcts[i-1]["title"]}">{dcts[i-1]["title"]}</a></span>'
192 |                         if dcts[i+1]["level"] != -1:
193 |                             part_right = f'<span class="navi-item-right"><a href="entry://{self.settings.name_abbr}_{dcts[i+1]["title"]}">{dcts[i+1]["title"]}</a>&#8197;☛</span>'
194 |                         else:
195 |                             part_right = f'<span class="navi-item-right"><a href="entry://{dcts[i+1]["title"]}">{dcts[i+1]["title"]}</a>&#8197;☛</span>'
196 |                     part_bottom = '<div class="bottom-navi">' + part_left + '<span class="navi-item-middle">&#8197;&#12288;&#8197;</span>' + part_right + '</div>\n'
197 |                     # 合并写入
198 |                     fw.write(part_title+part_css+part_index+part_top+part_list+part_headword+part_body+part_bottom+'</>\n')
199 |                     headwords.append(dct["title"])
200 |                     # 收集顶级章节
201 |                     if dct["level"] != -1:
202 |                         if dct["level"] == 0:
203 |                             tops.append(dct["title"])
204 |                         elif dct["level"] == 1 and self.settings.multi_volume:
205 |                             pass
206 |                         else:
207 |                             headwords_stem.append(dct["title"])
208 |                     i += 1
209 |                 # 3.写入总目词条
210 |                 toc_entry = f'TOC_{self.settings.name_abbr}\n'
211 |                 toc_entry += f'<link rel="stylesheet" type="text/css" href="{self.settings.name_abbr.lower()}.css"/>\n'
212 |                 toc_entry += f'<div class="top-navi-level"><span class="navi-item"><a href="entry://TOC_{self.settings.name_abbr}">🕮</a></span></div>\n'
213 |                 toc_entry += '<div class="toc-list"><ul>'
214 |                 for top in tops:
215 |                     toc_entry += f'<li><a href="entry://{self.settings.name_abbr}_{top}">{top}</a></li>'
216 |                 toc_entry += '</ul><div class="bottom-navi">' + '<span class="navi-item-middle">&#8197;&#12288;&#8197;</span>' + '</div>\n'
217 |                 toc_entry += '</div>\n</>\n'
218 |                 fw.write(toc_entry)
219 |                 # 4.章节重定向
220 |                 for word in headwords_stem:
221 |                     fw.write(f'{word}\n@@@LINK={self.settings.name_abbr}_{word}\n</>\n')
222 |             print("文本词条(有导航栏)已生成")
223 |         else:
224 |             pass
225 |         return headwords
226 | 
227 |     def _check_index_alls(self, dir_input, dir_out):
228 |         """ 检查 index_all 文本 """
229 |         pass_flg = True
230 |         file_index_all = os.path.join(dir_input, self.settings.fname_index_all)
231 |         # 1.扫描识别总 index_all 文件
232 |         final_index_all = os.path.join(dir_out, self.settings.fname_index_all)
233 |         index_check_num = self.func.text_file_check(file_index_all)
234 |         if index_check_num == 2:
235 |             shutil.copy(file_index_all, final_index_all)
236 |             # 读取检查总 index_all 文件
237 |             with open(final_index_all, 'r', encoding='utf-8') as fr:
238 |                 i = 0
239 |                 for line in fr:
240 |                     i += 1
241 |                     mth_stem = self.settings.pat_stem_text.match(line)
242 |                     if mth_stem:
243 |                         # 章节
244 |                         pass
245 |                     elif self.settings.pat_tab.match(line):
246 |                         # 词条
247 |                         pass
248 |                     else:
249 |                         print(Fore.RED + "ERROR: " + Fore.RESET + f"index_all.txt 第 {i} 行未匹配, 请检查")
250 |                         pass_flg = False
251 |                         break
252 |         elif index_check_num == 1:
253 |             pass_flg = False
254 |         elif self.settings.multi_volume:
255 |             # 2.扫描识别分 index_all
256 |             lst_file_index_all = []
257 |             pat1 = re.compile(r'index_all_(\d+)', flags=re.I)
258 |             lst_n = []
259 |             for fname in os.listdir(dir_input):
260 |                 if fname.endswith('.txt') and pat1.match(fname):
261 |                     vol_n = int(pat1.match(fname).group(1))
262 |                     fp = os.path.join(dir_input, fname)
263 |                     if vol_n not in lst_n:
264 |                         index_check_num = self.func.text_file_check(fp)
265 |                         if index_check_num == 1:
266 |                             pass_flg = False
267 |                             break
268 |                         elif index_check_num == 2:
269 |                             lst_file_index_all.append({"vol_n": vol_n, "path": fp})
270 |                             lst_n.append(vol_n)
271 |             if pass_flg and not lst_file_index_all:
272 |                 print(Fore.RED + "ERROR: " + Fore.RESET + "未读取到 index_all 文件")
273 |                 pass_flg = False
274 |             elif pass_flg:
275 |                 self.settings.volume_num = len(lst_file_index_all)
276 |                 # 3.合并各 index_all 文本, 顺便检查格式
277 |                 lst_file_index_all.sort(key=lambda dct: dct["vol_n"], reverse=False)
278 |                 pat_vname = re.compile(r'index_all_\d+_(.+?)\.txt', flags=re.I)
279 |                 with open(final_index_all, 'w', encoding='utf-8') as fw:
280 |                     break_flg = False
281 |                     for x in range(len(lst_file_index_all)):
282 |                         fname = os.path.split(lst_file_index_all[x]["path"])[1]
283 |                         with open(lst_file_index_all[x]["path"], 'r', encoding='utf-8') as fr:
284 |                             # 获取卷名, 写入卷标
285 |                             try:
286 |                                 vname = self.settings.vol_names[x]
287 |                             except IndexError:
288 |                                 vname = None
289 |                             if not vname:
290 |                                 if pat_vname.match(fname):
291 |                                     vname = pat_vname.match(fname).group(1)
292 |                                 else:
293 |                                     vname = '第'+str(lst_file_index_all[x]["vol_n"]).zfill(2)+'卷'
294 |                             fw.write('【L0】'+vname+'\t\n')
295 |                             # 整合开始
296 |                             i = 0
297 |                             for line in fr:
298 |                                 i += 1
299 |                                 mth_stem = self.settings.pat_stem_text.match(line)
300 |                                 if mth_stem:
301 |                                     # 章节
302 |                                     if mth_stem.group(3) == '':
303 |                                         fw.write(f'【L{str(int(mth_stem.group(1))+1)}】{mth_stem.group(2)}\t\n')
304 |                                     else:
305 |                                         fw.write(f'【L{str(int(mth_stem.group(1))+1)}】{mth_stem.group(2)}\t{mth_stem.group(3)}\n')
306 |                                 elif self.settings.pat_tab.match(line):
307 |                                     # 词条
308 |                                     mth = self.settings.pat_tab.match(line)
309 |                                     fw.write(f'{mth.group(1)}\t{mth.group(2)}\n')
310 |                                 else:
311 |                                     print(Fore.RED + "ERROR: " + Fore.RESET + f"{fname} 第 {i} 行未匹配, 请检查")
312 |                                     pass_flg = False
313 |                                     break_flg = True
314 |                                     break
315 |                         if break_flg:
316 |                             break
317 |         if pass_flg:
318 |             return final_index_all
319 |         else:
320 |             return None
321 | 
322 |     def _check_raw_files(self):
323 |         """ 检查原材料
324 |         * 必要文本存在(文本编码均要是 utf-8 无 bom)
325 |         * 检查 info.html 的编码
326 |         """
327 |         check_result = []
328 |         # 预定义输入文件路径
329 |         dir_input = self.settings.dir_input
330 |         file_index_all = os.path.join(dir_input, self.settings.fname_index_all)
331 |         file_syns = os.path.join(dir_input, self.settings.fname_syns)
332 |         file_dict_info = os.path.join(dir_input, self.settings.fname_dict_info)
333 |         dir_data = os.path.join(dir_input, self.settings.dname_data)
334 |         # 准备临时文件夹
335 |         dir_index_all = self.settings.dir_index_all
336 |         if os.path.exists(dir_index_all):
337 |             shutil.rmtree(dir_index_all)
338 |             os.makedirs(dir_index_all)
339 |         else:
340 |             os.makedirs(dir_index_all)
341 |         file_index_all = self._check_index_alls(dir_input, dir_index_all)
342 |         # 1.检查索引文件: 必须存在且合格
343 |         if file_index_all:
344 |             check_result.append(file_index_all)
345 |             # 2.检查同义词文件: 若存在就要合格
346 |             syns_check_num = self.func.text_file_check(file_syns)
347 |             if syns_check_num == 0:
348 |                 check_result.append(None)
349 |             elif syns_check_num == 2:
350 |                 check_result.append(file_syns)
351 |             # 3.检查 info.html: 若存在就要合格
352 |             info_check_num = self.func.text_file_check(file_dict_info)
353 |             if info_check_num == 0:
354 |                 check_result.append(None)
355 |             elif info_check_num == 2:
356 |                 check_result.append(file_dict_info)
357 |             # 4.检查 data 文件夹
358 |             if os.path.isdir(dir_data) and len(os.listdir(dir_data)) != 0:
359 |                 check_result.append(dir_data)
360 |             elif os.path.isdir(dir_data):
361 |                 print(Fore.MAGENTA + "WARN: " + Fore.RESET + "data 文件夹为空, 已忽略将不打包")
362 |                 check_result.append(None)
363 |             else:
364 |                 check_result.append(None)
365 |         # 返回最终检查结果
366 |         if len(check_result) == 4:
367 |             return check_result
368 |         else:
369 |             return None
370 | 


--------------------------------------------------------------------------------