├── .cross_platform ├── auto_mdx_builder.py └── ebook_utils.py ├── LICENSE ├── README.md ├── auto_mdx_builder.py ├── ebook_utils.py ├── func_lib.py ├── images ├── amb_folder.png ├── auto_split.png ├── img_dict_atmpl.gif ├── img_dict_btmpl.gif ├── imgs_order.png ├── index.png ├── index_all.png ├── settings.png ├── syns.png ├── text_dict_ctmpl.png ├── text_dict_dtmpl.gif ├── toc.png └── work_dir_tree.png ├── lib ├── FreePic2Pdf.ini ├── MuPDF_pcs.txt ├── PDFPatcher_AppConfig.json ├── Pdg2Pic.ini ├── atmpl.css ├── auto_split_2.css ├── bkmk │ ├── FreePic2Pdf.itf │ └── FreePic2Pdf_bkmk.txt ├── bkmk_utf16le │ ├── FreePic2Pdf.itf │ └── FreePic2Pdf_bkmk.txt ├── btmpl.css ├── build.toml ├── ctmpl.css └── dtmpl.css ├── requirements.txt ├── settings.py └── templates ├── __init__.py ├── img_dict_atmpl.py ├── img_dict_btmpl.py ├── text_dict_ctmpl.py └── text_dict_dtmpl.py /.cross_platform/auto_mdx_builder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Date : 2023-11-16 00:00:17 4 | # @Author : Litles (litlesme@gmail.com) 5 | # @Link : https://github.com/Litles 6 | # @Version : 1.5 7 | 8 | import logging 9 | import traceback 10 | import os 11 | import re 12 | import shutil 13 | from colorama import Fore, just_fix_windows_console 14 | from settings import Settings 15 | from func_lib import FuncLib 16 | from img_dict_atmpl import ImgDictAtmpl 17 | from img_dict_btmpl import ImgDictBtmpl 18 | from text_dict_ctmpl import TextDictCtmpl 19 | from text_dict_dtmpl import TextDictDtmpl 20 | from ebook_utils import EbookUtils 21 | 22 | 23 | class AutoMdxBuilder: 24 | """图像词典制作程序""" 25 | def __init__(self): 26 | self.settings = Settings() 27 | self.func = FuncLib(self) 28 | self.utils = EbookUtils(self) 29 | 30 | def auto_processing(self, sel): 31 | """ 根据选择自动处理 """ 32 | if sel == 1: 33 | # --- 解包 mdx/mdd 文件 --- 34 | mfile = input("请输入要解包的 mdx/mdd 文件路径: ").strip('"') 35 | if self.utils.export_mdx(mfile): 36 | print(Fore.GREEN + "\n已输出在同目录下: " + Fore.RESET + os.path.splitext(mfile)[0]) 37 | elif sel == 2: 38 | # --- 将源 txt 文件打包成 mdx 文件 --- 39 | file_final_txt = input("请输入要打包的 txt 文件路径: ").strip('"') 40 | if self.func.text_file_check(file_final_txt) == 2: 41 | # 检查数据文件夹 42 | dir_curr, fname_txt = os.path.split(file_final_txt) 43 | dir_data = os.path.join(dir_curr, 'data') 44 | if not os.path.exists(dir_data): 45 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"文件夹 {dir_data} 不存在, 已默认不打包 mdd") 46 | dir_data = None 47 | elif os.path.exists(dir_data) and len(os.listdir(dir_data)) == 0: 48 | print(Fore.MAGENTA + "WARN: " + Fore.RESET + f"文件夹 {dir_data} 为空, 已默认不打包 mdd") 49 | dir_data = None 50 | # 生成 info.html 51 | file_info_raw = None 52 | for fname in os.listdir(dir_curr): 53 | if fname == 'info.html': 54 | file_info_raw = os.path.join(dir_curr, fname) 55 | elif fname.endswith('.html') and fname.startswith(os.path.splitext(fname_txt)[0]): 56 | file_info_raw = os.path.join(dir_curr, fname) 57 | break 58 | file_dict_info = self.func.generate_info_html(os.path.splitext(fname_txt)[0], file_info_raw, None) 59 | # 打包 60 | print('\n------------------\n开始打包……\n') 61 | done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_data, dir_curr) 62 | if done_flg: 63 | print(Fore.GREEN + "\n打包完毕。" + Fore.RESET) 64 | else: 65 | print(Fore.RED + "\n材料检查不通过, 请确保材料准备无误再执行程序" + Fore.RESET) 66 | elif sel == 3: 67 | # --- 将资料包文件夹打包成 mdd 文件 --- 68 | dir_data = input("请输入要打包的资料文件夹路径: ").strip('"\\').rstrip('/') 69 | dir_data = dir_data.rstrip('\\') 70 | dir_data = dir_data.rstrip('/') 71 | print('\n------------------\n开始打包……\n') 72 | done_flg = self.utils.pack_to_mdd(dir_data, None) 73 | if done_flg: 74 | print(Fore.GREEN + "\n打包完毕。" + Fore.RESET) 75 | # elif sel == 10: 76 | # # --- 从 PDF文件/pdg文件夹 生成预备原材料 --- 77 | # p = input("请输入 pdf文件/pdg文件夹 路径: ").strip('"\\').rstrip('/') 78 | # if os.path.isfile(p) and os.path.splitext(p)[1] == '.pdf': 79 | # self.pdf_to_amb(p) 80 | # elif os.path.isdir(p): 81 | # self.pdf_to_amb(p, False) 82 | # else: 83 | # print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 84 | elif sel == 11: 85 | # --- 从 toc_all.txt 生成 index_all.txt --- 86 | file_toc_all = input("请输入 toc_all.txt 的文件路径: ").strip('"') 87 | file_index_all = os.path.join(os.path.split(file_toc_all)[0], 'index_all.txt') 88 | if self.func.toc_all_to_index(file_toc_all, file_index_all): 89 | print(Fore.GREEN + "\n处理完成, 生成在同目录下" + Fore.RESET) 90 | else: 91 | print(Fore.RED + "\n文件检查不通过, 请确保文件准备无误再执行程序" + Fore.RESET) 92 | elif sel == 12: 93 | # --- 合并 toc.txt 和 index.txt 为 index_all.txt --- 94 | file_toc = input("(1) 请输入 toc.txt 的文件路径: ").strip('"') 95 | file_index = input("(2) 请输入 index.txt 的文件路径: ").strip('"') 96 | file_index_all = os.path.join(os.path.split(file_index)[0], 'index_all.txt') 97 | self.func.merge_to_index_all(file_toc, file_index, file_index_all) 98 | elif sel == 20: 99 | # --- 生成词典 --- 100 | p = input("请输入原材料文件夹路径或 build.toml 文件路径: ").strip('"\\').rstrip('/') 101 | if os.path.split(p)[1] == 'build.toml': 102 | if self.settings.load_build_toml(p, False, False): 103 | self._build_mdict() 104 | elif os.path.isdir(p): 105 | file_toml = os.path.join(p, 'build.toml') 106 | if os.path.isfile(file_toml): 107 | if self.settings.load_build_toml(file_toml, False, True): 108 | self._build_mdict() 109 | else: 110 | print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 build.toml 文件") 111 | else: 112 | print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 113 | elif sel == 30: 114 | # --- 从词典还原原材料 --- 115 | p = input("请输入词典的文件夹或 mdx/mdd 文件路径: ").strip('"\\').rstrip('/') 116 | if os.path.isfile(p) and os.path.splitext(p)[1] == '.mdx': 117 | self._restore_raw(p, False) 118 | elif os.path.isfile(p) and os.path.splitext(p)[1] == '.mdd': 119 | if os.path.isfile(p[:-1]+'x'): 120 | self._restore_raw(p[:-1]+'x', False) 121 | elif os.path.isdir(p): 122 | for m in os.listdir(p): 123 | if m.endswith('.mdx'): 124 | self._restore_raw(os.path.join(p, m), True) 125 | break 126 | else: 127 | print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 mdx 文件") 128 | else: 129 | print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 130 | # elif sel == 31: 131 | # # --- 从原材料还原 PDF --- 132 | # p = input("请输入原材料文件夹路径或 build.toml 文件路径: ").strip('"\\').rstrip('/') 133 | # if os.path.split(p)[1] == 'build.toml': 134 | # if self.settings.load_build_toml(p, True): 135 | # self.amb_to_pdf(file_toml, False) 136 | # elif os.path.isdir(p): 137 | # file_toml = os.path.join(p, 'build.toml') 138 | # if os.path.isfile(file_toml): 139 | # if self.settings.load_build_toml(file_toml, True): 140 | # self.amb_to_pdf(file_toml, True) 141 | # else: 142 | # print(Fore.RED + "ERROR: " + Fore.RESET + "文件夹内未找到 build.toml 文件") 143 | # else: 144 | # print(Fore.RED + "ERROR: " + Fore.RESET + "路径输入有误") 145 | elif sel == 32: 146 | # --- 从 index_all.txt 还原 toc_all.txt --- 147 | file_index_all = input("请输入 index_all.txt 的文件路径: ").strip('"') 148 | file_toc_all = os.path.join(os.path.split(file_index_all)[0], 'toc_all.txt') 149 | if self.func.index_to_toc(file_index_all, file_toc_all): 150 | print(Fore.GREEN + "\n处理完成, 生成在同目录下" + Fore.RESET) 151 | else: 152 | print(Fore.RED + "\n文件检查不通过, 请确保所有词目都有对应页码" + Fore.RESET) 153 | elif sel == 41: 154 | # --- 从 PDF 提取图片 (MuPDF) --- 155 | p = input("请输入 PDF 文件路径: ").strip('"\\').rstrip('/') 156 | if os.path.isfile(p) and p.lower().endswith('.pdf'): 157 | fname = os.path.split(p)[1] 158 | out_dir = os.path.join(os.path.split(p)[0], fname.split('.')[0]) 159 | self.utils.extract_pdf_to_imgs_fitz(p, out_dir) 160 | else: 161 | print(Fore.RED + "\n输入的路径有误" + Fore.RESET) 162 | elif sel == 42: 163 | # --- 将 PDF 转换成图片 (MuPDF) --- 164 | p = input("请输入 PDF 文件路径: ").strip('"\\').rstrip('/') 165 | if os.path.isfile(p) and p.lower().endswith('.pdf'): 166 | fname = os.path.split(p)[1] 167 | out_dir = os.path.join(os.path.split(p)[0], fname.split('.')[0]) 168 | dpi = input("请输入要生成图片的 DPI(回车则默认300): ") 169 | if re.match(r'^\d+$', dpi): 170 | self.utils.convert_pdf_to_imgs_fitz(p, out_dir, int(dpi)) 171 | else: 172 | self.utils.convert_pdf_to_imgs_fitz(p, out_dir) 173 | else: 174 | print(Fore.RED + "\n输入的路径有误" + Fore.RESET) 175 | # elif sel == 43: 176 | # # --- 将 图片 合成 PDF (MuPDF) --- 177 | # p = input("请输入图片所在文件夹路径: ").strip('"\\').rstrip('/') 178 | # if os.path.isdir(p): 179 | # out_file = p+'.pdf' 180 | # self.utils.combine_img_to_pdf(p, out_file) 181 | # else: 182 | # print(Fore.RED + "\n输入的路径有误" + Fore.RESET) 183 | # elif sel == 44: 184 | # # --- PDF 书签导出/导入(FreePic2Pdf) --- 185 | # file_pdf = input("请输入 PDF 文件路径: ").strip('"\\').rstrip('/') 186 | # dir_bkmk = input("请输入书签文件夹路径(导出则直接回车): ").strip('"\\').rstrip('/') 187 | # if os.path.isdir(dir_bkmk): 188 | # self.utils.eximport_bkmk_fp2p(file_pdf, dir_bkmk, False) 189 | # elif dir_bkmk is None or len(dir_bkmk) == 0: 190 | # fname = os.path.split(file_pdf)[1] 191 | # dir_bkmk = os.path.join(os.path.split(file_pdf)[0], fname.split('.')[0]+'_bkmk') 192 | # self.utils.eximport_bkmk_fp2p(file_pdf, dir_bkmk) 193 | # else: 194 | # print(Fore.RED + "\n输入的路径有误" + Fore.RESET) 195 | else: 196 | pass 197 | 198 | def _build_mdict(self): 199 | done_flg = False 200 | if self.settings.templ_choice in ('a', 'A'): 201 | """ 制作图像词典 (模板A) """ 202 | # 生成 txt 源文本 203 | proc_flg, file_final_txt, dir_imgs_out, file_dict_info = ImgDictAtmpl(self).make_source_file() 204 | if proc_flg: 205 | # 创建输出文件夹 206 | if not os.path.exists(self.settings.dir_output): 207 | os.makedirs(self.settings.dir_output) 208 | # 拷贝模板 css 文件 209 | file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_atmpl) 210 | file_css = os.path.join(self.settings.dir_output, self.settings.fname_css) 211 | shutil.copy(file_css_tmpl, file_css) 212 | # 开始打包 213 | print('\n------------------\n开始打包……\n') 214 | done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_imgs_out, self.settings.dir_output) 215 | elif self.settings.templ_choice in ('b', 'B'): 216 | """ 制作图像词典 (模板B) """ 217 | # 生成 txt 源文本 218 | proc_flg, file_final_txt, dir_imgs_out, file_dict_info = ImgDictBtmpl(self).make_source_file() 219 | if proc_flg: 220 | # 创建输出文件夹 221 | if not os.path.exists(self.settings.dir_output): 222 | os.makedirs(self.settings.dir_output) 223 | # 拷贝模板 css 文件 224 | file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_btmpl) 225 | file_css = os.path.join(self.settings.dir_output, self.settings.fname_css) 226 | shutil.copy(file_css_tmpl, file_css) 227 | # 开始打包 228 | print('\n------------------\n开始打包……\n') 229 | done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_imgs_out, self.settings.dir_output) 230 | elif self.settings.templ_choice in ('c', 'C'): 231 | """ 制作文本词典 (模板C) """ 232 | # 生成 txt 源文本 233 | proc_flg, file_final_txt, file_dict_info = TextDictCtmpl(self).make_source_file() 234 | if proc_flg: 235 | # 创建输出文件夹 236 | if not os.path.exists(self.settings.dir_output): 237 | os.makedirs(self.settings.dir_output) 238 | # 拷贝模板 css 文件 239 | file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_ctmpl) 240 | file_css = os.path.join(self.settings.dir_output, self.settings.fname_css) 241 | shutil.copy(file_css_tmpl, file_css) 242 | # 开始打包 243 | print('\n------------------\n开始打包……\n') 244 | dir_data = os.path.join(self.settings.dir_input, self.settings.dname_data) 245 | if not os.path.exists(dir_data) or len(os.listdir(dir_data)) == 0: 246 | dir_data = None 247 | done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_data, self.settings.dir_output) 248 | elif self.settings.templ_choice in ('d', 'D'): 249 | """ 制作文本词典 (模板D) """ 250 | # 生成 txt 源文本 251 | proc_flg, file_final_txt, file_dict_info = TextDictDtmpl(self).make_source_file() 252 | if proc_flg: 253 | # 创建输出文件夹 254 | if not os.path.exists(self.settings.dir_output): 255 | os.makedirs(self.settings.dir_output) 256 | # 拷贝模板 css 文件 257 | file_css_tmpl = os.path.join(self.settings.dir_lib, self.settings.css_dtmpl) 258 | file_css = os.path.join(self.settings.dir_output, self.settings.fname_css) 259 | shutil.copy(file_css_tmpl, file_css) 260 | # 开始打包 261 | print('\n------------------\n开始打包……\n') 262 | dir_data = os.path.join(self.settings.dir_input, self.settings.dname_data) 263 | if not os.path.exists(dir_data) or len(os.listdir(dir_data)) == 0: 264 | dir_data = None 265 | done_flg = self.utils.pack_to_mdict(file_final_txt, file_dict_info, dir_data, self.settings.dir_output) 266 | else: 267 | pass 268 | if done_flg: 269 | print("\n打包完毕。" + Fore.GREEN + "\n\n恭喜, 词典已生成!" + Fore.RESET) 270 | 271 | def _restore_raw(self, xfile, outside_flg): 272 | """ 将词典还原为原材料 """ 273 | # 1.准备参数 274 | extract_flg = False 275 | dict_name = None 276 | templ_choice = None 277 | dir_input, fname = os.path.split(xfile) 278 | # 2.分析 mdx 文件 279 | tmp_restore = os.path.join(self.settings.dir_output_tmp, 'restore') 280 | if not os.path.exists(tmp_restore): 281 | os.makedirs(tmp_restore) 282 | tmp_xfile = os.path.join(tmp_restore, fname) 283 | tmp_xdir = os.path.splitext(tmp_xfile)[0] 284 | if os.path.exists(tmp_xdir): 285 | shutil.rmtree(tmp_xdir) 286 | shutil.copy(xfile, tmp_xfile) 287 | if self.utils.export_mdx(tmp_xfile): 288 | tmp_final_txt = os.path.join(tmp_xdir, fname.split('.')[0]+'.txt') 289 | # 分析 info 信息, 确定是否支持还原 290 | for f in os.listdir(tmp_xdir): 291 | fp = os.path.join(tmp_xdir, f) 292 | text = '' 293 | if fp.endswith('.info.html'): 294 | with open(fp, 'r', encoding='utf-8') as fr: 295 | pat = re.compile(r'
' 665 | i = 0 666 | for item in dct["children"]: 667 | i += 1 668 | if i == 1: 669 | html += f'{item}' 670 | else: 671 | html += f'/{item}' 672 | html += '
{mth.group(2)}
{dct["body"]}