├── LICENSE ├── README.md └── write_markup.py /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BookxNote-Tools 2 | 3 | BookxNote可以看作是win端上的MarginNote,是一款小巧轻便PDF电子书阅读器。它不仅可以为用户提供多种阅读模式,而且还可以自动记忆上次阅读的页面位置,在最近阅读文档打开,自动跳转到上次位置,有助于学习与思考,让读者的大脑更高效的记忆,界面清爽、干净、专注阅读。 4 | 5 | 我的主要需求是读文献,在把BookxNote与知之阅读和liquidText(免费版)比较后倾向于使用BookxNote,因为感觉顺手一些,而且速度很快(我电脑上liquidText的启动速度非常硬伤)。 6 | 但BookxNote对我一个非常致命的问题就是标注没法保存回pdf上,我能理解用这类软件精读后的pdf就是药渣,但在存储不值钱的条件下保留一份原文件并没有什么压力。而且文献这种东西还是很有可能要重新再读的……有个上次阅读的记录会好一些。 7 | BookxNote的同步也是个问题,对于不在收集篮里的文件都是使用绝对路径定位的,这没法和zotero等文献管理工具联合使用。 8 | 而且标注不能导出为markdown,(相对)很难和joplin、obsidian之类的软件联合使用。 9 | 10 | 好在作者是用json保存的标注,我能写个外置的脚本满足这些需求。 11 | 12 | 我目前的使用方式: 13 | 14 | 1. 在zotero里直接用bookxnote打开pdf 15 | 2. 如果需要精读就将pdf保存至笔记文件夹 16 | 3. 阅读完运行写入pdf的脚本 17 | 4. 脚本从笔记文件夹中找到pdf和标注 写在一起后保存在单独的output文件夹里 18 | 5. 使用zotfile的源文件夹功能重新获取到标注过的pdf 再利用重复项合并之类的方式合在一起 19 | 6. 标注导出成html后用pandoc之类的东西再转成markdown 然后丢进joplin或者obsidian 20 | 21 | 目前完成了写入pdf的脚本 还少个转markdown以及自动删除output文件夹里已有的pdf的脚本 22 | 23 | --- 24 | 25 | ## 需求 26 | 27 | - Python3 28 | - PyMuPDF = 1.1.8 29 | - BookxNote Pro = V2.0.0.1026 30 | 31 | --- 32 | 33 | 别的版本的BookxNote我就不知道能不能用了 34 | release里有个编译了的脚本 丢在笔记数据目录里就能用 35 | 36 | TODO很多 只写了我常用的一些标注(下划线、高亮、文本框、方形与圆形) 直线标注有些问题(作者似乎把多条直线写在一起了) 而且颜色可能有些问题 线宽也没法调 Bug可能也有不少 37 | 不过对我来说够用了 暂时不打算折腾了 欢迎大佬push 38 | -------------------------------------------------------------------------------- /write_markup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import fitz 4 | from glob import glob 5 | 6 | # 本脚本默认放在笔记数据目录中 保存的PDF文件在output文件夹里 7 | note_path = './notebooks' 8 | dst_pdf_path = './output' 9 | 10 | 11 | def path_join(*paths): 12 | path = os.path.join(*paths) 13 | path = os.path.normcase(path) 14 | return path 15 | 16 | 17 | def Hex_to_RGB(hex): 18 | # 作者的颜色类似于ffffed99 前两位的ff好像没用 19 | r = int(hex[2:4], 16) / 255 20 | g = int(hex[4:6], 16) / 25 21 | b = int(hex[6:8], 16) / 255 22 | return (r, g, b) 23 | 24 | 25 | def update_pdf(markup_path, update_log): 26 | with open(markup_path, 'r', encoding='UTF-8') as f: 27 | markups = json.loads(f.read()) 28 | pdf_file = markups['title'] + '.pdf' 29 | last_update = sorted([x['date'] for x in markups['markups']])[-1] 30 | 31 | if pdf_file not in update_log or last_update > update_log[pdf_file]: 32 | doc = fitz.open( 33 | os.path.join(os.path.dirname(markup_path), 'resources', pdf_file)) 34 | for markup in markups['markups']: 35 | annot = None 36 | page = doc[markup['page']] 37 | # 高亮和下划线 38 | if markup['type'] == 5: 39 | stext = markup['textblocks'][0]['text'] 40 | rl = page.searchFor(stext, quads=True) 41 | while len(rl) == 0 and len(stext) >= 0: 42 | stext = stext[:-1] 43 | rl = page.searchFor(stext, quads=True) 44 | # TODO:多行的情况每行新建一条线 45 | if len(rl) > 0: 46 | for r in rl: 47 | if 'underline' in markup: 48 | annot = page.addUnderlineAnnot(r) 49 | annot.setColors( 50 | stroke=Hex_to_RGB(markup['linecolor'])) 51 | annot.set_border(width=markup['linewidth']) 52 | else: 53 | annot = page.addHighlightAnnot(r) 54 | annot.setColors( 55 | stroke=Hex_to_RGB(markup['fillcolor'])) 56 | annot.update() 57 | # 打字机和文本框 58 | # TODO: 文本框的箭头 59 | elif markup['type'] == 9 or markup['type'] == 11: 60 | annot = page.addFreetextAnnot(markup['rect'], 61 | markup['originaltext'], 62 | text_color=Hex_to_RGB( 63 | markup['linecolor'])) 64 | annot.setColors(stroke=Hex_to_RGB(markup['linecolor'])) 65 | annot.update() 66 | # 方形 67 | elif markup['type'] == 2: 68 | # TODO: 线宽无法设置 69 | annot = page.addRectAnnot(markup['rect']) 70 | annot.setColors(stroke=Hex_to_RGB(markup['linecolor'])) 71 | annot.update() 72 | # 圆形 73 | elif markup['type'] == 3: 74 | # TODO: 线宽无法设置 75 | annot = page.addCircleAnnot(markup['rect']) 76 | annot.setColors(stroke=Hex_to_RGB(markup['linecolor'])) 77 | annot.update() 78 | # 直线 79 | elif markup['type'] == 1: 80 | # TODO: 线宽无法设置 81 | assert len(markup["rect"]) % 4 == 0 82 | for i in range(len(markup["rect"]) // 4): 83 | p1 = (markup["rect"][i * 2 + 0], markup["rect"][i * 2 + 1]) 84 | p2 = (markup["rect"][i * 2 + 2], markup["rect"][i * 2 + 3]) 85 | annot = page.addLineAnnot(p1, p2) 86 | annot.setColors(stroke=Hex_to_RGB(markup['linecolor'])) 87 | annot.set_opacity(0.7) 88 | annot.update() 89 | # 其他都没做 90 | else: 91 | print('Unhandled annotation type:{}'.format(markup['type'])) 92 | if 'content' in markup and annot is not None: 93 | # 下划线、高亮和直线的批注都在最后一个里了 94 | info = annot.info 95 | info["content"] = markup['content'] 96 | annot.set_info(info) 97 | annot.update() 98 | doc.save(os.path.join(dst_pdf_path, pdf_file), deflate=True) 99 | 100 | return pdf_file, last_update 101 | 102 | 103 | if __name__ == "__main__": 104 | if not os.path.exists(dst_pdf_path): 105 | os.makedirs(dst_pdf_path) 106 | # 会在notebooks的目录里生成一个log 仅处理更新过的pdf 107 | if os.path.exists(path_join(note_path, 'pdf_log.json')): 108 | with open(path_join(note_path, 'pdf_log.json'), 'r') as f: 109 | old_update_log = json.load(f) 110 | else: 111 | old_update_log = {} 112 | new_update_log = {} 113 | 114 | notes = glob(path_join(note_path, '*', 'markups.json')) 115 | if len(notes) > 0: 116 | for idx, markup_path in enumerate(notes): 117 | print('{}/{}:\t {}'.format( 118 | idx, len(notes), 119 | os.path.basename(os.path.dirname(markup_path)))) 120 | try: 121 | pdf_file, last_update = update_pdf(markup_path, old_update_log) 122 | new_update_log[pdf_file] = last_update 123 | except Exception as e: 124 | print(e) 125 | # log里只记录本次处理时notepads目录里有的pdf 126 | with open(path_join(note_path, 'pdf_log.json'), 'w') as f: 127 | json.dump(new_update_log, f) 128 | os.system("pause") 129 | --------------------------------------------------------------------------------