├── LICENSE
├── README.md
└── write_markup.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <https://unlicense.org>
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # BookxNote-Tools
 2 | 
 3 | BookxNote可以看作是win端上的MarginNote，是一款小巧轻便PDF电子书阅读器。它不仅可以为用户提供多种阅读模式，而且还可以自动记忆上次阅读的页面位置，在最近阅读文档打开，自动跳转到上次位置，有助于学习与思考，让读者的大脑更高效的记忆，界面清爽、干净、专注阅读。
 4 | 
 5 | 我的主要需求是读文献，在把BookxNote与知之阅读和liquidText（免费版）比较后倾向于使用BookxNote，因为感觉顺手一些，而且速度很快（我电脑上liquidText的启动速度非常硬伤）。
 6 | 但BookxNote对我一个非常致命的问题就是标注没法保存回pdf上，我能理解用这类软件精读后的pdf就是药渣，但在存储不值钱的条件下保留一份原文件并没有什么压力。而且文献这种东西还是很有可能要重新再读的……有个上次阅读的记录会好一些。
 7 | BookxNote的同步也是个问题，对于不在收集篮里的文件都是使用绝对路径定位的，这没法和zotero等文献管理工具联合使用。
 8 | 而且标注不能导出为markdown，（相对）很难和joplin、obsidian之类的软件联合使用。
 9 | 
10 | 好在作者是用json保存的标注，我能写个外置的脚本满足这些需求。
11 | 
12 | 我目前的使用方式：
13 | 
14 |     1. 在zotero里直接用bookxnote打开pdf
15 |     2. 如果需要精读就将pdf保存至笔记文件夹
16 |     3. 阅读完运行写入pdf的脚本
17 |     4. 脚本从笔记文件夹中找到pdf和标注 写在一起后保存在单独的output文件夹里
18 |     5. 使用zotfile的源文件夹功能重新获取到标注过的pdf 再利用重复项合并之类的方式合在一起
19 |     6. 标注导出成html后用pandoc之类的东西再转成markdown 然后丢进joplin或者obsidian
20 | 
21 | 目前完成了写入pdf的脚本 还少个转markdown以及自动删除output文件夹里已有的pdf的脚本
22 | 
23 | ---
24 | 
25 | ## 需求
26 | 
27 | - Python3
28 | - PyMuPDF = 1.1.8 
29 | - BookxNote Pro = V2.0.0.1026
30 | 
31 | ---
32 | 
33 | 别的版本的BookxNote我就不知道能不能用了 
34 | release里有个编译了的脚本 丢在笔记数据目录里就能用
35 | 
36 | TODO很多 只写了我常用的一些标注（下划线、高亮、文本框、方形与圆形） 直线标注有些问题（作者似乎把多条直线写在一起了） 而且颜色可能有些问题 线宽也没法调 Bug可能也有不少
37 | 不过对我来说够用了 暂时不打算折腾了 欢迎大佬push
38 | 


--------------------------------------------------------------------------------
/write_markup.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import fitz
  4 | from glob import glob
  5 | 
  6 | # 本脚本默认放在笔记数据目录中 保存的PDF文件在output文件夹里
  7 | note_path = './notebooks'
  8 | dst_pdf_path = './output'
  9 | 
 10 | 
 11 | def path_join(*paths):
 12 |     path = os.path.join(*paths)
 13 |     path = os.path.normcase(path)
 14 |     return path
 15 | 
 16 | 
 17 | def Hex_to_RGB(hex):
 18 |     # 作者的颜色类似于ffffed99 前两位的ff好像没用
 19 |     r = int(hex[2:4], 16) / 255
 20 |     g = int(hex[4:6], 16) / 25
 21 |     b = int(hex[6:8], 16) / 255
 22 |     return (r, g, b)
 23 | 
 24 | 
 25 | def update_pdf(markup_path, update_log):
 26 |     with open(markup_path, 'r', encoding='UTF-8') as f:
 27 |         markups = json.loads(f.read())
 28 |     pdf_file = markups['title'] + '.pdf'
 29 |     last_update = sorted([x['date'] for x in markups['markups']])[-1]
 30 | 
 31 |     if pdf_file not in update_log or last_update > update_log[pdf_file]:
 32 |         doc = fitz.open(
 33 |             os.path.join(os.path.dirname(markup_path), 'resources', pdf_file))
 34 |         for markup in markups['markups']:
 35 |             annot = None
 36 |             page = doc[markup['page']]
 37 |             # 高亮和下划线
 38 |             if markup['type'] == 5:
 39 |                 stext = markup['textblocks'][0]['text']
 40 |                 rl = page.searchFor(stext, quads=True)
 41 |                 while len(rl) == 0 and len(stext) >= 0:
 42 |                     stext = stext[:-1]
 43 |                     rl = page.searchFor(stext, quads=True)
 44 |                 # TODO:多行的情况每行新建一条线
 45 |                 if len(rl) > 0:
 46 |                     for r in rl:
 47 |                         if 'underline' in markup:
 48 |                             annot = page.addUnderlineAnnot(r)
 49 |                             annot.setColors(
 50 |                                 stroke=Hex_to_RGB(markup['linecolor']))
 51 |                             annot.set_border(width=markup['linewidth'])
 52 |                         else:
 53 |                             annot = page.addHighlightAnnot(r)
 54 |                             annot.setColors(
 55 |                                 stroke=Hex_to_RGB(markup['fillcolor']))
 56 |                         annot.update()
 57 |             # 打字机和文本框
 58 |             # TODO: 文本框的箭头
 59 |             elif markup['type'] == 9 or markup['type'] == 11:
 60 |                 annot = page.addFreetextAnnot(markup['rect'],
 61 |                                               markup['originaltext'],
 62 |                                               text_color=Hex_to_RGB(
 63 |                                                   markup['linecolor']))
 64 |                 annot.setColors(stroke=Hex_to_RGB(markup['linecolor']))
 65 |                 annot.update()
 66 |             # 方形
 67 |             elif markup['type'] == 2:
 68 |                 # TODO: 线宽无法设置
 69 |                 annot = page.addRectAnnot(markup['rect'])
 70 |                 annot.setColors(stroke=Hex_to_RGB(markup['linecolor']))
 71 |                 annot.update()
 72 |             # 圆形
 73 |             elif markup['type'] == 3:
 74 |                 # TODO: 线宽无法设置
 75 |                 annot = page.addCircleAnnot(markup['rect'])
 76 |                 annot.setColors(stroke=Hex_to_RGB(markup['linecolor']))
 77 |                 annot.update()
 78 |             # 直线
 79 |             elif markup['type'] == 1:
 80 |                 # TODO: 线宽无法设置
 81 |                 assert len(markup["rect"]) % 4 == 0
 82 |                 for i in range(len(markup["rect"]) // 4):
 83 |                     p1 = (markup["rect"][i * 2 + 0], markup["rect"][i * 2 + 1])
 84 |                     p2 = (markup["rect"][i * 2 + 2], markup["rect"][i * 2 + 3])
 85 |                     annot = page.addLineAnnot(p1, p2)
 86 |                     annot.setColors(stroke=Hex_to_RGB(markup['linecolor']))
 87 |                     annot.set_opacity(0.7)
 88 |                     annot.update()
 89 |             # 其他都没做
 90 |             else:
 91 |                 print('Unhandled annotation type:{}'.format(markup['type']))
 92 |             if 'content' in markup and annot is not None:
 93 |                 # 下划线、高亮和直线的批注都在最后一个里了
 94 |                 info = annot.info
 95 |                 info["content"] = markup['content']
 96 |                 annot.set_info(info)
 97 |                 annot.update()
 98 |         doc.save(os.path.join(dst_pdf_path, pdf_file), deflate=True)
 99 | 
100 |     return pdf_file, last_update
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     if not os.path.exists(dst_pdf_path):
105 |         os.makedirs(dst_pdf_path)
106 |     # 会在notebooks的目录里生成一个log 仅处理更新过的pdf
107 |     if os.path.exists(path_join(note_path, 'pdf_log.json')):
108 |         with open(path_join(note_path, 'pdf_log.json'), 'r') as f:
109 |             old_update_log = json.load(f)
110 |     else:
111 |         old_update_log = {}
112 |     new_update_log = {}
113 | 
114 |     notes = glob(path_join(note_path, '*', 'markups.json'))
115 |     if len(notes) > 0:
116 |         for idx, markup_path in enumerate(notes):
117 |             print('{}/{}:\t {}'.format(
118 |                 idx, len(notes),
119 |                 os.path.basename(os.path.dirname(markup_path))))
120 |             try:
121 |                 pdf_file, last_update = update_pdf(markup_path, old_update_log)
122 |                 new_update_log[pdf_file] = last_update
123 |             except Exception as e:
124 |                 print(e)
125 |         # log里只记录本次处理时notepads目录里有的pdf
126 |         with open(path_join(note_path, 'pdf_log.json'), 'w') as f:
127 |             json.dump(new_update_log, f)
128 |     os.system("pause")
129 | 


--------------------------------------------------------------------------------