├── settings
    ├── __init__.py
    ├── settings.py.example
    └── settings.py
├── requirements.txt
├── .idea
    └── .gitignore
├── gunicorn_config.py
├── Dockerfile
├── deploy.sh
├── logger.py
├── README.md
├── .gitignore
├── templates
    └── wereadtonotion.html
├── app.py
├── weread_2_notion.py
├── weread
    └── weread.py
└── notion
    └── notion.py


/settings/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests~=2.26.0
2 | notion-client==1.0.0
3 | retrying==1.3.4
4 | Flask==2.3.2
5 | pywebio==1.8.2
6 | gunicorn==20.1.0
7 | gevent==22.10.2


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # 默认忽略的文件
2 | /shelf/
3 | /workspace.xml
4 | # 基于编辑器的 HTTP 客户端请求
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 | 


--------------------------------------------------------------------------------
/gunicorn_config.py:
--------------------------------------------------------------------------------
1 | bind = '0.0.0.0:80'  # 绑定的IP地址和端口号
2 | # bind = '127.0.0.1:6000'  # 绑定的IP地址和端口号
3 | workers = 1  # 工作进程的数量
4 | worker_class = 'gevent'  # 使用gevent作为工作进程的类别
5 | timeout = 12000  # 超时时间，单位为秒
6 | accesslog = '/var/log/weread'  # 访问日志的文件路径，'-' 表示输出到标准输出
7 | errorlog = '/var/log/weread'  # 错误日志的文件路径，'-' 表示输出到标准输出
8 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # 基于 Python 官方镜像构建 Docker 镜像
 2 | FROM python:3.9-slim
 3 | 
 4 | # 设置工作目录
 5 | WORKDIR /app
 6 | 
 7 | # 单独复制 requirements.txt，后续可以利用 Docker 缓存避免重复安装依赖
 8 | COPY ./requirements.txt /app/requirements.txt
 9 | RUN pip install -r requirements.txt
10 | 
11 | 
12 | # 将当前目录下的所有文件复制到容器的 /app 目录下
13 | COPY . /app
14 | # 暴露容器的端口
15 | EXPOSE 80
16 | 
17 | # 运行应用
18 | CMD ["gunicorn", "--config", "gunicorn_config.py", "app:app"]
19 | 


--------------------------------------------------------------------------------
/settings/settings.py.example:
--------------------------------------------------------------------------------
 1 | NOTION_TOKEN = ''
 2 | WEREAD_COOKIE = ''
 3 | DATABASE_ID = ''
 4 | 
 5 | BOOK_BLACKLIST = [
 6 |     '从红月开始',
 7 |     '修真聊天群（聊天群的日常生活）',
 8 |     '第一序列',
 9 |     '我真没想重生啊',
10 |     '我有一座恐怖屋',
11 |     '大王饶命',
12 |     '我师兄实在太稳健了',
13 |     '死而替生',
14 |     '中医许阳',
15 |     '这游戏也太真实了',
16 |     '家父汉高祖',
17 |     '吕布的人生模拟器',
18 |     '学霸的黑科技系统',
19 |     '黎明之剑',
20 |     '我的一天有48小时',
21 |     '明克街13号',
22 | ]
23 | 


--------------------------------------------------------------------------------
/settings/settings.py:
--------------------------------------------------------------------------------
 1 | NOTION_TOKEN = 'xxxxxxxxxxx'
 2 | WEREAD_COOKIE = 'wr_fp=2533447432; wr_gid=295032143; wr_vid=23683664; wr_pf=0; wr_rt=web%40QxPpLky5s9q8MDFU8TK_AL; wr_localvid=fb3325d071696250fb37ae2; wr_name=%E8%AF%B6%E9%B8%AD; wr_avatar=https%3A%2F%2Fthirdwx.qlogo.cn%2Fmmopen%2Fvi_32%2FXpfWjDr8uAYiagxaSibFmnMh2LUJXTQrjJ8z2ve6X8J3w2CI79YSvQwic1icyHUoHVgoccckuoLxlt1cQOD87tZLfQ%2F132; wr_gender=1; RK=Ou2xAkMpZ9; ptcz=6d556d39aaf44f088fec74b97d2942db2ea837b96f5c8c603ddacec2b5e1fd45; uin=o1307317886; wr_skey=t1Gw1zJp'
 3 | DATABASE_ID = 'd92bb4b8434745baa2061caf67d6ef7a'
 4 | 
 5 | BOOK_BLACKLIST = [
 6 |     '从红月开始',
 7 |     '修真聊天群（聊天群的日常生活）',
 8 |     '第一序列',
 9 |     '我真没想重生啊',
10 |     '我有一座恐怖屋',
11 |     '大王饶命',
12 |     '我师兄实在太稳健了',
13 |     '死而替生',
14 |     '中医许阳',
15 |     '这游戏也太真实了',
16 |     '家父汉高祖',
17 |     '吕布的人生模拟器',
18 |     '学霸的黑科技系统',
19 |     '黎明之剑',
20 |     '我的一天有48小时',
21 |     '明克街13号',
22 | ]
23 | 


--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # 获取应用版本参数
 4 | version=$1
 5 | 
 6 | # 默认应用版本
 7 | default_version="latest"
 8 | 
 9 | # 如果没有指定版本参数，则使用默认版本
10 | if [ -z "$version" ]; then
11 |   version=$default_version
12 | fi
13 | 
14 | # 容器名称
15 | container_name="weread-to-notion"
16 | 
17 | # 检查是否存在同名容器
18 | existing_container=$(docker ps -a --filter "name=$container_name" --format "{{.Names}}")
19 | 
20 | if [ ! -z "$existing_container" ]; then
21 |   # 停止并删除同名容器
22 |   echo "Stopping and removing existing container: $existing_container"
23 |   docker stop "$existing_container"
24 |   docker rm "$existing_container"
25 | fi
26 | 
27 | echo "################ Building Docker image: $container_name:$version ################"
28 | # 构建Docker镜像
29 | docker build -t "$container_name:$version" .
30 | 
31 | echo "################ Starting Docker container: $container_name:$version ############"
32 | # 启动容器
33 | docker run -d -p 7000:80 --name "$container_name" "$container_name:$version"
34 | 


--------------------------------------------------------------------------------
/logger.py:
--------------------------------------------------------------------------------
 1 | import logging.handlers
 2 | import os
 3 | 
 4 | _logger = logging.getLogger('weread_to_notion')
 5 | _logger.setLevel(logging.DEBUG)
 6 | 
 7 | _cons_rht = logging.StreamHandler()
 8 | 
 9 | # _log_base_dir = f'/var/log'
10 | _log_base_dir = '.'
11 | 
12 | # _pid = os.getpid()
13 | log_dir = f'{_log_base_dir}/logs'
14 | if not os.path.exists(log_dir):
15 |     os.makedirs(log_dir)
16 | log_file_name = 'weread_to_notion'
17 | 
18 | _file_rht = logging.handlers.RotatingFileHandler(f"{log_dir}/{log_file_name}.log",
19 |                                                  maxBytes=1024 * 1024 * 1024,
20 |                                                  backupCount=100)
21 | 
22 | _fmt = logging.Formatter("%(asctime)s pid:%(process)d %(filename)s:%(lineno)s %(levelname)s: %(message)s",
23 |                          "%Y-%m-%d %H:%M:%S")
24 | 
25 | _file_rht.setFormatter(_fmt)
26 | _cons_rht.setFormatter(_fmt)
27 | 
28 | _logger.addHandler(_file_rht)
29 | _logger.addHandler(_cons_rht)
30 | 
31 | debug = _logger.debug
32 | info = _logger.info
33 | warning = _logger.warning
34 | error = _logger.error
35 | exception = _logger.exception
36 | critical = _logger.critical
37 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # wereadtonotion
 2 | 
 3 | 微信读书笔记、划线等信息同步到notion数据库
 4 | > 效果如下：
 5 | > ![同步效果](https://markdown-mac-work-1306720256.cos.ap-guangzhou.myqcloud.com/png/AzRZUp.png)
 6 | 
 7 | *注意：不存在划线、书评等信息的书籍不会被获取、导入*
 8 | 
 9 | ## 功能
10 | 
11 | ### 已有功能
12 | 
13 | 1. 导入书籍的划线、评论、笔记等信息到 Notion 数据库
14 |    > 注意：不存在划线、书评等信息的书籍不会被获取、导入
15 | 2. 支持 Web 端部署执行和本机 Python 脚本运行
16 | 
17 | ### TODO
18 | 
19 | - [ ] 支持 Notion 增量读写
20 | - [ ] 支持用户选择书籍获取模式 [存在划线 / 书架书籍 / 全部书籍]
21 | - [ ] 单独导出功能。导出为 markdown 文件
22 | - [ ] 考虑支持自定义 Noiton 数据库结构
23 | 
24 | ## 使用方法
25 | 
26 | ### 1. 本地脚本运行
27 | 
28 | 1. 安装依赖 `pip install -r requirements.txt`
29 | 2. 配置 settings/settings.py 文件中的信息
30 |    > 1. 获取 **Notion token**
31 |    > - 打开[此页面](https://www.notion.so/my-integrations)并登录
32 |    > - 点击New integration 输入 name 提交.(如果已有，则点击 view integration)
33 |    > - 点击show，然后copy
34 |    > 2. 从微信读书中获取 cookie
35 |    > - 在浏览器中打开 weread.qq.com 并登录
36 |    > - 打开开发者工具(按 F12)，点击 network(网络)，刷新页面, 点击第一个请求，复制 cookie 的值。
37 |    > 3. 准备 Noiton Database ID
38 |    > - 复制[此页面](https://www.notion.so/yayya/a9b3a8dfcc0543559005a263103fc81c)到你的
39 |        Notion 中，点击右上角的分享按钮，将页面分享为公开页面
40 |    >- 点击页面右上角三个点，在 connections 中找到选择你的 connections。第一步中创建的 integration 的 name
41 |    >- 通过 URL 找到你的 Database ID 的值。
42 |       >  > 例如：页面 https://www.notion.so/yayya/d92bb4b8434745baa2061caf67d6ef7a?v=b4a5bfb89e8e44868a473179ee60x851 的
43 |       ID 为d92bb4b8434745baa2061caf67d6ef7a
44 | 4. 运行 `python weread_2_notion.py`
45 | 
46 | ### 2. 网页端部署运行
47 | 
48 | #### docker 部署(推荐)
49 | 
50 | 执行脚本: `bash deploy.sh`
51 | 
52 | #### 直接部署
53 | 
54 | 1. 安装依赖：`python3 install -r requirement.txt`
55 | 2. 启动 Web 端： `python3 app.py`
56 | 
57 | ## PS
58 | 
59 | - 借鉴了大佬的[项目](https://github.com/malinkang/weread_to_notion)，优化了代码结构、增加了web 端。非常感谢大佬的开源项目
60 | - 配合 NoitonNext 构建 Blog [效果](https://yaya.run/weread)非常好
61 | 
62 | ## 免责申明
63 | 
64 | 本工具仅作技术研究之用，请勿用于商业或违法用途，由于使用该工具导致的侵权或其它问题，该本工具不承担任何责任！
65 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | 
132 | # venv
133 | .cfg


--------------------------------------------------------------------------------
/templates/wereadtonotion.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <title>Weread to Notion</title>
 5 |     <style>
 6 |         .container {
 7 |             max-width: 500px;
 8 |             margin: 0 auto;
 9 |             padding: 20px;
10 |         }
11 | 
12 |         label {
13 |             display: block;
14 |             margin-bottom: 10px;
15 |         }
16 | 
17 |         textarea {
18 |             width: 100%;
19 |             height: 100px;
20 |         }
21 | 
22 |         pre {
23 |             background-color: #f5f5f5;
24 |             padding: 10px;
25 |             margin-top: 20px;
26 |             white-space: pre-wrap;
27 |         }
28 |     </style>
29 | </head>
30 | <body>
31 | <div class="container">
32 |     <h2>Weread to Notion</h2>
33 |     <form id="input-form">
34 |         <label for="notion_token">Notion Token:</label>
35 |         <input type="text" id="notion_token" name="notion_token" required><br><br>
36 | 
37 |         <label for="weread_cookie">微信读书 cookie:</label>
38 |         <input type="text" id="weread_cookie" name="weread_cookie" required><br><br>
39 | 
40 |         <label for="database_id">Notion 数据库ID:</label>
41 |         <input type="text" id="database_id" name="database_id" required><br><br>
42 | 
43 |         <label for="book_blacklist">黑名单(以逗号隔开:</label>
44 |         <textarea id="book_blacklist" name="book_blacklist"
45 |                   placeholder="Separate multiple books with commas"></textarea><br><br>
46 | 
47 |         <button type="submit">Submit</button>
48 |     </form>
49 | 
50 |     <pre id="log-container"></pre>
51 | </div>
52 | 
53 | <script>
54 |     var form = document.getElementById('input-form');
55 |     var logContainer = document.getElementById('log-container');
56 | 
57 |     form.addEventListener('submit', function (event) {
58 |         logContainer.innerText = '开始处理，请稍候，请勿重复提交~';
59 |         event.preventDefault();
60 |         var formData = new FormData(form);
61 | 
62 |         var xhr = new XMLHttpRequest();
63 |         xhr.open('POST', '/wereadtonotion');
64 |         xhr.onreadystatechange = function () {
65 |             if (xhr.readyState === XMLHttpRequest.DONE) {
66 |                 if (xhr.status === 200) {
67 |                     var logData = xhr.responseText;
68 |                     logContainer.innerText = logData;
69 |                 } else {
70 |                     logContainer.innerText = 'Request failed: ' + xhr.status;
71 |                 }
72 |             }
73 |         };
74 |         xhr.send(formData);
75 |     });
76 | </script>
77 | </body>
78 | </html>
79 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | from logger import info, exception
  2 | from weread_2_notion import weread_2_notion
  3 | from pywebio.platform.flask import webio_view
  4 | from pywebio.input import *
  5 | from pywebio.output import *
  6 | 
  7 | from flask import Flask
  8 | 
  9 | app = Flask(__name__)
 10 | 
 11 | 
 12 | def output_help_info(scope=None):
 13 |     put_markdown(""" 
 14 |      ## 微信读书书籍同步到 Notion 数据库
 15 |      **项目地址** ：[weread_2_notion](https://github.com/DuckDuck88/weread2notion)
 16 |      """)
 17 |     put_text()
 18 |     put_collapse('使用说明', [
 19 |         put_markdown('''
 20 |         1. 获取 Notion token
 21 |             - 打开[Notion developers](https://www.notion.so/my-integrations)并登录
 22 |             - 点击New integration 输入 name 提交.(如果已有，则点击 view integration)
 23 |             - 点击show，然后copy
 24 |         2. 从微信读书中获取 cookie
 25 |             - 在浏览器中打开[微信读书官网](weread.qq.com)并登录
 26 |             - 打开开发者工具(按 F12)，点击 network(网络)，刷新页面, 点击第一个请求，复制 cookie 的值。
 27 |                 > ![drULBf](https://markdown-mac-work-1306720256.cos.ap-guangzhou.myqcloud.com/png/drULBf.png)
 28 |         3. 准备 Noiton Database ID
 29 |             - 复制这个[Notion 数据库](https://yayya.notion.site/10d8b5402c924498b44302e4ce75385c?v=05c0d43e80564da290f9fa837170013a&pvs=4)到你的 Notion 中，点击右上角的分享按钮，将页面分享为公开页面
 30 |             - 点击页面右上角三个点，在 connections 中找到选择你的 connections。选择第一步中创建的 integration 的 name
 31 |             - 打开你复制的 Notion 页面，通过该页面 URL 找到你的 Database ID 。
 32 |                 > 例如：页面 https://www.notion.so/xxxx/d92bb4b8434745baa2061caf67d6ef7a?v=b4a5bfb89e8e44868a473179ee60x851 的 ID 为d92bb4b8434745baa2061caf67d6ef7a
 33 |                 > ![bU4zTs](https://markdown-mac-work-1306720256.cos.ap-guangzhou.myqcloud.com/png/bU4zTs.png)
 34 |         4. 把如下信息输入到以下表单中
 35 |     ''', scope=scope)
 36 |     ], open=False)
 37 | 
 38 | 
 39 | def update_configs(name, value):
 40 |     return input_update(value, name='configs')
 41 | 
 42 | 
 43 | def input_config_info(scope=None):
 44 |     with use_scope(scope):
 45 |         datas = input_group('configs', [
 46 |             input("Notion token：", type=PASSWORD, placeholder='输入 notion tikon', name='notion_token', required=True),
 47 |             input("微信读书 cookie：", type=TEXT, placeholder='输入 微信读书 cookies', name='weread_cookie',
 48 |                   required=True),
 49 |             input("Database ID:,", type=TEXT, placeholder='输入 需要同步的 Notion 数据库 ID', name='database_id',
 50 |                   required=True),
 51 |             textarea("黑名单列表：", rows=3, placeholder='输入不同步的书籍黑名单，书名以逗号隔开', name='book_blacklist')
 52 |         ])
 53 |     notion_token = datas['notion_token']
 54 |     weread_cookie = datas['weread_cookie']
 55 |     database_id = datas['database_id']
 56 |     datas['book_blacklist'] = datas['book_blacklist'].replace(' ', '').replace('\n', '').replace('\r', '').split(',')
 57 |     book_blacklist = datas['book_blacklist']
 58 |     return notion_token, weread_cookie, database_id, book_blacklist
 59 | 
 60 | 
 61 | def list_to_str(l):
 62 |     res = ""
 63 |     for i in l:
 64 |         res += f'*《{i}》*  '
 65 |     return '> ' + res if res else '> 无'
 66 | 
 67 | 
 68 | def dict_to_str(d):
 69 |     res = ""
 70 |     for k, v in d.items():
 71 |         res += f'书籍：*《{k}》* 失败原因: {v[0]}, 书籍详情: {v[1]}'
 72 |     return '> ' + res if res else '> 无'
 73 | 
 74 | 
 75 | def weread_to_notion():
 76 |     output_help_info(scope='head')
 77 |     # put_column([
 78 |     #     put_scope('head'),
 79 |     #     put_scope('main'),
 80 |     #     put_row([
 81 |     #         put_scope('left'),
 82 |     #         None,
 83 |     #         put_scope('right')
 84 |     #     ], size='25% 4% 69%'),
 85 |     # ])
 86 |     notion_token, weread_cookie, database_id, book_blacklist = input_config_info(scope='main')
 87 |     put_info('注意: 书籍较多时处理时间较长，当前页面可观察同步进度，关闭页面可以继续同步')
 88 |     try:
 89 |         with put_loading(shape='grow', color='primary'):
 90 |             all_book, handled_book, ignore_book, err_book = weread_2_notion(notion_token,
 91 |                                                                             weread_cookie,
 92 |                                                                             database_id,
 93 |                                                                             book_blacklist)
 94 |     except Exception as e:
 95 |         exception(f'失败:{str(e)}')
 96 |         put_error(f'{str(e)}', closable=False)
 97 |         return put_error(f'请刷新当前页面重新输入正确配置', closable=True)
 98 |     put_info(f'共有{len(all_book)}本书籍，其中{len(ignore_book)}本书籍被忽略，{len(handled_book)}本书籍被同步')
 99 |     put_markdown(f'**成功同步以下书籍**:\n{list_to_str(handled_book)}')
100 |     put_markdown(f'**同步以下书籍失败！**:\n{dict_to_str(err_book)}')
101 |     put_markdown(f'**忽略以下书籍**:\n{list_to_str(ignore_book)}')
102 |     return put_success(
103 |         f'共有{len(all_book)}本书籍，其中{len(ignore_book)}本书籍被忽略，{len(handled_book)}本书籍被同步,{len(err_book)}本书籍同步失败！',
104 |         closable=True)
105 | 
106 | 
107 | app.add_url_rule('/', 'webio_view', webio_view(weread_to_notion),
108 |                  methods=['GET', 'POST'])  # need GET,POST and OPTIONS methods
109 | 
110 | if __name__ == '__main__':
111 |     app.run(host='0.0.0.0', port=80, debug=True)
112 | 


--------------------------------------------------------------------------------
/weread_2_notion.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | from pywebio.output import put_text, put_info
  4 | 
  5 | from logger import info, debug, warning, error, exception
  6 | from notion.notion import NotionClient
  7 | from weread.weread import WeRead
  8 | 
  9 | try:
 10 |     from settings.settings import WEREAD_COOKIE, DATABASE_ID, BOOK_BLACKLIST, NOTION_TOKEN
 11 | except Exception as e:
 12 |     error(f'导入配置文件失败，{e} Web 无伤大雅')
 13 | 
 14 | 
 15 | def weread_2_notion(notion_token=NOTION_TOKEN,
 16 |                     weread_cookie=WEREAD_COOKIE,
 17 |                     database_id=DATABASE_ID,
 18 |                     book_blacklist=BOOK_BLACKLIST,
 19 |                     is_web=True):
 20 |     try:
 21 |         weread_ = WeRead(weread_cookie)
 22 |     except Exception as e:
 23 |         raise Exception(f'微信读书登录失败，{e}')
 24 |     try:
 25 |         notion = NotionClient(notion_token, database_id)
 26 |     except Exception as e:
 27 |         raise Exception(f'notion 登录失败，{e}')
 28 |     # 书籍列表
 29 |     try:
 30 |         books = weread_.get_notebooklist()  # TODO 增加逻辑，获取所有书籍列表。 当前是获取存在划线信息的书籍列表
 31 |     except Exception as e:
 32 |         raise Exception(f'获取书籍列表失败，请检查微信读书 cookies 是否正确 {e.__str__()}')
 33 |     all_book = []
 34 |     handled_book = []
 35 |     ignore_book = []
 36 |     err_book = {}
 37 |     info(f'书籍目录：{books}')
 38 |     if books is not None:
 39 |         for book in books:
 40 |             try:
 41 |                 sort = book["sort"]  # 更新时间
 42 |                 book = book.get("book")
 43 |                 title = book.get("title")
 44 |                 info(f' 正在同步：《{title}》, 当前进度 {len(handled_book) + len(ignore_book)}/{len(books)}')
 45 |                 if is_web:
 46 |                     put_text(f' 正在同步：《{title}》, 当前进度 {len(handled_book) + len(ignore_book)}/{len(books)}')
 47 |                 all_book.append(title)
 48 |                 if title in book_blacklist:
 49 |                     info(f'《{title}》在黑名单中，跳过')
 50 |                     ignore_book.append(title)
 51 |                     continue
 52 |                 # 调试用途代码
 53 |                 # if book.get("title") != '黄金时代':
 54 |                 #     ignore_book.append(title)
 55 |                 #     continue
 56 |                 # print(book['123'])
 57 |                 # print(book)
 58 | 
 59 |                 # 跳过是否更新判断，直接更新，否则给其他人使用会导致 notion 更新时间不一致
 60 |                 # if sort <= notion.get_sort():
 61 |                 #     warning(f'当前图书《{title}》没有更新划线、书评等信息，暂不处理')
 62 |                 #     ignore_book.append(title)
 63 |                 #     continue
 64 |                 handled_book.append(title)
 65 |                 cover = book.get("cover", '没有封面')
 66 |                 bookId = book.get("bookId", '-1')
 67 |                 author = book.get("author", '没有作者信息')
 68 |                 info(f'开始处理《{title}》, bookId={bookId}, sort={sort}')
 69 |                 notion.check(bookId)  # TODO 如果自行在 notion 修改，这里会删除重新插入，规避这个逻辑
 70 |                 chapter = weread_.get_chapter_info(bookId)
 71 |                 bookmark_list = weread_.get_bookmark_list(bookId)
 72 |                 summary, reviews = weread_.get_review_list(bookId)
 73 |                 bookmark_list.extend(reviews)
 74 |                 bookmark_list = sorted(bookmark_list, key=lambda x: (
 75 |                     x.get("chapterUid", 1),
 76 |                     0 if (x.get("range", "") == "" or x.get("range").split("-")[0] == "") else int(
 77 |                         x.get("range").split("-")[0])))
 78 |                 # 获取书籍信息
 79 |                 isbn, rating, intro, category = weread_.get_bookinfo(bookId)
 80 |                 # 获取阅读信息
 81 |                 read_info = weread_.get_read_info(bookId)
 82 | 
 83 |                 children, grandchild = notion.get_children(
 84 |                     chapter, summary, bookmark_list)
 85 |                 block_id = notion.insert_to_notion(bookName=title,
 86 |                                                    bookId=bookId,
 87 |                                                    book_str_id=weread_.calculate_book_str_id(bookId),
 88 |                                                    cover=cover,
 89 |                                                    sort=sort,
 90 |                                                    author=author,
 91 |                                                    isbn=isbn,
 92 |                                                    rating=rating,
 93 |                                                    intro=intro,
 94 |                                                    category=category,
 95 |                                                    read_info=read_info)
 96 |                 results = notion.add_children(block_id, children)
 97 |                 if (len(grandchild) > 0 and results != None):
 98 |                     notion.add_grandchild(grandchild, results)
 99 |                 debug(f'结束处理《{title}》, bookId={bookId}, sort={sort}')
100 |             except Exception as e:
101 |                 exception(f'处理书籍《{title}》失败')
102 |                 err_book[title] = [str(e), book]
103 |                 continue
104 |         return all_book, handled_book, ignore_book, err_book
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     weread_2_notion(is_web=False)
109 |     # weread = WeRead(WEREAD_COOKIE)
110 |     # print(weread.session.cookies.get_dict())
111 |     # res = weread.session.get(
112 |     #     'https://i.weread.qq.com/users/info')
113 |     # weread.session.
114 |     # print(weread.get_notebooklist())
115 | 


--------------------------------------------------------------------------------
/weread/weread.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import re
  3 | from http.cookies import SimpleCookie
  4 | import requests
  5 | from requests.utils import cookiejar_from_dict
  6 | from logger import error
  7 | 
  8 | 
  9 | class WeRead:
 10 |     WEREAD_URL = "https://weread.qq.com/"
 11 |     WEREAD_NOTEBOOKS_URL = "https://i.weread.qq.com/user/notebooks"
 12 |     # WEREAD_NOTEBOOKS_URL = "https://i.weread.qq.com/shelf/friendCommon"
 13 |     WEREAD_BOOKMARKLIST_URL = "https://i.weread.qq.com/book/bookmarklist"
 14 |     WEREAD_CHAPTER_INFO = "https://i.weread.qq.com/book/chapterInfos"
 15 |     WEREAD_READ_INFO_URL = "https://i.weread.qq.com/book/readinfo"
 16 |     WEREAD_REVIEW_LIST_URL = "https://i.weread.qq.com/review/list"
 17 |     WEREAD_BOOK_INFO = "https://i.weread.qq.com/book/info"
 18 | 
 19 |     def __init__(self, weread_cookie):
 20 |         self.session = requests.Session()
 21 |         self.session.cookies = self.parse_cookie_string(weread_cookie)
 22 | 
 23 |     def get_bookmark_list(self, bookId):
 24 |         """获取我的划线"""
 25 |         params = dict(bookId=bookId)
 26 |         r = self.session.get(self.WEREAD_BOOKMARKLIST_URL, params=params)
 27 |         if r.ok:
 28 |             updated = r.json().get("updated")
 29 |             updated = sorted(updated, key=lambda x: (
 30 |                 x.get("chapterUid", 1), int(x.get("range").split("-")[0])))
 31 |             return r.json()["updated"]
 32 |         return None
 33 | 
 34 |     def get_bookinfo(self, bookId):
 35 |         """获取书的详情"""
 36 |         params = dict(bookId=bookId)
 37 |         r = self.session.get(self.WEREAD_BOOK_INFO, params=params)
 38 |         isbn = ""
 39 |         newRating = 0
 40 |         intro = ""
 41 |         category = ""
 42 |         if r.ok:
 43 |             data = r.json()
 44 |             isbn = data.get("isbn", "-1")
 45 |             newRating = data.get("newRating", '-1') / 1000
 46 |             intro = data.get('intro', '本书没有介绍！')
 47 |             category = data.get("category", 'None')
 48 |         return (isbn, newRating, intro, category)
 49 | 
 50 |     def get_review_list(self, bookId):
 51 |         """获取笔记"""
 52 |         params = dict(bookId=bookId, listType=11, mine=1, syncKey=0)
 53 |         r = self.session.get(self.WEREAD_REVIEW_LIST_URL, params=params)
 54 |         reviews = r.json().get("reviews")
 55 |         summary = list(filter(lambda x: x.get("review").get("type") == 4, reviews))
 56 |         reviews = list(filter(lambda x: x.get("review").get("type") == 1, reviews))
 57 |         reviews = list(map(lambda x: x.get("review"), reviews))
 58 |         reviews = list(map(lambda x: {**x, "markText": x.pop("content")}, reviews))
 59 |         return summary, reviews
 60 | 
 61 |     def get_read_info(self, bookId):
 62 |         params = dict(bookId=bookId, readingDetail=1,
 63 |                       readingBookIndex=1, finishedDate=1)
 64 |         r = self.session.get(self.WEREAD_READ_INFO_URL, params=params)
 65 |         if r.ok:
 66 |             return r.json()
 67 |         return None
 68 | 
 69 |     def _transform_id(self, book_id):
 70 |         id_length = len(book_id)
 71 | 
 72 |         if re.match("^\d*$", book_id):
 73 |             ary = []
 74 |             for i in range(0, id_length, 9):
 75 |                 ary.append(format(int(book_id[i:min(i + 9, id_length)]), 'x'))
 76 |             return '3', ary
 77 | 
 78 |         result = ''
 79 |         for i in range(id_length):
 80 |             result += format(ord(book_id[i]), 'x')
 81 |         return '4', [result]
 82 | 
 83 |     def calculate_book_str_id(self, book_id):
 84 |         md5 = hashlib.md5()
 85 |         md5.update(book_id.encode('utf-8'))
 86 |         digest = md5.hexdigest()
 87 |         result = digest[0:3]
 88 |         code, transformed_ids = self._transform_id(book_id)
 89 |         result += code + '2' + digest[-2:]
 90 | 
 91 |         for i in range(len(transformed_ids)):
 92 |             hex_length_str = format(len(transformed_ids[i]), 'x')
 93 |             if len(hex_length_str) == 1:
 94 |                 hex_length_str = '0' + hex_length_str
 95 | 
 96 |             result += hex_length_str + transformed_ids[i]
 97 | 
 98 |             if i < len(transformed_ids) - 1:
 99 |                 result += 'g'
100 | 
101 |         if len(result) < 20:
102 |             result += digest[0:20 - len(result)]
103 | 
104 |         md5 = hashlib.md5()
105 |         md5.update(result.encode('utf-8'))
106 |         result += md5.hexdigest()[0:3]
107 |         return result
108 | 
109 |     def get_chapter_info(self, bookId):
110 |         """获取章节信息"""
111 |         body = {
112 |             'bookIds': [bookId],
113 |             'synckeys': [0],
114 |             'teenmode': 0
115 |         }
116 |         r = self.session.post(self.WEREAD_CHAPTER_INFO, json=body)
117 |         if r.ok and "data" in r.json() and len(r.json()["data"]) == 1 and "updated" in r.json()["data"][0]:
118 |             update = r.json()["data"][0]["updated"]
119 |             return {item["chapterUid"]: item for item in update}
120 |         return None
121 | 
122 |     def get_notebooklist(self):
123 |         """获取笔记本列表"""
124 |         # params = dict(userVid=self.session.cookies.get("wr_vid"))
125 |         r = self.session.get(self.WEREAD_NOTEBOOKS_URL)
126 |         if not r.ok:
127 |             error(f'获取图书失败,{r.text}')
128 |             raise RuntimeError(f'获取图书失败,{r.text}')
129 |         data = r.json()
130 |         books = data.get("books")
131 |         books.sort(key=lambda x: x["sort"])
132 |         return books
133 | 
134 |     def parse_cookie_string(self, cookie_string):
135 |         cookie = SimpleCookie()
136 |         cookie.load(cookie_string)
137 |         cookies_dict = {}
138 |         cookiejar = None
139 |         for key, morsel in cookie.items():
140 |             cookies_dict[key] = morsel.value
141 |             cookiejar = cookiejar_from_dict(
142 |                 cookies_dict, cookiejar=None, overwrite=True
143 |             )
144 |         return cookiejar
145 | 


--------------------------------------------------------------------------------
/notion/notion.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import time
  4 | from datetime import datetime
  5 | 
  6 | from retrying import retry
  7 | 
  8 | from logger import info, debug
  9 | 
 10 | o_path = os.getcwd()
 11 | sys.path.append(o_path)
 12 | 
 13 | from notion_client import Client
 14 | 
 15 | 
 16 | class NotionClient(object):
 17 | 
 18 |     def __init__(self, token, database_id):
 19 |         self.client = Client(auth=token)
 20 |         self.database_id = database_id
 21 | 
 22 |     def test_api(self):
 23 |         res = self.client.search(query="收藏夹").get("results")
 24 |         info(res)
 25 | 
 26 |     def check(self, bookId):
 27 |         """检查是否已经插入过 如果已经插入了就删除"""
 28 |         time.sleep(0.3)
 29 |         debug(f"开始检查{bookId}是否已经插入")
 30 |         filter = {
 31 |             "property": "BookId",
 32 |             "rich_text": {
 33 |                 "equals": bookId
 34 |             }
 35 |         }
 36 |         response = self.client.databases.query(database_id=self.database_id, filter=filter)
 37 |         for result in response["results"]:
 38 |             time.sleep(0.3)
 39 |             self.client.blocks.delete(block_id=result["id"])
 40 | 
 41 |     def get_table_of_contents(self):
 42 |         """获取目录"""
 43 |         return {
 44 |             "type": "table_of_contents",
 45 |             "table_of_contents": {
 46 |                 "color": "default"
 47 |             }
 48 |         }
 49 | 
 50 |     def get_heading(self, level, content):
 51 |         if level == 1:
 52 |             heading = "heading_1"
 53 |         elif level == 2:
 54 |             heading = "heading_2"
 55 |         else:
 56 |             heading = "heading_3"
 57 |         return {
 58 |             "type": heading,
 59 |             heading: {
 60 |                 "rich_text": [{
 61 |                     "type": "text",
 62 |                     "text": {
 63 |                         "content": content,
 64 |                     }
 65 |                 }],
 66 |                 "color": "default",
 67 |                 "is_toggleable": False
 68 |             }
 69 |         }
 70 | 
 71 |     def get_quote(self, content):
 72 |         return {
 73 |             "type": "quote",
 74 |             "quote": {
 75 |                 "rich_text": [{
 76 |                     "type": "text",
 77 |                     "text": {
 78 |                         "content": content
 79 |                     },
 80 |                 }],
 81 |                 "color": "default"
 82 |             }
 83 |         }
 84 | 
 85 |     def get_callout(self, content, style, colorStyle, reviewId):
 86 |         # 根据不同的划线样式设置不同的emoji 直线type=0 背景颜色是1 波浪线是2
 87 |         emoji = "🌟"
 88 |         if style == 0:
 89 |             emoji = "💡"
 90 |         elif style == 1:
 91 |             emoji = "⭐"
 92 |         # 如果reviewId不是空说明是笔记
 93 |         if reviewId != None:
 94 |             emoji = "✍️"
 95 |         color = "default"
 96 |         # 根据划线颜色设置文字的颜色
 97 |         if colorStyle == 1:
 98 |             color = "red"
 99 |         elif colorStyle == 2:
100 |             color = "purple"
101 |         elif colorStyle == 3:
102 |             color = "blue"
103 |         elif colorStyle == 4:
104 |             color = "green"
105 |         elif colorStyle == 5:
106 |             color = "yellow"
107 |         return {
108 |             "type": "callout",
109 |             "callout": {
110 |                 "rich_text": [{
111 |                     "type": "text",
112 |                     "text": {
113 |                         "content": content,
114 |                     }
115 |                 }],
116 |                 "icon": {
117 |                     "emoji": emoji
118 |                 },
119 |                 "color": color
120 |             }
121 |         }
122 | 
123 |     def get_children(self, chapter, summary, bookmark_list):
124 |         children = []
125 |         grandchild = {}
126 |         if chapter != None:
127 |             # 添加目录
128 |             children.append(self.get_table_of_contents())
129 |             d = {}
130 |             for data in bookmark_list:
131 |                 chapterUid = data.get("chapterUid", 1)
132 |                 if (chapterUid not in d):
133 |                     d[chapterUid] = []
134 |                 d[chapterUid].append(data)
135 |             for key, value in d.items():
136 |                 if key in chapter:
137 |                     # 添加章节
138 |                     children.append(self.get_heading(
139 |                         chapter.get(key).get("level"), chapter.get(key).get("title")))
140 |                 for i in value:
141 |                     callout = self.get_callout(
142 |                         i.get("markText"), data.get("style"), i.get("colorStyle"), i.get("reviewId"))
143 |                     children.append(callout)
144 |                     if i.get("abstract") != None and i.get("abstract") != "":
145 |                         quote = self.get_quote(i.get("abstract"))
146 |                         grandchild[len(children) - 1] = quote
147 | 
148 |         else:
149 |             # 如果没有章节信息
150 |             for data in bookmark_list:
151 |                 children.append(self.get_callout(data.get("markText"),
152 |                                                  data.get("style"), data.get("colorStyle"), data.get("reviewId")))
153 |         if summary != None and len(summary) > 0:
154 |             children.append(self.get_heading(1, "点评"))
155 |             for i in summary:
156 |                 children.append(self.get_callout(i.get("review").get("content"), i.get(
157 |                     "style"), i.get("colorStyle"), i.get("review").get("reviewId")))
158 |         return children, grandchild
159 | 
160 |     def add_children(self, id, children):
161 |         results = []
162 |         for i in range(0, len(children) // 100 + 1):
163 |             time.sleep(0.3)
164 |             response = self.client.blocks.children.append(
165 |                 block_id=id, children=children[i * 100:(i + 1) * 100])
166 |             results.extend(response.get("results"))
167 |         return results if len(results) == len(children) else None
168 | 
169 |     def add_grandchild(self, grandchild, results):
170 |         for key, value in grandchild.items():
171 |             time.sleep(0.3)
172 |             id = results[key].get("id")
173 |             self.client.blocks.children.append(block_id=id, children=[value])
174 | 
175 |     def insert_to_notion(self, bookName, bookId, book_str_id, cover, sort, author, isbn, rating, intro, category,
176 |                          read_info=None):
177 |         """插入到notion"""
178 |         time.sleep(0.3)
179 |         parent = {
180 |             "database_id": self.database_id,
181 |             "type": "database_id"
182 |         }
183 |         properties = {
184 |             "BookName": {"title": [{"type": "text", "text": {"content": bookName}}]},
185 |             "BookId": {"rich_text": [{"type": "text", "text": {"content": bookId}}]},
186 |             "ISBN": {"rich_text": [{"type": "text", "text": {"content": isbn}}]},
187 |             "URL": {"url": f"https://weread.qq.com/web/reader/{book_str_id}"},
188 |             "Author": {"rich_text": [{"type": "text", "text": {"content": author}}]},
189 |             "Sort": {"number": sort},
190 |             "Rating": {"number": rating},
191 |             "Cover": {"files": [{"type": "external", "name": "Cover", "external": {"url": cover}}]},
192 |             "intro": {"rich_text": [{"type": "text", "text": {"content": intro}}]},
193 |             "category": {"select": {"name": category}}
194 |         }
195 |         if read_info != None:
196 |             markedStatus = read_info.get("markedStatus", 0)
197 |             readingTime = read_info.get("readingTime", 0)
198 |             format_time = ""
199 |             hour = readingTime // 3600
200 |             if hour > 0:
201 |                 format_time += f"{hour}时"
202 |             minutes = readingTime % 3600 // 60
203 |             if minutes > 0:
204 |                 format_time += f"{minutes}分"
205 |             properties["Status"] = {"select": {"name": "读完" if markedStatus == 4 else "在读"}}
206 |             properties["ReadingTime"] = {"rich_text": [{"type": "text", "text": {"content": format_time}}]}
207 |             if "finishedDate" in read_info:
208 |                 properties["Date"] = {"date": {
209 |                     "start": datetime.utcfromtimestamp(read_info.get("finishedDate")).strftime("%Y-%m-%d %H:%M:%S"),
210 |                     "time_zone": "Asia/Shanghai"}}
211 | 
212 |         icon = {
213 |             "type": "external",
214 |             "external": {
215 |                 "url": cover
216 |             }
217 |         }
218 |         # notion api 限制100个block
219 |         response = self.client.pages.create(
220 |             parent=parent, icon=icon, properties=properties)
221 |         id = response["id"]
222 |         return id
223 | 
224 |     @retry(stop_max_attempt_number=3, wait_fixed=1000)
225 |     def get_sort(self):
226 |         """获取database中的上次编辑时间"""
227 |         filter = {
228 |             "property": "Sort",
229 |             "number": {
230 |                 "is_not_empty": True
231 |             }
232 |         }
233 |         sorts = [
234 |             {
235 |                 "property": "Sort",
236 |                 "direction": "descending",
237 |             }
238 |         ]
239 |         response = self.client.databases.query(
240 |             database_id=self.database_id, filter=filter, sorts=sorts, page_size=1)
241 |         if (len(response.get("results")) == 1):
242 |             return response.get("results")[0].get("properties").get("Sort").get("number")
243 |         return 0
244 | 


--------------------------------------------------------------------------------