├── settings ├── __init__.py ├── settings.py.example └── settings.py ├── requirements.txt ├── .idea └── .gitignore ├── gunicorn_config.py ├── Dockerfile ├── deploy.sh ├── logger.py ├── README.md ├── .gitignore ├── templates └── wereadtonotion.html ├── app.py ├── weread_2_notion.py ├── weread └── weread.py └── notion └── notion.py /settings/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests~=2.26.0 2 | notion-client==1.0.0 3 | retrying==1.3.4 4 | Flask==2.3.2 5 | pywebio==1.8.2 6 | gunicorn==20.1.0 7 | gevent==22.10.2 -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # 默认忽略的文件 2 | /shelf/ 3 | /workspace.xml 4 | # 基于编辑器的 HTTP 客户端请求 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /gunicorn_config.py: -------------------------------------------------------------------------------- 1 | bind = '0.0.0.0:80' # 绑定的IP地址和端口号 2 | # bind = '127.0.0.1:6000' # 绑定的IP地址和端口号 3 | workers = 1 # 工作进程的数量 4 | worker_class = 'gevent' # 使用gevent作为工作进程的类别 5 | timeout = 12000 # 超时时间,单位为秒 6 | accesslog = '/var/log/weread' # 访问日志的文件路径,'-' 表示输出到标准输出 7 | errorlog = '/var/log/weread' # 错误日志的文件路径,'-' 表示输出到标准输出 8 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 基于 Python 官方镜像构建 Docker 镜像 2 | FROM python:3.9-slim 3 | 4 | # 设置工作目录 5 | WORKDIR /app 6 | 7 | # 单独复制 requirements.txt,后续可以利用 Docker 缓存避免重复安装依赖 8 | COPY ./requirements.txt /app/requirements.txt 9 | RUN pip install -r requirements.txt 10 | 11 | 12 | # 将当前目录下的所有文件复制到容器的 /app 目录下 13 | COPY . /app 14 | # 暴露容器的端口 15 | EXPOSE 80 16 | 17 | # 运行应用 18 | CMD ["gunicorn", "--config", "gunicorn_config.py", "app:app"] 19 | -------------------------------------------------------------------------------- /settings/settings.py.example: -------------------------------------------------------------------------------- 1 | NOTION_TOKEN = '' 2 | WEREAD_COOKIE = '' 3 | DATABASE_ID = '' 4 | 5 | BOOK_BLACKLIST = [ 6 | '从红月开始', 7 | '修真聊天群(聊天群的日常生活)', 8 | '第一序列', 9 | '我真没想重生啊', 10 | '我有一座恐怖屋', 11 | '大王饶命', 12 | '我师兄实在太稳健了', 13 | '死而替生', 14 | '中医许阳', 15 | '这游戏也太真实了', 16 | '家父汉高祖', 17 | '吕布的人生模拟器', 18 | '学霸的黑科技系统', 19 | '黎明之剑', 20 | '我的一天有48小时', 21 | '明克街13号', 22 | ] 23 | -------------------------------------------------------------------------------- /settings/settings.py: -------------------------------------------------------------------------------- 1 | NOTION_TOKEN = 'xxxxxxxxxxx' 2 | WEREAD_COOKIE = 'wr_fp=2533447432; wr_gid=295032143; wr_vid=23683664; wr_pf=0; wr_rt=web%40QxPpLky5s9q8MDFU8TK_AL; wr_localvid=fb3325d071696250fb37ae2; wr_name=%E8%AF%B6%E9%B8%AD; wr_avatar=https%3A%2F%2Fthirdwx.qlogo.cn%2Fmmopen%2Fvi_32%2FXpfWjDr8uAYiagxaSibFmnMh2LUJXTQrjJ8z2ve6X8J3w2CI79YSvQwic1icyHUoHVgoccckuoLxlt1cQOD87tZLfQ%2F132; wr_gender=1; RK=Ou2xAkMpZ9; ptcz=6d556d39aaf44f088fec74b97d2942db2ea837b96f5c8c603ddacec2b5e1fd45; uin=o1307317886; wr_skey=t1Gw1zJp' 3 | DATABASE_ID = 'd92bb4b8434745baa2061caf67d6ef7a' 4 | 5 | BOOK_BLACKLIST = [ 6 | '从红月开始', 7 | '修真聊天群(聊天群的日常生活)', 8 | '第一序列', 9 | '我真没想重生啊', 10 | '我有一座恐怖屋', 11 | '大王饶命', 12 | '我师兄实在太稳健了', 13 | '死而替生', 14 | '中医许阳', 15 | '这游戏也太真实了', 16 | '家父汉高祖', 17 | '吕布的人生模拟器', 18 | '学霸的黑科技系统', 19 | '黎明之剑', 20 | '我的一天有48小时', 21 | '明克街13号', 22 | ] 23 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 获取应用版本参数 4 | version=$1 5 | 6 | # 默认应用版本 7 | default_version="latest" 8 | 9 | # 如果没有指定版本参数,则使用默认版本 10 | if [ -z "$version" ]; then 11 | version=$default_version 12 | fi 13 | 14 | # 容器名称 15 | container_name="weread-to-notion" 16 | 17 | # 检查是否存在同名容器 18 | existing_container=$(docker ps -a --filter "name=$container_name" --format "{{.Names}}") 19 | 20 | if [ ! -z "$existing_container" ]; then 21 | # 停止并删除同名容器 22 | echo "Stopping and removing existing container: $existing_container" 23 | docker stop "$existing_container" 24 | docker rm "$existing_container" 25 | fi 26 | 27 | echo "################ Building Docker image: $container_name:$version ################" 28 | # 构建Docker镜像 29 | docker build -t "$container_name:$version" . 30 | 31 | echo "################ Starting Docker container: $container_name:$version ############" 32 | # 启动容器 33 | docker run -d -p 7000:80 --name "$container_name" "$container_name:$version" 34 | -------------------------------------------------------------------------------- /logger.py: -------------------------------------------------------------------------------- 1 | import logging.handlers 2 | import os 3 | 4 | _logger = logging.getLogger('weread_to_notion') 5 | _logger.setLevel(logging.DEBUG) 6 | 7 | _cons_rht = logging.StreamHandler() 8 | 9 | # _log_base_dir = f'/var/log' 10 | _log_base_dir = '.' 11 | 12 | # _pid = os.getpid() 13 | log_dir = f'{_log_base_dir}/logs' 14 | if not os.path.exists(log_dir): 15 | os.makedirs(log_dir) 16 | log_file_name = 'weread_to_notion' 17 | 18 | _file_rht = logging.handlers.RotatingFileHandler(f"{log_dir}/{log_file_name}.log", 19 | maxBytes=1024 * 1024 * 1024, 20 | backupCount=100) 21 | 22 | _fmt = logging.Formatter("%(asctime)s pid:%(process)d %(filename)s:%(lineno)s %(levelname)s: %(message)s", 23 | "%Y-%m-%d %H:%M:%S") 24 | 25 | _file_rht.setFormatter(_fmt) 26 | _cons_rht.setFormatter(_fmt) 27 | 28 | _logger.addHandler(_file_rht) 29 | _logger.addHandler(_cons_rht) 30 | 31 | debug = _logger.debug 32 | info = _logger.info 33 | warning = _logger.warning 34 | error = _logger.error 35 | exception = _logger.exception 36 | critical = _logger.critical 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # wereadtonotion 2 | 3 | 微信读书笔记、划线等信息同步到notion数据库 4 | > 效果如下: 5 | > ![同步效果](https://markdown-mac-work-1306720256.cos.ap-guangzhou.myqcloud.com/png/AzRZUp.png) 6 | 7 | *注意:不存在划线、书评等信息的书籍不会被获取、导入* 8 | 9 | ## 功能 10 | 11 | ### 已有功能 12 | 13 | 1. 导入书籍的划线、评论、笔记等信息到 Notion 数据库 14 | > 注意:不存在划线、书评等信息的书籍不会被获取、导入 15 | 2. 支持 Web 端部署执行和本机 Python 脚本运行 16 | 17 | ### TODO 18 | 19 | - [ ] 支持 Notion 增量读写 20 | - [ ] 支持用户选择书籍获取模式 [存在划线 / 书架书籍 / 全部书籍] 21 | - [ ] 单独导出功能。导出为 markdown 文件 22 | - [ ] 考虑支持自定义 Noiton 数据库结构 23 | 24 | ## 使用方法 25 | 26 | ### 1. 本地脚本运行 27 | 28 | 1. 安装依赖 `pip install -r requirements.txt` 29 | 2. 配置 settings/settings.py 文件中的信息 30 | > 1. 获取 **Notion token** 31 | > - 打开[此页面](https://www.notion.so/my-integrations)并登录 32 | > - 点击New integration 输入 name 提交.(如果已有,则点击 view integration) 33 | > - 点击show,然后copy 34 | > 2. 从微信读书中获取 cookie 35 | > - 在浏览器中打开 weread.qq.com 并登录 36 | > - 打开开发者工具(按 F12),点击 network(网络),刷新页面, 点击第一个请求,复制 cookie 的值。 37 | > 3. 准备 Noiton Database ID 38 | > - 复制[此页面](https://www.notion.so/yayya/a9b3a8dfcc0543559005a263103fc81c)到你的 39 | Notion 中,点击右上角的分享按钮,将页面分享为公开页面 40 | >- 点击页面右上角三个点,在 connections 中找到选择你的 connections。第一步中创建的 integration 的 name 41 | >- 通过 URL 找到你的 Database ID 的值。 42 | > > 例如:页面 https://www.notion.so/yayya/d92bb4b8434745baa2061caf67d6ef7a?v=b4a5bfb89e8e44868a473179ee60x851 的 43 | ID 为d92bb4b8434745baa2061caf67d6ef7a 44 | 4. 运行 `python weread_2_notion.py` 45 | 46 | ### 2. 网页端部署运行 47 | 48 | #### docker 部署(推荐) 49 | 50 | 执行脚本: `bash deploy.sh` 51 | 52 | #### 直接部署 53 | 54 | 1. 安装依赖:`python3 install -r requirement.txt` 55 | 2. 启动 Web 端: `python3 app.py` 56 | 57 | ## PS 58 | 59 | - 借鉴了大佬的[项目](https://github.com/malinkang/weread_to_notion),优化了代码结构、增加了web 端。非常感谢大佬的开源项目 60 | - 配合 NoitonNext 构建 Blog [效果](https://yaya.run/weread)非常好 61 | 62 | ## 免责申明 63 | 64 | 本工具仅作技术研究之用,请勿用于商业或违法用途,由于使用该工具导致的侵权或其它问题,该本工具不承担任何责任! 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | 132 | # venv 133 | .cfg -------------------------------------------------------------------------------- /templates/wereadtonotion.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Weread to Notion 5 | 29 | 30 | 31 |
32 |

Weread to Notion

33 |
34 | 35 |

36 | 37 | 38 |

39 | 40 | 41 |

42 | 43 | 44 |

46 | 47 | 48 |
49 | 50 |

51 | 
52 | 53 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from logger import info, exception 2 | from weread_2_notion import weread_2_notion 3 | from pywebio.platform.flask import webio_view 4 | from pywebio.input import * 5 | from pywebio.output import * 6 | 7 | from flask import Flask 8 | 9 | app = Flask(__name__) 10 | 11 | 12 | def output_help_info(scope=None): 13 | put_markdown(""" 14 | ## 微信读书书籍同步到 Notion 数据库 15 | **项目地址** :[weread_2_notion](https://github.com/DuckDuck88/weread2notion) 16 | """) 17 | put_text() 18 | put_collapse('使用说明', [ 19 | put_markdown(''' 20 | 1. 获取 Notion token 21 | - 打开[Notion developers](https://www.notion.so/my-integrations)并登录 22 | - 点击New integration 输入 name 提交.(如果已有,则点击 view integration) 23 | - 点击show,然后copy 24 | 2. 从微信读书中获取 cookie 25 | - 在浏览器中打开[微信读书官网](weread.qq.com)并登录 26 | - 打开开发者工具(按 F12),点击 network(网络),刷新页面, 点击第一个请求,复制 cookie 的值。 27 | > ![drULBf](https://markdown-mac-work-1306720256.cos.ap-guangzhou.myqcloud.com/png/drULBf.png) 28 | 3. 准备 Noiton Database ID 29 | - 复制这个[Notion 数据库](https://yayya.notion.site/10d8b5402c924498b44302e4ce75385c?v=05c0d43e80564da290f9fa837170013a&pvs=4)到你的 Notion 中,点击右上角的分享按钮,将页面分享为公开页面 30 | - 点击页面右上角三个点,在 connections 中找到选择你的 connections。选择第一步中创建的 integration 的 name 31 | - 打开你复制的 Notion 页面,通过该页面 URL 找到你的 Database ID 。 32 | > 例如:页面 https://www.notion.so/xxxx/d92bb4b8434745baa2061caf67d6ef7a?v=b4a5bfb89e8e44868a473179ee60x851 的 ID 为d92bb4b8434745baa2061caf67d6ef7a 33 | > ![bU4zTs](https://markdown-mac-work-1306720256.cos.ap-guangzhou.myqcloud.com/png/bU4zTs.png) 34 | 4. 把如下信息输入到以下表单中 35 | ''', scope=scope) 36 | ], open=False) 37 | 38 | 39 | def update_configs(name, value): 40 | return input_update(value, name='configs') 41 | 42 | 43 | def input_config_info(scope=None): 44 | with use_scope(scope): 45 | datas = input_group('configs', [ 46 | input("Notion token:", type=PASSWORD, placeholder='输入 notion tikon', name='notion_token', required=True), 47 | input("微信读书 cookie:", type=TEXT, placeholder='输入 微信读书 cookies', name='weread_cookie', 48 | required=True), 49 | input("Database ID:,", type=TEXT, placeholder='输入 需要同步的 Notion 数据库 ID', name='database_id', 50 | required=True), 51 | textarea("黑名单列表:", rows=3, placeholder='输入不同步的书籍黑名单,书名以逗号隔开', name='book_blacklist') 52 | ]) 53 | notion_token = datas['notion_token'] 54 | weread_cookie = datas['weread_cookie'] 55 | database_id = datas['database_id'] 56 | datas['book_blacklist'] = datas['book_blacklist'].replace(' ', '').replace('\n', '').replace('\r', '').split(',') 57 | book_blacklist = datas['book_blacklist'] 58 | return notion_token, weread_cookie, database_id, book_blacklist 59 | 60 | 61 | def list_to_str(l): 62 | res = "" 63 | for i in l: 64 | res += f'*《{i}》* ' 65 | return '> ' + res if res else '> 无' 66 | 67 | 68 | def dict_to_str(d): 69 | res = "" 70 | for k, v in d.items(): 71 | res += f'书籍:*《{k}》* 失败原因: {v[0]}, 书籍详情: {v[1]}' 72 | return '> ' + res if res else '> 无' 73 | 74 | 75 | def weread_to_notion(): 76 | output_help_info(scope='head') 77 | # put_column([ 78 | # put_scope('head'), 79 | # put_scope('main'), 80 | # put_row([ 81 | # put_scope('left'), 82 | # None, 83 | # put_scope('right') 84 | # ], size='25% 4% 69%'), 85 | # ]) 86 | notion_token, weread_cookie, database_id, book_blacklist = input_config_info(scope='main') 87 | put_info('注意: 书籍较多时处理时间较长,当前页面可观察同步进度,关闭页面可以继续同步') 88 | try: 89 | with put_loading(shape='grow', color='primary'): 90 | all_book, handled_book, ignore_book, err_book = weread_2_notion(notion_token, 91 | weread_cookie, 92 | database_id, 93 | book_blacklist) 94 | except Exception as e: 95 | exception(f'失败:{str(e)}') 96 | put_error(f'{str(e)}', closable=False) 97 | return put_error(f'请刷新当前页面重新输入正确配置', closable=True) 98 | put_info(f'共有{len(all_book)}本书籍,其中{len(ignore_book)}本书籍被忽略,{len(handled_book)}本书籍被同步') 99 | put_markdown(f'**成功同步以下书籍**:\n{list_to_str(handled_book)}') 100 | put_markdown(f'**同步以下书籍失败!**:\n{dict_to_str(err_book)}') 101 | put_markdown(f'**忽略以下书籍**:\n{list_to_str(ignore_book)}') 102 | return put_success( 103 | f'共有{len(all_book)}本书籍,其中{len(ignore_book)}本书籍被忽略,{len(handled_book)}本书籍被同步,{len(err_book)}本书籍同步失败!', 104 | closable=True) 105 | 106 | 107 | app.add_url_rule('/', 'webio_view', webio_view(weread_to_notion), 108 | methods=['GET', 'POST']) # need GET,POST and OPTIONS methods 109 | 110 | if __name__ == '__main__': 111 | app.run(host='0.0.0.0', port=80, debug=True) 112 | -------------------------------------------------------------------------------- /weread_2_notion.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from pywebio.output import put_text, put_info 4 | 5 | from logger import info, debug, warning, error, exception 6 | from notion.notion import NotionClient 7 | from weread.weread import WeRead 8 | 9 | try: 10 | from settings.settings import WEREAD_COOKIE, DATABASE_ID, BOOK_BLACKLIST, NOTION_TOKEN 11 | except Exception as e: 12 | error(f'导入配置文件失败,{e} Web 无伤大雅') 13 | 14 | 15 | def weread_2_notion(notion_token=NOTION_TOKEN, 16 | weread_cookie=WEREAD_COOKIE, 17 | database_id=DATABASE_ID, 18 | book_blacklist=BOOK_BLACKLIST, 19 | is_web=True): 20 | try: 21 | weread_ = WeRead(weread_cookie) 22 | except Exception as e: 23 | raise Exception(f'微信读书登录失败,{e}') 24 | try: 25 | notion = NotionClient(notion_token, database_id) 26 | except Exception as e: 27 | raise Exception(f'notion 登录失败,{e}') 28 | # 书籍列表 29 | try: 30 | books = weread_.get_notebooklist() # TODO 增加逻辑,获取所有书籍列表。 当前是获取存在划线信息的书籍列表 31 | except Exception as e: 32 | raise Exception(f'获取书籍列表失败,请检查微信读书 cookies 是否正确 {e.__str__()}') 33 | all_book = [] 34 | handled_book = [] 35 | ignore_book = [] 36 | err_book = {} 37 | info(f'书籍目录:{books}') 38 | if books is not None: 39 | for book in books: 40 | try: 41 | sort = book["sort"] # 更新时间 42 | book = book.get("book") 43 | title = book.get("title") 44 | info(f' 正在同步:《{title}》, 当前进度 {len(handled_book) + len(ignore_book)}/{len(books)}') 45 | if is_web: 46 | put_text(f' 正在同步:《{title}》, 当前进度 {len(handled_book) + len(ignore_book)}/{len(books)}') 47 | all_book.append(title) 48 | if title in book_blacklist: 49 | info(f'《{title}》在黑名单中,跳过') 50 | ignore_book.append(title) 51 | continue 52 | # 调试用途代码 53 | # if book.get("title") != '黄金时代': 54 | # ignore_book.append(title) 55 | # continue 56 | # print(book['123']) 57 | # print(book) 58 | 59 | # 跳过是否更新判断,直接更新,否则给其他人使用会导致 notion 更新时间不一致 60 | # if sort <= notion.get_sort(): 61 | # warning(f'当前图书《{title}》没有更新划线、书评等信息,暂不处理') 62 | # ignore_book.append(title) 63 | # continue 64 | handled_book.append(title) 65 | cover = book.get("cover", '没有封面') 66 | bookId = book.get("bookId", '-1') 67 | author = book.get("author", '没有作者信息') 68 | info(f'开始处理《{title}》, bookId={bookId}, sort={sort}') 69 | notion.check(bookId) # TODO 如果自行在 notion 修改,这里会删除重新插入,规避这个逻辑 70 | chapter = weread_.get_chapter_info(bookId) 71 | bookmark_list = weread_.get_bookmark_list(bookId) 72 | summary, reviews = weread_.get_review_list(bookId) 73 | bookmark_list.extend(reviews) 74 | bookmark_list = sorted(bookmark_list, key=lambda x: ( 75 | x.get("chapterUid", 1), 76 | 0 if (x.get("range", "") == "" or x.get("range").split("-")[0] == "") else int( 77 | x.get("range").split("-")[0]))) 78 | # 获取书籍信息 79 | isbn, rating, intro, category = weread_.get_bookinfo(bookId) 80 | # 获取阅读信息 81 | read_info = weread_.get_read_info(bookId) 82 | 83 | children, grandchild = notion.get_children( 84 | chapter, summary, bookmark_list) 85 | block_id = notion.insert_to_notion(bookName=title, 86 | bookId=bookId, 87 | book_str_id=weread_.calculate_book_str_id(bookId), 88 | cover=cover, 89 | sort=sort, 90 | author=author, 91 | isbn=isbn, 92 | rating=rating, 93 | intro=intro, 94 | category=category, 95 | read_info=read_info) 96 | results = notion.add_children(block_id, children) 97 | if (len(grandchild) > 0 and results != None): 98 | notion.add_grandchild(grandchild, results) 99 | debug(f'结束处理《{title}》, bookId={bookId}, sort={sort}') 100 | except Exception as e: 101 | exception(f'处理书籍《{title}》失败') 102 | err_book[title] = [str(e), book] 103 | continue 104 | return all_book, handled_book, ignore_book, err_book 105 | 106 | 107 | if __name__ == '__main__': 108 | weread_2_notion(is_web=False) 109 | # weread = WeRead(WEREAD_COOKIE) 110 | # print(weread.session.cookies.get_dict()) 111 | # res = weread.session.get( 112 | # 'https://i.weread.qq.com/users/info') 113 | # weread.session. 114 | # print(weread.get_notebooklist()) 115 | -------------------------------------------------------------------------------- /weread/weread.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import re 3 | from http.cookies import SimpleCookie 4 | import requests 5 | from requests.utils import cookiejar_from_dict 6 | from logger import error 7 | 8 | 9 | class WeRead: 10 | WEREAD_URL = "https://weread.qq.com/" 11 | WEREAD_NOTEBOOKS_URL = "https://i.weread.qq.com/user/notebooks" 12 | # WEREAD_NOTEBOOKS_URL = "https://i.weread.qq.com/shelf/friendCommon" 13 | WEREAD_BOOKMARKLIST_URL = "https://i.weread.qq.com/book/bookmarklist" 14 | WEREAD_CHAPTER_INFO = "https://i.weread.qq.com/book/chapterInfos" 15 | WEREAD_READ_INFO_URL = "https://i.weread.qq.com/book/readinfo" 16 | WEREAD_REVIEW_LIST_URL = "https://i.weread.qq.com/review/list" 17 | WEREAD_BOOK_INFO = "https://i.weread.qq.com/book/info" 18 | 19 | def __init__(self, weread_cookie): 20 | self.session = requests.Session() 21 | self.session.cookies = self.parse_cookie_string(weread_cookie) 22 | 23 | def get_bookmark_list(self, bookId): 24 | """获取我的划线""" 25 | params = dict(bookId=bookId) 26 | r = self.session.get(self.WEREAD_BOOKMARKLIST_URL, params=params) 27 | if r.ok: 28 | updated = r.json().get("updated") 29 | updated = sorted(updated, key=lambda x: ( 30 | x.get("chapterUid", 1), int(x.get("range").split("-")[0]))) 31 | return r.json()["updated"] 32 | return None 33 | 34 | def get_bookinfo(self, bookId): 35 | """获取书的详情""" 36 | params = dict(bookId=bookId) 37 | r = self.session.get(self.WEREAD_BOOK_INFO, params=params) 38 | isbn = "" 39 | newRating = 0 40 | intro = "" 41 | category = "" 42 | if r.ok: 43 | data = r.json() 44 | isbn = data.get("isbn", "-1") 45 | newRating = data.get("newRating", '-1') / 1000 46 | intro = data.get('intro', '本书没有介绍!') 47 | category = data.get("category", 'None') 48 | return (isbn, newRating, intro, category) 49 | 50 | def get_review_list(self, bookId): 51 | """获取笔记""" 52 | params = dict(bookId=bookId, listType=11, mine=1, syncKey=0) 53 | r = self.session.get(self.WEREAD_REVIEW_LIST_URL, params=params) 54 | reviews = r.json().get("reviews") 55 | summary = list(filter(lambda x: x.get("review").get("type") == 4, reviews)) 56 | reviews = list(filter(lambda x: x.get("review").get("type") == 1, reviews)) 57 | reviews = list(map(lambda x: x.get("review"), reviews)) 58 | reviews = list(map(lambda x: {**x, "markText": x.pop("content")}, reviews)) 59 | return summary, reviews 60 | 61 | def get_read_info(self, bookId): 62 | params = dict(bookId=bookId, readingDetail=1, 63 | readingBookIndex=1, finishedDate=1) 64 | r = self.session.get(self.WEREAD_READ_INFO_URL, params=params) 65 | if r.ok: 66 | return r.json() 67 | return None 68 | 69 | def _transform_id(self, book_id): 70 | id_length = len(book_id) 71 | 72 | if re.match("^\d*$", book_id): 73 | ary = [] 74 | for i in range(0, id_length, 9): 75 | ary.append(format(int(book_id[i:min(i + 9, id_length)]), 'x')) 76 | return '3', ary 77 | 78 | result = '' 79 | for i in range(id_length): 80 | result += format(ord(book_id[i]), 'x') 81 | return '4', [result] 82 | 83 | def calculate_book_str_id(self, book_id): 84 | md5 = hashlib.md5() 85 | md5.update(book_id.encode('utf-8')) 86 | digest = md5.hexdigest() 87 | result = digest[0:3] 88 | code, transformed_ids = self._transform_id(book_id) 89 | result += code + '2' + digest[-2:] 90 | 91 | for i in range(len(transformed_ids)): 92 | hex_length_str = format(len(transformed_ids[i]), 'x') 93 | if len(hex_length_str) == 1: 94 | hex_length_str = '0' + hex_length_str 95 | 96 | result += hex_length_str + transformed_ids[i] 97 | 98 | if i < len(transformed_ids) - 1: 99 | result += 'g' 100 | 101 | if len(result) < 20: 102 | result += digest[0:20 - len(result)] 103 | 104 | md5 = hashlib.md5() 105 | md5.update(result.encode('utf-8')) 106 | result += md5.hexdigest()[0:3] 107 | return result 108 | 109 | def get_chapter_info(self, bookId): 110 | """获取章节信息""" 111 | body = { 112 | 'bookIds': [bookId], 113 | 'synckeys': [0], 114 | 'teenmode': 0 115 | } 116 | r = self.session.post(self.WEREAD_CHAPTER_INFO, json=body) 117 | if r.ok and "data" in r.json() and len(r.json()["data"]) == 1 and "updated" in r.json()["data"][0]: 118 | update = r.json()["data"][0]["updated"] 119 | return {item["chapterUid"]: item for item in update} 120 | return None 121 | 122 | def get_notebooklist(self): 123 | """获取笔记本列表""" 124 | # params = dict(userVid=self.session.cookies.get("wr_vid")) 125 | r = self.session.get(self.WEREAD_NOTEBOOKS_URL) 126 | if not r.ok: 127 | error(f'获取图书失败,{r.text}') 128 | raise RuntimeError(f'获取图书失败,{r.text}') 129 | data = r.json() 130 | books = data.get("books") 131 | books.sort(key=lambda x: x["sort"]) 132 | return books 133 | 134 | def parse_cookie_string(self, cookie_string): 135 | cookie = SimpleCookie() 136 | cookie.load(cookie_string) 137 | cookies_dict = {} 138 | cookiejar = None 139 | for key, morsel in cookie.items(): 140 | cookies_dict[key] = morsel.value 141 | cookiejar = cookiejar_from_dict( 142 | cookies_dict, cookiejar=None, overwrite=True 143 | ) 144 | return cookiejar 145 | -------------------------------------------------------------------------------- /notion/notion.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | from datetime import datetime 5 | 6 | from retrying import retry 7 | 8 | from logger import info, debug 9 | 10 | o_path = os.getcwd() 11 | sys.path.append(o_path) 12 | 13 | from notion_client import Client 14 | 15 | 16 | class NotionClient(object): 17 | 18 | def __init__(self, token, database_id): 19 | self.client = Client(auth=token) 20 | self.database_id = database_id 21 | 22 | def test_api(self): 23 | res = self.client.search(query="收藏夹").get("results") 24 | info(res) 25 | 26 | def check(self, bookId): 27 | """检查是否已经插入过 如果已经插入了就删除""" 28 | time.sleep(0.3) 29 | debug(f"开始检查{bookId}是否已经插入") 30 | filter = { 31 | "property": "BookId", 32 | "rich_text": { 33 | "equals": bookId 34 | } 35 | } 36 | response = self.client.databases.query(database_id=self.database_id, filter=filter) 37 | for result in response["results"]: 38 | time.sleep(0.3) 39 | self.client.blocks.delete(block_id=result["id"]) 40 | 41 | def get_table_of_contents(self): 42 | """获取目录""" 43 | return { 44 | "type": "table_of_contents", 45 | "table_of_contents": { 46 | "color": "default" 47 | } 48 | } 49 | 50 | def get_heading(self, level, content): 51 | if level == 1: 52 | heading = "heading_1" 53 | elif level == 2: 54 | heading = "heading_2" 55 | else: 56 | heading = "heading_3" 57 | return { 58 | "type": heading, 59 | heading: { 60 | "rich_text": [{ 61 | "type": "text", 62 | "text": { 63 | "content": content, 64 | } 65 | }], 66 | "color": "default", 67 | "is_toggleable": False 68 | } 69 | } 70 | 71 | def get_quote(self, content): 72 | return { 73 | "type": "quote", 74 | "quote": { 75 | "rich_text": [{ 76 | "type": "text", 77 | "text": { 78 | "content": content 79 | }, 80 | }], 81 | "color": "default" 82 | } 83 | } 84 | 85 | def get_callout(self, content, style, colorStyle, reviewId): 86 | # 根据不同的划线样式设置不同的emoji 直线type=0 背景颜色是1 波浪线是2 87 | emoji = "🌟" 88 | if style == 0: 89 | emoji = "💡" 90 | elif style == 1: 91 | emoji = "⭐" 92 | # 如果reviewId不是空说明是笔记 93 | if reviewId != None: 94 | emoji = "✍️" 95 | color = "default" 96 | # 根据划线颜色设置文字的颜色 97 | if colorStyle == 1: 98 | color = "red" 99 | elif colorStyle == 2: 100 | color = "purple" 101 | elif colorStyle == 3: 102 | color = "blue" 103 | elif colorStyle == 4: 104 | color = "green" 105 | elif colorStyle == 5: 106 | color = "yellow" 107 | return { 108 | "type": "callout", 109 | "callout": { 110 | "rich_text": [{ 111 | "type": "text", 112 | "text": { 113 | "content": content, 114 | } 115 | }], 116 | "icon": { 117 | "emoji": emoji 118 | }, 119 | "color": color 120 | } 121 | } 122 | 123 | def get_children(self, chapter, summary, bookmark_list): 124 | children = [] 125 | grandchild = {} 126 | if chapter != None: 127 | # 添加目录 128 | children.append(self.get_table_of_contents()) 129 | d = {} 130 | for data in bookmark_list: 131 | chapterUid = data.get("chapterUid", 1) 132 | if (chapterUid not in d): 133 | d[chapterUid] = [] 134 | d[chapterUid].append(data) 135 | for key, value in d.items(): 136 | if key in chapter: 137 | # 添加章节 138 | children.append(self.get_heading( 139 | chapter.get(key).get("level"), chapter.get(key).get("title"))) 140 | for i in value: 141 | callout = self.get_callout( 142 | i.get("markText"), data.get("style"), i.get("colorStyle"), i.get("reviewId")) 143 | children.append(callout) 144 | if i.get("abstract") != None and i.get("abstract") != "": 145 | quote = self.get_quote(i.get("abstract")) 146 | grandchild[len(children) - 1] = quote 147 | 148 | else: 149 | # 如果没有章节信息 150 | for data in bookmark_list: 151 | children.append(self.get_callout(data.get("markText"), 152 | data.get("style"), data.get("colorStyle"), data.get("reviewId"))) 153 | if summary != None and len(summary) > 0: 154 | children.append(self.get_heading(1, "点评")) 155 | for i in summary: 156 | children.append(self.get_callout(i.get("review").get("content"), i.get( 157 | "style"), i.get("colorStyle"), i.get("review").get("reviewId"))) 158 | return children, grandchild 159 | 160 | def add_children(self, id, children): 161 | results = [] 162 | for i in range(0, len(children) // 100 + 1): 163 | time.sleep(0.3) 164 | response = self.client.blocks.children.append( 165 | block_id=id, children=children[i * 100:(i + 1) * 100]) 166 | results.extend(response.get("results")) 167 | return results if len(results) == len(children) else None 168 | 169 | def add_grandchild(self, grandchild, results): 170 | for key, value in grandchild.items(): 171 | time.sleep(0.3) 172 | id = results[key].get("id") 173 | self.client.blocks.children.append(block_id=id, children=[value]) 174 | 175 | def insert_to_notion(self, bookName, bookId, book_str_id, cover, sort, author, isbn, rating, intro, category, 176 | read_info=None): 177 | """插入到notion""" 178 | time.sleep(0.3) 179 | parent = { 180 | "database_id": self.database_id, 181 | "type": "database_id" 182 | } 183 | properties = { 184 | "BookName": {"title": [{"type": "text", "text": {"content": bookName}}]}, 185 | "BookId": {"rich_text": [{"type": "text", "text": {"content": bookId}}]}, 186 | "ISBN": {"rich_text": [{"type": "text", "text": {"content": isbn}}]}, 187 | "URL": {"url": f"https://weread.qq.com/web/reader/{book_str_id}"}, 188 | "Author": {"rich_text": [{"type": "text", "text": {"content": author}}]}, 189 | "Sort": {"number": sort}, 190 | "Rating": {"number": rating}, 191 | "Cover": {"files": [{"type": "external", "name": "Cover", "external": {"url": cover}}]}, 192 | "intro": {"rich_text": [{"type": "text", "text": {"content": intro}}]}, 193 | "category": {"select": {"name": category}} 194 | } 195 | if read_info != None: 196 | markedStatus = read_info.get("markedStatus", 0) 197 | readingTime = read_info.get("readingTime", 0) 198 | format_time = "" 199 | hour = readingTime // 3600 200 | if hour > 0: 201 | format_time += f"{hour}时" 202 | minutes = readingTime % 3600 // 60 203 | if minutes > 0: 204 | format_time += f"{minutes}分" 205 | properties["Status"] = {"select": {"name": "读完" if markedStatus == 4 else "在读"}} 206 | properties["ReadingTime"] = {"rich_text": [{"type": "text", "text": {"content": format_time}}]} 207 | if "finishedDate" in read_info: 208 | properties["Date"] = {"date": { 209 | "start": datetime.utcfromtimestamp(read_info.get("finishedDate")).strftime("%Y-%m-%d %H:%M:%S"), 210 | "time_zone": "Asia/Shanghai"}} 211 | 212 | icon = { 213 | "type": "external", 214 | "external": { 215 | "url": cover 216 | } 217 | } 218 | # notion api 限制100个block 219 | response = self.client.pages.create( 220 | parent=parent, icon=icon, properties=properties) 221 | id = response["id"] 222 | return id 223 | 224 | @retry(stop_max_attempt_number=3, wait_fixed=1000) 225 | def get_sort(self): 226 | """获取database中的上次编辑时间""" 227 | filter = { 228 | "property": "Sort", 229 | "number": { 230 | "is_not_empty": True 231 | } 232 | } 233 | sorts = [ 234 | { 235 | "property": "Sort", 236 | "direction": "descending", 237 | } 238 | ] 239 | response = self.client.databases.query( 240 | database_id=self.database_id, filter=filter, sorts=sorts, page_size=1) 241 | if (len(response.get("results")) == 1): 242 | return response.get("results")[0].get("properties").get("Sort").get("number") 243 | return 0 244 | --------------------------------------------------------------------------------