├── settings
├── __init__.py
├── settings.py.example
└── settings.py
├── requirements.txt
├── .idea
└── .gitignore
├── gunicorn_config.py
├── Dockerfile
├── deploy.sh
├── logger.py
├── README.md
├── .gitignore
├── templates
└── wereadtonotion.html
├── app.py
├── weread_2_notion.py
├── weread
└── weread.py
└── notion
└── notion.py
/settings/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests~=2.26.0
2 | notion-client==1.0.0
3 | retrying==1.3.4
4 | Flask==2.3.2
5 | pywebio==1.8.2
6 | gunicorn==20.1.0
7 | gevent==22.10.2
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # 默认忽略的文件
2 | /shelf/
3 | /workspace.xml
4 | # 基于编辑器的 HTTP 客户端请求
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 |
--------------------------------------------------------------------------------
/gunicorn_config.py:
--------------------------------------------------------------------------------
1 | bind = '0.0.0.0:80' # 绑定的IP地址和端口号
2 | # bind = '127.0.0.1:6000' # 绑定的IP地址和端口号
3 | workers = 1 # 工作进程的数量
4 | worker_class = 'gevent' # 使用gevent作为工作进程的类别
5 | timeout = 12000 # 超时时间,单位为秒
6 | accesslog = '/var/log/weread' # 访问日志的文件路径,'-' 表示输出到标准输出
7 | errorlog = '/var/log/weread' # 错误日志的文件路径,'-' 表示输出到标准输出
8 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # 基于 Python 官方镜像构建 Docker 镜像
2 | FROM python:3.9-slim
3 |
4 | # 设置工作目录
5 | WORKDIR /app
6 |
7 | # 单独复制 requirements.txt,后续可以利用 Docker 缓存避免重复安装依赖
8 | COPY ./requirements.txt /app/requirements.txt
9 | RUN pip install -r requirements.txt
10 |
11 |
12 | # 将当前目录下的所有文件复制到容器的 /app 目录下
13 | COPY . /app
14 | # 暴露容器的端口
15 | EXPOSE 80
16 |
17 | # 运行应用
18 | CMD ["gunicorn", "--config", "gunicorn_config.py", "app:app"]
19 |
--------------------------------------------------------------------------------
/settings/settings.py.example:
--------------------------------------------------------------------------------
1 | NOTION_TOKEN = ''
2 | WEREAD_COOKIE = ''
3 | DATABASE_ID = ''
4 |
5 | BOOK_BLACKLIST = [
6 | '从红月开始',
7 | '修真聊天群(聊天群的日常生活)',
8 | '第一序列',
9 | '我真没想重生啊',
10 | '我有一座恐怖屋',
11 | '大王饶命',
12 | '我师兄实在太稳健了',
13 | '死而替生',
14 | '中医许阳',
15 | '这游戏也太真实了',
16 | '家父汉高祖',
17 | '吕布的人生模拟器',
18 | '学霸的黑科技系统',
19 | '黎明之剑',
20 | '我的一天有48小时',
21 | '明克街13号',
22 | ]
23 |
--------------------------------------------------------------------------------
/settings/settings.py:
--------------------------------------------------------------------------------
1 | NOTION_TOKEN = 'xxxxxxxxxxx'
2 | WEREAD_COOKIE = 'wr_fp=2533447432; wr_gid=295032143; wr_vid=23683664; wr_pf=0; wr_rt=web%40QxPpLky5s9q8MDFU8TK_AL; wr_localvid=fb3325d071696250fb37ae2; wr_name=%E8%AF%B6%E9%B8%AD; wr_avatar=https%3A%2F%2Fthirdwx.qlogo.cn%2Fmmopen%2Fvi_32%2FXpfWjDr8uAYiagxaSibFmnMh2LUJXTQrjJ8z2ve6X8J3w2CI79YSvQwic1icyHUoHVgoccckuoLxlt1cQOD87tZLfQ%2F132; wr_gender=1; RK=Ou2xAkMpZ9; ptcz=6d556d39aaf44f088fec74b97d2942db2ea837b96f5c8c603ddacec2b5e1fd45; uin=o1307317886; wr_skey=t1Gw1zJp'
3 | DATABASE_ID = 'd92bb4b8434745baa2061caf67d6ef7a'
4 |
5 | BOOK_BLACKLIST = [
6 | '从红月开始',
7 | '修真聊天群(聊天群的日常生活)',
8 | '第一序列',
9 | '我真没想重生啊',
10 | '我有一座恐怖屋',
11 | '大王饶命',
12 | '我师兄实在太稳健了',
13 | '死而替生',
14 | '中医许阳',
15 | '这游戏也太真实了',
16 | '家父汉高祖',
17 | '吕布的人生模拟器',
18 | '学霸的黑科技系统',
19 | '黎明之剑',
20 | '我的一天有48小时',
21 | '明克街13号',
22 | ]
23 |
--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # 获取应用版本参数
4 | version=$1
5 |
6 | # 默认应用版本
7 | default_version="latest"
8 |
9 | # 如果没有指定版本参数,则使用默认版本
10 | if [ -z "$version" ]; then
11 | version=$default_version
12 | fi
13 |
14 | # 容器名称
15 | container_name="weread-to-notion"
16 |
17 | # 检查是否存在同名容器
18 | existing_container=$(docker ps -a --filter "name=$container_name" --format "{{.Names}}")
19 |
20 | if [ ! -z "$existing_container" ]; then
21 | # 停止并删除同名容器
22 | echo "Stopping and removing existing container: $existing_container"
23 | docker stop "$existing_container"
24 | docker rm "$existing_container"
25 | fi
26 |
27 | echo "################ Building Docker image: $container_name:$version ################"
28 | # 构建Docker镜像
29 | docker build -t "$container_name:$version" .
30 |
31 | echo "################ Starting Docker container: $container_name:$version ############"
32 | # 启动容器
33 | docker run -d -p 7000:80 --name "$container_name" "$container_name:$version"
34 |
--------------------------------------------------------------------------------
/logger.py:
--------------------------------------------------------------------------------
1 | import logging.handlers
2 | import os
3 |
4 | _logger = logging.getLogger('weread_to_notion')
5 | _logger.setLevel(logging.DEBUG)
6 |
7 | _cons_rht = logging.StreamHandler()
8 |
9 | # _log_base_dir = f'/var/log'
10 | _log_base_dir = '.'
11 |
12 | # _pid = os.getpid()
13 | log_dir = f'{_log_base_dir}/logs'
14 | if not os.path.exists(log_dir):
15 | os.makedirs(log_dir)
16 | log_file_name = 'weread_to_notion'
17 |
18 | _file_rht = logging.handlers.RotatingFileHandler(f"{log_dir}/{log_file_name}.log",
19 | maxBytes=1024 * 1024 * 1024,
20 | backupCount=100)
21 |
22 | _fmt = logging.Formatter("%(asctime)s pid:%(process)d %(filename)s:%(lineno)s %(levelname)s: %(message)s",
23 | "%Y-%m-%d %H:%M:%S")
24 |
25 | _file_rht.setFormatter(_fmt)
26 | _cons_rht.setFormatter(_fmt)
27 |
28 | _logger.addHandler(_file_rht)
29 | _logger.addHandler(_cons_rht)
30 |
31 | debug = _logger.debug
32 | info = _logger.info
33 | warning = _logger.warning
34 | error = _logger.error
35 | exception = _logger.exception
36 | critical = _logger.critical
37 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # wereadtonotion
2 |
3 | 微信读书笔记、划线等信息同步到notion数据库
4 | > 效果如下:
5 | > 
6 |
7 | *注意:不存在划线、书评等信息的书籍不会被获取、导入*
8 |
9 | ## 功能
10 |
11 | ### 已有功能
12 |
13 | 1. 导入书籍的划线、评论、笔记等信息到 Notion 数据库
14 | > 注意:不存在划线、书评等信息的书籍不会被获取、导入
15 | 2. 支持 Web 端部署执行和本机 Python 脚本运行
16 |
17 | ### TODO
18 |
19 | - [ ] 支持 Notion 增量读写
20 | - [ ] 支持用户选择书籍获取模式 [存在划线 / 书架书籍 / 全部书籍]
21 | - [ ] 单独导出功能。导出为 markdown 文件
22 | - [ ] 考虑支持自定义 Noiton 数据库结构
23 |
24 | ## 使用方法
25 |
26 | ### 1. 本地脚本运行
27 |
28 | 1. 安装依赖 `pip install -r requirements.txt`
29 | 2. 配置 settings/settings.py 文件中的信息
30 | > 1. 获取 **Notion token**
31 | > - 打开[此页面](https://www.notion.so/my-integrations)并登录
32 | > - 点击New integration 输入 name 提交.(如果已有,则点击 view integration)
33 | > - 点击show,然后copy
34 | > 2. 从微信读书中获取 cookie
35 | > - 在浏览器中打开 weread.qq.com 并登录
36 | > - 打开开发者工具(按 F12),点击 network(网络),刷新页面, 点击第一个请求,复制 cookie 的值。
37 | > 3. 准备 Noiton Database ID
38 | > - 复制[此页面](https://www.notion.so/yayya/a9b3a8dfcc0543559005a263103fc81c)到你的
39 | Notion 中,点击右上角的分享按钮,将页面分享为公开页面
40 | >- 点击页面右上角三个点,在 connections 中找到选择你的 connections。第一步中创建的 integration 的 name
41 | >- 通过 URL 找到你的 Database ID 的值。
42 | > > 例如:页面 https://www.notion.so/yayya/d92bb4b8434745baa2061caf67d6ef7a?v=b4a5bfb89e8e44868a473179ee60x851 的
43 | ID 为d92bb4b8434745baa2061caf67d6ef7a
44 | 4. 运行 `python weread_2_notion.py`
45 |
46 | ### 2. 网页端部署运行
47 |
48 | #### docker 部署(推荐)
49 |
50 | 执行脚本: `bash deploy.sh`
51 |
52 | #### 直接部署
53 |
54 | 1. 安装依赖:`python3 install -r requirement.txt`
55 | 2. 启动 Web 端: `python3 app.py`
56 |
57 | ## PS
58 |
59 | - 借鉴了大佬的[项目](https://github.com/malinkang/weread_to_notion),优化了代码结构、增加了web 端。非常感谢大佬的开源项目
60 | - 配合 NoitonNext 构建 Blog [效果](https://yaya.run/weread)非常好
61 |
62 | ## 免责申明
63 |
64 | 本工具仅作技术研究之用,请勿用于商业或违法用途,由于使用该工具导致的侵权或其它问题,该本工具不承担任何责任!
65 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 |
132 | # venv
133 | .cfg
--------------------------------------------------------------------------------
/templates/wereadtonotion.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Weread to Notion
5 |
29 |
30 |
31 |
32 |
Weread to Notion
33 |
49 |
50 |
51 |
52 |
53 |
77 |
78 |
79 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | from logger import info, exception
2 | from weread_2_notion import weread_2_notion
3 | from pywebio.platform.flask import webio_view
4 | from pywebio.input import *
5 | from pywebio.output import *
6 |
7 | from flask import Flask
8 |
9 | app = Flask(__name__)
10 |
11 |
12 | def output_help_info(scope=None):
13 | put_markdown("""
14 | ## 微信读书书籍同步到 Notion 数据库
15 | **项目地址** :[weread_2_notion](https://github.com/DuckDuck88/weread2notion)
16 | """)
17 | put_text()
18 | put_collapse('使用说明', [
19 | put_markdown('''
20 | 1. 获取 Notion token
21 | - 打开[Notion developers](https://www.notion.so/my-integrations)并登录
22 | - 点击New integration 输入 name 提交.(如果已有,则点击 view integration)
23 | - 点击show,然后copy
24 | 2. 从微信读书中获取 cookie
25 | - 在浏览器中打开[微信读书官网](weread.qq.com)并登录
26 | - 打开开发者工具(按 F12),点击 network(网络),刷新页面, 点击第一个请求,复制 cookie 的值。
27 | > 
28 | 3. 准备 Noiton Database ID
29 | - 复制这个[Notion 数据库](https://yayya.notion.site/10d8b5402c924498b44302e4ce75385c?v=05c0d43e80564da290f9fa837170013a&pvs=4)到你的 Notion 中,点击右上角的分享按钮,将页面分享为公开页面
30 | - 点击页面右上角三个点,在 connections 中找到选择你的 connections。选择第一步中创建的 integration 的 name
31 | - 打开你复制的 Notion 页面,通过该页面 URL 找到你的 Database ID 。
32 | > 例如:页面 https://www.notion.so/xxxx/d92bb4b8434745baa2061caf67d6ef7a?v=b4a5bfb89e8e44868a473179ee60x851 的 ID 为d92bb4b8434745baa2061caf67d6ef7a
33 | > 
34 | 4. 把如下信息输入到以下表单中
35 | ''', scope=scope)
36 | ], open=False)
37 |
38 |
39 | def update_configs(name, value):
40 | return input_update(value, name='configs')
41 |
42 |
43 | def input_config_info(scope=None):
44 | with use_scope(scope):
45 | datas = input_group('configs', [
46 | input("Notion token:", type=PASSWORD, placeholder='输入 notion tikon', name='notion_token', required=True),
47 | input("微信读书 cookie:", type=TEXT, placeholder='输入 微信读书 cookies', name='weread_cookie',
48 | required=True),
49 | input("Database ID:,", type=TEXT, placeholder='输入 需要同步的 Notion 数据库 ID', name='database_id',
50 | required=True),
51 | textarea("黑名单列表:", rows=3, placeholder='输入不同步的书籍黑名单,书名以逗号隔开', name='book_blacklist')
52 | ])
53 | notion_token = datas['notion_token']
54 | weread_cookie = datas['weread_cookie']
55 | database_id = datas['database_id']
56 | datas['book_blacklist'] = datas['book_blacklist'].replace(' ', '').replace('\n', '').replace('\r', '').split(',')
57 | book_blacklist = datas['book_blacklist']
58 | return notion_token, weread_cookie, database_id, book_blacklist
59 |
60 |
61 | def list_to_str(l):
62 | res = ""
63 | for i in l:
64 | res += f'*《{i}》* '
65 | return '> ' + res if res else '> 无'
66 |
67 |
68 | def dict_to_str(d):
69 | res = ""
70 | for k, v in d.items():
71 | res += f'书籍:*《{k}》* 失败原因: {v[0]}, 书籍详情: {v[1]}'
72 | return '> ' + res if res else '> 无'
73 |
74 |
75 | def weread_to_notion():
76 | output_help_info(scope='head')
77 | # put_column([
78 | # put_scope('head'),
79 | # put_scope('main'),
80 | # put_row([
81 | # put_scope('left'),
82 | # None,
83 | # put_scope('right')
84 | # ], size='25% 4% 69%'),
85 | # ])
86 | notion_token, weread_cookie, database_id, book_blacklist = input_config_info(scope='main')
87 | put_info('注意: 书籍较多时处理时间较长,当前页面可观察同步进度,关闭页面可以继续同步')
88 | try:
89 | with put_loading(shape='grow', color='primary'):
90 | all_book, handled_book, ignore_book, err_book = weread_2_notion(notion_token,
91 | weread_cookie,
92 | database_id,
93 | book_blacklist)
94 | except Exception as e:
95 | exception(f'失败:{str(e)}')
96 | put_error(f'{str(e)}', closable=False)
97 | return put_error(f'请刷新当前页面重新输入正确配置', closable=True)
98 | put_info(f'共有{len(all_book)}本书籍,其中{len(ignore_book)}本书籍被忽略,{len(handled_book)}本书籍被同步')
99 | put_markdown(f'**成功同步以下书籍**:\n{list_to_str(handled_book)}')
100 | put_markdown(f'**同步以下书籍失败!**:\n{dict_to_str(err_book)}')
101 | put_markdown(f'**忽略以下书籍**:\n{list_to_str(ignore_book)}')
102 | return put_success(
103 | f'共有{len(all_book)}本书籍,其中{len(ignore_book)}本书籍被忽略,{len(handled_book)}本书籍被同步,{len(err_book)}本书籍同步失败!',
104 | closable=True)
105 |
106 |
107 | app.add_url_rule('/', 'webio_view', webio_view(weread_to_notion),
108 | methods=['GET', 'POST']) # need GET,POST and OPTIONS methods
109 |
110 | if __name__ == '__main__':
111 | app.run(host='0.0.0.0', port=80, debug=True)
112 |
--------------------------------------------------------------------------------
/weread_2_notion.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | from pywebio.output import put_text, put_info
4 |
5 | from logger import info, debug, warning, error, exception
6 | from notion.notion import NotionClient
7 | from weread.weread import WeRead
8 |
9 | try:
10 | from settings.settings import WEREAD_COOKIE, DATABASE_ID, BOOK_BLACKLIST, NOTION_TOKEN
11 | except Exception as e:
12 | error(f'导入配置文件失败,{e} Web 无伤大雅')
13 |
14 |
15 | def weread_2_notion(notion_token=NOTION_TOKEN,
16 | weread_cookie=WEREAD_COOKIE,
17 | database_id=DATABASE_ID,
18 | book_blacklist=BOOK_BLACKLIST,
19 | is_web=True):
20 | try:
21 | weread_ = WeRead(weread_cookie)
22 | except Exception as e:
23 | raise Exception(f'微信读书登录失败,{e}')
24 | try:
25 | notion = NotionClient(notion_token, database_id)
26 | except Exception as e:
27 | raise Exception(f'notion 登录失败,{e}')
28 | # 书籍列表
29 | try:
30 | books = weread_.get_notebooklist() # TODO 增加逻辑,获取所有书籍列表。 当前是获取存在划线信息的书籍列表
31 | except Exception as e:
32 | raise Exception(f'获取书籍列表失败,请检查微信读书 cookies 是否正确 {e.__str__()}')
33 | all_book = []
34 | handled_book = []
35 | ignore_book = []
36 | err_book = {}
37 | info(f'书籍目录:{books}')
38 | if books is not None:
39 | for book in books:
40 | try:
41 | sort = book["sort"] # 更新时间
42 | book = book.get("book")
43 | title = book.get("title")
44 | info(f' 正在同步:《{title}》, 当前进度 {len(handled_book) + len(ignore_book)}/{len(books)}')
45 | if is_web:
46 | put_text(f' 正在同步:《{title}》, 当前进度 {len(handled_book) + len(ignore_book)}/{len(books)}')
47 | all_book.append(title)
48 | if title in book_blacklist:
49 | info(f'《{title}》在黑名单中,跳过')
50 | ignore_book.append(title)
51 | continue
52 | # 调试用途代码
53 | # if book.get("title") != '黄金时代':
54 | # ignore_book.append(title)
55 | # continue
56 | # print(book['123'])
57 | # print(book)
58 |
59 | # 跳过是否更新判断,直接更新,否则给其他人使用会导致 notion 更新时间不一致
60 | # if sort <= notion.get_sort():
61 | # warning(f'当前图书《{title}》没有更新划线、书评等信息,暂不处理')
62 | # ignore_book.append(title)
63 | # continue
64 | handled_book.append(title)
65 | cover = book.get("cover", '没有封面')
66 | bookId = book.get("bookId", '-1')
67 | author = book.get("author", '没有作者信息')
68 | info(f'开始处理《{title}》, bookId={bookId}, sort={sort}')
69 | notion.check(bookId) # TODO 如果自行在 notion 修改,这里会删除重新插入,规避这个逻辑
70 | chapter = weread_.get_chapter_info(bookId)
71 | bookmark_list = weread_.get_bookmark_list(bookId)
72 | summary, reviews = weread_.get_review_list(bookId)
73 | bookmark_list.extend(reviews)
74 | bookmark_list = sorted(bookmark_list, key=lambda x: (
75 | x.get("chapterUid", 1),
76 | 0 if (x.get("range", "") == "" or x.get("range").split("-")[0] == "") else int(
77 | x.get("range").split("-")[0])))
78 | # 获取书籍信息
79 | isbn, rating, intro, category = weread_.get_bookinfo(bookId)
80 | # 获取阅读信息
81 | read_info = weread_.get_read_info(bookId)
82 |
83 | children, grandchild = notion.get_children(
84 | chapter, summary, bookmark_list)
85 | block_id = notion.insert_to_notion(bookName=title,
86 | bookId=bookId,
87 | book_str_id=weread_.calculate_book_str_id(bookId),
88 | cover=cover,
89 | sort=sort,
90 | author=author,
91 | isbn=isbn,
92 | rating=rating,
93 | intro=intro,
94 | category=category,
95 | read_info=read_info)
96 | results = notion.add_children(block_id, children)
97 | if (len(grandchild) > 0 and results != None):
98 | notion.add_grandchild(grandchild, results)
99 | debug(f'结束处理《{title}》, bookId={bookId}, sort={sort}')
100 | except Exception as e:
101 | exception(f'处理书籍《{title}》失败')
102 | err_book[title] = [str(e), book]
103 | continue
104 | return all_book, handled_book, ignore_book, err_book
105 |
106 |
107 | if __name__ == '__main__':
108 | weread_2_notion(is_web=False)
109 | # weread = WeRead(WEREAD_COOKIE)
110 | # print(weread.session.cookies.get_dict())
111 | # res = weread.session.get(
112 | # 'https://i.weread.qq.com/users/info')
113 | # weread.session.
114 | # print(weread.get_notebooklist())
115 |
--------------------------------------------------------------------------------
/weread/weread.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | import re
3 | from http.cookies import SimpleCookie
4 | import requests
5 | from requests.utils import cookiejar_from_dict
6 | from logger import error
7 |
8 |
9 | class WeRead:
10 | WEREAD_URL = "https://weread.qq.com/"
11 | WEREAD_NOTEBOOKS_URL = "https://i.weread.qq.com/user/notebooks"
12 | # WEREAD_NOTEBOOKS_URL = "https://i.weread.qq.com/shelf/friendCommon"
13 | WEREAD_BOOKMARKLIST_URL = "https://i.weread.qq.com/book/bookmarklist"
14 | WEREAD_CHAPTER_INFO = "https://i.weread.qq.com/book/chapterInfos"
15 | WEREAD_READ_INFO_URL = "https://i.weread.qq.com/book/readinfo"
16 | WEREAD_REVIEW_LIST_URL = "https://i.weread.qq.com/review/list"
17 | WEREAD_BOOK_INFO = "https://i.weread.qq.com/book/info"
18 |
19 | def __init__(self, weread_cookie):
20 | self.session = requests.Session()
21 | self.session.cookies = self.parse_cookie_string(weread_cookie)
22 |
23 | def get_bookmark_list(self, bookId):
24 | """获取我的划线"""
25 | params = dict(bookId=bookId)
26 | r = self.session.get(self.WEREAD_BOOKMARKLIST_URL, params=params)
27 | if r.ok:
28 | updated = r.json().get("updated")
29 | updated = sorted(updated, key=lambda x: (
30 | x.get("chapterUid", 1), int(x.get("range").split("-")[0])))
31 | return r.json()["updated"]
32 | return None
33 |
34 | def get_bookinfo(self, bookId):
35 | """获取书的详情"""
36 | params = dict(bookId=bookId)
37 | r = self.session.get(self.WEREAD_BOOK_INFO, params=params)
38 | isbn = ""
39 | newRating = 0
40 | intro = ""
41 | category = ""
42 | if r.ok:
43 | data = r.json()
44 | isbn = data.get("isbn", "-1")
45 | newRating = data.get("newRating", '-1') / 1000
46 | intro = data.get('intro', '本书没有介绍!')
47 | category = data.get("category", 'None')
48 | return (isbn, newRating, intro, category)
49 |
50 | def get_review_list(self, bookId):
51 | """获取笔记"""
52 | params = dict(bookId=bookId, listType=11, mine=1, syncKey=0)
53 | r = self.session.get(self.WEREAD_REVIEW_LIST_URL, params=params)
54 | reviews = r.json().get("reviews")
55 | summary = list(filter(lambda x: x.get("review").get("type") == 4, reviews))
56 | reviews = list(filter(lambda x: x.get("review").get("type") == 1, reviews))
57 | reviews = list(map(lambda x: x.get("review"), reviews))
58 | reviews = list(map(lambda x: {**x, "markText": x.pop("content")}, reviews))
59 | return summary, reviews
60 |
61 | def get_read_info(self, bookId):
62 | params = dict(bookId=bookId, readingDetail=1,
63 | readingBookIndex=1, finishedDate=1)
64 | r = self.session.get(self.WEREAD_READ_INFO_URL, params=params)
65 | if r.ok:
66 | return r.json()
67 | return None
68 |
69 | def _transform_id(self, book_id):
70 | id_length = len(book_id)
71 |
72 | if re.match("^\d*$", book_id):
73 | ary = []
74 | for i in range(0, id_length, 9):
75 | ary.append(format(int(book_id[i:min(i + 9, id_length)]), 'x'))
76 | return '3', ary
77 |
78 | result = ''
79 | for i in range(id_length):
80 | result += format(ord(book_id[i]), 'x')
81 | return '4', [result]
82 |
83 | def calculate_book_str_id(self, book_id):
84 | md5 = hashlib.md5()
85 | md5.update(book_id.encode('utf-8'))
86 | digest = md5.hexdigest()
87 | result = digest[0:3]
88 | code, transformed_ids = self._transform_id(book_id)
89 | result += code + '2' + digest[-2:]
90 |
91 | for i in range(len(transformed_ids)):
92 | hex_length_str = format(len(transformed_ids[i]), 'x')
93 | if len(hex_length_str) == 1:
94 | hex_length_str = '0' + hex_length_str
95 |
96 | result += hex_length_str + transformed_ids[i]
97 |
98 | if i < len(transformed_ids) - 1:
99 | result += 'g'
100 |
101 | if len(result) < 20:
102 | result += digest[0:20 - len(result)]
103 |
104 | md5 = hashlib.md5()
105 | md5.update(result.encode('utf-8'))
106 | result += md5.hexdigest()[0:3]
107 | return result
108 |
109 | def get_chapter_info(self, bookId):
110 | """获取章节信息"""
111 | body = {
112 | 'bookIds': [bookId],
113 | 'synckeys': [0],
114 | 'teenmode': 0
115 | }
116 | r = self.session.post(self.WEREAD_CHAPTER_INFO, json=body)
117 | if r.ok and "data" in r.json() and len(r.json()["data"]) == 1 and "updated" in r.json()["data"][0]:
118 | update = r.json()["data"][0]["updated"]
119 | return {item["chapterUid"]: item for item in update}
120 | return None
121 |
122 | def get_notebooklist(self):
123 | """获取笔记本列表"""
124 | # params = dict(userVid=self.session.cookies.get("wr_vid"))
125 | r = self.session.get(self.WEREAD_NOTEBOOKS_URL)
126 | if not r.ok:
127 | error(f'获取图书失败,{r.text}')
128 | raise RuntimeError(f'获取图书失败,{r.text}')
129 | data = r.json()
130 | books = data.get("books")
131 | books.sort(key=lambda x: x["sort"])
132 | return books
133 |
134 | def parse_cookie_string(self, cookie_string):
135 | cookie = SimpleCookie()
136 | cookie.load(cookie_string)
137 | cookies_dict = {}
138 | cookiejar = None
139 | for key, morsel in cookie.items():
140 | cookies_dict[key] = morsel.value
141 | cookiejar = cookiejar_from_dict(
142 | cookies_dict, cookiejar=None, overwrite=True
143 | )
144 | return cookiejar
145 |
--------------------------------------------------------------------------------
/notion/notion.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import time
4 | from datetime import datetime
5 |
6 | from retrying import retry
7 |
8 | from logger import info, debug
9 |
10 | o_path = os.getcwd()
11 | sys.path.append(o_path)
12 |
13 | from notion_client import Client
14 |
15 |
16 | class NotionClient(object):
17 |
18 | def __init__(self, token, database_id):
19 | self.client = Client(auth=token)
20 | self.database_id = database_id
21 |
22 | def test_api(self):
23 | res = self.client.search(query="收藏夹").get("results")
24 | info(res)
25 |
26 | def check(self, bookId):
27 | """检查是否已经插入过 如果已经插入了就删除"""
28 | time.sleep(0.3)
29 | debug(f"开始检查{bookId}是否已经插入")
30 | filter = {
31 | "property": "BookId",
32 | "rich_text": {
33 | "equals": bookId
34 | }
35 | }
36 | response = self.client.databases.query(database_id=self.database_id, filter=filter)
37 | for result in response["results"]:
38 | time.sleep(0.3)
39 | self.client.blocks.delete(block_id=result["id"])
40 |
41 | def get_table_of_contents(self):
42 | """获取目录"""
43 | return {
44 | "type": "table_of_contents",
45 | "table_of_contents": {
46 | "color": "default"
47 | }
48 | }
49 |
50 | def get_heading(self, level, content):
51 | if level == 1:
52 | heading = "heading_1"
53 | elif level == 2:
54 | heading = "heading_2"
55 | else:
56 | heading = "heading_3"
57 | return {
58 | "type": heading,
59 | heading: {
60 | "rich_text": [{
61 | "type": "text",
62 | "text": {
63 | "content": content,
64 | }
65 | }],
66 | "color": "default",
67 | "is_toggleable": False
68 | }
69 | }
70 |
71 | def get_quote(self, content):
72 | return {
73 | "type": "quote",
74 | "quote": {
75 | "rich_text": [{
76 | "type": "text",
77 | "text": {
78 | "content": content
79 | },
80 | }],
81 | "color": "default"
82 | }
83 | }
84 |
85 | def get_callout(self, content, style, colorStyle, reviewId):
86 | # 根据不同的划线样式设置不同的emoji 直线type=0 背景颜色是1 波浪线是2
87 | emoji = "🌟"
88 | if style == 0:
89 | emoji = "💡"
90 | elif style == 1:
91 | emoji = "⭐"
92 | # 如果reviewId不是空说明是笔记
93 | if reviewId != None:
94 | emoji = "✍️"
95 | color = "default"
96 | # 根据划线颜色设置文字的颜色
97 | if colorStyle == 1:
98 | color = "red"
99 | elif colorStyle == 2:
100 | color = "purple"
101 | elif colorStyle == 3:
102 | color = "blue"
103 | elif colorStyle == 4:
104 | color = "green"
105 | elif colorStyle == 5:
106 | color = "yellow"
107 | return {
108 | "type": "callout",
109 | "callout": {
110 | "rich_text": [{
111 | "type": "text",
112 | "text": {
113 | "content": content,
114 | }
115 | }],
116 | "icon": {
117 | "emoji": emoji
118 | },
119 | "color": color
120 | }
121 | }
122 |
123 | def get_children(self, chapter, summary, bookmark_list):
124 | children = []
125 | grandchild = {}
126 | if chapter != None:
127 | # 添加目录
128 | children.append(self.get_table_of_contents())
129 | d = {}
130 | for data in bookmark_list:
131 | chapterUid = data.get("chapterUid", 1)
132 | if (chapterUid not in d):
133 | d[chapterUid] = []
134 | d[chapterUid].append(data)
135 | for key, value in d.items():
136 | if key in chapter:
137 | # 添加章节
138 | children.append(self.get_heading(
139 | chapter.get(key).get("level"), chapter.get(key).get("title")))
140 | for i in value:
141 | callout = self.get_callout(
142 | i.get("markText"), data.get("style"), i.get("colorStyle"), i.get("reviewId"))
143 | children.append(callout)
144 | if i.get("abstract") != None and i.get("abstract") != "":
145 | quote = self.get_quote(i.get("abstract"))
146 | grandchild[len(children) - 1] = quote
147 |
148 | else:
149 | # 如果没有章节信息
150 | for data in bookmark_list:
151 | children.append(self.get_callout(data.get("markText"),
152 | data.get("style"), data.get("colorStyle"), data.get("reviewId")))
153 | if summary != None and len(summary) > 0:
154 | children.append(self.get_heading(1, "点评"))
155 | for i in summary:
156 | children.append(self.get_callout(i.get("review").get("content"), i.get(
157 | "style"), i.get("colorStyle"), i.get("review").get("reviewId")))
158 | return children, grandchild
159 |
160 | def add_children(self, id, children):
161 | results = []
162 | for i in range(0, len(children) // 100 + 1):
163 | time.sleep(0.3)
164 | response = self.client.blocks.children.append(
165 | block_id=id, children=children[i * 100:(i + 1) * 100])
166 | results.extend(response.get("results"))
167 | return results if len(results) == len(children) else None
168 |
169 | def add_grandchild(self, grandchild, results):
170 | for key, value in grandchild.items():
171 | time.sleep(0.3)
172 | id = results[key].get("id")
173 | self.client.blocks.children.append(block_id=id, children=[value])
174 |
175 | def insert_to_notion(self, bookName, bookId, book_str_id, cover, sort, author, isbn, rating, intro, category,
176 | read_info=None):
177 | """插入到notion"""
178 | time.sleep(0.3)
179 | parent = {
180 | "database_id": self.database_id,
181 | "type": "database_id"
182 | }
183 | properties = {
184 | "BookName": {"title": [{"type": "text", "text": {"content": bookName}}]},
185 | "BookId": {"rich_text": [{"type": "text", "text": {"content": bookId}}]},
186 | "ISBN": {"rich_text": [{"type": "text", "text": {"content": isbn}}]},
187 | "URL": {"url": f"https://weread.qq.com/web/reader/{book_str_id}"},
188 | "Author": {"rich_text": [{"type": "text", "text": {"content": author}}]},
189 | "Sort": {"number": sort},
190 | "Rating": {"number": rating},
191 | "Cover": {"files": [{"type": "external", "name": "Cover", "external": {"url": cover}}]},
192 | "intro": {"rich_text": [{"type": "text", "text": {"content": intro}}]},
193 | "category": {"select": {"name": category}}
194 | }
195 | if read_info != None:
196 | markedStatus = read_info.get("markedStatus", 0)
197 | readingTime = read_info.get("readingTime", 0)
198 | format_time = ""
199 | hour = readingTime // 3600
200 | if hour > 0:
201 | format_time += f"{hour}时"
202 | minutes = readingTime % 3600 // 60
203 | if minutes > 0:
204 | format_time += f"{minutes}分"
205 | properties["Status"] = {"select": {"name": "读完" if markedStatus == 4 else "在读"}}
206 | properties["ReadingTime"] = {"rich_text": [{"type": "text", "text": {"content": format_time}}]}
207 | if "finishedDate" in read_info:
208 | properties["Date"] = {"date": {
209 | "start": datetime.utcfromtimestamp(read_info.get("finishedDate")).strftime("%Y-%m-%d %H:%M:%S"),
210 | "time_zone": "Asia/Shanghai"}}
211 |
212 | icon = {
213 | "type": "external",
214 | "external": {
215 | "url": cover
216 | }
217 | }
218 | # notion api 限制100个block
219 | response = self.client.pages.create(
220 | parent=parent, icon=icon, properties=properties)
221 | id = response["id"]
222 | return id
223 |
224 | @retry(stop_max_attempt_number=3, wait_fixed=1000)
225 | def get_sort(self):
226 | """获取database中的上次编辑时间"""
227 | filter = {
228 | "property": "Sort",
229 | "number": {
230 | "is_not_empty": True
231 | }
232 | }
233 | sorts = [
234 | {
235 | "property": "Sort",
236 | "direction": "descending",
237 | }
238 | ]
239 | response = self.client.databases.query(
240 | database_id=self.database_id, filter=filter, sorts=sorts, page_size=1)
241 | if (len(response.get("results")) == 1):
242 | return response.get("results")[0].get("properties").get("Sort").get("number")
243 | return 0
244 |
--------------------------------------------------------------------------------