├── api ├── __init__.py ├── weread.py └── notion.py ├── lib ├── __init__.py ├── serverchan.py ├── page_block_list.py ├── test_page_block_list.py ├── db_weread_record.py └── test_db_weread_record.py ├── sync └── weread │ ├── __init__.py │ └── calendar.py ├── .gitignore ├── var └── sync_read.db ├── requirements.txt ├── default.ini ├── config.py ├── main.py ├── LICENSE ├── .github └── workflows │ └── weread.yml ├── README.md ├── README.zh-CN.md ├── sync_trending.py ├── sync_producthunt.py └── sync_read.py /api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sync/weread/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE - VSCode 2 | .vscode/* 3 | __pycache__/ 4 | *.pyc 5 | todo.txt -------------------------------------------------------------------------------- /var/sync_read.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alex-guoba/sync-notion/HEAD/var/sync_read.db -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | notion-client 3 | treelib 4 | fire 5 | pyquery 6 | PyGithub 7 | pysqlite3 8 | -------------------------------------------------------------------------------- /default.ini: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | 3 | [weread.format] 4 | ContentType = list 5 | EnableEmoj = false 6 | EnableReadingDetail = true 7 | 8 | [trending.language] 9 | Languages = typescript,go,python,swift 10 | MinStargazers = 100 11 | MinForks = 100 12 | MinWatchers = 100 13 | 14 | [memos.opts] 15 | MemosHost = http://127.0.0.1:8081 16 | MemosUserName = memos-demo 17 | 18 | [producthunt.filter] 19 | MinVotes = 10 20 | MinComments = 10 21 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | """ 2 | 封装配置文件读取 3 | """ 4 | import os 5 | 6 | import configparser 7 | 8 | DEFAULT_CONFIG_FILE = 'default.ini' 9 | 10 | def get_config_file(): 11 | '''读取环境配置''' 12 | return os.environ.get('CONFIG_FILE', DEFAULT_CONFIG_FILE) 13 | 14 | CONFIG_FILE = get_config_file() 15 | 16 | def create_config(config_file=None): 17 | '''创建配置文件''' 18 | parser = configparser.ConfigParser() 19 | parser.read(config_file or CONFIG_FILE) 20 | return parser 21 | 22 | CONFIG = create_config() 23 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Entrance point of the program. 3 | """ 4 | 5 | import logging 6 | 7 | import fire 8 | 9 | from sync_read import sync_read 10 | from sync_trending import sync_trending 11 | from sync_producthunt import sync_producthunt 12 | 13 | if __name__ == "__main__": 14 | logging.basicConfig(level=logging.INFO) 15 | 16 | fire.Fire( 17 | { 18 | "sync_read": sync_read, 19 | "sync_trending": sync_trending, 20 | "sync_producthunt": sync_producthunt, 21 | } 22 | ) 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 guopeng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/serverchan.py: -------------------------------------------------------------------------------- 1 | """ see: https://github.com/easychen/serverchan-demo/blob/master/python/send.py """ 2 | 3 | import os 4 | import requests 5 | import re 6 | 7 | 8 | def sc_send(sendkey, title, desp="", options=None): 9 | """ServerChan推送消息函数""" 10 | if options is None: 11 | options = {} 12 | # 判断 sendkey 是否以 'sctp' 开头,并提取数字构造 URL 13 | if sendkey.startswith("sctp"): 14 | match = re.match(r"sctp(\d+)t", sendkey) 15 | if match: 16 | num = match.group(1) 17 | url = f"https://{num}.push.ft07.com/send/{sendkey}.send" 18 | else: 19 | raise ValueError("Invalid sendkey format for sctp") 20 | else: 21 | url = f"https://sctapi.ftqq.com/{sendkey}.send" 22 | params = {"title": title, "desp": desp, **options} 23 | headers = {"Content-Type": "application/json;charset=utf-8"} 24 | response = requests.post(url, json=params, headers=headers, timeout=10) 25 | result = response.json() 26 | return result 27 | 28 | 29 | # data = {} 30 | # with open(os.path.join(os.path.dirname(__file__), "..", ".env"), "r") as f: 31 | # for line in f: 32 | # key, value = line.strip().split("=") 33 | # data[key] = value 34 | # key = data["SENDKEY"] 35 | 36 | # ret = sc_send(key, "主人服务器宕机了 via python", "第一行\n\n第二行") 37 | # print(ret) 38 | -------------------------------------------------------------------------------- /.github/workflows/weread.yml: -------------------------------------------------------------------------------- 1 | name: weread sync 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: "0 0 * * *" 7 | jobs: 8 | sync: 9 | name: Sync 10 | permissions: 11 | contents: write 12 | runs-on: ubuntu-22.04 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v4 16 | - name: Set up Python 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: "3.10" 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install -r requirements.txt 24 | #- name: weread sync 25 | # id: weread-sync 26 | # run: | 27 | # python main.py sync_read "${{secrets.WEREAD_COOKIE}}" "${{secrets.NOTION_TOKEN}}" "${{secrets.NOTION_DATABASE_ID}}" --calendar_db_id="${{secrets.NOTION_DATABASE_CALENDAR}}" --wxnotify_key="${{secrets.SCKEY}}" 28 | - name: trending sync 29 | id: trending-sync 30 | run: | 31 | python main.py sync_trending "${{secrets.NOTION_TOKEN}}" "${{secrets.NOTION_DATABASE_TRENDING}}" --git_token="${{secrets.GIT_TOKEN}}" 32 | - name: product-hunt sync 33 | id: product-hunt-sync 34 | run: | 35 | python main.py sync_producthunt "${{secrets.NOTION_TOKEN}}" "${{secrets.NOTION_DATABASE_PH}}" 36 | - name: Verify Changed files 37 | uses: tj-actions/verify-changed-files@v20 38 | id: verify-changed-files 39 | with: 40 | files: | 41 | ./var/sync_read.db 42 | - name: Commit 43 | if: steps.verify-changed-files.outputs.files_changed == 'true' 44 | run: | 45 | git config user.name github-actions 46 | git config user.email github-actions@github.com 47 | git add ./var/sync_read.db 48 | git commit -m "auto sync" 49 | git push 50 | -------------------------------------------------------------------------------- /lib/page_block_list.py: -------------------------------------------------------------------------------- 1 | """Orgnize the page blocks in a Notion page""" 2 | 3 | def safe_cast(val, to_type, default=None): 4 | """ 5 | 尝试将输入值 `val` 转换为指定类型 `to_type`,如果转换失败则返回默认值 `default`。 6 | """ 7 | try: 8 | return to_type(val) 9 | except (ValueError, TypeError): 10 | return default 11 | 12 | class PageBlockList(object): 13 | """Implements the PageBlockList class.""" 14 | 15 | def __init__(self, store, book_id, blocks): 16 | """Constructor for the PageBlockList class. 17 | list item format: 18 | { 19 | 'type': 'paragraph / heading / list / image ....', 20 | 'id': '$block_id', 21 | } 22 | """ 23 | self.book_id = book_id 24 | self.blocks = [] 25 | for block in blocks: 26 | bookmark_id = None 27 | _result = store.query_by_block(book_id, block['id']) 28 | if _result: 29 | bookmark_id = _result[0]['bookmark_id'] 30 | self.blocks.append({ 31 | 'type': block['type'], 32 | 'id': block['id'], 33 | 'bookmark_id': bookmark_id, 34 | }) 35 | 36 | def found_chapter_position(self, chapter_uid: int) -> str | None: 37 | """Find the position of a chapter in the list. 38 | return true if found, false if not found 39 | """ 40 | chapter, block_id, block_idx = -1, None, -1 41 | for idx, block in enumerate(self.blocks): 42 | if block['bookmark_id'] is not None and block['type'].startswith('heading_'): 43 | _cuid = safe_cast(block['bookmark_id'], int, 0) 44 | if _cuid < chapter_uid and _cuid > chapter: # find the biggest one in [0, chapter_uid] 45 | chapter = _cuid 46 | block_id = block['id'] 47 | block_idx = idx 48 | 49 | # push to the first block if not found 50 | if not block_id: 51 | return self.blocks[0]['id'] if len(self.blocks) > 0 else None 52 | 53 | # iterate to the end of chapter 54 | while block_idx < len(self.blocks) - 1: 55 | block = self.blocks[block_idx + 1] 56 | if block['type'].startswith('heading_'): 57 | return block_id 58 | block_idx += 1 59 | block_id = block['id'] 60 | return block_id -------------------------------------------------------------------------------- /lib/test_page_block_list.py: -------------------------------------------------------------------------------- 1 | """unit test for DBWeReadRecord""" 2 | import unittest 3 | from datetime import datetime, timedelta 4 | 5 | from lib.db_weread_record import DBWeReadRecord 6 | # from lib.db_weread_record import DBWeReadRecord # 替换your_module_name为实际的模块名 7 | 8 | class TestDBReadRecord(unittest.TestCase): 9 | """unit test for DBWeReadRecord""" 10 | 11 | def setUp(self): 12 | # 创建一个临时的数据库用于测试 13 | self.db_name = ":memory:" 14 | # self.db_name = "./var/tutorial.db" 15 | self.db_reader = DBWeReadRecord(self.db_name) 16 | 17 | def tearDown(self): 18 | # 测试结束后关闭数据库连接 19 | del self.db_reader 20 | 21 | def test_insert_and_query(self): 22 | """test insert and query""" 23 | # 测试插入和查询数据 24 | book_id = '12345' 25 | bookmark_id = 'chapter1' 26 | block_id = 'b55c9c91-384d-452b-81db-d1ef79372b75' 27 | 28 | expected_op_time = datetime.now() 29 | 30 | # 插入数据 31 | inserted_id = self.db_reader.insert(book_id, bookmark_id, block_id) 32 | self.assertTrue(inserted_id >= 0) 33 | 34 | # 查询数据 35 | results = self.db_reader.query(book_id, bookmark_id) 36 | self.assertEqual(len(results), 1) 37 | result = results[0] 38 | self.assertEqual(result['book_id'], book_id) 39 | self.assertEqual(result['bookmark_id'], bookmark_id) 40 | self.assertEqual(result['block_id'], block_id) 41 | # 检查op_time在合理范围内 42 | self.assertLessEqual(result['op_time'], expected_op_time + timedelta(seconds=10)) 43 | self.assertGreaterEqual(result['op_time'], expected_op_time - timedelta(seconds=10)) 44 | 45 | def test_insert_duplicate_and_query(self): 46 | """test insert duplicate and query""" 47 | # 测试插入重复数据并查询 48 | book_id = '987652' 49 | bookmark_id = 'chapter3' 50 | block_id = 'b55c9c91-384d-452b-81db-d1ef79379999' 51 | 52 | # 第一次插入 53 | inserted_id_1 = self.db_reader.insert(book_id, bookmark_id, block_id) 54 | self.assertTrue(inserted_id_1 >= 0) 55 | 56 | # 尝试第二次插入相同的数据,应不插入新数据 57 | inserted_id_2 = self.db_reader.insert(book_id, bookmark_id, block_id) 58 | self.assertEqual(inserted_id_1, inserted_id_2) 59 | 60 | # 查询数据,应该只有一条记录 61 | results = self.db_reader.query(book_id, bookmark_id) 62 | self.assertEqual(len(results), 1) 63 | 64 | if __name__ == '__main__': 65 | unittest.main() 66 | -------------------------------------------------------------------------------- /sync/weread/calendar.py: -------------------------------------------------------------------------------- 1 | """sync reading log to calendar database""" 2 | 3 | from datetime import datetime 4 | from notion_client import AsyncClient 5 | from api.notion import BlockHelper 6 | 7 | 8 | def query_filter(book_id: str, date: float): 9 | """query filter for calendar database""" 10 | return { 11 | "and": [ 12 | { 13 | "property": "BookId", 14 | "rich_text": { 15 | "equals": book_id, 16 | }, 17 | }, 18 | { 19 | "property": "ReadDate", 20 | "date": { 21 | "equals": datetime.fromtimestamp(date).strftime("%Y-%m-%d"), 22 | }, 23 | }, 24 | ] 25 | } 26 | 27 | 28 | async def sync_to_calener( 29 | client: AsyncClient, calendar_data_source_id: str, read_detail: dict 30 | ): 31 | """sync reading log to calendar database""" 32 | if not client or not read_detail: 33 | return 34 | rdetail = read_detail.get("readDetail") 35 | book_info = read_detail.get("bookInfo") 36 | 37 | if not rdetail or not book_info: 38 | return 39 | 40 | # from latest to oldest 41 | records = sorted( 42 | rdetail.get("data", []), key=lambda x: x.get("readDate") or 0, reverse=True 43 | ) 44 | 45 | # No batch-updating API exist😢 46 | book_id = book_info.get("bookId") 47 | for record in records: 48 | date = record.get("readDate") 49 | read_time = record.get("readTime", 0) 50 | 51 | response = await client.data_sources.query( 52 | data_source_id=calendar_data_source_id, 53 | filter=query_filter(book_id, date), 54 | ) 55 | if len(response["results"]) > 0: 56 | result = response["results"][0] 57 | _old = result.get("properties", {}).get("ReadTime", {}).get("number", 0) 58 | if _old < read_time: 59 | properties = {"ReadTime": BlockHelper.number(read_time)} 60 | await client.pages.update(page_id=result["id"], properties=properties) 61 | 62 | # 每日更新,仅更新最近一次即可 63 | break 64 | 65 | properties = { 66 | "Name": BlockHelper.title(book_info.get("title", "")), 67 | "BookId": BlockHelper.rich_text(book_id), 68 | "ReadDate": { 69 | "date": {"start": datetime.fromtimestamp(date).strftime("%Y-%m-%d")} 70 | }, # Discard timezone 71 | "ReadTime": BlockHelper.number(read_time), 72 | } 73 | await client.pages.create( 74 | parent={ 75 | "data_source_id": calendar_data_source_id, 76 | "type": "data_source_id", 77 | }, 78 | properties=properties, 79 | ) 80 | -------------------------------------------------------------------------------- /lib/db_weread_record.py: -------------------------------------------------------------------------------- 1 | """ 2 | 封装存储相关操作 3 | """ 4 | 5 | import sqlite3 6 | import datetime 7 | 8 | 9 | class DBWeReadRecord(object): 10 | """存储微信读书同步记录""" 11 | 12 | TabName = "weread_sync_record" 13 | SqlCreate = f"""create table if not exists {TabName} 14 | (book_id VARCHAR(255), bookmark_id varchar(255), block_id VARCHAR(255), 15 | op_time TIMESTAMP, resv VARCHAR(255), 16 | PRIMARY KEY (book_id, bookmark_id, block_id))""" 17 | 18 | def __init__(self, db_name): 19 | """ 20 | :param db_name: 数据库名称 21 | """ 22 | self.connection = sqlite3.connect( 23 | db_name, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES 24 | ) 25 | self.connection.row_factory = sqlite3.Row # use dictionary to return row 26 | self.create_table() 27 | 28 | def __del__(self): 29 | """ 30 | 析构函数 31 | :return: 32 | """ 33 | print("closing db connection") 34 | self.connection.close() 35 | 36 | def create_table(self): 37 | """ 38 | 创建表 39 | :return: 40 | """ 41 | cursor = self.connection.cursor() 42 | cursor.execute(self.SqlCreate) 43 | 44 | def insert(self, book_id, bookmark_id, block_id): 45 | """ 46 | 插入数据 47 | :param book_id: 书籍ID 48 | :param bookmark_id: 书签ID/章节ID 49 | :param block_id: 章节ID 50 | :param op_time: 操作时间 51 | :return: 插入后的主键值 52 | """ 53 | now = datetime.datetime.now() 54 | 55 | sql = f"insert or ignore into {self.TabName}(book_id, bookmark_id, block_id, op_time)\ 56 | values (?, ?, ?, ?)" 57 | cursor = self.connection.cursor() 58 | cursor.execute(sql, (book_id, bookmark_id, block_id, now)) 59 | self.connection.commit() 60 | return cursor.lastrowid 61 | 62 | def query(self, book_id, bookmark_id): 63 | """ 64 | 查询是否已经写如果 65 | :return: 查询后的主键值 66 | """ 67 | sql = f"select book_id, bookmark_id, block_id, op_time \ 68 | from {self.TabName} where book_id=? and bookmark_id=?" 69 | cursor = self.connection.cursor() 70 | cursor.execute(sql, (book_id, bookmark_id)) 71 | return cursor.fetchall() 72 | 73 | def query_by_block(self, book_id, block_id): 74 | """ 75 | 查询是否已经写如果 76 | :return: 查询后的主键值 77 | """ 78 | sql = f"select book_id, bookmark_id, block_id, op_time \ 79 | from {self.TabName} where book_id=? and block_id=?" 80 | cursor = self.connection.cursor() 81 | cursor.execute(sql, (book_id, block_id)) 82 | return cursor.fetchall() 83 | 84 | def delete_book(self, book_id): 85 | """ 86 | 删除书籍记录 87 | :param book_id: 书籍ID 88 | :return: 89 | """ 90 | sql = f"delete from {self.TabName} where book_id=?" 91 | cursor = self.connection.cursor() 92 | cursor.execute(sql, (book_id,)) 93 | self.connection.commit() 94 | 95 | def delete_bookmark(self, book_id, bookmark_id): 96 | """ 97 | 删除book_id, bookmark_id记录 98 | :param book_id: 书籍ID 99 | :param bookmark_id: 书签ID/章节ID 100 | :return: 101 | """ 102 | sql = f"delete from {self.TabName} where book_id=? and bookmark_id=?" 103 | cursor = self.connection.cursor() 104 | cursor.execute(sql, (book_id, bookmark_id)) 105 | self.connection.commit() -------------------------------------------------------------------------------- /lib/test_db_weread_record.py: -------------------------------------------------------------------------------- 1 | """unit test for PageBlockList""" 2 | import unittest 3 | # from datetime import datetime, timedelta 4 | 5 | from lib.db_weread_record import DBWeReadRecord 6 | from lib.page_block_list import PageBlockList 7 | # from lib.db_weread_record import DBWeReadRecord # 替换your_module_name为实际的模块名 8 | 9 | class TestPageBlockList(unittest.TestCase): 10 | """test PageBlockList""" 11 | 12 | def setUp(self): 13 | # 创建一个临时的数据库用于测试 14 | self.db_name = ":memory:" 15 | # self.db_name = "./var/tutorial.db" 16 | self.store = DBWeReadRecord(self.db_name) 17 | self.store.create_table() 18 | 19 | def tearDown(self): 20 | # 测试结束后关闭数据库连接 21 | del self.store 22 | 23 | def test_empty(self): 24 | """test append tail block""" 25 | book_id = "book_1" 26 | bookmark_id = 1 27 | block_id = "block_id_1" 28 | self.store.insert(book_id, bookmark_id, block_id) 29 | 30 | blocks = [] 31 | page_block_list = PageBlockList(self.store, book_id, blocks) 32 | 33 | appended_block = page_block_list.found_chapter_position(bookmark_id) 34 | self.assertEqual(appended_block, None) 35 | 36 | def test_append_tail(self): 37 | """test append tail block""" 38 | book_id = "book_1" 39 | bookmark_id = 1 40 | block_id = "block_id_1" 41 | self.store.insert(book_id, bookmark_id, block_id) 42 | 43 | blocks = [ 44 | { 45 | 'id': block_id, 46 | 'type': 'heading_1', 47 | } 48 | ] 49 | page_block_list = PageBlockList(self.store, book_id, blocks) 50 | 51 | appended_block = page_block_list.found_chapter_position(bookmark_id + 1) 52 | self.assertEqual(appended_block, block_id) 53 | 54 | 55 | def test_append_header(self): 56 | """test append tail block""" 57 | book_id = "book_2" 58 | bookmark_id = 2 59 | block_id = "block_id_2" 60 | self.store.insert(book_id, bookmark_id, block_id) 61 | 62 | blocks = [ 63 | { 64 | 'id': 'toc_id', 65 | 'type': 'table_of_contents' 66 | }, 67 | { 68 | 'id': block_id, 69 | 'type': 'heading_3', 70 | } 71 | ] 72 | page_block_list = PageBlockList(self.store, book_id, blocks) 73 | 74 | appended_block = page_block_list.found_chapter_position(bookmark_id - 1) 75 | self.assertEqual(appended_block, 'toc_id') 76 | 77 | def test_insert_mid(self): 78 | """test append tail block""" 79 | book_id = "book_1" 80 | 81 | self.store.insert(book_id, 3, 'block_3') 82 | self.store.insert(book_id, 5, 'block_5') 83 | self.store.insert(book_id, 7, 'block_7') 84 | 85 | blocks = [ 86 | { 87 | 'id': 'toc_id', 88 | 'type': 'table_of_contents' 89 | }, 90 | { 91 | 'id': 'block_3', 92 | 'type': 'heading_3', 93 | }, 94 | { 95 | 'id': 'block_5', 96 | 'type': 'heading_5', 97 | }, 98 | { 99 | 'id': 'block_7', 100 | 'type': 'heading_7', 101 | }, 102 | ] 103 | page_block_list = PageBlockList(self.store, book_id, blocks) 104 | 105 | appended_block = page_block_list.found_chapter_position(4) 106 | self.assertEqual(appended_block, 'block_3') 107 | -------------------------------------------------------------------------------- /api/weread.py: -------------------------------------------------------------------------------- 1 | """封装微信api的调用""" 2 | 3 | from http.cookies import SimpleCookie 4 | from requests.utils import cookiejar_from_dict 5 | import requests 6 | 7 | 8 | class WeReadAPI: 9 | """微信读书API""" 10 | 11 | # 全量书籍笔记信息列表 12 | WEREAD_NOTEBOOKS_URL = "https://weread.qq.com/api/user/notebook" 13 | 14 | # 章节信息列表 15 | WEREAD_CHAPTER_INFO = "https://weread.qq.com/web/book/chapterInfos" 16 | 17 | # 书籍划线 18 | WEREAD_BOOKMARKLIST_URL = "https://weread.qq.com/web/book/bookmarklist" 19 | 20 | # 获取笔记列表,包括笔记、推荐总结 21 | WEREAD_REVIEW_LIST_URL = "https://weread.qq.com/web/review/list" 22 | 23 | # 数据详情 24 | WEREAD_BOOK_INFO = "https://weread.qq.com/web/book/info" 25 | 26 | # 读取进度等 27 | WEREAD_READ_INFO_URL = "https://weread.qq.com/web/book/readinfo" 28 | 29 | WEREAD_URL = "https://weread.qq.com/" 30 | 31 | def __init__(self, cookie): 32 | session = requests.Session() 33 | session.cookies = self._parse_cookie(cookie) 34 | session.get(self.WEREAD_URL) 35 | self.session = session 36 | 37 | def _parse_cookie(self, cookie_string): 38 | cookie = SimpleCookie() 39 | cookie.load(cookie_string) 40 | cookies_dict = {} 41 | cookiejar = None 42 | for key, morsel in cookie.items(): 43 | cookies_dict[key] = morsel.value 44 | cookiejar = cookiejar_from_dict( 45 | cookies_dict, cookiejar=None, overwrite=True 46 | ) 47 | return cookiejar 48 | 49 | def get_notebooklist(self): 50 | """全量书籍笔记信息列表,仅包括笔记更新时间、数量等,不包括笔记明细""" 51 | r = self.session.get(self.WEREAD_NOTEBOOKS_URL) 52 | if r.ok: 53 | data = r.json() 54 | books = data.get("books") 55 | books.sort( 56 | key=lambda x: x["sort"] 57 | ) # 最近更新(划线、评语以及推荐都算更新)时间 58 | return books 59 | else: 60 | print(f"get notesbook failed: {r.text}") 61 | return [] 62 | 63 | def get_chapter_list(self, bookId): 64 | """获取章节信息列表""" 65 | body = {"bookIds": [bookId]} 66 | r = self.session.post(self.WEREAD_CHAPTER_INFO, json=body) 67 | if ( 68 | r.ok 69 | and "data" in r.json() 70 | and len(r.json()["data"]) == 1 71 | and "updated" in r.json()["data"][0] 72 | ): 73 | update = r.json()["data"][0]["updated"] 74 | # d = {item["chapterUid"]: item for item in update} 75 | return update 76 | else: 77 | print(r.text) 78 | return [] 79 | 80 | def get_bookmark_list(self, bookId): 81 | """获取书籍划线列表""" 82 | params = dict(bookId=bookId) 83 | r = self.session.get(self.WEREAD_BOOKMARKLIST_URL, params=params) 84 | if r.ok: 85 | updated = r.json().get("updated") 86 | updated = sorted( 87 | updated, 88 | key=lambda x: ( 89 | x.get("chapterUid", 1), 90 | int(x.get("range").split("-")[0]), 91 | ), 92 | ) 93 | return r.json()["updated"] 94 | else: 95 | print("get bookmarklist failed: {r.text}") 96 | return [] 97 | 98 | def get_review_list(self, bookId): 99 | """获取笔记列表,包括笔记评论、推荐总结""" 100 | params = dict(bookId=bookId, listType=11, mine=1, syncKey=0) 101 | r = self.session.get(self.WEREAD_REVIEW_LIST_URL, params=params) 102 | if r.ok: 103 | reviews = r.json().get("reviews") 104 | # 总结 105 | summary = list(filter(lambda x: x.get("review").get("type") == 4, reviews)) 106 | # 笔记(评语) 107 | reviews = list(filter(lambda x: x.get("review").get("type") == 1, reviews)) 108 | reviews = list(map(lambda x: x.get("review"), reviews)) 109 | reviews = list(map(lambda x: {**x, "markText": x.pop("content")}, reviews)) 110 | return summary, reviews 111 | else: 112 | print(r.text) 113 | return [], [] 114 | 115 | def get_bookinfo(self, bookId: str) -> list: 116 | """获取书的详情""" 117 | params = dict(bookId=bookId) 118 | r = self.session.get(self.WEREAD_BOOK_INFO, params=params) 119 | isbn = "" 120 | rating = 0 121 | category = "" 122 | intro = "" 123 | 124 | if r.ok: 125 | data = r.json() 126 | isbn = data["isbn"] 127 | rating = data["newRating"] / 1000 128 | category = data.get("category", "") 129 | intro = data.get("intro", "") 130 | 131 | return (isbn, rating, category, intro) 132 | 133 | def get_read_info(self, bookId): 134 | """获取书籍的进度""" 135 | params = dict( 136 | bookId=bookId, readingDetail=1, readingBookIndex=1, finishedDate=1 137 | ) 138 | r = self.session.get(self.WEREAD_READ_INFO_URL, params=params) 139 | if r.ok: 140 | return r.json() 141 | return {} 142 | 143 | 144 | def str_reading_time(reading_time: int): 145 | "convert reading time to str" 146 | format_time = "" 147 | hour = reading_time // 3600 148 | if hour > 0: 149 | format_time += f"{hour}时" 150 | minutes = reading_time % 3600 // 60 151 | if minutes > 0: 152 | format_time += f"{minutes}分" 153 | return format_time 154 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | Example Page 3 |

4 | 5 | # 将微信读书笔记、github trending、memos 自动同步到 Notion 6 | 7 | 本项目支持将微信读书笔记(划线及评论)、github trending 同步 Notion。支持本地手工、github action 定期两种方式。可以修改 action 配置自行按需选择。 8 | 9 | [English](./README.md) | 简体中文 10 | 11 | ## Requirements 12 | 13 | Python 3.10 14 | 15 | ## 同步微信读书笔记 16 | 17 | ### 使用 18 | 19 | 1. star 本项目 20 | 21 | 2. fork 这个工程,删除目录中的临时存储文件(./var/sync_read.db) 22 | 23 | 3. 获取微信读书的 Cookie: `WEREAD_COOKIE` 24 | 25 | - 浏览器打开 https://weread.qq.com/ 26 | - 微信扫码登录确认,提示没有权限忽略即可 27 | - 按 F12 进入开发者模式,依次点 Network -> Doc -> Headers-> cookie。复制 Cookie 字符串; 28 | 29 | 4. 获取 NotionToken: `NOTION_TOKEN` 30 | 31 | - 浏览器打开https://www.notion.so/my-integrations 32 | - 点击 New integration 输入 name 提交 33 | - 点击 show,然后 copy 34 | 35 | 5. 复制[这个 Notion 模板](https://gelco.notion.site/67639069c7b84f55b6394f16ecda0c4f?v=b5d09dc635db4b3d8ba13b200b88d823&pvs=25),删掉所有的数据,并点击右上角设置,Connections 添加你创建的 Integration。 36 | 37 | 6. 获取 NotionDatabaseID: `NOTION_DATABASE_ID` 38 | 39 | - 打开 Notion 数据库,点击右上角的 Share,然后点击 Copy link 40 | - 获取链接后比如https://gelco.notion.site/67639069c7b84f55b6394f16ecda0c4f?v=b5d09dc635db4b3d8ba13b200b88d823&pvs=25 中间的**67639069c7b84f55b6394f16ecda0c4f**就是 DatabaseID 41 | 42 | 7. 同步方式 43 | 44 | - **方式一**:在 Github 的 Secrets 中添加以下变量来实现每日自动同步 45 | 46 | - 打开你 fork 的工程,点击 Settings->Secrets and variables->New repository secret 47 | - 添加以下变量(**变量名称自定义,只要与 action 中对应的名称一致即可**) 48 | - `WEREAD_COOKIE` 49 | - `NOTION_TOKEN` 50 | - `NOTION_DATABASE_ID` 51 | 52 | - **方式二**: 也可以本地运行脚本完成同步: 53 | 54 | ```shell 55 | pip install -r requirements.txt 56 | python3 ./main.py sync_read ${WEREAD_COOKIE} ${NOTION_TOKEN} ${NOTION_DATABASE_ID} 57 | ``` 58 | 59 | ### 高级特性 60 | 61 | 1. 可以配合 [next-blogger](https://github.com/alex-guoba/next-blogger) 搭建自己的**读书笔记分享**网站。样式参考 [goroutine.cn](https://goroutine.cn/notes) 62 | 63 | 2. 【可选】可以指定单独的 Notion 数据库,用于存储每日同步记录,用作 calender 视图等场景。 64 | 65 | - action 中环境变量`NOTION_DATABASE_CALENDAR`,命令行方式参考[action](./.github/workflows/weread.yml)。模板参考[这个](https://gelco.notion.site/5a17a1f794464652ade156c4c7572736?v=d961ee4d64864620b948b1a18fb1ebdd&pvs=4) 66 | 67 | - 注意:也需要在 Connections 添加你创建的 Integration。流程与上面一致。 68 | 69 | 3. 支持[Server 酱](https://sct.ftqq.com/)微信通知,用于在同步完成时发送微信通知更新读书笔记数。 70 | 71 | - action 中环境变量`SCKEY`,参考[action](./.github/workflows/weread.yml)。申请方式参考[Server 酱](https://sct.ftqq.com/sendkey)。 72 | 73 | ### 增量同步说明 74 | 75 | #### 更新时机 76 | 77 | 1. 微信读书笔记为增量同步,每次同步时,会根据笔记的更新时间进行筛选。仅当微信读书中书籍有**笔记更新**时才会触发同步。 78 | 2. 可以删除 db 中已同步过的书籍页面(page),删除后下次同步时会全同步(注意需要在微信读书中触发一次笔记更新,比如新增、删除任意笔记即可)。 79 | 80 | #### 增量机制 81 | 82 | 1. 已同步到 Notion 的笔记,用户可以在 Notion 中**新增、修改**笔记内容,下次同步时,**不会覆盖已同步过的笔记**。 83 | 2. 增量笔记同步顺序:新增章节会按照微信读书的章节顺序插入;新增笔记会插入到对应的章节下,但不保证同一章节下的笔记与微信读书一致。 84 | 3. 用户可以在 database 中新增字段,用做书籍的自定义标识。但不得修改已有字段。 85 | 4. 已同步到 Notion 的笔记,**用户不得删除章节信息**,否则下次同步同一章节的增量笔记无法精确定位。 86 | 87 | ### 原理说明 88 | 89 | Notion 无法保存微信读书的笔记 id 等信息,所以在仓库中存储了一份微信读书笔记 ID 与[Notion Block ID](https://developers.notion.com/reference/patch-block-children)的映射关系。每次更新完毕后在 git action 中自动提交到仓库。 90 | 所以如果用户 clone 了本仓库到,首次运行时可以先删除原仓库中的映射文件(./var/sync_read.db)。 91 | 92 | ### 支持的配置项 93 | 94 | ```ini 95 | [weread.format] 96 | ContentType = list 97 | EnableEmoj = false 98 | EnableReadingDetail = true 99 | ``` 100 | 101 | - ContentType:增加笔记内容 block 组织形式配置,可以将内容展现形态指定为 paragraph/list/callout。 102 | - EnableEmoj:开启、禁用 emoj 103 | - EnableReadingDetail: 开启、禁用阅读明细。 104 | 105 | ## 同步 Github Trending 106 | 107 | ### 使用 108 | 109 | 与微信读书同步方法基本一致。 110 | 111 | 1. 获取 NotionToken(可复用) 112 | 113 | 2. 创建 NotionDatabase,获取 NotionDatabaseID, notion 模板参考[这个](https://gelco.notion.site/77a3c6c8c2fb405e8347a7bde96d51d1?v=5c6464969afa432ea473f07c7b6959e8) 114 | 115 | 3. 本地运行方式 116 | 117 | ```shell 118 | pip install -r requirements.txt 119 | python3 ./main.py sync_trending ${NOTION_TOKEN} ${NOTION_DATABASE_TRENDING} --git_token=${GIT_TOKEN} 120 | ``` 121 | 122 | 4. 或者在 Github 的 Secrets 中添加以下变量来实现每日自动同步 123 | 124 | - 打开你 fork 的工程,点击 Settings->Secrets and variables->New repository secret 125 | - 添加以下变量 (**变量名称自定义,只要与 action 中对应的名称一致即可**) 126 | - NOTION_TOKEN 127 | - NOTION_DATABASE_TRENDING 128 | - GIT_TOKEN 129 | 如果不需要仓库的其他信息(包括 fork、star、watcher 数量),GIT_TOKEN 可以不配置 130 | 131 | ### 支持的配置项 132 | 133 | ```ini 134 | [trending.language] 135 | languages = python,go 136 | ``` 137 | 138 | - languages: 关注的项目语言,不允许为空 139 | 140 | ## 同步 ProductHunt 产品列表到 Notion 141 | 142 | ### 使用 143 | 144 | 与微信读书同步方法基本一致。产品列表参考[ProductHunt](https://www.producthunt.com/all) 145 | 146 | 1. 获取 NotionToken(可复用) 147 | 148 | 2. 创建 NotionDatabase,获取 NotionDatabaseID, notion 模板参考[这个](https://gelco.notion.site/1467b35a24cd80449eeadf5ed024cef5?v=1a470daa9fc0418d8682aaf789860d40&pvs=73) 149 | 3. 本地运行 150 | 151 | ```shell 152 | python3 ./main.py sync_producthunt ${NOTION_TOKEN} ${DATABASE_ID} 153 | ``` 154 | 155 | ```ini 156 | [producthunt.filter] 157 | ; 过滤条件,可选 158 | MinVotes = 5 159 | MinComments = 5 160 | ``` 161 | 162 | 也可配置 github action 来实现定期同步,有需要修改 github action 以及配置对应环境即可。 163 | 164 | ```shell 165 | # git中配置好对应的环境变量,设置对应的action run指令即可 166 | python3 ./main.py sync_producthunt "${{secrets.NOTION_TOKEN}}" "${{DATABASE_ID_PH}}" 167 | ``` 168 | 169 | ## 感谢 170 | 171 | - [malinkang / weread_to_notion](https://github.com/malinkang/weread_to_notion) 172 | - [bonfy / github-trending](https://github.com/bonfy/github-trending) 173 | -------------------------------------------------------------------------------- /README.zh-CN.md: -------------------------------------------------------------------------------- 1 | 2 | # Syncing Wechat Book Notes, Github Trending, Memos to Notion automatically 3 | 4 | This project supports the synchronization of WeChat book notes, GitHub trending, and memos to Notion. It can be done locally or through regular GitHub actions, which can be customized according to your preferences. 5 | 6 | English | [简体中文](./README.zh-CN.md) 7 | 8 | ## Requirements 9 | 10 | Python 3.10 11 | 12 | ## Synchronizing WeChat Book Notes 13 | 14 | ### Usage 15 | 16 | 1. Star this project. 17 | 2. Fork this repository. 18 | 3. Obtain the WeChat book's Cookie. 19 | * Open in your browser. 20 | * Scan the QR code with WeChat and confirm login. Ignore any permission errors. 21 | * Press F12 to enter developer mode, then follow Network -> Doc -> Headers-> cookie. Copy the Cookie string. 22 | 4. Get the Notion Token. 23 | * Open in your browser. 24 | * Click "New integration" and enter a name to submit. 25 | * Click "show" and then copy the token. 26 | 5. Copy [this Notion template](https://gelco.notion.site/67639069c7b84f55b6394f16ecda0c4f?v=b5d09dc635db4b3d8ba13b200b88d823&pvs=25), delete all the data, and click the settings button in the top right corner. Add the integration you created under Connections. 27 | 6. Get the Notion Database ID. 28 | * Open the Notion database, click the "Share" button in the top right corner, and then click "Copy link". 29 | * The link will look like this: . The **67639069c7b84f55b6394f16ecda0c4f** part is the Database ID. 30 | 7. Add the following variables to your GitHub Secrets to enable daily automatic synchronization: 31 | * Open your forked repository, click Settings -> Secrets and variables -> New repository secret. 32 | * Add the following variables (you can customize the variable names as long as they match the names in the action): 33 | + WEREAD_COOKIE 34 | + NOTION_TOKEN 35 | + NOTION_DATABASE_ID 36 | 8. Alternatively, you can run the script locally: 37 | ```shell 38 | pip install -r requirements.txt 39 | python3 ./main.py sync_weread ${WEREAD_COOKIE} ${NOTION_TOKEN} ${NOTION_DATABASE_ID} 40 | ``` 41 | 42 | ### Supported Configuration Options 43 | 44 | ```ini 45 | [weread.format] 46 | ContentType = list 47 | EnableEmoj = false 48 | EnableReadingDetail = true 49 | ``` 50 | 51 | * ContentType: Specifies the organization format of the note content blocks as paragraph/list/callout. 52 | * EnableEmoj: Disables emojis. 53 | * EnableReadingDetail: Add reading detail info to notes 54 | 55 | ## Synchronizing GitHub Trending 56 | 57 | ### Usage 58 | 59 | The process is similar to synchronizing WeChat book notes. 60 | 61 | 1. Get the Notion Token (can be reused). 62 | 2. Create a Notion Database and get the Notion Database ID. Use this [template](https://gelco.notion.site/77a3c6c8c2fb405e8347a7bde96d51d1?v=5c6464969afa432ea473f07c7b6959e8) for reference. 63 | 3. To run locally: 64 | ```shell 65 | pip install -r requirements.txt 66 | python3 ./main.py sync_trending ${NOTION_TOKEN} ${NOTION_DATABASE_TRENDING} --git_token=${GIT_TOKEN} 67 | ``` 68 | 4. Or add the following variables to your GitHub Secrets for daily automatic synchronization: 69 | * Open your forked repository, click Settings -> Secrets and variables -> New repository secret. 70 | * Add the following variables (you can customize the variable names as long as they match the names in the action): 71 | + NOTION_TOKEN 72 | + NOTION_DATABASE_TRENDING 73 | + GIT_TOKEN (optional if you don't need repository information such as forks, stars, and watchers) 74 | 75 | ### Supported Configuration Options 76 | 77 | ```ini 78 | [trending.language] 79 | languages = python,go 80 | ``` 81 | 82 | * languages: The programming languages of the repositories to follow. This field cannot be empty. 83 | 84 | ## Syncing Memos to Notion 85 | 86 | ### Usage 87 | 88 | The process is similar to syncing WeChat book notes. 89 | 90 | 1. Obtain a Notion Token (can be reused) 91 | 2. Create a Notion Database and get its ID. The Notion template can be referenced from [here](https://gelco.notion.site/b840c05d92af44719ee3d9d7f73010f8?v=f0a726764fa3455b9a28f50783eea58a&pvs=4) 92 | 3. Assign a unique [Token](https://usememos.com/docs/access-tokens) to the user on the Memos platform for accessing Memos. 93 | 4. Modify the configuration file to set the Memos host address and the user's UserName for pulling data. Note that the Token assigned to the user must match the UserName to access Private memos. 94 | 5. Run locally with the following command: 95 | ```shell 96 | python3 ./main.py sync_memos ${NOTION_TOKEN} ${DATABASE_ID} ${MEMOS_TOKEN} 97 | ``` 98 | 99 | ```ini 100 | [memos.opts] 101 | MemosHost = http://127.0.0.1:8081 102 | ; Username, not nickname 103 | MemosUserName = memos-demo 104 | ``` 105 | 106 | It is also possible to configure GitHub Actions for regular syncing by modifying the action and configuring the corresponding environment variables. 107 | 108 | ```shell 109 | # Configure the corresponding environment variables in git and set the action run command 110 | python3 ./main.py sync_memos "${{secrets.NOTION_TOKEN}}" "${{DATABASE_ID}}" "${{MEMOS_TOKEN}}" 111 | ``` 112 | 113 | ## Acknowledgments 114 | - [malinkang / weread_to_notion](https://github.com/malinkang/weread_to_notion) 115 | - [bonfy / github-trending](https://github.com/bonfy/github-trending) 116 | - [usememos / memos](https://github.com/usememos/memos) 117 | -------------------------------------------------------------------------------- /sync_trending.py: -------------------------------------------------------------------------------- 1 | """ 2 | 同步github trending到notion 3 | """ 4 | 5 | import logging 6 | import time 7 | import requests 8 | from pyquery import PyQuery as pq 9 | from github import Github 10 | from github import Auth 11 | 12 | from notion_client import AsyncClient 13 | 14 | from config import CONFIG 15 | from api import notion 16 | 17 | 18 | class TrendItem: 19 | """trend item""" 20 | 21 | def __init__(self, title: str, url: str, desc: str) -> None: 22 | self.title = title 23 | self.url = url 24 | self.desc = desc 25 | self.watchers_count = 0 26 | self.forks_count = 0 27 | self.stargazers_count = 0 28 | 29 | def _repo_path(self) -> str: 30 | items = self.url.split("/")[-2:] 31 | return "/".join(items) 32 | 33 | def fullfill_repo_info(self, git_token): 34 | "fullfill basic info from repo" 35 | if not self.url: 36 | return 37 | auth = None 38 | if git_token: 39 | auth = Auth.Token(git_token) 40 | git = Github(auth=auth) 41 | 42 | try: 43 | repo = git.get_repo(self._repo_path()) 44 | # pylint: disable-next=broad-except 45 | except Exception as _e: 46 | logging.error("get repo %s error: %s", self._repo_path(), _e) 47 | return 48 | 49 | self.watchers_count = repo.watchers_count 50 | self.forks_count = repo.forks_count 51 | self.stargazers_count = repo.stargazers_count 52 | 53 | 54 | async def query_page(client: AsyncClient, data_source_id: str, title: str) -> bool: 55 | """检查是否已经插入过 如果已经插入了就忽略""" 56 | time.sleep(0.3) 57 | 58 | response = await client.data_sources.query( 59 | data_source_id=data_source_id, 60 | filter={"property": "Title", "rich_text": {"equals": title}}, 61 | ) 62 | if len(response["results"]): 63 | return True 64 | return False 65 | 66 | 67 | async def insert_page( 68 | client: AsyncClient, data_source_id: str, language: str, trend: TrendItem 69 | ) -> None | str: 70 | """插入page""" 71 | parent = {"data_source_id": data_source_id, "type": "data_source_id"} 72 | properties = { 73 | "Title": {"title": [{"type": "text", "text": {"content": trend.title}}]}, 74 | "Language": {"select": {"name": language}}, 75 | "URL": {"url": trend.url}, 76 | "Desc": {"rich_text": [{"type": "text", "text": {"content": trend.desc}}]}, 77 | "WatchersCount": {"number": trend.watchers_count}, 78 | "ForksCount": {"number": trend.forks_count}, 79 | "StargazersCount": {"number": trend.stargazers_count}, 80 | } 81 | response = await client.pages.create(parent=parent, properties=properties) 82 | return response["id"] 83 | 84 | 85 | def _scrape(language: str) -> list[TrendItem]: 86 | headers = { 87 | # pylint: disable=line-too-long 88 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0", 89 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", 90 | "Accept-Encoding": "gzip,deflate,sdch", 91 | "Accept-Language": "zh-CN,zh;q=0.8", 92 | } 93 | result = [] 94 | 95 | url = f"https://github.com/trending/{language}".format(language=language) 96 | req = requests.get(url, headers=headers, timeout=10) 97 | if req.status_code != 200: 98 | logging.error("git trending error. %d", req.status_code) 99 | return result 100 | 101 | content = pq(req.content) 102 | items = content("div.Box article.Box-row") 103 | 104 | # codecs to solve the problem utf-8 codec like chinese 105 | # with codecs.open(filename, "a", "utf-8") as f: 106 | # # f.write('\n#### {language}\n'.format(language=language)) 107 | 108 | for item in items: 109 | i = pq(item) 110 | title = i(".lh-condensed a").text() 111 | description = i("p.col-9").text() 112 | url = "https://github.com" + str(i(".lh-condensed a").attr("href")) 113 | 114 | result.append(TrendItem(str(title), url, str(description))) 115 | 116 | return result 117 | 118 | 119 | def _filter_repo(trend: TrendItem) -> bool: 120 | filters = { 121 | "MinStargazers": "stargazers_count", 122 | "MinForks": "forks_count", 123 | "MinWatchers": "watchers_count", 124 | } 125 | for k, v in filters.items(): 126 | thresh_hold = CONFIG.getint("trending.language", k) 127 | current = getattr(trend, v, 0) 128 | if thresh_hold > 0 and current < thresh_hold: 129 | return True 130 | return False 131 | 132 | 133 | # pylint: disable=line-too-long 134 | async def _sync( 135 | client: AsyncClient, 136 | data_source_id: str, 137 | language: str, 138 | trends: list[TrendItem], 139 | git_token: str | None = None, 140 | ) -> None: 141 | for trend in trends: 142 | time.sleep(0.3) # avoid rate limit for notion API 143 | exist = await query_page(client, data_source_id, trend.title) 144 | if exist: 145 | continue 146 | # insert to db 147 | logging.info(trend) 148 | 149 | if git_token: 150 | trend.fullfill_repo_info(git_token) 151 | 152 | if _filter_repo(trend): 153 | logging.info("ignore %s", trend.title) 154 | continue 155 | 156 | await insert_page(client, data_source_id, language, trend) 157 | 158 | 159 | async def sync_trending(notion_token, database_id, git_token=None): 160 | """sync github trending to notion""" 161 | client = AsyncClient(auth=notion_token, log_level=logging.ERROR) 162 | 163 | data_sources_id = await notion.get_datasource_id(client, database_id) 164 | if not data_sources_id: 165 | logging.error("database %s has no data source", database_id) 166 | return 167 | 168 | languages = list( 169 | map( 170 | lambda x: x.strip(), CONFIG.get("trending.language", "Languages").split(",") 171 | ) 172 | ) 173 | for language in languages: 174 | if not language: 175 | continue 176 | 177 | logging.info("sync %s", language) 178 | 179 | trends = _scrape(language) 180 | if not trends: 181 | logging.error("language [%s] error", language) 182 | continue 183 | 184 | await _sync(client, data_sources_id, language, trends, git_token) 185 | -------------------------------------------------------------------------------- /sync_producthunt.py: -------------------------------------------------------------------------------- 1 | """ 2 | 同步product hunt到notion 3 | """ 4 | 5 | import logging 6 | import time 7 | import requests 8 | from pyquery import PyQuery as pq 9 | from notion_client import AsyncClient 10 | 11 | from api import notion 12 | from config import CONFIG 13 | from api.notion import BlockHelper 14 | 15 | 16 | class ProductItem: 17 | """product item""" 18 | 19 | def __init__( 20 | self, 21 | name: str, 22 | desc: str, 23 | topics: list[str], 24 | comments: int, 25 | votes: int, 26 | url: str = "", 27 | cover: str = "", 28 | ) -> None: 29 | self.name = name 30 | self.desc = desc 31 | self.topics = topics 32 | self.comments = comments 33 | self.votes = votes 34 | self.cover = cover 35 | self.url = f"https://www.producthunt.com{url}" 36 | 37 | # def fullfill_repo_info(self, git_token): 38 | # pass 39 | def __repr__(self) -> str: 40 | return f"""""" 42 | 43 | 44 | async def query_page(client: AsyncClient, data_source_id: str, name: str) -> bool: 45 | """check page exist or not""" 46 | time.sleep(0.3) 47 | 48 | response = await client.data_sources.query( 49 | data_source_id=data_source_id, 50 | filter={"property": "Name", "rich_text": {"equals": name}}, 51 | ) 52 | if len(response["results"]): 53 | return True 54 | return False 55 | 56 | 57 | async def _append_page( 58 | client: AsyncClient, data_source_id: str, prod: ProductItem 59 | ) -> None | str: 60 | """插入page""" 61 | parent = {"data_source_id": data_source_id, "type": "data_source_id"} 62 | properties = { 63 | "Name": BlockHelper.title(prod.name), 64 | "Description": BlockHelper.rich_text(prod.desc), 65 | "Topics": BlockHelper.multi_select(prod.topics), 66 | "Comments": BlockHelper.number(prod.comments), 67 | "Votes": BlockHelper.number(prod.votes), 68 | "URL": BlockHelper.url(prod.url), 69 | "Cover": BlockHelper.files("Cover", prod.cover), 70 | } 71 | response = await client.pages.create( 72 | parent=parent, icon=BlockHelper.icon(prod.cover), properties=properties 73 | ) 74 | return response["id"] 75 | 76 | 77 | def _scrape() -> list[ProductItem]: 78 | headers = { 79 | # pylint: disable=line-too-long 80 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0", 81 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", 82 | "Accept-Encoding": "gzip,deflate,sdch", 83 | "Accept-Language": "zh-CN,zh;q=0.8", 84 | } 85 | result = [] 86 | 87 | url = "https://www.producthunt.com/all" 88 | req = requests.get(url, headers=headers, timeout=60) 89 | if req.status_code != 200: 90 | logging.error("access product hunt error. %d", req.status_code) 91 | return [] 92 | 93 | content = pq(req.content) 94 | items = content("main div.flex-col div.flex-col div[class^='styles_item']") 95 | if items.length == 0: 96 | items = content("main div.flex-col div.flex-col section") 97 | 98 | for item in items: 99 | logging.debug("parse product: %s", item) 100 | i = pq(item) 101 | url = i('a[href^="/posts/"]').eq(0).attr("href") 102 | 103 | mid = i("div.flex-col a") 104 | name = mid.eq(0).text() 105 | description = mid.eq(1).text() 106 | # name = i("div.flex-col a strong").text() 107 | # description = i("div.flex-col a").text() 108 | 109 | comments = i("div.flex-col div.flex-row div").eq(0).text() 110 | if not comments: 111 | comments = i("button div.flex-col").eq(0).text() 112 | 113 | votes = i('button[data-test="vote-button"]').text() 114 | 115 | cover = i('a[href^="/posts/"] img').eq(0).attr("src") 116 | if not cover: 117 | cover = i('a[href^="/posts/"] video').eq(0).attr("poster") 118 | 119 | _topics = i('div.flex-col div.flex-row a[href^="/topics/"]') 120 | topics = [] 121 | for topic in _topics: 122 | topic = pq(topic).text() 123 | topics.append(topic) 124 | 125 | if name == "" or description == "" or len(topics) == 0: 126 | logging.error( 127 | "parse name or description error: %s-%s-%d", 128 | name, 129 | description, 130 | len(topics), 131 | ) 132 | continue 133 | if not votes.isnumeric() or not comments.isnumeric(): 134 | logging.error( 135 | "parse votes or comments error: %s-%s-%s", name, votes, comments 136 | ) 137 | continue 138 | 139 | try: 140 | votes = int(votes) 141 | comments = int(comments) 142 | except ValueError: 143 | logging.error("parse votes or comments error") 144 | continue 145 | 146 | result.append( 147 | ProductItem( 148 | name, description, topics, votes, comments, url=url, cover=cover 149 | ) 150 | ) 151 | 152 | return result 153 | 154 | 155 | def _filter_product(prod: ProductItem) -> bool: 156 | filters = { 157 | "MinVotes": "votes", 158 | "MinComments": "comments", 159 | } 160 | for k, v in filters.items(): 161 | thresh_hold = CONFIG.getint("producthunt.filter", k) 162 | current = getattr(prod, v, 0) 163 | if thresh_hold > 0 and current < thresh_hold: 164 | return True 165 | return False 166 | 167 | 168 | # pylint: disable=line-too-long 169 | async def _sync( 170 | client: AsyncClient, 171 | data_source_id: str, 172 | products: list[ProductItem], 173 | ) -> None: 174 | for prod in products: 175 | if _filter_product(prod): 176 | logging.info("filter product: %s", prod.name) 177 | continue 178 | 179 | time.sleep(0.3) # avoid rate limit for notion API 180 | if await query_page(client, data_source_id, prod.name): 181 | continue 182 | 183 | # insert to db 184 | logging.info(prod) 185 | 186 | _id = await _append_page(client, data_source_id, prod) 187 | print(_id) 188 | 189 | 190 | async def sync_producthunt(notion_token, database_id): 191 | """sync product hunt to notion""" 192 | client = AsyncClient(auth=notion_token, log_level=logging.ERROR) 193 | data_sources_id = await notion.get_datasource_id(client, database_id) 194 | if not data_sources_id: 195 | logging.error("database %s has no data source", database_id) 196 | return 197 | 198 | products = _scrape() 199 | if not products: 200 | logging.error( 201 | "ph scape error", 202 | ) 203 | return 204 | 205 | logging.info("ph scape total num [%s]", len(products)) 206 | await _sync(client, data_sources_id, products) 207 | -------------------------------------------------------------------------------- /api/notion.py: -------------------------------------------------------------------------------- 1 | """ 2 | 封装notion相关操作 3 | """ 4 | 5 | from datetime import datetime 6 | 7 | from notion_client import AsyncClient 8 | 9 | 10 | # class NotionAPI: 11 | # """暂未启用""" 12 | 13 | # def __init__(self, token): 14 | # self.token = token 15 | 16 | # def dumy(self): 17 | # """pass""" 18 | # pass 19 | 20 | 21 | class BlockHelper: 22 | """生成notion格式的工具函数""" 23 | 24 | headings = { 25 | 1: "heading_1", 26 | 2: "heading_2", 27 | 3: "heading_3", 28 | } 29 | 30 | table_contents = { 31 | "type": "table_of_contents", 32 | "table_of_contents": {"color": "default"}, 33 | } 34 | 35 | color_styles = { 36 | 1: "red", 37 | 2: "purple", 38 | 3: "blue", 39 | 4: "green", 40 | 5: "yellow", 41 | } 42 | 43 | def __init__(self): 44 | pass 45 | 46 | @classmethod 47 | def table_of_contents(cls): 48 | """获取目录""" 49 | return cls.table_contents 50 | 51 | @classmethod 52 | def heading(cls, level, content): 53 | """取heading格式""" "" 54 | heading_type = cls.headings.get(level, "heading_3") 55 | return { 56 | "type": heading_type, 57 | heading_type: { 58 | "rich_text": [ 59 | { 60 | "type": "text", 61 | "text": { 62 | "content": content, 63 | }, 64 | } 65 | ], 66 | "color": "default", 67 | "is_toggleable": False, 68 | }, 69 | } 70 | 71 | @classmethod 72 | def table( 73 | cls, 74 | table_width: int, 75 | cells: list, 76 | has_column_header: bool = False, 77 | has_row_header: bool = False, 78 | ): 79 | """table""" "" 80 | # heading_type = cls.headings.get(level, "heading_3") 81 | table = { 82 | "type": "table", 83 | "table": { 84 | "table_width": table_width, 85 | "has_column_header": has_column_header, 86 | "has_row_header": has_row_header, 87 | }, 88 | } 89 | table["table"]["children"] = [cls.table_row(cells)] 90 | 91 | return table 92 | 93 | @classmethod 94 | def table_row(cls, content_list: list): 95 | """table row, see https://developers.notion.com/reference/block#table-rows . 96 | When creating a table block via the Append block children endpoint, the table 97 | must have at least one table_row whose cells array has the same length as the table_width. 98 | """ 99 | table_row = { 100 | "type": "table_row", 101 | "table_row": { 102 | "cells": [], 103 | }, 104 | } 105 | for content in content_list: 106 | item = [ 107 | { 108 | "type": "text", 109 | "text": { 110 | "content": str(content), 111 | }, 112 | } 113 | ] 114 | table_row["table_row"]["cells"].append(item) 115 | return table_row 116 | 117 | @classmethod 118 | def quote(cls, content): 119 | """取引用格式""" 120 | return { 121 | "type": "quote", 122 | "quote": { 123 | "rich_text": [ 124 | { 125 | "type": "text", 126 | "text": {"content": content}, 127 | } 128 | ], 129 | "color": "default", 130 | }, 131 | } 132 | 133 | @classmethod 134 | def divider(cls): 135 | """ "divier""" 136 | return {"type": "divider", "divider": {}} 137 | 138 | @classmethod 139 | def emoj_style(cls, style, review_id): 140 | """根据不同的划线样式设置不同的emoji 直线type=0 背景颜色是1 波浪线是2""" 141 | emoji = "🌟" 142 | if style == 0: 143 | emoji = "💡" 144 | elif style == 1: 145 | emoji = "⭐" 146 | # 如果reviewId不是空说明是笔记 147 | if review_id is not None: 148 | emoji = "✍️" 149 | return emoji 150 | 151 | @classmethod 152 | def callout(cls, content, style, color, review_id, enable_emoj=False): 153 | """取callout格式""" 154 | emoji = "" 155 | if enable_emoj: 156 | emoji = cls.emoj_style(style, review_id) 157 | return { 158 | "type": "callout", 159 | "callout": { 160 | "rich_text": [ 161 | { 162 | "type": "text", 163 | "text": { 164 | "content": content, 165 | }, 166 | } 167 | ], 168 | "icon": {"emoji": emoji}, 169 | "color": cls.color_styles.get(color, "default"), 170 | }, 171 | } 172 | 173 | @classmethod 174 | def paragraph(cls, content, style, color, review_id, enable_emoj=False): 175 | """取text格式""" 176 | emoji = "" 177 | if enable_emoj: 178 | emoji = cls.emoj_style(style, review_id) 179 | return { 180 | "type": "paragraph", 181 | "paragraph": { 182 | "rich_text": [ 183 | { 184 | "type": "text", 185 | "text": { 186 | "content": emoji + content, 187 | }, 188 | } 189 | ], 190 | "color": cls.color_styles.get(color, "default"), 191 | }, 192 | } 193 | 194 | @classmethod 195 | def bullet_list(cls, content, style, color, review_id, enable_emoj=False): 196 | """取callout格式""" 197 | emoji = "" 198 | if enable_emoj: 199 | emoji = cls.emoj_style(style, review_id) 200 | return { 201 | "type": "bulleted_list_item", 202 | "bulleted_list_item": { 203 | "rich_text": [ 204 | { 205 | "type": "text", 206 | "text": { 207 | "content": emoji + content, 208 | }, 209 | } 210 | ], 211 | "color": cls.color_styles.get(color, "default"), 212 | }, 213 | } 214 | 215 | @classmethod 216 | def rich_text(cls, content): 217 | "generate rich text" 218 | return {"rich_text": [{"type": "text", "text": {"content": content}}]} 219 | 220 | @classmethod 221 | def title(cls, content): 222 | "generate title block" 223 | return {"title": [{"type": "text", "text": {"content": content}}]} 224 | 225 | @classmethod 226 | def url(cls, remoteurl): 227 | "generate url block" 228 | return {"url": remoteurl} 229 | 230 | @classmethod 231 | def number(cls, num): 232 | "generate number block" 233 | return {"number": num} 234 | 235 | @classmethod 236 | def files(cls, name, url): 237 | "generate external file & media block" 238 | return {"files": [{"type": "external", "name": name, "external": {"url": url}}]} 239 | 240 | @classmethod 241 | def select(cls, option): 242 | "generate select block" 243 | return {"select": {"name": option}} 244 | 245 | @classmethod 246 | def multi_select(cls, selected_options: list[any]): 247 | "generate multi-select block" 248 | return {"multi_select": [{"name": option} for option in selected_options]} 249 | 250 | @classmethod 251 | def date(cls, d): 252 | "generate date block" 253 | return { 254 | "date": { 255 | "start": datetime.fromtimestamp(d).strftime("%Y-%m-%d %H:%M:%S"), 256 | "time_zone": "Asia/Shanghai", 257 | } 258 | } 259 | 260 | @classmethod 261 | def icon(cls, img): 262 | """generate icon block""" 263 | return {"type": "external", "external": {"url": img}} 264 | 265 | 266 | async def get_datasource_id(client: AsyncClient, database_id: str) -> str: 267 | """获取data source""" 268 | db = await client.databases.retrieve(database_id=database_id) 269 | data_sources = db.get("data_sources", []) 270 | if not data_sources: 271 | return "" 272 | return data_sources[0]["id"] 273 | -------------------------------------------------------------------------------- /sync_read.py: -------------------------------------------------------------------------------- 1 | """sync wereading history to private notion database & pages 2 | author: alex-guoba 3 | """ 4 | 5 | import logging 6 | import re 7 | import time 8 | from datetime import datetime 9 | import hashlib 10 | from collections import defaultdict 11 | 12 | from treelib.tree import Tree 13 | from notion_client import AsyncClient 14 | 15 | from api import notion, weread 16 | from api.notion import BlockHelper 17 | 18 | from lib.db_weread_record import DBWeReadRecord 19 | from lib.page_block_list import PageBlockList 20 | from lib.serverchan import sc_send 21 | 22 | from config import CONFIG 23 | from sync.weread.calendar import sync_to_calener 24 | 25 | ROOT_NODE_ID = "#root" 26 | BOOK_MARK_KEY = "#bookmarks" 27 | NOTION_MAX_LEVEL = 3 28 | 29 | 30 | class BlockItem: 31 | """Just for enveloping the child block""" 32 | 33 | def __init__(self, after=None, bookmark=None, block=None, child=None) -> None: 34 | """ 35 | 初始化方法,用于创建一个新的Block对象。 36 | Args: 37 | after (str, optional): 用于追加到该block之后时使用。 38 | bookmark (str, optional): 对应的bookmarkid,需要与bid一起写入db时使用 39 | block (str, optional): Block的内容 40 | child (list, optional): 子Block对象的列表,默认为None。 41 | Returns: 42 | None 43 | 44 | """ 45 | self.after = after 46 | self.bookmark = bookmark 47 | self.block = block 48 | self.child = child if child else [] 49 | self.bid = None 50 | 51 | def set_bid(self, bid): 52 | """set block id after appending to notion success""" 53 | self.bid = bid 54 | 55 | 56 | async def get_page_info(client: AsyncClient, data_source_id: str, book_id: str): 57 | """查询原page信息,并返回pageinfo和pid""" 58 | time.sleep(0.3) 59 | response = await client.data_sources.query( 60 | data_source_id=data_source_id, 61 | filter={"property": "BookId", "rich_text": {"equals": book_id}}, 62 | ) 63 | pageinfo = None 64 | pid = None 65 | for result in response["results"]: 66 | pageinfo = result 67 | pid = result["id"] 68 | break 69 | 70 | return pageinfo, pid 71 | 72 | 73 | def inherit_properties(page): 74 | """ 75 | 从传入的 page 字典中提取 properties 字段,并返回一个新的字典,其中不包含类型为 'formula' 的属性。 76 | Args: 77 | page (dict): 包含页面信息的字典,其中包含名为 'properties' 的字段,该字段是一个字典,包含页面属性的键值对。 78 | Returns: 79 | dict: 一个新的字典,包含从原始 'properties' 字段中提取的、类型不为 'formula' 的页面属性键值对。 80 | """ 81 | properties = {} 82 | if page: 83 | for k, v in page["properties"].items(): 84 | if v.get("type") == "formula": 85 | continue 86 | properties[k] = v 87 | return properties 88 | 89 | 90 | async def create_or_update_page( 91 | client: AsyncClient, 92 | data_source_id: str, 93 | pageinfo, 94 | pid, 95 | book_name="", 96 | book_id="", 97 | cover="", 98 | sort=0, 99 | author="", 100 | isbn="", 101 | rating=0, 102 | category="", 103 | note_count=0, 104 | review_count=0, 105 | intro="", 106 | read_info=None, 107 | ): 108 | """插入到notion""" 109 | parent = {"data_source_id": data_source_id, "type": "data_source_id"} 110 | 111 | properties = inherit_properties(pageinfo) 112 | 113 | properties.update( 114 | { 115 | "BookName": BlockHelper.title(book_name), 116 | "BookId": BlockHelper.rich_text(book_id), 117 | "ISBN": BlockHelper.rich_text(isbn), 118 | "URL": BlockHelper.url( 119 | f"https://weread.qq.com/web/reader/{calculate_book_str_id(book_id)}" 120 | ), 121 | "Author": BlockHelper.rich_text(author), 122 | "Sort": BlockHelper.number(sort), 123 | "Rating": BlockHelper.number(rating), 124 | "Cover": BlockHelper.files("Cover", cover), 125 | "NoteCount": BlockHelper.number(note_count), 126 | "ReviewCount": BlockHelper.number(review_count), 127 | "Category": BlockHelper.rich_text(category), 128 | "Intro": BlockHelper.rich_text(intro), 129 | } 130 | ) 131 | 132 | if read_info: 133 | marked_status = read_info.get("markedStatus", 0) 134 | properties["Status"] = BlockHelper.select( 135 | "读完" if marked_status == 4 else "在读" 136 | ) 137 | 138 | format_time = weread.str_reading_time(read_info.get("readingTime", 0)) 139 | properties["ReadingTime"] = BlockHelper.rich_text(format_time) 140 | 141 | # 最近阅读 142 | detail = read_info.get("readDetail", {}) 143 | if detail.get("lastReadingDate"): 144 | properties["lastReadingDate"] = BlockHelper.date( 145 | detail.get("lastReadingDate") 146 | ) 147 | 148 | # 完成时间 149 | if read_info.get("finishedDate"): 150 | properties["FinishAt"] = BlockHelper.date(read_info.get("finishedDate")) 151 | 152 | if pid is None: 153 | response = await client.pages.create( 154 | parent=parent, icon=BlockHelper.icon(cover), properties=properties 155 | ) 156 | return response["id"], True 157 | 158 | response = await client.pages.update( 159 | page_id=pid, icon=BlockHelper.icon(cover), properties=properties 160 | ) 161 | return pid, False 162 | 163 | 164 | async def list_page_blocks(client: AsyncClient, pid: str): 165 | """query page blocks (children not included)""" 166 | response = await client.blocks.children.list(block_id=pid) 167 | children = response["results"] if len(response.get("results")) > 0 else [] 168 | while response.get("has_more"): 169 | response = await client.blocks.children.list( 170 | block_id=pid, start_cursor=response["next_cursor"] 171 | ) 172 | children += response["results"] if len(response.get("results")) > 0 else [] 173 | # remove other fileds in blocks 174 | tailor = list(map(lambda x: {"id": x.get("id"), "type": x.get("type")}, children)) 175 | return tailor 176 | 177 | 178 | async def append_children(client: AsyncClient, pid, after, children): 179 | """append child block to page. Notion API limit 100 blocker per appending""" 180 | results = [] 181 | print("appending ", len(children), " blocks after ", after) 182 | for i in range(0, len(children) // 100 + 1): 183 | time.sleep(0.3) 184 | subchild = children[i * 100 : (i + 1) * 100] 185 | response = None 186 | if after: 187 | response = await client.blocks.children.append( 188 | block_id=pid, children=subchild, after=after 189 | ) 190 | else: 191 | response = await client.blocks.children.append( 192 | block_id=pid, children=subchild 193 | ) 194 | # Notion will return all the blocks start from the appending block. So we need to filter the result. 195 | results.extend(response.get("results")[: len(subchild)]) 196 | return results if len(results) == len(children) else [] 197 | 198 | 199 | async def append_blocks( 200 | client: AsyncClient, 201 | pid: str, 202 | appending: list[BlockItem], 203 | store: DBWeReadRecord, 204 | book_id: str, 205 | ): 206 | """append child block to page by group""" 207 | batch = [] 208 | block_id = None 209 | result = [] 210 | for item in appending: 211 | if not batch: 212 | block_id = item.after 213 | batch.append(item.block) 214 | continue 215 | if block_id == item.after: 216 | batch.append(item.block) 217 | continue 218 | _result = await append_children(client, pid, block_id, batch) 219 | result.extend(_result) 220 | 221 | block_id = item.after 222 | batch = [item.block] 223 | 224 | if len(batch) > 0: 225 | _result = await append_children(client, pid, block_id, batch) 226 | result.extend(_result) 227 | 228 | for idx, item in enumerate(appending): 229 | bid = result[idx].get("id") 230 | item.set_bid(bid) 231 | if item.child: 232 | await append_children(client, bid, None, item.child) 233 | 234 | # write to db 235 | for block in appending: 236 | if block.bookmark and block.bid: 237 | store.insert(book_id, block.bookmark, block.bid) 238 | 239 | 240 | async def get_db_latest_sort(client: AsyncClient, data_source_id: str) -> int: 241 | """获取database中的最新更新时间""" 242 | db_filter = {"property": "Sort", "number": {"is_not_empty": True}} 243 | sorts = [ 244 | { 245 | "property": "Sort", 246 | "direction": "descending", 247 | } 248 | ] 249 | response = await client.data_sources.query( 250 | data_source_id=data_source_id, filter=db_filter, sorts=sorts, page_size=1 251 | ) 252 | if len(response.get("results")) == 1: 253 | return response.get("results")[0].get("properties").get("Sort").get("number") 254 | return 0 255 | 256 | 257 | def gen_chapter_tree(chapter_list): 258 | """生成章节树""" 259 | tree = Tree() 260 | root = tree.create_node(identifier=ROOT_NODE_ID) # root node 261 | p = {} 262 | for chapter in chapter_list: 263 | level = chapter.get("level", 1) 264 | if level <= 0: 265 | level = 1 266 | elif level > NOTION_MAX_LEVEL: # 目前仅支持header1-3 267 | level = NOTION_MAX_LEVEL 268 | 269 | parent = p.get(level - 1, root) # 取最近一次更新节点 270 | chapter_uid = chapter.get("chapterUid") 271 | p[level] = tree.create_node( 272 | tag=chapter_uid, identifier=chapter_uid, parent=parent, data=chapter 273 | ) 274 | return tree 275 | 276 | 277 | def mount_bookmarks(chapter_tree, bookmark_list): 278 | """挂载划线、评论到对应的树节点""" 279 | d = defaultdict(list) 280 | for data in bookmark_list: 281 | uid = data.get("chapterUid", 1) 282 | d[uid].append(data) 283 | 284 | for key, value in d.items(): 285 | node = chapter_tree.get_node(key) 286 | if not node: 287 | logging.error("chapter info not found [%s].", key) 288 | continue 289 | 290 | # mount bookmark list to chapter list 291 | node.data[BOOK_MARK_KEY] = value 292 | 293 | 294 | def remove_empty_chapter(chapter_tree): 295 | """从底向上,删除章节树中的空节点""" 296 | max_depth = chapter_tree.depth() 297 | for d in range(max_depth, 0, -1): 298 | nodes = list(chapter_tree.filter_nodes(lambda x: chapter_tree.depth(x) == d)) 299 | 300 | for n in nodes: 301 | if n.data.get(BOOK_MARK_KEY) is None and n.is_leaf(): 302 | chapter_tree.remove_node(n.identifier) 303 | 304 | 305 | def content_block(text: str, style: str, color: str, review_id: str) -> dict: 306 | """ 307 | 根据配置选择内容block形态 308 | """ 309 | enable_emoj = CONFIG.getboolean("weread.format", "EnableEmoj") 310 | match CONFIG.get("weread.format", "ContentType"): 311 | case "callout": 312 | return BlockHelper.callout( 313 | text, style, color, review_id, enable_emoj=enable_emoj 314 | ) 315 | 316 | case "list": 317 | return BlockHelper.bullet_list( 318 | text, style, color, review_id, enable_emoj=enable_emoj 319 | ) 320 | 321 | case _: 322 | return BlockHelper.paragraph( 323 | text, style, color, review_id, enable_emoj=enable_emoj 324 | ) 325 | 326 | 327 | def made_page_blocks( 328 | store, blocks, bookID, chapters_list, bookmark_list 329 | ) -> list[BlockItem]: 330 | """generate page blocks to appending""" 331 | appending: list[BlockItem] = [] 332 | 333 | page_block_list = PageBlockList(store, bookID, blocks) 334 | 335 | # 添加目录 336 | if not blocks: 337 | # child format: [after_blockid, bookmarkd_id, block_data] 338 | appending.append(BlockItem(block=BlockHelper.table_of_contents())) 339 | appending.append(BlockItem(block=BlockHelper.divider())) 340 | 341 | if len(chapters_list) > 0: 342 | chapter_tree = gen_chapter_tree(chapters_list) 343 | mount_bookmarks(chapter_tree, bookmark_list) 344 | remove_empty_chapter(chapter_tree) 345 | 346 | for n in chapter_tree.expand_tree(mode=Tree.DEPTH): 347 | if chapter_tree[n].is_root(): 348 | continue 349 | 350 | data = chapter_tree[n].data 351 | chapter_uid = data.get("chapterUid") 352 | 353 | block_id = None 354 | _records = store.query(bookID, chapter_uid) 355 | if len(_records) > 0: 356 | block_id = _records[0]["block_id"] 357 | else: 358 | # find a suitable position to insert 359 | block_id = page_block_list.found_chapter_position(chapter_uid) 360 | appending.append( 361 | BlockItem( 362 | after=block_id, 363 | bookmark=chapter_uid, 364 | block=BlockHelper.heading(data.get("level"), data.get("title")), 365 | ) 366 | ) 367 | 368 | for i in data.get(BOOK_MARK_KEY, []): 369 | bookmark_id = i.get("bookmarkId") or i.get("reviewId") 370 | _records = store.query(bookID, bookmark_id) 371 | if len(_records) > 0: 372 | continue 373 | appending.append( 374 | BlockItem( 375 | after=block_id, 376 | bookmark=bookmark_id, 377 | block=content_block( 378 | i.get("markText"), 379 | i.get("style"), 380 | i.get("colorStyle"), 381 | i.get("reviewId"), 382 | ), 383 | child=( 384 | [BlockHelper.quote(i.get("abstract"))] 385 | if i.get("abstract") 386 | else None 387 | ), 388 | ) 389 | ) 390 | else: 391 | # no chapter info 392 | for data in bookmark_list: 393 | bookmark_id = data.get("bookmarkId") or data.get("reviewId") 394 | _records = store.query(bookID, bookmark_id) 395 | if len(_records) > 0: 396 | continue 397 | appending.append( 398 | BlockItem( 399 | bookmark=bookmark_id, 400 | block=content_block( 401 | data.get("markText"), 402 | data.get("style"), 403 | data.get("colorStyle"), 404 | data.get("reviewId"), 405 | ), 406 | child=( 407 | [BlockHelper.quote(data.get("abstract"))] 408 | if data.get("abstract") 409 | else None 410 | ), 411 | ) 412 | ) 413 | 414 | return appending 415 | 416 | 417 | def made_comment_blocks( 418 | store: DBWeReadRecord, book_id: str, summary: list 419 | ) -> list[BlockItem]: 420 | """generate extra stat blocks to appending""" 421 | appending: list[BlockItem] = [] 422 | 423 | # 追加推荐评语 424 | if not summary: 425 | return appending 426 | 427 | bookmark_id = "_comment_" 428 | block_id = None 429 | _records = store.query(book_id, bookmark_id) 430 | if len(_records) == 0: 431 | appending.extend( 432 | ( 433 | BlockItem(block=BlockHelper.divider()), 434 | BlockItem(block=BlockHelper.heading(1, "点评"), bookmark=bookmark_id), 435 | ) 436 | ) 437 | else: 438 | block_id = _records[0]["block_id"] 439 | 440 | for i in summary: 441 | # print("summary:", i) 442 | bookmark_id = i.get("review").get("reviewId") 443 | _records = store.query(book_id, bookmark_id) 444 | if len(_records) > 0: 445 | continue 446 | appending.append( 447 | BlockItem( 448 | after=block_id, 449 | bookmark=bookmark_id, 450 | block=content_block( 451 | i.get("review").get("content"), 452 | i.get("style"), 453 | i.get("colorStyle"), 454 | i.get("review").get("reviewId"), 455 | ), 456 | ) 457 | ) 458 | 459 | return appending 460 | 461 | 462 | async def made_readinfo_blocks( 463 | client: AsyncClient, 464 | store: DBWeReadRecord, 465 | book_id: str, 466 | rinfo: dict, 467 | bookmark_count: int, 468 | ) -> list[BlockItem]: 469 | """generate extra stat blocks to appending""" 470 | appending: list[BlockItem] = [] 471 | rdetail = rinfo.get("readDetail") 472 | 473 | if not rdetail: 474 | return appending 475 | if not CONFIG.getboolean("weread.format", "EnableReadingDetail"): 476 | return appending 477 | 478 | bookmark_id = "_stat_" 479 | block_id = None 480 | _records = store.query(book_id, bookmark_id) 481 | if len(_records) == 0: 482 | appending.extend( 483 | ( 484 | BlockItem(block=BlockHelper.divider()), 485 | BlockItem( 486 | block=BlockHelper.heading(1, "阅读明细"), bookmark=bookmark_id 487 | ), 488 | ) 489 | ) 490 | else: 491 | block_id = _records[0]["block_id"] 492 | 493 | # 总计 494 | bookmark_id = "_stat.total_" 495 | _records = store.query(book_id, bookmark_id) 496 | if len(_records): 497 | store.delete_bookmark(book_id, bookmark_id) 498 | await client.blocks.delete(block_id=_records[0]["block_id"]) 499 | 500 | longest_reading_time = weread.str_reading_time(rdetail.get("longestReadingTime", 0)) 501 | longest_reading_date = datetime.fromtimestamp( 502 | rdetail.get("longestReadingDate") 503 | ).strftime("%Y/%m/%d") 504 | appending.append( 505 | BlockItem( 506 | after=block_id, 507 | bookmark=bookmark_id, 508 | block=BlockHelper.table(2, ["维度", "指标"], True), 509 | child=[ 510 | BlockHelper.table_row( 511 | ["累积阅读天数", str(rdetail.get("totalReadDay", 0)) + "天"] 512 | ), 513 | BlockHelper.table_row( 514 | ["最长连续阅读天数", str(rdetail.get("continueReadDays", 0)) + "天"] 515 | ), 516 | BlockHelper.table_row( 517 | ["单日阅读最久", f"{longest_reading_time} ({longest_reading_date})"] 518 | ), 519 | BlockHelper.table_row(["阅读笔记条数", str(bookmark_count) + "条"]), 520 | ], 521 | ) 522 | ) 523 | 524 | # 明细 525 | bookmark_id = "_stat.detail_" 526 | _records = store.query(book_id, bookmark_id) 527 | if len(_records): 528 | store.delete_bookmark(book_id, bookmark_id) 529 | await client.blocks.delete(block_id=_records[0]["block_id"]) 530 | item = BlockItem( 531 | after=block_id, 532 | bookmark=bookmark_id, 533 | block=BlockHelper.table(2, ["日期", "阅读时长"], True), 534 | child=[], 535 | ) 536 | for daily in rdetail.get("data"): 537 | item.child.append( 538 | BlockHelper.table_row( 539 | [ 540 | datetime.fromtimestamp(daily.get("readDate")).strftime("%Y/%m/%d"), 541 | weread.str_reading_time(daily.get("readTime", 0)), 542 | ] 543 | ) 544 | ) 545 | appending.append(item) 546 | 547 | return appending 548 | 549 | 550 | def transform_id(book_id): 551 | """transform book id to hex string""" 552 | id_length = len(book_id) 553 | if re.match(r"^\d*$", book_id): 554 | ary = [] 555 | for i in range(0, id_length, 9): 556 | ary.append(format(int(book_id[i : min(i + 9, id_length)]), "x")) 557 | return "3", ary 558 | 559 | result = "" 560 | for i in range(id_length): 561 | result += format(ord(book_id[i]), "x") 562 | return "4", [result] 563 | 564 | 565 | def calculate_book_str_id(book_id): 566 | """calculate book id string""" 567 | md5 = hashlib.md5() 568 | md5.update(book_id.encode("utf-8")) 569 | digest = md5.hexdigest() 570 | result = digest[0:3] 571 | code, transformed_ids = transform_id(book_id) 572 | result += code + "2" + digest[-2:] 573 | 574 | for i in range(len(transformed_ids)): 575 | hex_length_str = format(len(transformed_ids[i]), "x") 576 | if len(hex_length_str) == 1: 577 | hex_length_str = "0" + hex_length_str 578 | 579 | result += hex_length_str + transformed_ids[i] 580 | 581 | if i < len(transformed_ids) - 1: 582 | result += "g" 583 | 584 | if len(result) < 20: 585 | result += digest[0 : 20 - len(result)] 586 | 587 | md5 = hashlib.md5() 588 | md5.update(result.encode("utf-8")) 589 | result += md5.hexdigest()[0:3] 590 | return result 591 | 592 | 593 | def send_wxnotify(wxnotify_key, read_stat): 594 | """send wechat notify""" 595 | if not wxnotify_key or len(read_stat) == 0: 596 | return 597 | 598 | content = "阅读进度更新啦: ~\n\n" 599 | for stat in read_stat: 600 | content += f"{stat.get('book_name')} : {stat.get('count')}条\n\n" 601 | 602 | sc_send(wxnotify_key, "Sync-Notion阅读笔记通知", content) 603 | 604 | 605 | async def sync_read( 606 | weread_cookie, notion_token, database_id, calendar_db_id=None, wxnotify_key=None 607 | ): 608 | """sync weread reading notes to notion""" 609 | client = AsyncClient(auth=notion_token, log_level=logging.ERROR) 610 | 611 | data_source_id = await notion.get_datasource_id(client, database_id) 612 | if not data_source_id: 613 | logging.error("database %s has no data source", database_id) 614 | return 615 | 616 | calendar_data_source_id = "" 617 | if calendar_db_id: 618 | calendar_data_source_id = await notion.get_datasource_id(client, calendar_db_id) 619 | 620 | latest_sort = await get_db_latest_sort(client, data_source_id) 621 | 622 | wreader = weread.WeReadAPI(weread_cookie) 623 | store = DBWeReadRecord("./var/sync_read.db") 624 | read_stat = [] 625 | 626 | books = wreader.get_notebooklist() 627 | for _book in books: 628 | sort = _book["sort"] 629 | if sort <= latest_sort: # 笔记无更新,跳过 630 | continue 631 | 632 | book_dict = _book.get("book") 633 | book_id = book_dict.get("bookId") 634 | 635 | logging.info("Start to synch book %s", book_id) 636 | 637 | chapters_list = wreader.get_chapter_list(book_id) 638 | bookmark_list = wreader.get_bookmark_list(book_id) 639 | summary, reviews = wreader.get_review_list(book_id) 640 | 641 | # converge bookmark and chapter review 642 | bookmark_list.extend(reviews) 643 | bookmark_list = sorted( 644 | bookmark_list, 645 | key=lambda x: ( 646 | x.get("chapterUid", 1), 647 | ( 648 | 0 649 | if (x.get("range", "") == "" or x.get("range").split("-")[0] == "") 650 | else int(x.get("range").split("-")[0]) 651 | ), 652 | ), 653 | ) 654 | 655 | isbn, rating, category, intro = wreader.get_bookinfo(book_id) 656 | read_info = wreader.get_read_info(book_id) 657 | 658 | # delete before insert again 659 | pageinfo, pid = await get_page_info(client, data_source_id, book_id) 660 | pid, created = await create_or_update_page( 661 | client, 662 | data_source_id, 663 | pageinfo, 664 | pid, 665 | book_name=book_dict.get("title"), 666 | book_id=book_id, 667 | cover=book_dict.get("cover"), 668 | sort=sort, 669 | author=book_dict.get("author"), 670 | isbn=isbn, 671 | rating=rating, 672 | category=category, 673 | note_count=_book.get("noteCount"), 674 | review_count=_book.get("reviewCount"), 675 | intro=intro, 676 | read_info=read_info, 677 | ) 678 | 679 | blocks = [] 680 | if not created: 681 | blocks = await list_page_blocks(client, pid) 682 | else: 683 | store.delete_book(book_id) 684 | 685 | appending = made_page_blocks( 686 | store, 687 | blocks, 688 | book_id, 689 | chapters_list, 690 | bookmark_list, 691 | ) 692 | await append_blocks(client, pid, appending, store, book_id) 693 | 694 | appending = made_comment_blocks( 695 | store, 696 | book_id, 697 | summary, 698 | ) 699 | await append_blocks(client, pid, appending, store, book_id) 700 | 701 | appending = await made_readinfo_blocks( 702 | client, store, book_id, read_info, len(bookmark_list) 703 | ) 704 | await append_blocks(client, pid, appending, store, book_id) 705 | if len(appending) > 0: 706 | read_stat.append( 707 | { 708 | "count": len(appending), 709 | "book_name": book_dict.get("title"), 710 | } 711 | ) 712 | 713 | if calendar_data_source_id: 714 | await sync_to_calener(client, calendar_data_source_id, read_info) 715 | 716 | if wxnotify_key is not None and len(read_stat) != 0: 717 | send_wxnotify(wxnotify_key, read_stat) 718 | --------------------------------------------------------------------------------