├── api
    ├── __init__.py
    ├── weread.py
    └── notion.py
├── lib
    ├── __init__.py
    ├── serverchan.py
    ├── page_block_list.py
    ├── test_page_block_list.py
    ├── db_weread_record.py
    └── test_db_weread_record.py
├── sync
    └── weread
    │   ├── __init__.py
    │   └── calendar.py
├── .gitignore
├── var
    └── sync_read.db
├── requirements.txt
├── default.ini
├── config.py
├── main.py
├── LICENSE
├── .github
    └── workflows
    │   └── weread.yml
├── README.md
├── README.zh-CN.md
├── sync_trending.py
├── sync_producthunt.py
└── sync_read.py


/api/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sync/weread/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # IDE - VSCode
2 | .vscode/*
3 | __pycache__/
4 | *.pyc
5 | todo.txt


--------------------------------------------------------------------------------
/var/sync_read.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alex-guoba/sync-notion/HEAD/var/sync_read.db


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | notion-client
3 | treelib
4 | fire
5 | pyquery
6 | PyGithub
7 | pysqlite3
8 | 


--------------------------------------------------------------------------------
/default.ini:
--------------------------------------------------------------------------------
 1 | [DEFAULT]
 2 | 
 3 | [weread.format]
 4 | ContentType = list 
 5 | EnableEmoj = false
 6 | EnableReadingDetail = true
 7 | 
 8 | [trending.language]
 9 | Languages = typescript,go,python,swift
10 | MinStargazers = 100
11 | MinForks = 100
12 | MinWatchers = 100
13 | 
14 | [memos.opts]
15 | MemosHost = http://127.0.0.1:8081
16 | MemosUserName = memos-demo
17 | 
18 | [producthunt.filter]
19 | MinVotes = 10
20 | MinComments = 10
21 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 封装配置文件读取
 3 | """
 4 | import os
 5 | 
 6 | import configparser
 7 | 
 8 | DEFAULT_CONFIG_FILE = 'default.ini'
 9 | 
10 | def get_config_file():
11 |     '''读取环境配置'''
12 |     return os.environ.get('CONFIG_FILE', DEFAULT_CONFIG_FILE)
13 | 
14 | CONFIG_FILE = get_config_file()
15 | 
16 | def create_config(config_file=None):
17 |     '''创建配置文件'''
18 |     parser = configparser.ConfigParser()
19 |     parser.read(config_file or CONFIG_FILE)
20 |     return parser
21 | 
22 | CONFIG = create_config()
23 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Entrance point of the program.
 3 | """
 4 | 
 5 | import logging
 6 | 
 7 | import fire
 8 | 
 9 | from sync_read import sync_read
10 | from sync_trending import sync_trending
11 | from sync_producthunt import sync_producthunt
12 | 
13 | if __name__ == "__main__":
14 |     logging.basicConfig(level=logging.INFO)
15 | 
16 |     fire.Fire(
17 |         {
18 |             "sync_read": sync_read,
19 |             "sync_trending": sync_trending,
20 |             "sync_producthunt": sync_producthunt,
21 |         }
22 |     )
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 guopeng
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/lib/serverchan.py:
--------------------------------------------------------------------------------
 1 | """ see: https://github.com/easychen/serverchan-demo/blob/master/python/send.py """
 2 | 
 3 | import os
 4 | import requests
 5 | import re
 6 | 
 7 | 
 8 | def sc_send(sendkey, title, desp="", options=None):
 9 |     """ServerChan推送消息函数"""
10 |     if options is None:
11 |         options = {}
12 |     # 判断 sendkey 是否以 'sctp' 开头，并提取数字构造 URL
13 |     if sendkey.startswith("sctp"):
14 |         match = re.match(r"sctp(\d+)t", sendkey)
15 |         if match:
16 |             num = match.group(1)
17 |             url = f"https://{num}.push.ft07.com/send/{sendkey}.send"
18 |         else:
19 |             raise ValueError("Invalid sendkey format for sctp")
20 |     else:
21 |         url = f"https://sctapi.ftqq.com/{sendkey}.send"
22 |     params = {"title": title, "desp": desp, **options}
23 |     headers = {"Content-Type": "application/json;charset=utf-8"}
24 |     response = requests.post(url, json=params, headers=headers, timeout=10)
25 |     result = response.json()
26 |     return result
27 | 
28 | 
29 | # data = {}
30 | # with open(os.path.join(os.path.dirname(__file__), "..", ".env"), "r") as f:
31 | #     for line in f:
32 | #         key, value = line.strip().split("=")
33 | #         data[key] = value
34 | # key = data["SENDKEY"]
35 | 
36 | # ret = sc_send(key, "主人服务器宕机了 via python", "第一行\n\n第二行")
37 | # print(ret)
38 | 


--------------------------------------------------------------------------------
/.github/workflows/weread.yml:
--------------------------------------------------------------------------------
 1 | name: weread sync
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: "0 0 * * *"
 7 | jobs:
 8 |   sync:
 9 |     name: Sync
10 |     permissions:
11 |       contents: write
12 |     runs-on: ubuntu-22.04
13 |     steps:
14 |       - name: Checkout
15 |         uses: actions/checkout@v4
16 |       - name: Set up Python
17 |         uses: actions/setup-python@v5
18 |         with:
19 |           python-version: "3.10"
20 |       - name: Install dependencies
21 |         run: |
22 |           python -m pip install --upgrade pip
23 |           pip install -r requirements.txt
24 |       #- name: weread sync
25 |       #  id: weread-sync
26 |       #  run: |
27 |       #    python main.py sync_read "${{secrets.WEREAD_COOKIE}}" "${{secrets.NOTION_TOKEN}}" "${{secrets.NOTION_DATABASE_ID}}" --calendar_db_id="${{secrets.NOTION_DATABASE_CALENDAR}}" --wxnotify_key="${{secrets.SCKEY}}"
28 |       - name: trending sync
29 |         id: trending-sync
30 |         run: |
31 |           python main.py sync_trending "${{secrets.NOTION_TOKEN}}" "${{secrets.NOTION_DATABASE_TRENDING}}" --git_token="${{secrets.GIT_TOKEN}}"
32 |       - name: product-hunt sync
33 |         id: product-hunt-sync
34 |         run: |
35 |           python main.py sync_producthunt "${{secrets.NOTION_TOKEN}}" "${{secrets.NOTION_DATABASE_PH}}"
36 |       - name: Verify Changed files
37 |         uses: tj-actions/verify-changed-files@v20
38 |         id: verify-changed-files
39 |         with:
40 |           files: |
41 |             ./var/sync_read.db
42 |       - name: Commit
43 |         if: steps.verify-changed-files.outputs.files_changed == 'true'
44 |         run: |
45 |           git config user.name github-actions
46 |           git config user.email github-actions@github.com
47 |           git add ./var/sync_read.db
48 |           git commit -m "auto sync"
49 |           git push
50 | 


--------------------------------------------------------------------------------
/lib/page_block_list.py:
--------------------------------------------------------------------------------
 1 | """Orgnize the page blocks in a Notion page"""
 2 | 
 3 | def safe_cast(val, to_type, default=None):
 4 |     """
 5 |     尝试将输入值 `val` 转换为指定类型 `to_type`，如果转换失败则返回默认值 `default`。
 6 |     """
 7 |     try:
 8 |         return to_type(val)
 9 |     except (ValueError, TypeError):
10 |         return default
11 | 
12 | class PageBlockList(object):
13 |     """Implements the PageBlockList class."""
14 | 
15 |     def __init__(self, store, book_id, blocks):
16 |         """Constructor for the PageBlockList class.
17 |         list item format:
18 |         {
19 |             'type': 'paragraph / heading / list / image ....',
20 |             'id': '$block_id',
21 |         }
22 |         """
23 |         self.book_id = book_id
24 |         self.blocks = []
25 |         for block in blocks:
26 |             bookmark_id = None
27 |             _result = store.query_by_block(book_id, block['id'])
28 |             if _result:
29 |                 bookmark_id = _result[0]['bookmark_id']
30 |             self.blocks.append({
31 |                 'type': block['type'],
32 |                 'id': block['id'],
33 |                 'bookmark_id': bookmark_id,
34 |             })
35 | 
36 |     def found_chapter_position(self, chapter_uid: int) -> str | None:
37 |         """Find the position of a chapter in the list.
38 |         return true if found, false if not found
39 |         """
40 |         chapter, block_id, block_idx = -1, None, -1
41 |         for idx, block in enumerate(self.blocks):
42 |             if block['bookmark_id'] is not None and block['type'].startswith('heading_'):
43 |                 _cuid = safe_cast(block['bookmark_id'], int, 0)
44 |                 if _cuid < chapter_uid and _cuid > chapter: # find the biggest one in [0, chapter_uid]
45 |                     chapter = _cuid
46 |                     block_id = block['id']
47 |                     block_idx = idx
48 | 
49 |         # push to the first block if not found
50 |         if not block_id:
51 |             return self.blocks[0]['id'] if len(self.blocks) > 0 else None
52 |         
53 |         # iterate to the end of chapter
54 |         while block_idx < len(self.blocks) - 1:
55 |             block = self.blocks[block_idx + 1]
56 |             if block['type'].startswith('heading_'):
57 |                 return block_id
58 |             block_idx += 1
59 |             block_id = block['id']
60 |         return block_id


--------------------------------------------------------------------------------
/lib/test_page_block_list.py:
--------------------------------------------------------------------------------
 1 | """unit test for DBWeReadRecord"""
 2 | import unittest
 3 | from datetime import datetime, timedelta
 4 | 
 5 | from lib.db_weread_record import DBWeReadRecord
 6 | # from lib.db_weread_record import DBWeReadRecord # 替换your_module_name为实际的模块名
 7 | 
 8 | class TestDBReadRecord(unittest.TestCase):
 9 |     """unit test for DBWeReadRecord"""
10 | 
11 |     def setUp(self):
12 |         # 创建一个临时的数据库用于测试
13 |         self.db_name = ":memory:"
14 |         # self.db_name = "./var/tutorial.db"
15 |         self.db_reader = DBWeReadRecord(self.db_name)
16 | 
17 |     def tearDown(self):
18 |         # 测试结束后关闭数据库连接
19 |         del self.db_reader
20 | 
21 |     def test_insert_and_query(self):
22 |         """test insert and query"""
23 |         # 测试插入和查询数据
24 |         book_id = '12345'
25 |         bookmark_id = 'chapter1'
26 |         block_id = 'b55c9c91-384d-452b-81db-d1ef79372b75'
27 | 
28 |         expected_op_time = datetime.now()
29 |         
30 |         # 插入数据
31 |         inserted_id = self.db_reader.insert(book_id, bookmark_id, block_id)
32 |         self.assertTrue(inserted_id >= 0)
33 | 
34 |         # 查询数据
35 |         results = self.db_reader.query(book_id, bookmark_id)
36 |         self.assertEqual(len(results), 1)
37 |         result = results[0]
38 |         self.assertEqual(result['book_id'], book_id)
39 |         self.assertEqual(result['bookmark_id'], bookmark_id)
40 |         self.assertEqual(result['block_id'], block_id)
41 |         # 检查op_time在合理范围内
42 |         self.assertLessEqual(result['op_time'], expected_op_time + timedelta(seconds=10))
43 |         self.assertGreaterEqual(result['op_time'], expected_op_time - timedelta(seconds=10))
44 | 
45 |     def test_insert_duplicate_and_query(self):
46 |         """test insert duplicate and query"""
47 |         # 测试插入重复数据并查询
48 |         book_id = '987652'
49 |         bookmark_id = 'chapter3'
50 |         block_id = 'b55c9c91-384d-452b-81db-d1ef79379999'
51 | 
52 |         # 第一次插入
53 |         inserted_id_1 = self.db_reader.insert(book_id, bookmark_id, block_id)
54 |         self.assertTrue(inserted_id_1 >= 0)
55 | 
56 |         # 尝试第二次插入相同的数据，应不插入新数据
57 |         inserted_id_2 = self.db_reader.insert(book_id, bookmark_id, block_id)
58 |         self.assertEqual(inserted_id_1, inserted_id_2)
59 | 
60 |         # 查询数据，应该只有一条记录
61 |         results = self.db_reader.query(book_id, bookmark_id)
62 |         self.assertEqual(len(results), 1)
63 | 
64 | if __name__ == '__main__':
65 |     unittest.main()
66 | 


--------------------------------------------------------------------------------
/sync/weread/calendar.py:
--------------------------------------------------------------------------------
 1 | """sync reading log to calendar database"""
 2 | 
 3 | from datetime import datetime
 4 | from notion_client import AsyncClient
 5 | from api.notion import BlockHelper
 6 | 
 7 | 
 8 | def query_filter(book_id: str, date: float):
 9 |     """query filter for calendar database"""
10 |     return {
11 |         "and": [
12 |             {
13 |                 "property": "BookId",
14 |                 "rich_text": {
15 |                     "equals": book_id,
16 |                 },
17 |             },
18 |             {
19 |                 "property": "ReadDate",
20 |                 "date": {
21 |                     "equals": datetime.fromtimestamp(date).strftime("%Y-%m-%d"),
22 |                 },
23 |             },
24 |         ]
25 |     }
26 | 
27 | 
28 | async def sync_to_calener(
29 |     client: AsyncClient, calendar_data_source_id: str, read_detail: dict
30 | ):
31 |     """sync reading log to calendar database"""
32 |     if not client or not read_detail:
33 |         return
34 |     rdetail = read_detail.get("readDetail")
35 |     book_info = read_detail.get("bookInfo")
36 | 
37 |     if not rdetail or not book_info:
38 |         return
39 | 
40 |     # from latest to oldest
41 |     records = sorted(
42 |         rdetail.get("data", []), key=lambda x: x.get("readDate") or 0, reverse=True
43 |     )
44 | 
45 |     # No batch-updating API exist😢
46 |     book_id = book_info.get("bookId")
47 |     for record in records:
48 |         date = record.get("readDate")
49 |         read_time = record.get("readTime", 0)
50 | 
51 |         response = await client.data_sources.query(
52 |             data_source_id=calendar_data_source_id,
53 |             filter=query_filter(book_id, date),
54 |         )
55 |         if len(response["results"]) > 0:
56 |             result = response["results"][0]
57 |             _old = result.get("properties", {}).get("ReadTime", {}).get("number", 0)
58 |             if _old < read_time:
59 |                 properties = {"ReadTime": BlockHelper.number(read_time)}
60 |                 await client.pages.update(page_id=result["id"], properties=properties)
61 | 
62 |             # 每日更新，仅更新最近一次即可
63 |             break
64 | 
65 |         properties = {
66 |             "Name": BlockHelper.title(book_info.get("title", "")),
67 |             "BookId": BlockHelper.rich_text(book_id),
68 |             "ReadDate": {
69 |                 "date": {"start": datetime.fromtimestamp(date).strftime("%Y-%m-%d")}
70 |             },  # Discard timezone
71 |             "ReadTime": BlockHelper.number(read_time),
72 |         }
73 |         await client.pages.create(
74 |             parent={
75 |                 "data_source_id": calendar_data_source_id,
76 |                 "type": "data_source_id",
77 |             },
78 |             properties=properties,
79 |         )
80 | 


--------------------------------------------------------------------------------
/lib/db_weread_record.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 封装存储相关操作
  3 | """
  4 | 
  5 | import sqlite3
  6 | import datetime
  7 | 
  8 | 
  9 | class DBWeReadRecord(object):
 10 |     """存储微信读书同步记录"""
 11 | 
 12 |     TabName = "weread_sync_record"
 13 |     SqlCreate = f"""create table if not exists {TabName}
 14 |     (book_id VARCHAR(255), bookmark_id varchar(255), block_id VARCHAR(255),
 15 |     op_time TIMESTAMP, resv VARCHAR(255),
 16 |     PRIMARY KEY (book_id, bookmark_id, block_id))"""
 17 | 
 18 |     def __init__(self, db_name):
 19 |         """
 20 |         :param db_name: 数据库名称
 21 |         """
 22 |         self.connection = sqlite3.connect(
 23 |             db_name, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES
 24 |         )
 25 |         self.connection.row_factory = sqlite3.Row  # use dictionary to return row
 26 |         self.create_table()
 27 | 
 28 |     def __del__(self):
 29 |         """
 30 |         析构函数
 31 |         :return:
 32 |         """
 33 |         print("closing db connection")
 34 |         self.connection.close()
 35 | 
 36 |     def create_table(self):
 37 |         """
 38 |         创建表
 39 |         :return:
 40 |         """
 41 |         cursor = self.connection.cursor()
 42 |         cursor.execute(self.SqlCreate)
 43 | 
 44 |     def insert(self, book_id, bookmark_id, block_id):
 45 |         """
 46 |         插入数据
 47 |         :param book_id: 书籍ID
 48 |         :param bookmark_id: 书签ID/章节ID
 49 |         :param block_id: 章节ID
 50 |         :param op_time: 操作时间
 51 |         :return: 插入后的主键值
 52 |         """
 53 |         now = datetime.datetime.now()
 54 | 
 55 |         sql = f"insert or ignore into {self.TabName}(book_id, bookmark_id, block_id, op_time)\
 56 |               values (?, ?, ?, ?)"
 57 |         cursor = self.connection.cursor()
 58 |         cursor.execute(sql, (book_id, bookmark_id, block_id, now))
 59 |         self.connection.commit()
 60 |         return cursor.lastrowid
 61 | 
 62 |     def query(self, book_id, bookmark_id):
 63 |         """
 64 |         查询是否已经写如果
 65 |         :return: 查询后的主键值
 66 |         """
 67 |         sql = f"select book_id, bookmark_id, block_id, op_time \
 68 |             from {self.TabName} where book_id=? and bookmark_id=?"
 69 |         cursor = self.connection.cursor()
 70 |         cursor.execute(sql, (book_id, bookmark_id))
 71 |         return cursor.fetchall()
 72 | 
 73 |     def query_by_block(self, book_id, block_id):
 74 |         """
 75 |         查询是否已经写如果
 76 |         :return: 查询后的主键值
 77 |         """
 78 |         sql = f"select book_id, bookmark_id, block_id, op_time \
 79 |             from {self.TabName} where book_id=? and block_id=?"
 80 |         cursor = self.connection.cursor()
 81 |         cursor.execute(sql, (book_id, block_id))
 82 |         return cursor.fetchall()
 83 | 
 84 |     def delete_book(self, book_id):
 85 |         """
 86 |         删除书籍记录
 87 |         :param book_id: 书籍ID
 88 |         :return:
 89 |         """
 90 |         sql = f"delete from {self.TabName} where book_id=?"
 91 |         cursor = self.connection.cursor()
 92 |         cursor.execute(sql, (book_id,))
 93 |         self.connection.commit()
 94 | 
 95 |     def delete_bookmark(self, book_id, bookmark_id):
 96 |         """
 97 |         删除book_id, bookmark_id记录
 98 |         :param book_id: 书籍ID
 99 |         :param bookmark_id: 书签ID/章节ID
100 |         :return:
101 |         """
102 |         sql = f"delete from {self.TabName} where book_id=? and bookmark_id=?"
103 |         cursor = self.connection.cursor()
104 |         cursor.execute(sql, (book_id, bookmark_id))
105 |         self.connection.commit()


--------------------------------------------------------------------------------
/lib/test_db_weread_record.py:
--------------------------------------------------------------------------------
  1 | """unit test for PageBlockList"""
  2 | import unittest
  3 | # from datetime import datetime, timedelta
  4 | 
  5 | from lib.db_weread_record import DBWeReadRecord
  6 | from lib.page_block_list import PageBlockList
  7 | # from lib.db_weread_record import DBWeReadRecord # 替换your_module_name为实际的模块名
  8 | 
  9 | class TestPageBlockList(unittest.TestCase):
 10 |     """test PageBlockList"""
 11 | 
 12 |     def setUp(self):
 13 |         # 创建一个临时的数据库用于测试
 14 |         self.db_name = ":memory:"
 15 |         # self.db_name = "./var/tutorial.db"
 16 |         self.store = DBWeReadRecord(self.db_name)
 17 |         self.store.create_table()
 18 | 
 19 |     def tearDown(self):
 20 |         # 测试结束后关闭数据库连接
 21 |         del self.store
 22 | 
 23 |     def test_empty(self):
 24 |         """test append tail block"""
 25 |         book_id = "book_1"
 26 |         bookmark_id = 1
 27 |         block_id = "block_id_1"
 28 |         self.store.insert(book_id, bookmark_id, block_id)
 29 | 
 30 |         blocks = []
 31 |         page_block_list = PageBlockList(self.store, book_id, blocks)
 32 |         
 33 |         appended_block = page_block_list.found_chapter_position(bookmark_id)
 34 |         self.assertEqual(appended_block, None)
 35 |         
 36 |     def test_append_tail(self):
 37 |         """test append tail block"""
 38 |         book_id = "book_1"
 39 |         bookmark_id = 1
 40 |         block_id = "block_id_1"
 41 |         self.store.insert(book_id, bookmark_id, block_id)
 42 | 
 43 |         blocks = [
 44 |             {
 45 |                 'id': block_id,
 46 |                 'type': 'heading_1',
 47 |             }
 48 |         ]
 49 |         page_block_list = PageBlockList(self.store, book_id, blocks)
 50 |         
 51 |         appended_block = page_block_list.found_chapter_position(bookmark_id + 1)
 52 |         self.assertEqual(appended_block, block_id)
 53 | 
 54 | 
 55 |     def test_append_header(self):
 56 |         """test append tail block"""
 57 |         book_id = "book_2"
 58 |         bookmark_id = 2
 59 |         block_id = "block_id_2"
 60 |         self.store.insert(book_id, bookmark_id, block_id)
 61 | 
 62 |         blocks = [
 63 |             {
 64 |                 'id': 'toc_id',
 65 |                 'type': 'table_of_contents'
 66 |             },
 67 |             {
 68 |                 'id': block_id,
 69 |                 'type': 'heading_3',
 70 |             }
 71 |         ]
 72 |         page_block_list = PageBlockList(self.store, book_id, blocks)
 73 | 
 74 |         appended_block = page_block_list.found_chapter_position(bookmark_id - 1)
 75 |         self.assertEqual(appended_block, 'toc_id')
 76 | 
 77 |     def test_insert_mid(self):
 78 |         """test append tail block"""
 79 |         book_id = "book_1"
 80 | 
 81 |         self.store.insert(book_id, 3, 'block_3')
 82 |         self.store.insert(book_id, 5, 'block_5')
 83 |         self.store.insert(book_id, 7, 'block_7')
 84 | 
 85 |         blocks = [
 86 |             {
 87 |                 'id': 'toc_id',
 88 |                 'type': 'table_of_contents'
 89 |             },
 90 |             {
 91 |                 'id': 'block_3',
 92 |                 'type': 'heading_3',
 93 |             },
 94 |             {
 95 |                 'id': 'block_5',
 96 |                 'type': 'heading_5',
 97 |             },
 98 |             {
 99 |                 'id': 'block_7',
100 |                 'type': 'heading_7',
101 |             },
102 |         ]
103 |         page_block_list = PageBlockList(self.store, book_id, blocks)
104 | 
105 |         appended_block = page_block_list.found_chapter_position(4)
106 |         self.assertEqual(appended_block, 'block_3')
107 | 


--------------------------------------------------------------------------------
/api/weread.py:
--------------------------------------------------------------------------------
  1 | """封装微信api的调用"""
  2 | 
  3 | from http.cookies import SimpleCookie
  4 | from requests.utils import cookiejar_from_dict
  5 | import requests
  6 | 
  7 | 
  8 | class WeReadAPI:
  9 |     """微信读书API"""
 10 | 
 11 |     # 全量书籍笔记信息列表
 12 |     WEREAD_NOTEBOOKS_URL = "https://weread.qq.com/api/user/notebook"
 13 | 
 14 |     # 章节信息列表
 15 |     WEREAD_CHAPTER_INFO = "https://weread.qq.com/web/book/chapterInfos"
 16 | 
 17 |     # 书籍划线
 18 |     WEREAD_BOOKMARKLIST_URL = "https://weread.qq.com/web/book/bookmarklist"
 19 | 
 20 |     # 获取笔记列表，包括笔记、推荐总结
 21 |     WEREAD_REVIEW_LIST_URL = "https://weread.qq.com/web/review/list"
 22 | 
 23 |     # 数据详情
 24 |     WEREAD_BOOK_INFO = "https://weread.qq.com/web/book/info"
 25 | 
 26 |     # 读取进度等
 27 |     WEREAD_READ_INFO_URL = "https://weread.qq.com/web/book/readinfo"
 28 | 
 29 |     WEREAD_URL = "https://weread.qq.com/"
 30 | 
 31 |     def __init__(self, cookie):
 32 |         session = requests.Session()
 33 |         session.cookies = self._parse_cookie(cookie)
 34 |         session.get(self.WEREAD_URL)
 35 |         self.session = session
 36 | 
 37 |     def _parse_cookie(self, cookie_string):
 38 |         cookie = SimpleCookie()
 39 |         cookie.load(cookie_string)
 40 |         cookies_dict = {}
 41 |         cookiejar = None
 42 |         for key, morsel in cookie.items():
 43 |             cookies_dict[key] = morsel.value
 44 |             cookiejar = cookiejar_from_dict(
 45 |                 cookies_dict, cookiejar=None, overwrite=True
 46 |             )
 47 |         return cookiejar
 48 | 
 49 |     def get_notebooklist(self):
 50 |         """全量书籍笔记信息列表，仅包括笔记更新时间、数量等，不包括笔记明细"""
 51 |         r = self.session.get(self.WEREAD_NOTEBOOKS_URL)
 52 |         if r.ok:
 53 |             data = r.json()
 54 |             books = data.get("books")
 55 |             books.sort(
 56 |                 key=lambda x: x["sort"]
 57 |             )  # 最近更新（划线、评语以及推荐都算更新）时间
 58 |             return books
 59 |         else:
 60 |             print(f"get notesbook failed: {r.text}")
 61 |             return []
 62 | 
 63 |     def get_chapter_list(self, bookId):
 64 |         """获取章节信息列表"""
 65 |         body = {"bookIds": [bookId]}
 66 |         r = self.session.post(self.WEREAD_CHAPTER_INFO, json=body)
 67 |         if (
 68 |             r.ok
 69 |             and "data" in r.json()
 70 |             and len(r.json()["data"]) == 1
 71 |             and "updated" in r.json()["data"][0]
 72 |         ):
 73 |             update = r.json()["data"][0]["updated"]
 74 |             # d = {item["chapterUid"]: item for item in update}
 75 |             return update
 76 |         else:
 77 |             print(r.text)
 78 |         return []
 79 | 
 80 |     def get_bookmark_list(self, bookId):
 81 |         """获取书籍划线列表"""
 82 |         params = dict(bookId=bookId)
 83 |         r = self.session.get(self.WEREAD_BOOKMARKLIST_URL, params=params)
 84 |         if r.ok:
 85 |             updated = r.json().get("updated")
 86 |             updated = sorted(
 87 |                 updated,
 88 |                 key=lambda x: (
 89 |                     x.get("chapterUid", 1),
 90 |                     int(x.get("range").split("-")[0]),
 91 |                 ),
 92 |             )
 93 |             return r.json()["updated"]
 94 |         else:
 95 |             print("get bookmarklist failed: {r.text}")
 96 |         return []
 97 | 
 98 |     def get_review_list(self, bookId):
 99 |         """获取笔记列表，包括笔记评论、推荐总结"""
100 |         params = dict(bookId=bookId, listType=11, mine=1, syncKey=0)
101 |         r = self.session.get(self.WEREAD_REVIEW_LIST_URL, params=params)
102 |         if r.ok:
103 |             reviews = r.json().get("reviews")
104 |             # 总结
105 |             summary = list(filter(lambda x: x.get("review").get("type") == 4, reviews))
106 |             # 笔记（评语）
107 |             reviews = list(filter(lambda x: x.get("review").get("type") == 1, reviews))
108 |             reviews = list(map(lambda x: x.get("review"), reviews))
109 |             reviews = list(map(lambda x: {**x, "markText": x.pop("content")}, reviews))
110 |             return summary, reviews
111 |         else:
112 |             print(r.text)
113 |             return [], []
114 | 
115 |     def get_bookinfo(self, bookId: str) -> list:
116 |         """获取书的详情"""
117 |         params = dict(bookId=bookId)
118 |         r = self.session.get(self.WEREAD_BOOK_INFO, params=params)
119 |         isbn = ""
120 |         rating = 0
121 |         category = ""
122 |         intro = ""
123 | 
124 |         if r.ok:
125 |             data = r.json()
126 |             isbn = data["isbn"]
127 |             rating = data["newRating"] / 1000
128 |             category = data.get("category", "")
129 |             intro = data.get("intro", "")
130 | 
131 |         return (isbn, rating, category, intro)
132 | 
133 |     def get_read_info(self, bookId):
134 |         """获取书籍的进度"""
135 |         params = dict(
136 |             bookId=bookId, readingDetail=1, readingBookIndex=1, finishedDate=1
137 |         )
138 |         r = self.session.get(self.WEREAD_READ_INFO_URL, params=params)
139 |         if r.ok:
140 |             return r.json()
141 |         return {}
142 | 
143 | 
144 | def str_reading_time(reading_time: int):
145 |     "convert reading time to str"
146 |     format_time = ""
147 |     hour = reading_time // 3600
148 |     if hour > 0:
149 |         format_time += f"{hour}时"
150 |     minutes = reading_time % 3600 // 60
151 |     if minutes > 0:
152 |         format_time += f"{minutes}分"
153 |     return format_time
154 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <img alt="Example Page" src="https://github.com/alex-guoba/sync-notion/assets/2872637/4bcb0692-8881-4f39-abce-22495c8a3fcc" width="689">
  3 | </p>
  4 | 
  5 | # 将微信读书笔记、github trending、memos 自动同步到 Notion
  6 | 
  7 | 本项目支持将微信读书笔记（划线及评论）、github trending 同步 Notion。支持本地手工、github action 定期两种方式。可以修改 action 配置自行按需选择。
  8 | 
  9 | [English](./README.md) | 简体中文
 10 | 
 11 | ## Requirements
 12 | 
 13 | Python 3.10
 14 | 
 15 | ## 同步微信读书笔记
 16 | 
 17 | ### 使用
 18 | 
 19 | 1. star 本项目
 20 | 
 21 | 2. fork 这个工程，删除目录中的临时存储文件(./var/sync_read.db)
 22 | 
 23 | 3. 获取微信读书的 Cookie: `WEREAD_COOKIE`
 24 | 
 25 | - 浏览器打开 https://weread.qq.com/
 26 | - 微信扫码登录确认，提示没有权限忽略即可
 27 | - 按 F12 进入开发者模式，依次点 Network -> Doc -> Headers-> cookie。复制 Cookie 字符串;
 28 | 
 29 | 4. 获取 NotionToken: `NOTION_TOKEN`
 30 | 
 31 | - 浏览器打开https://www.notion.so/my-integrations
 32 | - 点击 New integration 输入 name 提交
 33 | - 点击 show，然后 copy
 34 | 
 35 | 5. 复制[这个 Notion 模板](https://gelco.notion.site/67639069c7b84f55b6394f16ecda0c4f?v=b5d09dc635db4b3d8ba13b200b88d823&pvs=25)，删掉所有的数据，并点击右上角设置，Connections 添加你创建的 Integration。
 36 | 
 37 | 6. 获取 NotionDatabaseID: `NOTION_DATABASE_ID`
 38 | 
 39 | - 打开 Notion 数据库，点击右上角的 Share，然后点击 Copy link
 40 | - 获取链接后比如https://gelco.notion.site/67639069c7b84f55b6394f16ecda0c4f?v=b5d09dc635db4b3d8ba13b200b88d823&pvs=25 中间的**67639069c7b84f55b6394f16ecda0c4f**就是 DatabaseID
 41 | 
 42 | 7. 同步方式
 43 | 
 44 | - **方式一**：在 Github 的 Secrets 中添加以下变量来实现每日自动同步
 45 | 
 46 |   - 打开你 fork 的工程，点击 Settings->Secrets and variables->New repository secret
 47 |   - 添加以下变量(**变量名称自定义，只要与 action 中对应的名称一致即可**)
 48 |     - `WEREAD_COOKIE`
 49 |     - `NOTION_TOKEN`
 50 |     - `NOTION_DATABASE_ID`
 51 | 
 52 | - **方式二**： 也可以本地运行脚本完成同步:
 53 | 
 54 | ```shell
 55 | pip install -r requirements.txt
 56 | python3 ./main.py sync_read ${WEREAD_COOKIE} ${NOTION_TOKEN} ${NOTION_DATABASE_ID}
 57 | ```
 58 | 
 59 | ### 高级特性
 60 | 
 61 | 1. 可以配合 [next-blogger](https://github.com/alex-guoba/next-blogger) 搭建自己的**读书笔记分享**网站。样式参考 [goroutine.cn](https://goroutine.cn/notes)
 62 | 
 63 | 2. 【可选】可以指定单独的 Notion 数据库，用于存储每日同步记录，用作 calender 视图等场景。
 64 | 
 65 | - action 中环境变量`NOTION_DATABASE_CALENDAR`，命令行方式参考[action](./.github/workflows/weread.yml)。模板参考[这个](https://gelco.notion.site/5a17a1f794464652ade156c4c7572736?v=d961ee4d64864620b948b1a18fb1ebdd&pvs=4)
 66 | 
 67 | - 注意：也需要在 Connections 添加你创建的 Integration。流程与上面一致。
 68 | 
 69 | 3. 支持[Server 酱](https://sct.ftqq.com/)微信通知，用于在同步完成时发送微信通知更新读书笔记数。
 70 | 
 71 | - action 中环境变量`SCKEY`，参考[action](./.github/workflows/weread.yml)。申请方式参考[Server 酱](https://sct.ftqq.com/sendkey)。
 72 | 
 73 | ### 增量同步说明
 74 | 
 75 | #### 更新时机
 76 | 
 77 | 1. 微信读书笔记为增量同步，每次同步时，会根据笔记的更新时间进行筛选。仅当微信读书中书籍有**笔记更新**时才会触发同步。
 78 | 2. 可以删除 db 中已同步过的书籍页面(page)，删除后下次同步时会全同步（注意需要在微信读书中触发一次笔记更新，比如新增、删除任意笔记即可）。
 79 | 
 80 | #### 增量机制
 81 | 
 82 | 1. 已同步到 Notion 的笔记，用户可以在 Notion 中**新增、修改**笔记内容，下次同步时，**不会覆盖已同步过的笔记**。
 83 | 2. 增量笔记同步顺序：新增章节会按照微信读书的章节顺序插入；新增笔记会插入到对应的章节下，但不保证同一章节下的笔记与微信读书一致。
 84 | 3. 用户可以在 database 中新增字段，用做书籍的自定义标识。但不得修改已有字段。
 85 | 4. 已同步到 Notion 的笔记，**用户不得删除章节信息**，否则下次同步同一章节的增量笔记无法精确定位。
 86 | 
 87 | ### 原理说明
 88 | 
 89 | Notion 无法保存微信读书的笔记 id 等信息，所以在仓库中存储了一份微信读书笔记 ID 与[Notion Block ID](https://developers.notion.com/reference/patch-block-children)的映射关系。每次更新完毕后在 git action 中自动提交到仓库。
 90 | 所以如果用户 clone 了本仓库到，首次运行时可以先删除原仓库中的映射文件(./var/sync_read.db)。
 91 | 
 92 | ### 支持的配置项
 93 | 
 94 | ```ini
 95 | [weread.format]
 96 | ContentType = list
 97 | EnableEmoj = false
 98 | EnableReadingDetail = true
 99 | ```
100 | 
101 | - ContentType：增加笔记内容 block 组织形式配置，可以将内容展现形态指定为 paragraph/list/callout。
102 | - EnableEmoj：开启、禁用 emoj
103 | - EnableReadingDetail: 开启、禁用阅读明细。
104 | 
105 | ## 同步 Github Trending
106 | 
107 | ### 使用
108 | 
109 | 与微信读书同步方法基本一致。
110 | 
111 | 1. 获取 NotionToken（可复用）
112 | 
113 | 2. 创建 NotionDatabase，获取 NotionDatabaseID， notion 模板参考[这个](https://gelco.notion.site/77a3c6c8c2fb405e8347a7bde96d51d1?v=5c6464969afa432ea473f07c7b6959e8)
114 | 
115 | 3. 本地运行方式
116 | 
117 | ```shell
118 | pip install -r requirements.txt
119 | python3 ./main.py sync_trending ${NOTION_TOKEN} ${NOTION_DATABASE_TRENDING} --git_token=${GIT_TOKEN}
120 | ```
121 | 
122 | 4. 或者在 Github 的 Secrets 中添加以下变量来实现每日自动同步
123 | 
124 | - 打开你 fork 的工程，点击 Settings->Secrets and variables->New repository secret
125 | - 添加以下变量 (**变量名称自定义，只要与 action 中对应的名称一致即可**)
126 |   - NOTION_TOKEN
127 |   - NOTION_DATABASE_TRENDING
128 |   - GIT_TOKEN
129 |     如果不需要仓库的其他信息（包括 fork、star、watcher 数量），GIT_TOKEN 可以不配置
130 | 
131 | ### 支持的配置项
132 | 
133 | ```ini
134 | [trending.language]
135 | languages = python,go
136 | ```
137 | 
138 | - languages: 关注的项目语言，不允许为空
139 | 
140 | ## 同步 ProductHunt 产品列表到 Notion
141 | 
142 | ### 使用
143 | 
144 | 与微信读书同步方法基本一致。产品列表参考[ProductHunt](https://www.producthunt.com/all)
145 | 
146 | 1. 获取 NotionToken（可复用）
147 | 
148 | 2. 创建 NotionDatabase，获取 NotionDatabaseID， notion 模板参考[这个](https://gelco.notion.site/1467b35a24cd80449eeadf5ed024cef5?v=1a470daa9fc0418d8682aaf789860d40&pvs=73)
149 | 3. 本地运行
150 | 
151 | ```shell
152 | python3 ./main.py sync_producthunt ${NOTION_TOKEN} ${DATABASE_ID}
153 | ```
154 | 
155 | ```ini
156 | [producthunt.filter]
157 | ; 过滤条件，可选
158 | MinVotes = 5
159 | MinComments = 5
160 | ```
161 | 
162 | 也可配置 github action 来实现定期同步，有需要修改 github action 以及配置对应环境即可。
163 | 
164 | ```shell
165 | # git中配置好对应的环境变量，设置对应的action run指令即可
166 | python3 ./main.py sync_producthunt "${{secrets.NOTION_TOKEN}}" "${{DATABASE_ID_PH}}"
167 | ```
168 | 
169 | ## 感谢
170 | 
171 | - [malinkang / weread_to_notion](https://github.com/malinkang/weread_to_notion)
172 | - [bonfy / github-trending](https://github.com/bonfy/github-trending)
173 | 


--------------------------------------------------------------------------------
/README.zh-CN.md:
--------------------------------------------------------------------------------
  1 | 
  2 | #  Syncing Wechat Book Notes, Github Trending, Memos to Notion automatically
  3 | 
  4 | This project supports the synchronization of WeChat book notes, GitHub trending, and memos to Notion. It can be done locally or through regular GitHub actions, which can be customized according to your preferences.
  5 | 
  6 | English | [简体中文](./README.zh-CN.md)
  7 | 
  8 | ## Requirements
  9 | 
 10 | Python 3.10
 11 | 
 12 | ## Synchronizing WeChat Book Notes
 13 | 
 14 | ### Usage
 15 | 
 16 | 1. Star this project.
 17 | 2. Fork this repository.
 18 | 3. Obtain the WeChat book's Cookie.
 19 | 	* Open <https://weread.qq.com/> in your browser.
 20 | 	* Scan the QR code with WeChat and confirm login. Ignore any permission errors.
 21 | 	* Press F12 to enter developer mode, then follow Network -> Doc -> Headers-> cookie. Copy the Cookie string.
 22 | 4. Get the Notion Token.
 23 | 	* Open <https://www.notion.so/my-integrations> in your browser.
 24 | 	* Click "New integration" and enter a name to submit.
 25 | 	* Click "show" and then copy the token.
 26 | 5. Copy [this Notion template](https://gelco.notion.site/67639069c7b84f55b6394f16ecda0c4f?v=b5d09dc635db4b3d8ba13b200b88d823&pvs=25), delete all the data, and click the settings button in the top right corner. Add the integration you created under Connections.
 27 | 6. Get the Notion Database ID.
 28 | 	* Open the Notion database, click the "Share" button in the top right corner, and then click "Copy link".
 29 | 	* The link will look like this: <https://gelco.notion.site/67639069c7b84f55b6394f16ecda0c4f?v=b5d09dc635db4b3d8ba13b200b88d823&pvs=25>. The **67639069c7b84f55b6394f16ecda0c4f** part is the Database ID.
 30 | 7. Add the following variables to your GitHub Secrets to enable daily automatic synchronization:
 31 | 	* Open your forked repository, click Settings -> Secrets and variables -> New repository secret.
 32 | 	* Add the following variables (you can customize the variable names as long as they match the names in the action):
 33 | 		+ WEREAD_COOKIE
 34 | 		+ NOTION_TOKEN
 35 | 		+ NOTION_DATABASE_ID
 36 | 8. Alternatively, you can run the script locally:
 37 | ```shell
 38 | pip install -r requirements.txt
 39 | python3 ./main.py sync_weread ${WEREAD_COOKIE} ${NOTION_TOKEN} ${NOTION_DATABASE_ID}
 40 | ```
 41 | 
 42 | ### Supported Configuration Options
 43 | 
 44 | ```ini
 45 | [weread.format]
 46 | ContentType = list
 47 | EnableEmoj = false
 48 | EnableReadingDetail = true
 49 | ```
 50 | 
 51 | * ContentType: Specifies the organization format of the note content blocks as paragraph/list/callout.
 52 | * EnableEmoj: Disables emojis.
 53 | * EnableReadingDetail: Add reading detail info to notes
 54 | 
 55 | ## Synchronizing GitHub Trending
 56 | 
 57 | ### Usage
 58 | 
 59 | The process is similar to synchronizing WeChat book notes.
 60 | 
 61 | 1. Get the Notion Token (can be reused).
 62 | 2. Create a Notion Database and get the Notion Database ID. Use this [template](https://gelco.notion.site/77a3c6c8c2fb405e8347a7bde96d51d1?v=5c6464969afa432ea473f07c7b6959e8) for reference.
 63 | 3. To run locally:
 64 | ```shell
 65 | pip install -r requirements.txt
 66 | python3 ./main.py sync_trending ${NOTION_TOKEN} ${NOTION_DATABASE_TRENDING} --git_token=${GIT_TOKEN}
 67 | ```
 68 | 4. Or add the following variables to your GitHub Secrets for daily automatic synchronization:
 69 | 	* Open your forked repository, click Settings -> Secrets and variables -> New repository secret.
 70 | 	* Add the following variables (you can customize the variable names as long as they match the names in the action):
 71 | 		+ NOTION_TOKEN
 72 | 		+ NOTION_DATABASE_TRENDING
 73 | 		+ GIT_TOKEN (optional if you don't need repository information such as forks, stars, and watchers)
 74 | 
 75 | ### Supported Configuration Options
 76 | 
 77 | ```ini
 78 | [trending.language]
 79 | languages = python,go
 80 | ```
 81 | 
 82 | * languages: The programming languages of the repositories to follow. This field cannot be empty.
 83 | 
 84 | ## Syncing Memos to Notion
 85 | 
 86 | ### Usage
 87 | 
 88 | The process is similar to syncing WeChat book notes.
 89 | 
 90 | 1. Obtain a Notion Token (can be reused)
 91 | 2. Create a Notion Database and get its ID. The Notion template can be referenced from [here](https://gelco.notion.site/b840c05d92af44719ee3d9d7f73010f8?v=f0a726764fa3455b9a28f50783eea58a&pvs=4)
 92 | 3. Assign a unique [Token](https://usememos.com/docs/access-tokens) to the user on the Memos platform for accessing Memos.
 93 | 4. Modify the configuration file to set the Memos host address and the user's UserName for pulling data. Note that the Token assigned to the user must match the UserName to access Private memos.
 94 | 5. Run locally with the following command:
 95 | ```shell
 96 | python3 ./main.py sync_memos ${NOTION_TOKEN} ${DATABASE_ID} ${MEMOS_TOKEN}
 97 | ```
 98 | 
 99 | ```ini
100 | [memos.opts]
101 | MemosHost = http://127.0.0.1:8081
102 | ; Username, not nickname
103 | MemosUserName = memos-demo
104 | ```
105 | 
106 | It is also possible to configure GitHub Actions for regular syncing by modifying the action and configuring the corresponding environment variables.
107 | 
108 | ```shell
109 | # Configure the corresponding environment variables in git and set the action run command
110 | python3 ./main.py sync_memos "${{secrets.NOTION_TOKEN}}" "${{DATABASE_ID}}" "${{MEMOS_TOKEN}}"
111 | ```
112 | 
113 | ## Acknowledgments
114 | - [malinkang / weread_to_notion](https://github.com/malinkang/weread_to_notion)
115 | - [bonfy / github-trending](https://github.com/bonfy/github-trending)
116 | - [usememos / memos](https://github.com/usememos/memos)
117 | 


--------------------------------------------------------------------------------
/sync_trending.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 同步github trending到notion
  3 | """
  4 | 
  5 | import logging
  6 | import time
  7 | import requests
  8 | from pyquery import PyQuery as pq
  9 | from github import Github
 10 | from github import Auth
 11 | 
 12 | from notion_client import AsyncClient
 13 | 
 14 | from config import CONFIG
 15 | from api import notion
 16 | 
 17 | 
 18 | class TrendItem:
 19 |     """trend item"""
 20 | 
 21 |     def __init__(self, title: str, url: str, desc: str) -> None:
 22 |         self.title = title
 23 |         self.url = url
 24 |         self.desc = desc
 25 |         self.watchers_count = 0
 26 |         self.forks_count = 0
 27 |         self.stargazers_count = 0
 28 | 
 29 |     def _repo_path(self) -> str:
 30 |         items = self.url.split("/")[-2:]
 31 |         return "/".join(items)
 32 | 
 33 |     def fullfill_repo_info(self, git_token):
 34 |         "fullfill basic info from repo"
 35 |         if not self.url:
 36 |             return
 37 |         auth = None
 38 |         if git_token:
 39 |             auth = Auth.Token(git_token)
 40 |         git = Github(auth=auth)
 41 | 
 42 |         try:
 43 |             repo = git.get_repo(self._repo_path())
 44 |         # pylint: disable-next=broad-except
 45 |         except Exception as _e:
 46 |             logging.error("get repo %s error: %s", self._repo_path(), _e)
 47 |             return
 48 | 
 49 |         self.watchers_count = repo.watchers_count
 50 |         self.forks_count = repo.forks_count
 51 |         self.stargazers_count = repo.stargazers_count
 52 | 
 53 | 
 54 | async def query_page(client: AsyncClient, data_source_id: str, title: str) -> bool:
 55 |     """检查是否已经插入过 如果已经插入了就忽略"""
 56 |     time.sleep(0.3)
 57 | 
 58 |     response = await client.data_sources.query(
 59 |         data_source_id=data_source_id,
 60 |         filter={"property": "Title", "rich_text": {"equals": title}},
 61 |     )
 62 |     if len(response["results"]):
 63 |         return True
 64 |     return False
 65 | 
 66 | 
 67 | async def insert_page(
 68 |     client: AsyncClient, data_source_id: str, language: str, trend: TrendItem
 69 | ) -> None | str:
 70 |     """插入page"""
 71 |     parent = {"data_source_id": data_source_id, "type": "data_source_id"}
 72 |     properties = {
 73 |         "Title": {"title": [{"type": "text", "text": {"content": trend.title}}]},
 74 |         "Language": {"select": {"name": language}},
 75 |         "URL": {"url": trend.url},
 76 |         "Desc": {"rich_text": [{"type": "text", "text": {"content": trend.desc}}]},
 77 |         "WatchersCount": {"number": trend.watchers_count},
 78 |         "ForksCount": {"number": trend.forks_count},
 79 |         "StargazersCount": {"number": trend.stargazers_count},
 80 |     }
 81 |     response = await client.pages.create(parent=parent, properties=properties)
 82 |     return response["id"]
 83 | 
 84 | 
 85 | def _scrape(language: str) -> list[TrendItem]:
 86 |     headers = {
 87 |         # pylint: disable=line-too-long
 88 |         "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0",
 89 |         "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
 90 |         "Accept-Encoding": "gzip,deflate,sdch",
 91 |         "Accept-Language": "zh-CN,zh;q=0.8",
 92 |     }
 93 |     result = []
 94 | 
 95 |     url = f"https://github.com/trending/{language}".format(language=language)
 96 |     req = requests.get(url, headers=headers, timeout=10)
 97 |     if req.status_code != 200:
 98 |         logging.error("git trending error. %d", req.status_code)
 99 |         return result
100 | 
101 |     content = pq(req.content)
102 |     items = content("div.Box article.Box-row")
103 | 
104 |     # codecs to solve the problem utf-8 codec like chinese
105 |     # with codecs.open(filename, "a", "utf-8") as f:
106 |     #     # f.write('\n#### {language}\n'.format(language=language))
107 | 
108 |     for item in items:
109 |         i = pq(item)
110 |         title = i(".lh-condensed a").text()
111 |         description = i("p.col-9").text()
112 |         url = "https://github.com" + str(i(".lh-condensed a").attr("href"))
113 | 
114 |         result.append(TrendItem(str(title), url, str(description)))
115 | 
116 |     return result
117 | 
118 | 
119 | def _filter_repo(trend: TrendItem) -> bool:
120 |     filters = {
121 |         "MinStargazers": "stargazers_count",
122 |         "MinForks": "forks_count",
123 |         "MinWatchers": "watchers_count",
124 |     }
125 |     for k, v in filters.items():
126 |         thresh_hold = CONFIG.getint("trending.language", k)
127 |         current = getattr(trend, v, 0)
128 |         if thresh_hold > 0 and current < thresh_hold:
129 |             return True
130 |     return False
131 | 
132 | 
133 | # pylint: disable=line-too-long
134 | async def _sync(
135 |     client: AsyncClient,
136 |     data_source_id: str,
137 |     language: str,
138 |     trends: list[TrendItem],
139 |     git_token: str | None = None,
140 | ) -> None:
141 |     for trend in trends:
142 |         time.sleep(0.3)  # avoid rate limit for notion API
143 |         exist = await query_page(client, data_source_id, trend.title)
144 |         if exist:
145 |             continue
146 |         # insert to db
147 |         logging.info(trend)
148 | 
149 |         if git_token:
150 |             trend.fullfill_repo_info(git_token)
151 | 
152 |         if _filter_repo(trend):
153 |             logging.info("ignore %s", trend.title)
154 |             continue
155 | 
156 |         await insert_page(client, data_source_id, language, trend)
157 | 
158 | 
159 | async def sync_trending(notion_token, database_id, git_token=None):
160 |     """sync github trending to notion"""
161 |     client = AsyncClient(auth=notion_token, log_level=logging.ERROR)
162 | 
163 |     data_sources_id = await notion.get_datasource_id(client, database_id)
164 |     if not data_sources_id:
165 |         logging.error("database %s has no data source", database_id)
166 |         return
167 | 
168 |     languages = list(
169 |         map(
170 |             lambda x: x.strip(), CONFIG.get("trending.language", "Languages").split(",")
171 |         )
172 |     )
173 |     for language in languages:
174 |         if not language:
175 |             continue
176 | 
177 |         logging.info("sync %s", language)
178 | 
179 |         trends = _scrape(language)
180 |         if not trends:
181 |             logging.error("language [%s] error", language)
182 |             continue
183 | 
184 |         await _sync(client, data_sources_id, language, trends, git_token)
185 | 


--------------------------------------------------------------------------------
/sync_producthunt.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 同步product hunt到notion
  3 | """
  4 | 
  5 | import logging
  6 | import time
  7 | import requests
  8 | from pyquery import PyQuery as pq
  9 | from notion_client import AsyncClient
 10 | 
 11 | from api import notion
 12 | from config import CONFIG
 13 | from api.notion import BlockHelper
 14 | 
 15 | 
 16 | class ProductItem:
 17 |     """product item"""
 18 | 
 19 |     def __init__(
 20 |         self,
 21 |         name: str,
 22 |         desc: str,
 23 |         topics: list[str],
 24 |         comments: int,
 25 |         votes: int,
 26 |         url: str = "",
 27 |         cover: str = "",
 28 |     ) -> None:
 29 |         self.name = name
 30 |         self.desc = desc
 31 |         self.topics = topics
 32 |         self.comments = comments
 33 |         self.votes = votes
 34 |         self.cover = cover
 35 |         self.url = f"https://www.producthunt.com{url}"
 36 | 
 37 |     # def fullfill_repo_info(self, git_token):
 38 |     #     pass
 39 |     def __repr__(self) -> str:
 40 |         return f"""<ProductItem name={self.name} desc={self.desc} topics={self.topics} \
 41 | comments={self.comments} votes={self.votes} url={self.url} cover={self.cover}>"""
 42 | 
 43 | 
 44 | async def query_page(client: AsyncClient, data_source_id: str, name: str) -> bool:
 45 |     """check page exist or not"""
 46 |     time.sleep(0.3)
 47 | 
 48 |     response = await client.data_sources.query(
 49 |         data_source_id=data_source_id,
 50 |         filter={"property": "Name", "rich_text": {"equals": name}},
 51 |     )
 52 |     if len(response["results"]):
 53 |         return True
 54 |     return False
 55 | 
 56 | 
 57 | async def _append_page(
 58 |     client: AsyncClient, data_source_id: str, prod: ProductItem
 59 | ) -> None | str:
 60 |     """插入page"""
 61 |     parent = {"data_source_id": data_source_id, "type": "data_source_id"}
 62 |     properties = {
 63 |         "Name": BlockHelper.title(prod.name),
 64 |         "Description": BlockHelper.rich_text(prod.desc),
 65 |         "Topics": BlockHelper.multi_select(prod.topics),
 66 |         "Comments": BlockHelper.number(prod.comments),
 67 |         "Votes": BlockHelper.number(prod.votes),
 68 |         "URL": BlockHelper.url(prod.url),
 69 |         "Cover": BlockHelper.files("Cover", prod.cover),
 70 |     }
 71 |     response = await client.pages.create(
 72 |         parent=parent, icon=BlockHelper.icon(prod.cover), properties=properties
 73 |     )
 74 |     return response["id"]
 75 | 
 76 | 
 77 | def _scrape() -> list[ProductItem]:
 78 |     headers = {
 79 |         # pylint: disable=line-too-long
 80 |         "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0",
 81 |         "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
 82 |         "Accept-Encoding": "gzip,deflate,sdch",
 83 |         "Accept-Language": "zh-CN,zh;q=0.8",
 84 |     }
 85 |     result = []
 86 | 
 87 |     url = "https://www.producthunt.com/all"
 88 |     req = requests.get(url, headers=headers, timeout=60)
 89 |     if req.status_code != 200:
 90 |         logging.error("access product hunt error. %d", req.status_code)
 91 |         return []
 92 | 
 93 |     content = pq(req.content)
 94 |     items = content("main div.flex-col div.flex-col div[class^='styles_item']")
 95 |     if items.length == 0:
 96 |         items = content("main div.flex-col div.flex-col section")
 97 | 
 98 |     for item in items:
 99 |         logging.debug("parse product: %s", item)
100 |         i = pq(item)
101 |         url = i('a[href^="/posts/"]').eq(0).attr("href")
102 | 
103 |         mid = i("div.flex-col a")
104 |         name = mid.eq(0).text()
105 |         description = mid.eq(1).text()
106 |         # name = i("div.flex-col a strong").text()
107 |         # description = i("div.flex-col a").text()
108 | 
109 |         comments = i("div.flex-col div.flex-row div").eq(0).text()
110 |         if not comments:
111 |             comments = i("button div.flex-col").eq(0).text()
112 | 
113 |         votes = i('button[data-test="vote-button"]').text()
114 | 
115 |         cover = i('a[href^="/posts/"] img').eq(0).attr("src")
116 |         if not cover:
117 |             cover = i('a[href^="/posts/"] video').eq(0).attr("poster")
118 | 
119 |         _topics = i('div.flex-col div.flex-row a[href^="/topics/"]')
120 |         topics = []
121 |         for topic in _topics:
122 |             topic = pq(topic).text()
123 |             topics.append(topic)
124 | 
125 |         if name == "" or description == "" or len(topics) == 0:
126 |             logging.error(
127 |                 "parse name or description error: %s-%s-%d",
128 |                 name,
129 |                 description,
130 |                 len(topics),
131 |             )
132 |             continue
133 |         if not votes.isnumeric() or not comments.isnumeric():
134 |             logging.error(
135 |                 "parse votes or comments error: %s-%s-%s", name, votes, comments
136 |             )
137 |             continue
138 | 
139 |         try:
140 |             votes = int(votes)
141 |             comments = int(comments)
142 |         except ValueError:
143 |             logging.error("parse votes or comments error")
144 |             continue
145 | 
146 |         result.append(
147 |             ProductItem(
148 |                 name, description, topics, votes, comments, url=url, cover=cover
149 |             )
150 |         )
151 | 
152 |     return result
153 | 
154 | 
155 | def _filter_product(prod: ProductItem) -> bool:
156 |     filters = {
157 |         "MinVotes": "votes",
158 |         "MinComments": "comments",
159 |     }
160 |     for k, v in filters.items():
161 |         thresh_hold = CONFIG.getint("producthunt.filter", k)
162 |         current = getattr(prod, v, 0)
163 |         if thresh_hold > 0 and current < thresh_hold:
164 |             return True
165 |     return False
166 | 
167 | 
168 | # pylint: disable=line-too-long
169 | async def _sync(
170 |     client: AsyncClient,
171 |     data_source_id: str,
172 |     products: list[ProductItem],
173 | ) -> None:
174 |     for prod in products:
175 |         if _filter_product(prod):
176 |             logging.info("filter product: %s", prod.name)
177 |             continue
178 | 
179 |         time.sleep(0.3)  # avoid rate limit for notion API
180 |         if await query_page(client, data_source_id, prod.name):
181 |             continue
182 | 
183 |         # insert to db
184 |         logging.info(prod)
185 | 
186 |         _id = await _append_page(client, data_source_id, prod)
187 |         print(_id)
188 | 
189 | 
190 | async def sync_producthunt(notion_token, database_id):
191 |     """sync product hunt to notion"""
192 |     client = AsyncClient(auth=notion_token, log_level=logging.ERROR)
193 |     data_sources_id = await notion.get_datasource_id(client, database_id)
194 |     if not data_sources_id:
195 |         logging.error("database %s has no data source", database_id)
196 |         return
197 | 
198 |     products = _scrape()
199 |     if not products:
200 |         logging.error(
201 |             "ph scape error",
202 |         )
203 |         return
204 | 
205 |     logging.info("ph scape total num [%s]", len(products))
206 |     await _sync(client, data_sources_id, products)
207 | 


--------------------------------------------------------------------------------
/api/notion.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 封装notion相关操作
  3 | """
  4 | 
  5 | from datetime import datetime
  6 | 
  7 | from notion_client import AsyncClient
  8 | 
  9 | 
 10 | # class NotionAPI:
 11 | #     """暂未启用"""
 12 | 
 13 | #     def __init__(self, token):
 14 | #         self.token = token
 15 | 
 16 | #     def dumy(self):
 17 | #         """pass"""
 18 | #         pass
 19 | 
 20 | 
 21 | class BlockHelper:
 22 |     """生成notion格式的工具函数"""
 23 | 
 24 |     headings = {
 25 |         1: "heading_1",
 26 |         2: "heading_2",
 27 |         3: "heading_3",
 28 |     }
 29 | 
 30 |     table_contents = {
 31 |         "type": "table_of_contents",
 32 |         "table_of_contents": {"color": "default"},
 33 |     }
 34 | 
 35 |     color_styles = {
 36 |         1: "red",
 37 |         2: "purple",
 38 |         3: "blue",
 39 |         4: "green",
 40 |         5: "yellow",
 41 |     }
 42 | 
 43 |     def __init__(self):
 44 |         pass
 45 | 
 46 |     @classmethod
 47 |     def table_of_contents(cls):
 48 |         """获取目录"""
 49 |         return cls.table_contents
 50 | 
 51 |     @classmethod
 52 |     def heading(cls, level, content):
 53 |         """取heading格式""" ""
 54 |         heading_type = cls.headings.get(level, "heading_3")
 55 |         return {
 56 |             "type": heading_type,
 57 |             heading_type: {
 58 |                 "rich_text": [
 59 |                     {
 60 |                         "type": "text",
 61 |                         "text": {
 62 |                             "content": content,
 63 |                         },
 64 |                     }
 65 |                 ],
 66 |                 "color": "default",
 67 |                 "is_toggleable": False,
 68 |             },
 69 |         }
 70 | 
 71 |     @classmethod
 72 |     def table(
 73 |         cls,
 74 |         table_width: int,
 75 |         cells: list,
 76 |         has_column_header: bool = False,
 77 |         has_row_header: bool = False,
 78 |     ):
 79 |         """table""" ""
 80 |         # heading_type = cls.headings.get(level, "heading_3")
 81 |         table = {
 82 |             "type": "table",
 83 |             "table": {
 84 |                 "table_width": table_width,
 85 |                 "has_column_header": has_column_header,
 86 |                 "has_row_header": has_row_header,
 87 |             },
 88 |         }
 89 |         table["table"]["children"] = [cls.table_row(cells)]
 90 | 
 91 |         return table
 92 | 
 93 |     @classmethod
 94 |     def table_row(cls, content_list: list):
 95 |         """table row, see https://developers.notion.com/reference/block#table-rows .
 96 |         When creating a table block via the Append block children endpoint, the table
 97 |         must have at least one table_row whose cells array has the same length as the table_width.
 98 |         """
 99 |         table_row = {
100 |             "type": "table_row",
101 |             "table_row": {
102 |                 "cells": [],
103 |             },
104 |         }
105 |         for content in content_list:
106 |             item = [
107 |                 {
108 |                     "type": "text",
109 |                     "text": {
110 |                         "content": str(content),
111 |                     },
112 |                 }
113 |             ]
114 |             table_row["table_row"]["cells"].append(item)
115 |         return table_row
116 | 
117 |     @classmethod
118 |     def quote(cls, content):
119 |         """取引用格式"""
120 |         return {
121 |             "type": "quote",
122 |             "quote": {
123 |                 "rich_text": [
124 |                     {
125 |                         "type": "text",
126 |                         "text": {"content": content},
127 |                     }
128 |                 ],
129 |                 "color": "default",
130 |             },
131 |         }
132 | 
133 |     @classmethod
134 |     def divider(cls):
135 |         """ "divier"""
136 |         return {"type": "divider", "divider": {}}
137 | 
138 |     @classmethod
139 |     def emoj_style(cls, style, review_id):
140 |         """根据不同的划线样式设置不同的emoji 直线type=0 背景颜色是1 波浪线是2"""
141 |         emoji = "🌟"
142 |         if style == 0:
143 |             emoji = "💡"
144 |         elif style == 1:
145 |             emoji = "⭐"
146 |         # 如果reviewId不是空说明是笔记
147 |         if review_id is not None:
148 |             emoji = "✍️"
149 |         return emoji
150 | 
151 |     @classmethod
152 |     def callout(cls, content, style, color, review_id, enable_emoj=False):
153 |         """取callout格式"""
154 |         emoji = ""
155 |         if enable_emoj:
156 |             emoji = cls.emoj_style(style, review_id)
157 |         return {
158 |             "type": "callout",
159 |             "callout": {
160 |                 "rich_text": [
161 |                     {
162 |                         "type": "text",
163 |                         "text": {
164 |                             "content": content,
165 |                         },
166 |                     }
167 |                 ],
168 |                 "icon": {"emoji": emoji},
169 |                 "color": cls.color_styles.get(color, "default"),
170 |             },
171 |         }
172 | 
173 |     @classmethod
174 |     def paragraph(cls, content, style, color, review_id, enable_emoj=False):
175 |         """取text格式"""
176 |         emoji = ""
177 |         if enable_emoj:
178 |             emoji = cls.emoj_style(style, review_id)
179 |         return {
180 |             "type": "paragraph",
181 |             "paragraph": {
182 |                 "rich_text": [
183 |                     {
184 |                         "type": "text",
185 |                         "text": {
186 |                             "content": emoji + content,
187 |                         },
188 |                     }
189 |                 ],
190 |                 "color": cls.color_styles.get(color, "default"),
191 |             },
192 |         }
193 | 
194 |     @classmethod
195 |     def bullet_list(cls, content, style, color, review_id, enable_emoj=False):
196 |         """取callout格式"""
197 |         emoji = ""
198 |         if enable_emoj:
199 |             emoji = cls.emoj_style(style, review_id)
200 |         return {
201 |             "type": "bulleted_list_item",
202 |             "bulleted_list_item": {
203 |                 "rich_text": [
204 |                     {
205 |                         "type": "text",
206 |                         "text": {
207 |                             "content": emoji + content,
208 |                         },
209 |                     }
210 |                 ],
211 |                 "color": cls.color_styles.get(color, "default"),
212 |             },
213 |         }
214 | 
215 |     @classmethod
216 |     def rich_text(cls, content):
217 |         "generate rich text"
218 |         return {"rich_text": [{"type": "text", "text": {"content": content}}]}
219 | 
220 |     @classmethod
221 |     def title(cls, content):
222 |         "generate title block"
223 |         return {"title": [{"type": "text", "text": {"content": content}}]}
224 | 
225 |     @classmethod
226 |     def url(cls, remoteurl):
227 |         "generate url block"
228 |         return {"url": remoteurl}
229 | 
230 |     @classmethod
231 |     def number(cls, num):
232 |         "generate number block"
233 |         return {"number": num}
234 | 
235 |     @classmethod
236 |     def files(cls, name, url):
237 |         "generate external file & media block"
238 |         return {"files": [{"type": "external", "name": name, "external": {"url": url}}]}
239 | 
240 |     @classmethod
241 |     def select(cls, option):
242 |         "generate select block"
243 |         return {"select": {"name": option}}
244 | 
245 |     @classmethod
246 |     def multi_select(cls, selected_options: list[any]):
247 |         "generate multi-select block"
248 |         return {"multi_select": [{"name": option} for option in selected_options]}
249 | 
250 |     @classmethod
251 |     def date(cls, d):
252 |         "generate date block"
253 |         return {
254 |             "date": {
255 |                 "start": datetime.fromtimestamp(d).strftime("%Y-%m-%d %H:%M:%S"),
256 |                 "time_zone": "Asia/Shanghai",
257 |             }
258 |         }
259 | 
260 |     @classmethod
261 |     def icon(cls, img):
262 |         """generate icon block"""
263 |         return {"type": "external", "external": {"url": img}}
264 | 
265 | 
266 | async def get_datasource_id(client: AsyncClient, database_id: str) -> str:
267 |     """获取data source"""
268 |     db = await client.databases.retrieve(database_id=database_id)
269 |     data_sources = db.get("data_sources", [])
270 |     if not data_sources:
271 |         return ""
272 |     return data_sources[0]["id"]
273 | 


--------------------------------------------------------------------------------
/sync_read.py:
--------------------------------------------------------------------------------
  1 | """sync wereading history to private notion database & pages
  2 | author: alex-guoba
  3 | """
  4 | 
  5 | import logging
  6 | import re
  7 | import time
  8 | from datetime import datetime
  9 | import hashlib
 10 | from collections import defaultdict
 11 | 
 12 | from treelib.tree import Tree
 13 | from notion_client import AsyncClient
 14 | 
 15 | from api import notion, weread
 16 | from api.notion import BlockHelper
 17 | 
 18 | from lib.db_weread_record import DBWeReadRecord
 19 | from lib.page_block_list import PageBlockList
 20 | from lib.serverchan import sc_send
 21 | 
 22 | from config import CONFIG
 23 | from sync.weread.calendar import sync_to_calener
 24 | 
 25 | ROOT_NODE_ID = "#root"
 26 | BOOK_MARK_KEY = "#bookmarks"
 27 | NOTION_MAX_LEVEL = 3
 28 | 
 29 | 
 30 | class BlockItem:
 31 |     """Just for enveloping the child block"""
 32 | 
 33 |     def __init__(self, after=None, bookmark=None, block=None, child=None) -> None:
 34 |         """
 35 |         初始化方法，用于创建一个新的Block对象。
 36 |         Args:
 37 |             after (str, optional): 用于追加到该block之后时使用。
 38 |             bookmark (str, optional): 对应的bookmarkid，需要与bid一起写入db时使用
 39 |             block (str, optional): Block的内容
 40 |             child (list, optional): 子Block对象的列表，默认为None。
 41 |         Returns:
 42 |             None
 43 | 
 44 |         """
 45 |         self.after = after
 46 |         self.bookmark = bookmark
 47 |         self.block = block
 48 |         self.child = child if child else []
 49 |         self.bid = None
 50 | 
 51 |     def set_bid(self, bid):
 52 |         """set block id after appending to notion success"""
 53 |         self.bid = bid
 54 | 
 55 | 
 56 | async def get_page_info(client: AsyncClient, data_source_id: str, book_id: str):
 57 |     """查询原page信息，并返回pageinfo和pid"""
 58 |     time.sleep(0.3)
 59 |     response = await client.data_sources.query(
 60 |         data_source_id=data_source_id,
 61 |         filter={"property": "BookId", "rich_text": {"equals": book_id}},
 62 |     )
 63 |     pageinfo = None
 64 |     pid = None
 65 |     for result in response["results"]:
 66 |         pageinfo = result
 67 |         pid = result["id"]
 68 |         break
 69 | 
 70 |     return pageinfo, pid
 71 | 
 72 | 
 73 | def inherit_properties(page):
 74 |     """
 75 |     从传入的 page 字典中提取 properties 字段，并返回一个新的字典，其中不包含类型为 'formula' 的属性。
 76 |     Args:
 77 |         page (dict): 包含页面信息的字典，其中包含名为 'properties' 的字段，该字段是一个字典，包含页面属性的键值对。
 78 |     Returns:
 79 |         dict: 一个新的字典，包含从原始 'properties' 字段中提取的、类型不为 'formula' 的页面属性键值对。
 80 |     """
 81 |     properties = {}
 82 |     if page:
 83 |         for k, v in page["properties"].items():
 84 |             if v.get("type") == "formula":
 85 |                 continue
 86 |             properties[k] = v
 87 |     return properties
 88 | 
 89 | 
 90 | async def create_or_update_page(
 91 |     client: AsyncClient,
 92 |     data_source_id: str,
 93 |     pageinfo,
 94 |     pid,
 95 |     book_name="",
 96 |     book_id="",
 97 |     cover="",
 98 |     sort=0,
 99 |     author="",
100 |     isbn="",
101 |     rating=0,
102 |     category="",
103 |     note_count=0,
104 |     review_count=0,
105 |     intro="",
106 |     read_info=None,
107 | ):
108 |     """插入到notion"""
109 |     parent = {"data_source_id": data_source_id, "type": "data_source_id"}
110 | 
111 |     properties = inherit_properties(pageinfo)
112 | 
113 |     properties.update(
114 |         {
115 |             "BookName": BlockHelper.title(book_name),
116 |             "BookId": BlockHelper.rich_text(book_id),
117 |             "ISBN": BlockHelper.rich_text(isbn),
118 |             "URL": BlockHelper.url(
119 |                 f"https://weread.qq.com/web/reader/{calculate_book_str_id(book_id)}"
120 |             ),
121 |             "Author": BlockHelper.rich_text(author),
122 |             "Sort": BlockHelper.number(sort),
123 |             "Rating": BlockHelper.number(rating),
124 |             "Cover": BlockHelper.files("Cover", cover),
125 |             "NoteCount": BlockHelper.number(note_count),
126 |             "ReviewCount": BlockHelper.number(review_count),
127 |             "Category": BlockHelper.rich_text(category),
128 |             "Intro": BlockHelper.rich_text(intro),
129 |         }
130 |     )
131 | 
132 |     if read_info:
133 |         marked_status = read_info.get("markedStatus", 0)
134 |         properties["Status"] = BlockHelper.select(
135 |             "读完" if marked_status == 4 else "在读"
136 |         )
137 | 
138 |         format_time = weread.str_reading_time(read_info.get("readingTime", 0))
139 |         properties["ReadingTime"] = BlockHelper.rich_text(format_time)
140 | 
141 |         # 最近阅读
142 |         detail = read_info.get("readDetail", {})
143 |         if detail.get("lastReadingDate"):
144 |             properties["lastReadingDate"] = BlockHelper.date(
145 |                 detail.get("lastReadingDate")
146 |             )
147 | 
148 |         # 完成时间
149 |         if read_info.get("finishedDate"):
150 |             properties["FinishAt"] = BlockHelper.date(read_info.get("finishedDate"))
151 | 
152 |     if pid is None:
153 |         response = await client.pages.create(
154 |             parent=parent, icon=BlockHelper.icon(cover), properties=properties
155 |         )
156 |         return response["id"], True
157 | 
158 |     response = await client.pages.update(
159 |         page_id=pid, icon=BlockHelper.icon(cover), properties=properties
160 |     )
161 |     return pid, False
162 | 
163 | 
164 | async def list_page_blocks(client: AsyncClient, pid: str):
165 |     """query page blocks (children not included)"""
166 |     response = await client.blocks.children.list(block_id=pid)
167 |     children = response["results"] if len(response.get("results")) > 0 else []
168 |     while response.get("has_more"):
169 |         response = await client.blocks.children.list(
170 |             block_id=pid, start_cursor=response["next_cursor"]
171 |         )
172 |         children += response["results"] if len(response.get("results")) > 0 else []
173 |     # remove other fileds in blocks
174 |     tailor = list(map(lambda x: {"id": x.get("id"), "type": x.get("type")}, children))
175 |     return tailor
176 | 
177 | 
178 | async def append_children(client: AsyncClient, pid, after, children):
179 |     """append child block to page. Notion API limit 100 blocker per appending"""
180 |     results = []
181 |     print("appending ", len(children), " blocks after ", after)
182 |     for i in range(0, len(children) // 100 + 1):
183 |         time.sleep(0.3)
184 |         subchild = children[i * 100 : (i + 1) * 100]
185 |         response = None
186 |         if after:
187 |             response = await client.blocks.children.append(
188 |                 block_id=pid, children=subchild, after=after
189 |             )
190 |         else:
191 |             response = await client.blocks.children.append(
192 |                 block_id=pid, children=subchild
193 |             )
194 |         # Notion will return all the blocks start from the appending block. So we need to filter the result.
195 |         results.extend(response.get("results")[: len(subchild)])
196 |     return results if len(results) == len(children) else []
197 | 
198 | 
199 | async def append_blocks(
200 |     client: AsyncClient,
201 |     pid: str,
202 |     appending: list[BlockItem],
203 |     store: DBWeReadRecord,
204 |     book_id: str,
205 | ):
206 |     """append child block to page by group"""
207 |     batch = []
208 |     block_id = None
209 |     result = []
210 |     for item in appending:
211 |         if not batch:
212 |             block_id = item.after
213 |             batch.append(item.block)
214 |             continue
215 |         if block_id == item.after:
216 |             batch.append(item.block)
217 |             continue
218 |         _result = await append_children(client, pid, block_id, batch)
219 |         result.extend(_result)
220 | 
221 |         block_id = item.after
222 |         batch = [item.block]
223 | 
224 |     if len(batch) > 0:
225 |         _result = await append_children(client, pid, block_id, batch)
226 |         result.extend(_result)
227 | 
228 |     for idx, item in enumerate(appending):
229 |         bid = result[idx].get("id")
230 |         item.set_bid(bid)
231 |         if item.child:
232 |             await append_children(client, bid, None, item.child)
233 | 
234 |     # write to db
235 |     for block in appending:
236 |         if block.bookmark and block.bid:
237 |             store.insert(book_id, block.bookmark, block.bid)
238 | 
239 | 
240 | async def get_db_latest_sort(client: AsyncClient, data_source_id: str) -> int:
241 |     """获取database中的最新更新时间"""
242 |     db_filter = {"property": "Sort", "number": {"is_not_empty": True}}
243 |     sorts = [
244 |         {
245 |             "property": "Sort",
246 |             "direction": "descending",
247 |         }
248 |     ]
249 |     response = await client.data_sources.query(
250 |         data_source_id=data_source_id, filter=db_filter, sorts=sorts, page_size=1
251 |     )
252 |     if len(response.get("results")) == 1:
253 |         return response.get("results")[0].get("properties").get("Sort").get("number")
254 |     return 0
255 | 
256 | 
257 | def gen_chapter_tree(chapter_list):
258 |     """生成章节树"""
259 |     tree = Tree()
260 |     root = tree.create_node(identifier=ROOT_NODE_ID)  # root node
261 |     p = {}
262 |     for chapter in chapter_list:
263 |         level = chapter.get("level", 1)
264 |         if level <= 0:
265 |             level = 1
266 |         elif level > NOTION_MAX_LEVEL:  # 目前仅支持header1-3
267 |             level = NOTION_MAX_LEVEL
268 | 
269 |         parent = p.get(level - 1, root)  # 取最近一次更新节点
270 |         chapter_uid = chapter.get("chapterUid")
271 |         p[level] = tree.create_node(
272 |             tag=chapter_uid, identifier=chapter_uid, parent=parent, data=chapter
273 |         )
274 |     return tree
275 | 
276 | 
277 | def mount_bookmarks(chapter_tree, bookmark_list):
278 |     """挂载划线、评论到对应的树节点"""
279 |     d = defaultdict(list)
280 |     for data in bookmark_list:
281 |         uid = data.get("chapterUid", 1)
282 |         d[uid].append(data)
283 | 
284 |     for key, value in d.items():
285 |         node = chapter_tree.get_node(key)
286 |         if not node:
287 |             logging.error("chapter info not found [%s].", key)
288 |             continue
289 | 
290 |         # mount bookmark list to chapter list
291 |         node.data[BOOK_MARK_KEY] = value
292 | 
293 | 
294 | def remove_empty_chapter(chapter_tree):
295 |     """从底向上，删除章节树中的空节点"""
296 |     max_depth = chapter_tree.depth()
297 |     for d in range(max_depth, 0, -1):
298 |         nodes = list(chapter_tree.filter_nodes(lambda x: chapter_tree.depth(x) == d))
299 | 
300 |         for n in nodes:
301 |             if n.data.get(BOOK_MARK_KEY) is None and n.is_leaf():
302 |                 chapter_tree.remove_node(n.identifier)
303 | 
304 | 
305 | def content_block(text: str, style: str, color: str, review_id: str) -> dict:
306 |     """
307 |     根据配置选择内容block形态
308 |     """
309 |     enable_emoj = CONFIG.getboolean("weread.format", "EnableEmoj")
310 |     match CONFIG.get("weread.format", "ContentType"):
311 |         case "callout":
312 |             return BlockHelper.callout(
313 |                 text, style, color, review_id, enable_emoj=enable_emoj
314 |             )
315 | 
316 |         case "list":
317 |             return BlockHelper.bullet_list(
318 |                 text, style, color, review_id, enable_emoj=enable_emoj
319 |             )
320 | 
321 |         case _:
322 |             return BlockHelper.paragraph(
323 |                 text, style, color, review_id, enable_emoj=enable_emoj
324 |             )
325 | 
326 | 
327 | def made_page_blocks(
328 |     store, blocks, bookID, chapters_list, bookmark_list
329 | ) -> list[BlockItem]:
330 |     """generate page blocks to appending"""
331 |     appending: list[BlockItem] = []
332 | 
333 |     page_block_list = PageBlockList(store, bookID, blocks)
334 | 
335 |     # 添加目录
336 |     if not blocks:
337 |         # child format: [after_blockid, bookmarkd_id, block_data]
338 |         appending.append(BlockItem(block=BlockHelper.table_of_contents()))
339 |         appending.append(BlockItem(block=BlockHelper.divider()))
340 | 
341 |     if len(chapters_list) > 0:
342 |         chapter_tree = gen_chapter_tree(chapters_list)
343 |         mount_bookmarks(chapter_tree, bookmark_list)
344 |         remove_empty_chapter(chapter_tree)
345 | 
346 |         for n in chapter_tree.expand_tree(mode=Tree.DEPTH):
347 |             if chapter_tree[n].is_root():
348 |                 continue
349 | 
350 |             data = chapter_tree[n].data
351 |             chapter_uid = data.get("chapterUid")
352 | 
353 |             block_id = None
354 |             _records = store.query(bookID, chapter_uid)
355 |             if len(_records) > 0:
356 |                 block_id = _records[0]["block_id"]
357 |             else:
358 |                 # find a suitable position to insert
359 |                 block_id = page_block_list.found_chapter_position(chapter_uid)
360 |                 appending.append(
361 |                     BlockItem(
362 |                         after=block_id,
363 |                         bookmark=chapter_uid,
364 |                         block=BlockHelper.heading(data.get("level"), data.get("title")),
365 |                     )
366 |                 )
367 | 
368 |             for i in data.get(BOOK_MARK_KEY, []):
369 |                 bookmark_id = i.get("bookmarkId") or i.get("reviewId")
370 |                 _records = store.query(bookID, bookmark_id)
371 |                 if len(_records) > 0:
372 |                     continue
373 |                 appending.append(
374 |                     BlockItem(
375 |                         after=block_id,
376 |                         bookmark=bookmark_id,
377 |                         block=content_block(
378 |                             i.get("markText"),
379 |                             i.get("style"),
380 |                             i.get("colorStyle"),
381 |                             i.get("reviewId"),
382 |                         ),
383 |                         child=(
384 |                             [BlockHelper.quote(i.get("abstract"))]
385 |                             if i.get("abstract")
386 |                             else None
387 |                         ),
388 |                     )
389 |                 )
390 |     else:
391 |         # no chapter info
392 |         for data in bookmark_list:
393 |             bookmark_id = data.get("bookmarkId") or data.get("reviewId")
394 |             _records = store.query(bookID, bookmark_id)
395 |             if len(_records) > 0:
396 |                 continue
397 |             appending.append(
398 |                 BlockItem(
399 |                     bookmark=bookmark_id,
400 |                     block=content_block(
401 |                         data.get("markText"),
402 |                         data.get("style"),
403 |                         data.get("colorStyle"),
404 |                         data.get("reviewId"),
405 |                     ),
406 |                     child=(
407 |                         [BlockHelper.quote(data.get("abstract"))]
408 |                         if data.get("abstract")
409 |                         else None
410 |                     ),
411 |                 )
412 |             )
413 | 
414 |     return appending
415 | 
416 | 
417 | def made_comment_blocks(
418 |     store: DBWeReadRecord, book_id: str, summary: list
419 | ) -> list[BlockItem]:
420 |     """generate extra stat blocks to appending"""
421 |     appending: list[BlockItem] = []
422 | 
423 |     # 追加推荐评语
424 |     if not summary:
425 |         return appending
426 | 
427 |     bookmark_id = "_comment_"
428 |     block_id = None
429 |     _records = store.query(book_id, bookmark_id)
430 |     if len(_records) == 0:
431 |         appending.extend(
432 |             (
433 |                 BlockItem(block=BlockHelper.divider()),
434 |                 BlockItem(block=BlockHelper.heading(1, "点评"), bookmark=bookmark_id),
435 |             )
436 |         )
437 |     else:
438 |         block_id = _records[0]["block_id"]
439 | 
440 |     for i in summary:
441 |         # print("summary:", i)
442 |         bookmark_id = i.get("review").get("reviewId")
443 |         _records = store.query(book_id, bookmark_id)
444 |         if len(_records) > 0:
445 |             continue
446 |         appending.append(
447 |             BlockItem(
448 |                 after=block_id,
449 |                 bookmark=bookmark_id,
450 |                 block=content_block(
451 |                     i.get("review").get("content"),
452 |                     i.get("style"),
453 |                     i.get("colorStyle"),
454 |                     i.get("review").get("reviewId"),
455 |                 ),
456 |             )
457 |         )
458 | 
459 |     return appending
460 | 
461 | 
462 | async def made_readinfo_blocks(
463 |     client: AsyncClient,
464 |     store: DBWeReadRecord,
465 |     book_id: str,
466 |     rinfo: dict,
467 |     bookmark_count: int,
468 | ) -> list[BlockItem]:
469 |     """generate extra stat blocks to appending"""
470 |     appending: list[BlockItem] = []
471 |     rdetail = rinfo.get("readDetail")
472 | 
473 |     if not rdetail:
474 |         return appending
475 |     if not CONFIG.getboolean("weread.format", "EnableReadingDetail"):
476 |         return appending
477 | 
478 |     bookmark_id = "_stat_"
479 |     block_id = None
480 |     _records = store.query(book_id, bookmark_id)
481 |     if len(_records) == 0:
482 |         appending.extend(
483 |             (
484 |                 BlockItem(block=BlockHelper.divider()),
485 |                 BlockItem(
486 |                     block=BlockHelper.heading(1, "阅读明细"), bookmark=bookmark_id
487 |                 ),
488 |             )
489 |         )
490 |     else:
491 |         block_id = _records[0]["block_id"]
492 | 
493 |     # 总计
494 |     bookmark_id = "_stat.total_"
495 |     _records = store.query(book_id, bookmark_id)
496 |     if len(_records):
497 |         store.delete_bookmark(book_id, bookmark_id)
498 |         await client.blocks.delete(block_id=_records[0]["block_id"])
499 | 
500 |     longest_reading_time = weread.str_reading_time(rdetail.get("longestReadingTime", 0))
501 |     longest_reading_date = datetime.fromtimestamp(
502 |         rdetail.get("longestReadingDate")
503 |     ).strftime("%Y/%m/%d")
504 |     appending.append(
505 |         BlockItem(
506 |             after=block_id,
507 |             bookmark=bookmark_id,
508 |             block=BlockHelper.table(2, ["维度", "指标"], True),
509 |             child=[
510 |                 BlockHelper.table_row(
511 |                     ["累积阅读天数", str(rdetail.get("totalReadDay", 0)) + "天"]
512 |                 ),
513 |                 BlockHelper.table_row(
514 |                     ["最长连续阅读天数", str(rdetail.get("continueReadDays", 0)) + "天"]
515 |                 ),
516 |                 BlockHelper.table_row(
517 |                     ["单日阅读最久", f"{longest_reading_time} ({longest_reading_date})"]
518 |                 ),
519 |                 BlockHelper.table_row(["阅读笔记条数", str(bookmark_count) + "条"]),
520 |             ],
521 |         )
522 |     )
523 | 
524 |     # 明细
525 |     bookmark_id = "_stat.detail_"
526 |     _records = store.query(book_id, bookmark_id)
527 |     if len(_records):
528 |         store.delete_bookmark(book_id, bookmark_id)
529 |         await client.blocks.delete(block_id=_records[0]["block_id"])
530 |     item = BlockItem(
531 |         after=block_id,
532 |         bookmark=bookmark_id,
533 |         block=BlockHelper.table(2, ["日期", "阅读时长"], True),
534 |         child=[],
535 |     )
536 |     for daily in rdetail.get("data"):
537 |         item.child.append(
538 |             BlockHelper.table_row(
539 |                 [
540 |                     datetime.fromtimestamp(daily.get("readDate")).strftime("%Y/%m/%d"),
541 |                     weread.str_reading_time(daily.get("readTime", 0)),
542 |                 ]
543 |             )
544 |         )
545 |     appending.append(item)
546 | 
547 |     return appending
548 | 
549 | 
550 | def transform_id(book_id):
551 |     """transform book id to hex string"""
552 |     id_length = len(book_id)
553 |     if re.match(r"^\d*$", book_id):
554 |         ary = []
555 |         for i in range(0, id_length, 9):
556 |             ary.append(format(int(book_id[i : min(i + 9, id_length)]), "x"))
557 |         return "3", ary
558 | 
559 |     result = ""
560 |     for i in range(id_length):
561 |         result += format(ord(book_id[i]), "x")
562 |     return "4", [result]
563 | 
564 | 
565 | def calculate_book_str_id(book_id):
566 |     """calculate book id string"""
567 |     md5 = hashlib.md5()
568 |     md5.update(book_id.encode("utf-8"))
569 |     digest = md5.hexdigest()
570 |     result = digest[0:3]
571 |     code, transformed_ids = transform_id(book_id)
572 |     result += code + "2" + digest[-2:]
573 | 
574 |     for i in range(len(transformed_ids)):
575 |         hex_length_str = format(len(transformed_ids[i]), "x")
576 |         if len(hex_length_str) == 1:
577 |             hex_length_str = "0" + hex_length_str
578 | 
579 |         result += hex_length_str + transformed_ids[i]
580 | 
581 |         if i < len(transformed_ids) - 1:
582 |             result += "g"
583 | 
584 |     if len(result) < 20:
585 |         result += digest[0 : 20 - len(result)]
586 | 
587 |     md5 = hashlib.md5()
588 |     md5.update(result.encode("utf-8"))
589 |     result += md5.hexdigest()[0:3]
590 |     return result
591 | 
592 | 
593 | def send_wxnotify(wxnotify_key, read_stat):
594 |     """send wechat notify"""
595 |     if not wxnotify_key or len(read_stat) == 0:
596 |         return
597 | 
598 |     content = "阅读进度更新啦: ~\n\n"
599 |     for stat in read_stat:
600 |         content += f"{stat.get('book_name')} : {stat.get('count')}条\n\n"
601 | 
602 |     sc_send(wxnotify_key, "Sync-Notion阅读笔记通知", content)
603 | 
604 | 
605 | async def sync_read(
606 |     weread_cookie, notion_token, database_id, calendar_db_id=None, wxnotify_key=None
607 | ):
608 |     """sync weread reading notes to notion"""
609 |     client = AsyncClient(auth=notion_token, log_level=logging.ERROR)
610 | 
611 |     data_source_id = await notion.get_datasource_id(client, database_id)
612 |     if not data_source_id:
613 |         logging.error("database %s has no data source", database_id)
614 |         return
615 | 
616 |     calendar_data_source_id = ""
617 |     if calendar_db_id:
618 |         calendar_data_source_id = await notion.get_datasource_id(client, calendar_db_id)
619 | 
620 |     latest_sort = await get_db_latest_sort(client, data_source_id)
621 | 
622 |     wreader = weread.WeReadAPI(weread_cookie)
623 |     store = DBWeReadRecord("./var/sync_read.db")
624 |     read_stat = []
625 | 
626 |     books = wreader.get_notebooklist()
627 |     for _book in books:
628 |         sort = _book["sort"]
629 |         if sort <= latest_sort:  # 笔记无更新，跳过
630 |             continue
631 | 
632 |         book_dict = _book.get("book")
633 |         book_id = book_dict.get("bookId")
634 | 
635 |         logging.info("Start to synch book %s", book_id)
636 | 
637 |         chapters_list = wreader.get_chapter_list(book_id)
638 |         bookmark_list = wreader.get_bookmark_list(book_id)
639 |         summary, reviews = wreader.get_review_list(book_id)
640 | 
641 |         # converge bookmark and chapter review
642 |         bookmark_list.extend(reviews)
643 |         bookmark_list = sorted(
644 |             bookmark_list,
645 |             key=lambda x: (
646 |                 x.get("chapterUid", 1),
647 |                 (
648 |                     0
649 |                     if (x.get("range", "") == "" or x.get("range").split("-")[0] == "")
650 |                     else int(x.get("range").split("-")[0])
651 |                 ),
652 |             ),
653 |         )
654 | 
655 |         isbn, rating, category, intro = wreader.get_bookinfo(book_id)
656 |         read_info = wreader.get_read_info(book_id)
657 | 
658 |         # delete before insert again
659 |         pageinfo, pid = await get_page_info(client, data_source_id, book_id)
660 |         pid, created = await create_or_update_page(
661 |             client,
662 |             data_source_id,
663 |             pageinfo,
664 |             pid,
665 |             book_name=book_dict.get("title"),
666 |             book_id=book_id,
667 |             cover=book_dict.get("cover"),
668 |             sort=sort,
669 |             author=book_dict.get("author"),
670 |             isbn=isbn,
671 |             rating=rating,
672 |             category=category,
673 |             note_count=_book.get("noteCount"),
674 |             review_count=_book.get("reviewCount"),
675 |             intro=intro,
676 |             read_info=read_info,
677 |         )
678 | 
679 |         blocks = []
680 |         if not created:
681 |             blocks = await list_page_blocks(client, pid)
682 |         else:
683 |             store.delete_book(book_id)
684 | 
685 |         appending = made_page_blocks(
686 |             store,
687 |             blocks,
688 |             book_id,
689 |             chapters_list,
690 |             bookmark_list,
691 |         )
692 |         await append_blocks(client, pid, appending, store, book_id)
693 | 
694 |         appending = made_comment_blocks(
695 |             store,
696 |             book_id,
697 |             summary,
698 |         )
699 |         await append_blocks(client, pid, appending, store, book_id)
700 | 
701 |         appending = await made_readinfo_blocks(
702 |             client, store, book_id, read_info, len(bookmark_list)
703 |         )
704 |         await append_blocks(client, pid, appending, store, book_id)
705 |         if len(appending) > 0:
706 |             read_stat.append(
707 |                 {
708 |                     "count": len(appending),
709 |                     "book_name": book_dict.get("title"),
710 |                 }
711 |             )
712 | 
713 |         if calendar_data_source_id:
714 |             await sync_to_calener(client, calendar_data_source_id, read_info)
715 | 
716 |     if wxnotify_key is not None and len(read_stat) != 0:
717 |         send_wxnotify(wxnotify_key, read_stat)
718 | 


--------------------------------------------------------------------------------